tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: [PATCH] percpu_create -- associate ctors and dtors



> Date: Thu, 30 Jan 2020 02:46:42 +0000
> From: Taylor R Campbell <campbell+netbsd-tech-kern%mumble.net@localhost>
> 
> The attached patch set [...]

Some day, perhaps I will not be bad at attachments.  That day,
however, has yet to come.
From 32542bb28f86862f91fcf874f2b7494d4df00c62 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Wed, 29 Jan 2020 19:49:15 +0000
Subject: [PATCH 1/4] New function percpu_create.

Associates a constructor and destructor with the percpu.  Currently
the constructor runs immediately, but in principle we could use the
same API for future CPU hotplug support.

This lets you sleep for allocation or draining users before
deallocation when setting up or tearing down a percpu -- currently we
have many abuses of percpu_foreach in tree for that purpose.
---
 distrib/sets/lists/comp/mi |  3 ++
 share/man/man9/Makefile    |  1 +
 share/man/man9/percpu.9    | 57 +++++++++++++++++++++++++-----
 sys/kern/subr_percpu.c     | 71 ++++++++++++++++++++++++++++++++------
 sys/sys/percpu.h           |  2 ++
 5 files changed, 115 insertions(+), 19 deletions(-)

diff --git a/distrib/sets/lists/comp/mi b/distrib/sets/lists/comp/mi
index 039edf3ffa87..53b544e6d95b 100644
--- a/distrib/sets/lists/comp/mi
+++ b/distrib/sets/lists/comp/mi
@@ -11570,6 +11570,7 @@
 ./usr/share/man/cat9/pcu_used_p.0		comp-sys-catman		.cat
 ./usr/share/man/cat9/percpu.0			comp-sys-catman		.cat
 ./usr/share/man/cat9/percpu_alloc.0		comp-sys-catman		.cat
+./usr/share/man/cat9/percpu_create.0		comp-sys-catman		.cat
 ./usr/share/man/cat9/percpu_foreach.0		comp-sys-catman		.cat
 ./usr/share/man/cat9/percpu_free.0		comp-sys-catman		.cat
 ./usr/share/man/cat9/percpu_getref.0		comp-sys-catman		.cat
@@ -19499,6 +19500,7 @@
 ./usr/share/man/html9/pcu_used_p.html		comp-sys-htmlman	html
 ./usr/share/man/html9/percpu.html		comp-sys-htmlman	html
 ./usr/share/man/html9/percpu_alloc.html		comp-sys-htmlman	html
+./usr/share/man/html9/percpu_create.html	comp-sys-htmlman	html
 ./usr/share/man/html9/percpu_foreach.html	comp-sys-htmlman	html
 ./usr/share/man/html9/percpu_free.html		comp-sys-htmlman	html
 ./usr/share/man/html9/percpu_getref.html	comp-sys-htmlman	html
@@ -27587,6 +27589,7 @@
 ./usr/share/man/man9/pcu_used_p.9		comp-sys-man		.man
 ./usr/share/man/man9/percpu.9			comp-sys-man		.man
 ./usr/share/man/man9/percpu_alloc.9		comp-sys-man		.man
+./usr/share/man/man9/percpu_create.9		comp-sys-man		.man
 ./usr/share/man/man9/percpu_foreach.9		comp-sys-man		.man
 ./usr/share/man/man9/percpu_free.9		comp-sys-man		.man
 ./usr/share/man/man9/percpu_getref.9		comp-sys-man		.man
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index fb83c94971ec..9e179df3ab5c 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -663,6 +663,7 @@ MLINKS+=pcmcia.9 pcmcia_function_init.9 \
 	pcmcia.9 pcmcia_cis_read_n.9 \
 	pcmcia.9 pcmcia_scan_cis.9
 MLINKS+=percpu.9 percpu_alloc.9 \
+	percpu.9 percpu_create.9 \
 	percpu.9 percpu_free.9 \
 	percpu.9 percpu_getref.9 \
 	percpu.9 percpu_putref.9 \
diff --git a/share/man/man9/percpu.9 b/share/man/man9/percpu.9
index 79ce894fd732..b8623d865619 100644
--- a/share/man/man9/percpu.9
+++ b/share/man/man9/percpu.9
@@ -27,12 +27,13 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd May 31, 2017
+.Dd January 29, 2020
 .Dt PERCPU 9
 .Os
 .Sh NAME
 .Nm percpu ,
 .Nm percpu_alloc ,
+.Nm percpu_create ,
 .Nm percpu_free ,
 .Nm percpu_getref ,
 .Nm percpu_putref ,
@@ -43,6 +44,8 @@
 .Vt typedef void (*percpu_callback_t)(void *, void *, struct cpu_info *);
 .Ft percpu_t *
 .Fn percpu_alloc "size_t size"
+.Ft percpu_t *
+.Fn percpu_create "size_t size" "percpu_callback_t ctor" "percpu_callback_t dtor" "void *arg"
 .Ft void
 .Fn percpu_free "percpu_t *pc" "size_t size"
 .Ft void *
@@ -85,6 +88,30 @@ The storage is initialized with zeroes.
 Treat this as an expensive operation.
 .Fn percpu_alloc
 returns a handle for the per-CPU storage.
+.It Fn percpu_create "size" "ctor" "dtor" "arg"
+Like
+.Fn percpu_alloc ,
+but before returning, for each CPU, call
+.Fn "(*ctor)" p arg ci
+in the current thread, where
+.Fa p
+is the pointer to that CPU's storage and
+.Fa ci
+is the
+.Vt "struct cpu_info *"
+for that CPU.
+Further, arrange that
+.Fn percpu_free
+will do the same with
+.Fn "(*dtor)" p arg ci .
+.Pp
+.Fa ctor
+and
+.Fa dtor
+.Em MAY
+sleep, e.g. to allocate memory or to wait for users to drain before
+deallocating memory.
+Do not rely on any particular order of iteration over the CPUs.
 .It Fn percpu_free "pc" "size"
 Call this in thread context to
 return to the system the per-CPU storage held by
@@ -93,7 +120,9 @@ return to the system the per-CPU storage held by
 should match the
 .Fa size
 passed to
-.Fn percpu_alloc .
+.Fn percpu_alloc
+or
+.Fn percpu_create .
 When
 .Fn percpu_free
 returns,
@@ -111,6 +140,13 @@ Follow each
 .Fn percpu_getref
 call with a matching call to
 .Fn percpu_putref .
+.Pp
+Caller
+.Em MUST NOT
+sleep after
+.Fn percpu_getref ,
+not even on an adaptive lock, before
+.Fn percpu_putref .
 .It Fn percpu_putref "pc"
 Indicate that the thread is finished
 with the pointer returned by the matching
@@ -118,9 +154,9 @@ call to
 .Fn percpu_getref .
 Re-enables preemption.
 .It Fn percpu_foreach "pc" "cb" "arg"
-On each CPU, for
+For each CPU, with
 .Fa ci
-the corresponding
+being the corresponding
 .Vt "struct cpu_info *"
 and
 .Fa "p"
@@ -132,12 +168,17 @@ run
 .Fa "arg"
 .Fa "ci"
 .Fc .
-Call this in thread context.
+The call to
 .Fa cb
-should be non-blocking and fast.
-Do not rely on
+runs in the current thread; use
+.Xr xcall 9
+for cross-calls to run logic on other CPUs.
+.Pp
+Must be used in thread context.
 .Fa cb
-to be run on the CPUs in any particular order.
+.Em MUST NOT
+sleep except on adaptive locks, and should be fast.
+Do not rely on any particular order of iteration over the CPUs.
 .El
 .Sh CODE REFERENCES
 The
diff --git a/sys/kern/subr_percpu.c b/sys/kern/subr_percpu.c
index bcb5ccacf5ac..75d6aa8c23fb 100644
--- a/sys/kern/subr_percpu.c
+++ b/sys/kern/subr_percpu.c
@@ -47,14 +47,12 @@ __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.20 2019/12/05 03:21:08 riastradh E
 #define	PERCPU_QCACHE_MAX	0
 #define	PERCPU_IMPORT_SIZE	2048
 
-#if defined(DIAGNOSTIC)
-#define	MAGIC	0x50435055	/* "PCPU" */
-#define	percpu_encrypt(pc)	((pc) ^ MAGIC)
-#define	percpu_decrypt(pc)	((pc) ^ MAGIC)
-#else /* defined(DIAGNOSTIC) */
-#define	percpu_encrypt(pc)	(pc)
-#define	percpu_decrypt(pc)	(pc)
-#endif /* defined(DIAGNOSTIC) */
+struct percpu {
+	unsigned		pc_offset;
+	size_t			pc_size;
+	percpu_callback_t	pc_dtor;
+	void			*pc_cookie;
+};
 
 static krwlock_t	percpu_swap_lock	__cacheline_aligned;
 static kmutex_t		percpu_allocation_lock	__cacheline_aligned;
@@ -71,7 +69,7 @@ cpu_percpu(struct cpu_info *ci)
 static unsigned int
 percpu_offset(percpu_t *pc)
 {
-	const unsigned int off = percpu_decrypt((uintptr_t)pc);
+	const unsigned int off = pc->pc_offset;
 
 	KASSERT(off < percpu_nextoff);
 	return off;
@@ -252,6 +250,14 @@ percpu_init_cpu(struct cpu_info *ci)
 
 percpu_t *
 percpu_alloc(size_t size)
+{
+
+	return percpu_create(size, NULL, NULL, NULL);
+}
+
+percpu_t *
+percpu_create(size_t size, percpu_callback_t ctor, percpu_callback_t dtor,
+    void *cookie)
 {
 	vmem_addr_t offset;
 	percpu_t *pc;
@@ -259,8 +265,31 @@ percpu_alloc(size_t size)
 	ASSERT_SLEEPABLE();
 	(void)vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT,
 	    &offset);
-	pc = (percpu_t *)percpu_encrypt((uintptr_t)offset);
-	percpu_zero(pc, size);
+
+	pc = kmem_alloc(sizeof(*pc), KM_SLEEP);
+	pc->pc_offset = offset;
+	pc->pc_size = size;
+	pc->pc_dtor = dtor;
+	pc->pc_cookie = cookie;
+
+	if (ctor) {
+		CPU_INFO_ITERATOR cii;
+		struct cpu_info *ci;
+		void *buf;
+
+		buf = kmem_alloc(size, KM_SLEEP);
+		for (CPU_INFO_FOREACH(cii, ci)) {
+			memset(buf, 0, size);
+			(*ctor)(buf, cookie, ci);
+			percpu_traverse_enter();
+			memcpy(percpu_getptr_remote(pc, ci), buf, size);
+			percpu_traverse_exit();
+		}
+		kmem_free(buf, size);
+	} else {
+		percpu_zero(pc, size);
+	}
+
 	return pc;
 }
 
@@ -276,7 +305,27 @@ percpu_free(percpu_t *pc, size_t size)
 {
 
 	ASSERT_SLEEPABLE();
+	KASSERT(size == pc->pc_size);
+
+	if (pc->pc_dtor) {
+		CPU_INFO_ITERATOR cii;
+		struct cpu_info *ci;
+		void *buf;
+
+		buf = kmem_alloc(size, KM_SLEEP);
+		for (CPU_INFO_FOREACH(cii, ci)) {
+			percpu_traverse_enter();
+			memcpy(buf, percpu_getptr_remote(pc, ci), size);
+			explicit_memset(percpu_getptr_remote(pc, ci), 0, size);
+			percpu_traverse_exit();
+			(*pc->pc_dtor)(buf, pc->pc_cookie, ci);
+		}
+		explicit_memset(buf, 0, size);
+		kmem_free(buf, size);
+	}
+
 	vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size);
+	kmem_free(pc, sizeof(*pc));
 }
 
 /*
diff --git a/sys/sys/percpu.h b/sys/sys/percpu.h
index 8513af5deebb..db1f768d87c0 100644
--- a/sys/sys/percpu.h
+++ b/sys/sys/percpu.h
@@ -41,6 +41,8 @@ void	percpu_putref(percpu_t *);
 typedef void (*percpu_callback_t)(void *, void *, struct cpu_info *);
 void	percpu_foreach(percpu_t *, percpu_callback_t, void *);
 
+percpu_t *percpu_create(size_t, percpu_callback_t, percpu_callback_t, void *);
+
 /* low-level api; don't use unless necessary */
 void	percpu_traverse_enter(void);
 void	percpu_traverse_exit(void);

From 8a4bffbc18efaf82ccd8889bd76947b6ec6bd086 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Wed, 29 Jan 2020 21:51:33 +0000
Subject: [PATCH 2/4] Switch arm pic allocation and initialization to
 percpu_create.

---
 sys/arch/arm/pic/pic.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/sys/arch/arm/pic/pic.c b/sys/arch/arm/pic/pic.c
index 38959b588a5c..f29e3eca0508 100644
--- a/sys/arch/arm/pic/pic.c
+++ b/sys/arch/arm/pic/pic.c
@@ -647,12 +647,8 @@ pic_add(struct pic_softc *pic, int irqbase)
 
 #if defined(__HAVE_PIC_PENDING_INTRS) && defined(MULTIPROCESSOR)
 	if (__predict_false(pic_pending_percpu == NULL)) {
-		pic_pending_percpu = percpu_alloc(sizeof(struct pic_pending));
-
-		/*
-		 * Now zero the per-cpu pending data.
-		 */
-		percpu_foreach(pic_pending_percpu, pic_pending_zero, NULL);
+		pic_pending_percpu = percpu_create(sizeof(struct pic_pending),
+		    pic_pending_zero, NULL, NULL);
 	}
 #endif /* __HAVE_PIC_PENDING_INTRS && MULTIPROCESSOR */
 
@@ -702,12 +698,8 @@ pic_add(struct pic_softc *pic, int irqbase)
 	 * corrupt the pointers in the evcnts themselves.  Remember, any
 	 * problem can be solved with sufficient indirection.
 	 */
-	pic->pic_percpu = percpu_alloc(sizeof(struct pic_percpu));
-
-	/*
-	 * Now allocate the per-cpu evcnts.
-	 */
-	percpu_foreach(pic->pic_percpu, pic_percpu_allocate, pic);
+	pic->pic_percpu = percpu_create(sizeof(struct pic_percpu),
+	    pic_percpu_allocate, NULL, pic);
 
 	pic->pic_sources = &pic_sources[sourcebase];
 	pic->pic_irqbase = irqbase;

From 8e12cd1753e921cad2f02225aae5b70074b3f863 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Wed, 29 Jan 2020 21:54:21 +0000
Subject: [PATCH 3/4] Switch sys/net to percpu_create.

---
 sys/net/if.c          | 20 ++++++++------------
 sys/net/if_l2tp.c     |  5 ++---
 sys/net/route.c       |  7 ++-----
 sys/netinet/wqinput.c |  4 ++--
 4 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/sys/net/if.c b/sys/net/if.c
index 1b9df2a16df0..b263f1fb6e39 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -2912,17 +2912,6 @@ if_tunnel_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
 	tro->tr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
 }
 
-percpu_t *
-if_tunnel_alloc_ro_percpu(void)
-{
-	percpu_t *ro_percpu;
-
-	ro_percpu = percpu_alloc(sizeof(struct tunnel_ro));
-	percpu_foreach(ro_percpu, if_tunnel_ro_init_pc, NULL);
-
-	return ro_percpu;
-}
-
 static void
 if_tunnel_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
 {
@@ -2934,11 +2923,18 @@ if_tunnel_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
 	mutex_obj_free(tro->tr_lock);
 }
 
+percpu_t *
+if_tunnel_alloc_ro_percpu(void)
+{
+
+	return percpu_create(sizeof(struct tunnel_ro),
+	    if_tunnel_ro_init_pc, if_tunnel_ro_fini_pc, NULL);
+}
+
 void
 if_tunnel_free_ro_percpu(percpu_t *ro_percpu)
 {
 
-	percpu_foreach(ro_percpu, if_tunnel_ro_fini_pc, NULL);
 	percpu_free(ro_percpu, sizeof(struct tunnel_ro));
 }
 
diff --git a/sys/net/if_l2tp.c b/sys/net/if_l2tp.c
index e71d78fd406a..8f730af6a492 100644
--- a/sys/net/if_l2tp.c
+++ b/sys/net/if_l2tp.c
@@ -267,8 +267,8 @@ l2tp_clone_create(struct if_clone *ifc, int unit)
 
 	sc->l2tp_ro_percpu = if_tunnel_alloc_ro_percpu();
 
-	sc->l2tp_ifq_percpu = percpu_alloc(sizeof(struct ifqueue *));
-	percpu_foreach(sc->l2tp_ifq_percpu, l2tp_ifq_init_pc, NULL);
+	sc->l2tp_ifq_percpu = percpu_create(sizeof(struct ifqueue *),
+	    l2tp_ifq_init_pc, l2tp_ifq_fini_pc, NULL);
 	sc->l2tp_si = softint_establish(si_flags, l2tpintr_softint, sc);
 
 	mutex_enter(&l2tp_softcs.lock);
@@ -367,7 +367,6 @@ l2tp_clone_destroy(struct ifnet *ifp)
 	mutex_exit(&sc->l2tp_lock);
 
 	softint_disestablish(sc->l2tp_si);
-	percpu_foreach(sc->l2tp_ifq_percpu, l2tp_ifq_fini_pc, NULL);
 	percpu_free(sc->l2tp_ifq_percpu, sizeof(struct ifqueue *));
 
 	mutex_enter(&l2tp_softcs.lock);
diff --git a/sys/net/route.c b/sys/net/route.c
index ef19e0f837ca..744d9501d4b6 100644
--- a/sys/net/route.c
+++ b/sys/net/route.c
@@ -2231,12 +2231,9 @@ rtcache_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unuse
 percpu_t *
 rtcache_percpu_alloc(void)
 {
-	percpu_t *pc;
 
-	pc = percpu_alloc(sizeof(struct route *));
-	percpu_foreach(pc, rtcache_percpu_init_cpu, NULL);
-
-	return pc;
+	return percpu_create(sizeof(struct route *),
+	    rtcache_percpu_init_cpu, NULL, NULL);
 }
 
 const struct sockaddr *
diff --git a/sys/netinet/wqinput.c b/sys/netinet/wqinput.c
index d05d25d4eda4..b5607e91b9f7 100644
--- a/sys/netinet/wqinput.c
+++ b/sys/netinet/wqinput.c
@@ -188,8 +188,8 @@ wqinput_create(const char *name, void (*func)(struct mbuf *, int, int))
 		panic("%s: workqueue_create failed (%d)\n", __func__, error);
 	pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0,
 	    name, NULL, IPL_SOFTNET);
-	wqi->wqi_worklists = percpu_alloc(sizeof(struct wqinput_worklist *));
-	percpu_foreach(wqi->wqi_worklists, wqinput_percpu_init_cpu, NULL);
+	wqi->wqi_worklists = percpu_create(sizeof(struct wqinput_worklist *),
+	    wqinput_percpu_init_cpu, NULL, NULL);
 	wqi->wqi_input = func;
 
 	wqinput_sysctl_setup(name, wqi);

From df41d2119a661a8f690a32c3b33b013ebb02c4b5 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Thu, 30 Jan 2020 02:11:30 +0000
Subject: [PATCH 4/4] Switch opencrypto to percpu_create.

Can't sleep for allocation in percpu_foreach.
---
 sys/opencrypto/crypto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/opencrypto/crypto.c b/sys/opencrypto/crypto.c
index 0685ebfc7636..47876cc89e23 100644
--- a/sys/opencrypto/crypto.c
+++ b/sys/opencrypto/crypto.c
@@ -562,8 +562,8 @@ crypto_init0(void)
 	cryptkop_cache = pool_cache_init(sizeof(struct cryptkop),
 	    coherency_unit, 0, 0, "cryptkop", NULL, IPL_NET, NULL, NULL, NULL);
 
-	crypto_crp_qs_percpu = percpu_alloc(sizeof(struct crypto_crp_qs));
-	percpu_foreach(crypto_crp_qs_percpu, crypto_crp_qs_init_pc, NULL);
+	crypto_crp_qs_percpu = percpu_create(sizeof(struct crypto_crp_qs),
+	    crypto_crp_qs_init_pc, /*XXX*/NULL, NULL);
 
 	crypto_crp_ret_qs_init();
 


Home | Main Index | Thread Index | Old Index