Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src New function percpu_create.



details:   https://anonhg.NetBSD.org/src/rev/cb9ffc63e280
branches:  trunk
changeset: 744426:cb9ffc63e280
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sat Feb 01 12:49:02 2020 +0000

description:
New function percpu_create.

Associates a constructor and destructor with the percpu.  Currently
the constructor runs immediately, but in principle we could use the
same API for future CPU hotplug support.

This lets you sleep for allocation or draining users before
deallocation when setting up or tearing down a percpu -- currently we
have many abuses of percpu_foreach in tree for that purpose.

Proposed on tech-kern:
https://mail-index.NetBSD.org/tech-kern/2020/01/30/msg026036.html

diffstat:

 distrib/sets/lists/comp/mi |   5 ++-
 share/man/man9/Makefile    |   3 +-
 share/man/man9/percpu.9    |  59 ++++++++++++++++++++++++++----
 sys/kern/subr_percpu.c     |  86 +++++++++++++++++++++++++++++++++++++++------
 sys/sys/percpu.h           |   4 +-
 5 files changed, 132 insertions(+), 25 deletions(-)

diffs (truncated from 326 to 300 lines):

diff -r a240a40bdfb7 -r cb9ffc63e280 distrib/sets/lists/comp/mi
--- a/distrib/sets/lists/comp/mi        Sat Feb 01 12:45:05 2020 +0000
+++ b/distrib/sets/lists/comp/mi        Sat Feb 01 12:49:02 2020 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: mi,v 1.2307 2020/01/29 18:39:04 maya Exp $
+#      $NetBSD: mi,v 1.2308 2020/02/01 12:49:02 riastradh Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 ./etc/mtree/set.comp                           comp-sys-root
@@ -11570,6 +11570,7 @@
 ./usr/share/man/cat9/pcu_used_p.0              comp-sys-catman         .cat
 ./usr/share/man/cat9/percpu.0                  comp-sys-catman         .cat
 ./usr/share/man/cat9/percpu_alloc.0            comp-sys-catman         .cat
+./usr/share/man/cat9/percpu_create.0           comp-sys-catman         .cat
 ./usr/share/man/cat9/percpu_foreach.0          comp-sys-catman         .cat
 ./usr/share/man/cat9/percpu_free.0             comp-sys-catman         .cat
 ./usr/share/man/cat9/percpu_getref.0           comp-sys-catman         .cat
@@ -19498,6 +19499,7 @@
 ./usr/share/man/html9/pcu_used_p.html          comp-sys-htmlman        html
 ./usr/share/man/html9/percpu.html              comp-sys-htmlman        html
 ./usr/share/man/html9/percpu_alloc.html                comp-sys-htmlman        html
+./usr/share/man/html9/percpu_create.html       comp-sys-htmlman        html
 ./usr/share/man/html9/percpu_foreach.html      comp-sys-htmlman        html
 ./usr/share/man/html9/percpu_free.html         comp-sys-htmlman        html
 ./usr/share/man/html9/percpu_getref.html       comp-sys-htmlman        html
@@ -27585,6 +27587,7 @@
 ./usr/share/man/man9/pcu_used_p.9              comp-sys-man            .man
 ./usr/share/man/man9/percpu.9                  comp-sys-man            .man
 ./usr/share/man/man9/percpu_alloc.9            comp-sys-man            .man
+./usr/share/man/man9/percpu_create.9           comp-sys-man            .man
 ./usr/share/man/man9/percpu_foreach.9          comp-sys-man            .man
 ./usr/share/man/man9/percpu_free.9             comp-sys-man            .man
 ./usr/share/man/man9/percpu_getref.9           comp-sys-man            .man
diff -r a240a40bdfb7 -r cb9ffc63e280 share/man/man9/Makefile
--- a/share/man/man9/Makefile   Sat Feb 01 12:45:05 2020 +0000
+++ b/share/man/man9/Makefile   Sat Feb 01 12:49:02 2020 +0000
@@ -1,4 +1,4 @@
-#       $NetBSD: Makefile,v 1.445 2020/01/20 18:38:18 thorpej Exp $
+#       $NetBSD: Makefile,v 1.446 2020/02/01 12:49:02 riastradh Exp $
 
 #      Makefile for section 9 (kernel function and variable) manual pages.
 
@@ -663,6 +663,7 @@
        pcmcia.9 pcmcia_cis_read_n.9 \
        pcmcia.9 pcmcia_scan_cis.9
 MLINKS+=percpu.9 percpu_alloc.9 \
+       percpu.9 percpu_create.9 \
        percpu.9 percpu_free.9 \
        percpu.9 percpu_getref.9 \
        percpu.9 percpu_putref.9 \
diff -r a240a40bdfb7 -r cb9ffc63e280 share/man/man9/percpu.9
--- a/share/man/man9/percpu.9   Sat Feb 01 12:45:05 2020 +0000
+++ b/share/man/man9/percpu.9   Sat Feb 01 12:49:02 2020 +0000
@@ -1,4 +1,4 @@
-.\"     $NetBSD: percpu.9,v 1.12 2017/05/31 23:54:17 chs Exp $
+.\"     $NetBSD: percpu.9,v 1.13 2020/02/01 12:49:02 riastradh Exp $
 .\"
 .\" Copyright (c) 2010 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@@ -27,12 +27,13 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd May 31, 2017
+.Dd January 29, 2020
 .Dt PERCPU 9
 .Os
 .Sh NAME
 .Nm percpu ,
 .Nm percpu_alloc ,
+.Nm percpu_create ,
 .Nm percpu_free ,
 .Nm percpu_getref ,
 .Nm percpu_putref ,
@@ -43,6 +44,8 @@
 .Vt typedef void (*percpu_callback_t)(void *, void *, struct cpu_info *);
 .Ft percpu_t *
 .Fn percpu_alloc "size_t size"
+.Ft percpu_t *
+.Fn percpu_create "size_t size" "percpu_callback_t ctor" "percpu_callback_t dtor" "void *arg"
 .Ft void
 .Fn percpu_free "percpu_t *pc" "size_t size"
 .Ft void *
@@ -85,6 +88,30 @@
 Treat this as an expensive operation.
 .Fn percpu_alloc
 returns a handle for the per-CPU storage.
+.It Fn percpu_create "size" "ctor" "dtor" "arg"
+Like
+.Fn percpu_alloc ,
+but before returning, for each CPU, call
+.Fn "(*ctor)" p arg ci
+in the current thread, where
+.Fa p
+is the pointer to that CPU's storage and
+.Fa ci
+is the
+.Vt "struct cpu_info *"
+for that CPU.
+Further, arrange that
+.Fn percpu_free
+will do the same with
+.Fn "(*dtor)" p arg ci .
+.Pp
+.Fa ctor
+and
+.Fa dtor
+.Em MAY
+sleep, e.g. to allocate memory or to wait for users to drain before
+deallocating memory.
+Do not rely on any particular order of iteration over the CPUs.
 .It Fn percpu_free "pc" "size"
 Call this in thread context to
 return to the system the per-CPU storage held by
@@ -93,7 +120,9 @@
 should match the
 .Fa size
 passed to
-.Fn percpu_alloc .
+.Fn percpu_alloc
+or
+.Fn percpu_create .
 When
 .Fn percpu_free
 returns,
@@ -111,6 +140,13 @@
 .Fn percpu_getref
 call with a matching call to
 .Fn percpu_putref .
+.Pp
+Caller
+.Em MUST NOT
+sleep after
+.Fn percpu_getref ,
+not even on an adaptive lock, before
+.Fn percpu_putref .
 .It Fn percpu_putref "pc"
 Indicate that the thread is finished
 with the pointer returned by the matching
@@ -118,9 +154,9 @@
 .Fn percpu_getref .
 Re-enables preemption.
 .It Fn percpu_foreach "pc" "cb" "arg"
-On each CPU, for
+For each CPU, with
 .Fa ci
-the corresponding
+being the corresponding
 .Vt "struct cpu_info *"
 and
 .Fa "p"
@@ -132,12 +168,17 @@
 .Fa "arg"
 .Fa "ci"
 .Fc .
-Call this in thread context.
+The call to
 .Fa cb
-should be non-blocking and fast.
-Do not rely on
+runs in the current thread; use
+.Xr xcall 9
+for cross-calls to run logic on other CPUs.
+.Pp
+Must be used in thread context.
 .Fa cb
-to be run on the CPUs in any particular order.
+.Em MUST NOT
+sleep except on adaptive locks, and should be fast.
+Do not rely on any particular order of iteration over the CPUs.
 .El
 .Sh CODE REFERENCES
 The
diff -r a240a40bdfb7 -r cb9ffc63e280 sys/kern/subr_percpu.c
--- a/sys/kern/subr_percpu.c    Sat Feb 01 12:45:05 2020 +0000
+++ b/sys/kern/subr_percpu.c    Sat Feb 01 12:49:02 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: subr_percpu.c,v 1.20 2019/12/05 03:21:08 riastradh Exp $       */
+/*     $NetBSD: subr_percpu.c,v 1.21 2020/02/01 12:49:02 riastradh Exp $       */
 
 /*-
  * Copyright (c)2007,2008 YAMAMOTO Takashi,
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.20 2019/12/05 03:21:08 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.21 2020/02/01 12:49:02 riastradh Exp $");
 
 #include <sys/param.h>
 #include <sys/cpu.h>
@@ -47,14 +47,12 @@
 #define        PERCPU_QCACHE_MAX       0
 #define        PERCPU_IMPORT_SIZE      2048
 
-#if defined(DIAGNOSTIC)
-#define        MAGIC   0x50435055      /* "PCPU" */
-#define        percpu_encrypt(pc)      ((pc) ^ MAGIC)
-#define        percpu_decrypt(pc)      ((pc) ^ MAGIC)
-#else /* defined(DIAGNOSTIC) */
-#define        percpu_encrypt(pc)      (pc)
-#define        percpu_decrypt(pc)      (pc)
-#endif /* defined(DIAGNOSTIC) */
+struct percpu {
+       unsigned                pc_offset;
+       size_t                  pc_size;
+       percpu_callback_t       pc_dtor;
+       void                    *pc_cookie;
+};
 
 static krwlock_t       percpu_swap_lock        __cacheline_aligned;
 static kmutex_t                percpu_allocation_lock  __cacheline_aligned;
@@ -71,7 +69,7 @@
 static unsigned int
 percpu_offset(percpu_t *pc)
 {
-       const unsigned int off = percpu_decrypt((uintptr_t)pc);
+       const unsigned int off = pc->pc_offset;
 
        KASSERT(off < percpu_nextoff);
        return off;
@@ -253,14 +251,56 @@
 percpu_t *
 percpu_alloc(size_t size)
 {
+
+       return percpu_create(size, NULL, NULL, NULL);
+}
+
+/*
+ * percpu_create: allocate percpu storage and associate ctor/dtor with it
+ *
+ * => called in thread context.
+ * => considered as an expensive and rare operation.
+ * => allocated storage is initialized by ctor, or zeros if ctor is null
+ * => percpu_free will call dtor first, if dtor is nonnull
+ * => ctor or dtor may sleep, even on allocation
+ */
+
+percpu_t *
+percpu_create(size_t size, percpu_callback_t ctor, percpu_callback_t dtor,
+    void *cookie)
+{
        vmem_addr_t offset;
        percpu_t *pc;
 
        ASSERT_SLEEPABLE();
        (void)vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT,
            &offset);
-       pc = (percpu_t *)percpu_encrypt((uintptr_t)offset);
-       percpu_zero(pc, size);
+
+       pc = kmem_alloc(sizeof(*pc), KM_SLEEP);
+       pc->pc_offset = offset;
+       pc->pc_size = size;
+       pc->pc_dtor = dtor;
+       pc->pc_cookie = cookie;
+
+       if (ctor) {
+               CPU_INFO_ITERATOR cii;
+               struct cpu_info *ci;
+               void *buf;
+
+               buf = kmem_alloc(size, KM_SLEEP);
+               for (CPU_INFO_FOREACH(cii, ci)) {
+                       memset(buf, 0, size);
+                       (*ctor)(buf, cookie, ci);
+                       percpu_traverse_enter();
+                       memcpy(percpu_getptr_remote(pc, ci), buf, size);
+                       percpu_traverse_exit();
+               }
+               explicit_memset(buf, 0, size);
+               kmem_free(buf, size);
+       } else {
+               percpu_zero(pc, size);
+       }
+
        return pc;
 }
 
@@ -276,7 +316,27 @@
 {
 
        ASSERT_SLEEPABLE();
+       KASSERT(size == pc->pc_size);
+
+       if (pc->pc_dtor) {
+               CPU_INFO_ITERATOR cii;
+               struct cpu_info *ci;
+               void *buf;
+
+               buf = kmem_alloc(size, KM_SLEEP);
+               for (CPU_INFO_FOREACH(cii, ci)) {
+                       percpu_traverse_enter();
+                       memcpy(buf, percpu_getptr_remote(pc, ci), size);
+                       explicit_memset(percpu_getptr_remote(pc, ci), 0, size);
+                       percpu_traverse_exit();
+                       (*pc->pc_dtor)(buf, pc->pc_cookie, ci);
+               }
+               explicit_memset(buf, 0, size);



Home | Main Index | Thread Index | Old Index