Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/kern pool(9): Membar audit.



details:   https://anonhg.NetBSD.org/src/rev/3d914fbef052
branches:  trunk
changeset: 362428:3d914fbef052
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sun Feb 27 14:16:43 2022 +0000

description:
pool(9): Membar audit.

- Use atomic_store_release and atomic_load_consume for associating a
  freshly constructed pool_cache with its underlying pool.  The pool
  gets published in various ways before the pool cache is fully
  constructed.

  => Nix membar_sync -- no store-before-load is needed here.

- Take pool_head_lock around sysctl kern.pool TAILQ_FOREACH.  Then take
  a reference count, and drop the lock, around copyout.

  => Otherwise, pools could be partially initialized or freed while
     we're still trying to read from them -- and in the worst case,
     we might see a corrupted view of the tailq.

  => If we kept the lock around copyout, this could deadlock in memory
     allocation.

  => If we didn't take a reference count while releasing the lock, the
     pool could be destroyed while we're trying to traverse the list,
     sending us into oblivion instead of the next element.

diffstat:

 sys/kern/subr_pool.c |  34 +++++++++++++++++++++-------------
 1 files changed, 21 insertions(+), 13 deletions(-)

diffs (135 lines):

diff -r cb773abef49c -r 3d914fbef052 sys/kern/subr_pool.c
--- a/sys/kern/subr_pool.c      Sun Feb 27 14:16:32 2022 +0000
+++ b/sys/kern/subr_pool.c      Sun Feb 27 14:16:43 2022 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: subr_pool.c,v 1.280 2021/12/24 00:13:53 riastradh Exp $        */
+/*     $NetBSD: subr_pool.c,v 1.281 2022/02/27 14:16:43 riastradh Exp $        */
 
 /*
  * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010, 2014, 2015, 2018,
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.280 2021/12/24 00:13:53 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.281 2022/02/27 14:16:43 riastradh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_ddb.h"
@@ -1648,6 +1648,7 @@
 {
        struct pool_item_header *ph, *phnext;
        struct pool_pagelist pq;
+       struct pool_cache *pc;
        uint32_t curtime;
        bool klock;
        int rv;
@@ -1673,8 +1674,8 @@
                klock = false;
 
        /* Reclaim items from the pool's cache (if any). */
-       if (pp->pr_cache != NULL)
-               pool_cache_invalidate(pp->pr_cache);
+       if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL)
+               pool_cache_invalidate(pc);
 
        if (mutex_tryenter(&pp->pr_lock) == 0) {
                if (klock) {
@@ -1883,7 +1884,7 @@
        if (skip_empty && pp->pr_nget == 0)
                return;
 
-       if ((pc = pp->pr_cache) != NULL) {
+       if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) {
                (*pr)("POOLCACHE");
        } else {
                (*pr)("POOL");
@@ -1895,7 +1896,6 @@
                    pp->pr_wchan, pp, pp->pr_size, pp->pr_align, pp->pr_npages,
                    pp->pr_nitems, pp->pr_nout, pp->pr_nget, pp->pr_nput,
                    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_nidle);
-               
                return;
        }
 
@@ -2184,8 +2184,7 @@
        if (__predict_true(!cold))
                mutex_exit(&pool_head_lock);
 
-       membar_sync();
-       pp->pr_cache = pc;
+       atomic_store_release(&pp->pr_cache, pc);
 }
 
 /*
@@ -2224,7 +2223,7 @@
 
        /* Disassociate it from the pool. */
        mutex_enter(&pp->pr_lock);
-       pp->pr_cache = NULL;
+       atomic_store_relaxed(&pp->pr_cache, NULL);
        mutex_exit(&pp->pr_lock);
 
        /* Destroy per-CPU data */
@@ -3339,6 +3338,7 @@
 
        TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
                struct pool_item_header *ph;
+               struct pool_cache *pc;
                uintptr_t item;
                bool allocated = true;
                bool incache = false;
@@ -3373,8 +3373,8 @@
                        allocated = pool_allocated(pp, ph, addr);
                }
 found:
-               if (allocated && pp->pr_cache) {
-                       pool_cache_t pc = pp->pr_cache;
+               if (allocated &&
+                   (pc = atomic_load_consume(&pp->pr_cache)) != NULL) {
                        struct pool_cache_group *pcg;
                        int i;
 
@@ -3437,9 +3437,11 @@
        memset(&data, 0, sizeof(data));
        error = 0;
        written = 0;
+       mutex_enter(&pool_head_lock);
        TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
                if (written + sizeof(data) > *oldlenp)
                        break;
+               pp->pr_refcnt++;
                strlcpy(data.pr_wchan, pp->pr_wchan, sizeof(data.pr_wchan));
                data.pr_pagesize = pp->pr_alloc->pa_pagesz;
                data.pr_flags = pp->pr_roflags | pp->pr_flags;
@@ -3469,9 +3471,8 @@
                data.pr_cache_nempty = 0;
                data.pr_cache_ncontended = 0;
                data.pr_cache_npartial = 0;
-               if (pp->pr_cache) {
+               if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) {
                        uint32_t nfull = 0;
-                       pc = pp->pr_cache;
                        data.pr_cache_meta_size = pc->pc_pcgsize;
                        for (i = 0; i < pc->pc_ncpu; ++i) {
                                cc = pc->pc_cpus[i];
@@ -3492,12 +3493,19 @@
                data.pr_cache_nhit_global = data.pr_cache_nmiss_pcpu -
                    data.pr_cache_nmiss_global;
 
+               if (pp->pr_refcnt == UINT_MAX) /* XXX possible? */
+                       continue;
+               mutex_exit(&pool_head_lock);
                error = sysctl_copyout(l, &data, oldp, sizeof(data));
+               mutex_enter(&pool_head_lock);
+               if (--pp->pr_refcnt == 0)
+                       cv_broadcast(&pool_busy);
                if (error)
                        break;
                written += sizeof(data);
                oldp = (char *)oldp + sizeof(data);
        }
+       mutex_exit(&pool_head_lock);
 
        *oldlenp = written;
        return error;



Home | Main Index | Thread Index | Old Index