Subject: Some locking issues in subr_pool.c
To: NetBSD Kernel Technical Discussion List <tech-kern@netbsd.org>
From: Jason Thorpe <thorpej@shagadelic.org>
List: tech-kern
Date: 04/29/2005 11:59:17
--Apple-Mail-5--944996098
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	delsp=yes;
	format=flowed

Folks...

I noticed some locking issues in subr_pool.c the other day:

- Locking protocol for pr_rmpage() was hideous.  Sometimes called  
with the pool locked, sometimes not.  And in the "not" case, it was  
modifying fields normally protected by the lock.

- Fixing the above requires always deferring freeing the pool page to  
the back-end allocator, because the pool must not be locked when the  
pool_allocator is called (lock ordering is pool_allocator -> pool;  
see pool_allocator_free()).  This was a mostly mechanical change.

- When doing the above mechanical change, I noticed that pool_reclaim 
() violates the pool_cache -> pool lock ordering.  I didn't see a  
good way to avoid that problem, so pool_cache_reclaim() has to use a  
trylock, and bail out if it that fails.  This is not a huge deal,  
since we already trylock the pool at the beginning of pool_reclaim(),  
and this is only an opportunistic memory reclamation anyway.

The following patch should fix these problems.  I have not even tried  
to compile it yet, but I would like to get comments on it.

-- thorpej


--Apple-Mail-5--944996098
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	x-unix-mode=0644;
	name="pool-locking-patch.txt"
Content-Disposition: attachment;
	filename=pool-locking-patch.txt

--- subr_pool.c	2005-04-27 16:12:13.000000000 -0700
+++ subr_pool.c.new	2005-04-29 07:17:09.000000000 -0700
@@ -174,7 +174,7 @@ struct pool_item {
 /* The cache group pool. */
 static struct pool pcgpool;
 
-static void	pool_cache_reclaim(struct pool_cache *);
+static void	pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *);
 
 static int	pool_catchup(struct pool *);
 static void	pool_prime_page(struct pool *, caddr_t,
@@ -384,6 +384,23 @@ pr_find_pagehead(struct pool *pp, caddr_
 	return ph;
 }
 
+static void
+pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq)
+{
+	struct pool_item_header *ph;
+	int s;
+
+	while ((ph = LIST_FIRST(&pq)) != NULL) {
+		LIST_REMOVE(ph, ph_pagelist);
+		pool_allocator_free(pp, ph->ph_page);
+		if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
+			s = splvm();
+			pool_put(pp->pr_phpool, ph);
+			splx(s);
+		}
+	}
+}
+
 /*
  * Remove a page from the pool.
  */
@@ -393,7 +410,7 @@ pr_rmpage(struct pool *pp, struct pool_i
 {
 	int s;
 
-	LOCK_ASSERT(!simple_lock_held(&pp->pr_slock) || pq != NULL);
+	LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
 
 	/*
 	 * If the page was idle, decrement the idle page count.
@@ -411,21 +428,13 @@ pr_rmpage(struct pool *pp, struct pool_i
 	pp->pr_nitems -= pp->pr_itemsperpage;
 
 	/*
-	 * Unlink a page from the pool and release it (or queue it for release).
+	 * Unlink the page from the pool and queue it for release.
 	 */
 	LIST_REMOVE(ph, ph_pagelist);
 	if ((pp->pr_roflags & PR_PHINPAGE) == 0)
 		SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
-	if (pq) {
-		LIST_INSERT_HEAD(pq, ph, ph_pagelist);
-	} else {
-		pool_allocator_free(pp, ph->ph_page);
-		if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
-			s = splvm();
-			pool_put(pp->pr_phpool, ph);
-			splx(s);
-		}
-	}
+	LIST_INSERT_HEAD(pq, ph, ph_pagelist);
+
 	pp->pr_npages--;
 	pp->pr_npagefree++;
 
@@ -696,6 +705,7 @@ pool_init(struct pool *pp, size_t size, 
 void
 pool_destroy(struct pool *pp)
 {
+	struct pool_pagelist pq;
 	struct pool_item_header *ph;
 	struct pool_cache *pc;
 	int s;
@@ -707,7 +717,13 @@ pool_destroy(struct pool *pp)
 	simple_unlock(&pp->pr_alloc->pa_slock);
 	splx(s);
 
-	/* Destroy all caches for this pool. */
+	/*
+	 * Destroy all caches for this pool.
+	 * XXXJRT We don't lock here, because the locking order is
+	 * pool_cache -> pool.  Maybe it doesn't matter because the
+	 * pool is being destroyed anyway.  Or maybe we should panic
+	 * here if the pool_caches are not already gone.
+	 */
 	while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL)
 		pool_cache_destroy(pc);
 
@@ -720,8 +736,14 @@ pool_destroy(struct pool *pp)
 #endif
 
 	/* Remove all pages */
+	LIST_INIT(&pq);
+	s = splvm();
+	simple_lock(&pp->pr_slock);
 	while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
-		pr_rmpage(pp, ph, NULL);
+		pr_rmpage(pp, ph, &pq);
+	simple_unlock(&pp->pr_slock);
+	splx(s);
+	pr_pagelist_free(pp, &pq);
 	KASSERT(LIST_EMPTY(&pp->pr_fullpages));
 	KASSERT(LIST_EMPTY(&pp->pr_partpages));
 
@@ -1037,7 +1059,7 @@ pool_get(struct pool *pp, int flags)
  * Internal version of pool_put().  Pool is already locked/entered.
  */
 static void
-pool_do_put(struct pool *pp, void *v)
+pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq)
 {
 	struct pool_item *pi = v;
 	struct pool_item_header *ph;
@@ -1125,9 +1147,7 @@ pool_do_put(struct pool *pp, void *v)
 		if (pp->pr_npages > pp->pr_minpages &&
 		    (pp->pr_npages > pp->pr_maxpages ||
 		     (pp->pr_alloc->pa_flags & PA_WANT) != 0)) {
-			simple_unlock(&pp->pr_slock);
-			pr_rmpage(pp, ph, NULL);
-			simple_lock(&pp->pr_slock);
+			pr_rmpage(pp, ph, pq);
 		} else {
 			LIST_REMOVE(ph, ph_pagelist);
 			LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
@@ -1165,16 +1185,22 @@ pool_do_put(struct pool *pp, void *v)
 void
 _pool_put(struct pool *pp, void *v, const char *file, long line)
 {
+	struct pool_pagelist pq;
+
+	LIST_INIT(&pq);
 
 	simple_lock(&pp->pr_slock);
 	pr_enter(pp, file, line);
 
 	pr_log(pp, v, PRLOG_PUT, file, line);
 
-	pool_do_put(pp, v);
+	pool_do_put(pp, v, &pq);
 
 	pr_leave(pp);
 	simple_unlock(&pp->pr_slock);
+
+	if (! LIST_EMPTY(&pq))
+		pr_pagelist_free(pp, &pq);
 }
 #undef pool_put
 #endif /* POOL_DIAGNOSTIC */
@@ -1182,12 +1208,18 @@ _pool_put(struct pool *pp, void *v, cons
 void
 pool_put(struct pool *pp, void *v)
 {
+	struct pool_pagelist pq;
+
+	LIST_INIT(&pq);
 
 	simple_lock(&pp->pr_slock);
 
-	pool_do_put(pp, v);
+	pool_do_put(pp, v, &pq);
 
 	simple_unlock(&pp->pr_slock);
+
+	if (! LIST_EMPTY(&pq))
+		pr_pagelist_free(pp, &pq);
 }
 
 #ifdef POOL_DIAGNOSTIC
@@ -1469,7 +1501,7 @@ pool_reclaim(struct pool *pp)
 	 * Reclaim items from the pool's caches.
 	 */
 	TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist)
-		pool_cache_reclaim(pc);
+		pool_cache_reclaim(pc, &pq);
 
 	s = splclock();
 	curtime = mono_time;
@@ -1503,17 +1535,7 @@ pool_reclaim(struct pool *pp)
 	if (LIST_EMPTY(&pq))
 		return (0);
 
-	while ((ph = LIST_FIRST(&pq)) != NULL) {
-		LIST_REMOVE(ph, ph_pagelist);
-		pool_allocator_free(pp, ph->ph_page);
-		if (pp->pr_roflags & PR_PHINPAGE) {
-			continue;
-		}
-		s = splvm();
-		pool_put(pp->pr_phpool, ph);
-		splx(s);
-	}
-
+	pr_pagelist_free(pp, &pq);
 	return (1);
 }
 
@@ -2024,19 +2046,20 @@ pool_cache_destruct_object(struct pool_c
 }
 
 /*
- * pool_cache_do_invalidate:
+ * pool_cache_invalidate:
  *
- *	This internal function implements pool_cache_invalidate() and
- *	pool_cache_reclaim().
+ *	Invalidate a pool cache (destruct and release all of the
+ *	cached objects).
  */
-static void
-pool_cache_do_invalidate(struct pool_cache *pc, int free_groups,
-    void (*putit)(struct pool *, void *))
+void
+pool_cache_invalidate(struct pool_cache *pc)
 {
 	struct pool_cache_group *pcg, *npcg;
 	void *object;
 	int s;
 
+	simple_lock(&pc->pc_slock);
+
 	for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
 	     pcg = npcg) {
 		npcg = TAILQ_NEXT(pcg, pcg_list);
@@ -2047,32 +2070,15 @@ pool_cache_do_invalidate(struct pool_cac
 				pc->pc_allocfrom = NULL;
 			if (pc->pc_dtor != NULL)
 				(*pc->pc_dtor)(pc->pc_arg, object);
-			(*putit)(pc->pc_pool, object);
-		}
-		if (free_groups) {
-			pc->pc_ngroups--;
-			TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
-			if (pc->pc_freeto == pcg)
-				pc->pc_freeto = NULL;
-			s = splvm();
-			pool_put(&pcgpool, pcg);
-			splx(s);
+			/*
+			 * XXXJRT Could improve this by calling
+			 * pool_do_put() and freeing the entire
+			 * page list at the end.
+			 */
+			pool_put(pc->pc_pool, object);
 		}
 	}
-}
 
-/*
- * pool_cache_invalidate:
- *
- *	Invalidate a pool cache (destruct and release all of the
- *	cached objects).
- */
-void
-pool_cache_invalidate(struct pool_cache *pc)
-{
-
-	simple_lock(&pc->pc_slock);
-	pool_cache_do_invalidate(pc, 0, pool_put);
 	simple_unlock(&pc->pc_slock);
 }
 
@@ -2082,11 +2088,42 @@ pool_cache_invalidate(struct pool_cache 
  *	Reclaim a pool cache for pool_reclaim().
  */
 static void
-pool_cache_reclaim(struct pool_cache *pc)
+pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq)
 {
+	struct pool_cache_group *pcg, *npcg;
+	void *object;
+	int s;
+
+	/*
+	 * We're locking in the wrong order (normally pool_cache -> pool,
+	 * but the pool is already locked when we get here), so we have
+	 * to use trylock.  If we can't lock the pool_cache, it's not really
+	 * a big deal here.
+	 */
+	if (simple_lock_try(&pc->pc_slock) == 0)
+		return;
+
+	for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
+	     pcg = npcg) {
+		npcg = TAILQ_NEXT(pcg, pcg_list);
+		while (pcg->pcg_avail != 0) {
+			pc->pc_nitems--;
+			object = pcg_get(pcg, NULL);
+			if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
+				pc->pc_allocfrom = NULL;
+			if (pc->pc_dtor != NULL)
+				(*pc->pc_dtor)(pc->pc_arg, object);
+			pool_do_put(pc->pc_pool, object, pq);
+		}
+		pc->pc_ngroups--;
+		TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
+		if (pc->pc_freeto == pcg)
+			pc->pc_freeto = NULL;
+		s = splvm();
+		pool_put(&pcgpool, pcg);
+		splx(s);
+	}
 
-	simple_lock(&pc->pc_slock);
-	pool_cache_do_invalidate(pc, 1, pool_do_put);
 	simple_unlock(&pc->pc_slock);
 }
 

--Apple-Mail-5--944996098--