Subject: Re: kern/32287: Processes hang in "mclpl"
To: None <tech-kern@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: netbsd-bugs
Date: 12/15/2005 22:55:44
--NextPart-20051215225115-0106300
Content-Type: Text/Plain; charset=us-ascii

> >Number:         32287
> >Category:       kern
> >Synopsis:       Processes hang in "mclpl"

the attached patch is to fix the "hang" part of the problem.
i'll commit it unless anyone objects.

- pool_allocator_alloc: drain ourselves as well,
  so that pool_cache on us is drained as well.
- pool_cache_put_paddr: destruct objects if underlying pool is starved.
- pool_get: on kva starvation, wake up once a second and try again.

of course, for longer term, it's better to have some kind of
kva reclaim mechanism.

YAMAMOTO Takashi

--NextPart-20051215225115-0106300
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"

Index: subr_pool.c
===================================================================
--- subr_pool.c	(revision 1470)
+++ subr_pool.c	(revision 1471)
@@ -929,13 +929,13 @@ pool_get(struct pool *pp, int flags)
 			/*
 			 * Wait for items to be returned to this pool.
 			 *
-			 * XXX: maybe we should wake up once a second and
-			 * try again?
+			 * wake up once a second and try again,
+			 * as the check in pool_cache_put_paddr() is racy.
 			 */
 			pp->pr_flags |= PR_WANTED;
 			/* PA_WANTED is already set on the allocator. */
 			pr_leave(pp);
-			ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
+			ltsleep(pp, PSWP, pp->pr_wchan, hz, &pp->pr_slock);
 			pr_enter(pp, file, line);
 			goto startover;
 		}
@@ -2019,6 +2019,10 @@ pool_cache_put_paddr(struct pool_cache *
 	struct pool_cache_group *pcg;
 	int s;
 
+	if (__predict_false((pc->pc_pool->pr_flags & PR_WANTED) != 0)) {
+		goto destruct;
+	}
+
 	simple_lock(&pc->pc_slock);
 
 	pcg = LIST_FIRST(&pc->pc_partgroups);
@@ -2040,6 +2044,7 @@ pool_cache_put_paddr(struct pool_cache *
 		pcg = pool_get(&pcgpool, PR_NOWAIT);
 		splx(s);
 		if (pcg == NULL) {
+destruct:
 
 			/*
 			 * Unable to allocate a cache group; destruct the object
@@ -2249,8 +2254,8 @@ pool_allocator_alloc(struct pool *org, i
 		}
 
 		/*
-		 * Drain all pools, except "org", that use this
-		 * allocator.  We do this to reclaim VA space.
+		 * Drain all pools, that use this allocator.
+		 * We do this to reclaim VA space.
 		 * pa_alloc is responsible for waiting for
 		 * physical memory.
 		 *
@@ -2271,8 +2276,6 @@ pool_allocator_alloc(struct pool *org, i
 		do {
 			TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list);
 			TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list);
-			if (pp == org)
-				continue;
 			simple_unlock(&pa->pa_slock);
 			freed = pool_reclaim(pp);
 			simple_lock(&pa->pa_slock);

--NextPart-20051215225115-0106300--