Subject: Re: Speeding up fork/wait path
To: Chuck Silvers <chuq@chuq.com>
From: Jaromir Dolecek <jdolecek@NetBSD.org>
List: tech-kern
Date: 11/02/2003 20:06:10
Chuck Silvers wrote:
> I'd rather see the child do its own vmspace_free(), and then defer the
> remaining work to another thread.  proc0 already wakes up periodically
> and does a little work, so having that thread free the uareas seems
> reasonable.  it will probably take a little fiddling to get this working
> but it doesn't seem like it should be very hard.

I implemented similar solution.

Exiting process now:
1. pmap_deactivate()s the address space and frees the vmspace
2. runs cpu_wait() (tears down TSS on i386 & amd64)
3. makes the process SZOMB
4. keeps KERNEL_PROC_LOCK()

lwp_exit2() now does all post-dead lwp cleanup, including 'freeing' u-area,
and unlocks KERNEL_PROC_LOCK() if result of process exit.
There is no deadlwp/zomblwp list anymore. uvm_uarea_free() now merely
links the 'freed' u-area to a list, it never calls uvm_km_free().
exit2() is deleted, lwp_exit2() is always called for either
'lwp' or 'process' exit.

There is new uvm_uarea_collect(), which is called from strategic
places. This routine frees any u-areas over the limit, and
obviously can block. ATM this routine is called from pagedaemon thread,
from sys_wait4(), and in during process exit if the process had
any (>1) lwps before the exit. There is no reaper anymore.

This is patch for MI parts and i386. Other archs would need some trivial
changes in cpu_exit(), especially remove pmap_deactivate() and
account for demise of exit2.
It's possible to optimize the MD exit code somewhat now
that lwp_exit2() is called always - some parameters can
be removed, and some of the code can be somewhat simplified.

Opinions?

Jaromir

Index: arch/i386/i386/vm_machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/vm_machdep.c,v
retrieving revision 1.112
diff -u -p -r1.112 vm_machdep.c
--- arch/i386/i386/vm_machdep.c	27 Oct 2003 14:11:47 -0000	1.112
+++ arch/i386/i386/vm_machdep.c	2 Nov 2003 18:52:53 -0000
@@ -279,16 +279,8 @@ cpu_exit(struct lwp *l, int proc)
 	 * pmap_destroy().
 	 */
 
-	/*
-	 * Deactivate the address space before the vmspace is
-	 * freed.  Note that we will continue to run on this
-	 * vmspace's context until the switch to the idle process
-	 * in switch_exit().
-	 */
-	pmap_deactivate(l);
-
 	uvmexp.swtch++;
-	switch_exit(l, proc ? exit2 : lwp_exit2);
+	switch_exit(l, lwp_exit2);
 }
 
 /*
@@ -303,6 +295,9 @@ cpu_wait(l)
 
 	/* Nuke the TSS. */
 	tss_free(l->l_md.md_tss_sel);
+#ifdef DEBUG
+	l->l_md.md_tss_sel = 0xdeadbeef;
+#endif
 }
 
 /*
Index: sys/lwp.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lwp.h,v
retrieving revision 1.12
diff -u -p -r1.12 lwp.h
--- sys/lwp.h	2 Nov 2003 16:26:10 -0000	1.12
+++ sys/lwp.h	2 Nov 2003 18:52:53 -0000
@@ -100,8 +100,6 @@ struct	lwp {
 LIST_HEAD(lwplist, lwp);		/* a list of LWPs */
 
 extern struct lwplist alllwp;		/* List of all LWPs. */
-extern struct lwplist deadlwp;		/* */
-extern struct lwplist zomblwp;
 
 extern struct pool lwp_pool;		/* memory pool for LWPs */
 extern struct pool lwp_uc_pool;		/* memory pool for LWP startup args */
@@ -113,6 +111,7 @@ extern struct lwp lwp0;			/* LWP for pro
 #define	L_SELECT	0x00040	/* Selecting; wakeup/waiting danger. */
 #define	L_SINTR		0x00080	/* Sleep is interruptible. */
 #define	L_TIMEOUT	0x00400	/* Timing out during sleep. */
+#define	L_PROCEXIT	0x00800 /* In process exit, l_proc no longer valid */
 #define	L_BIGLOCK	0x80000	/* LWP needs kernel "big lock" to run */
 #define	L_SA		0x100000 /* Scheduler activations LWP */
 #define	L_SA_UPCALL	0x200000 /* SA upcall is pending */
Index: sys/proc.h
===================================================================
RCS file: /cvsroot/src/sys/sys/proc.h,v
retrieving revision 1.174
diff -u -p -r1.174 proc.h
--- sys/proc.h	9 Oct 2003 14:00:34 -0000	1.174
+++ sys/proc.h	2 Nov 2003 18:52:53 -0000
@@ -173,7 +173,6 @@ struct proc {
 	char		p_pad1[3];
 
 	pid_t		p_pid;		/* Process identifier. */
-	SLIST_ENTRY(proc) p_dead;	/* Processes waiting for reaper */
 	LIST_ENTRY(proc) p_pglist;	/* List of processes in pgrp. */
 	struct proc 	*p_pptr;	/* Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* List of sibling processes. */
@@ -435,9 +434,8 @@ int	ltsleep(const void *, int, const cha
 	    __volatile struct simplelock *);
 void	wakeup(const void *);
 void	wakeup_one(const void *);
-void	reaper(void *);
 void	exit1(struct lwp *, int);
-void	exit2(struct lwp *);
+#define	exit2	lwp_exit2
 int	find_stopped_child(struct proc *, pid_t, int, struct proc **);
 struct proc *proc_alloc(void);
 void	proc0_insert(struct proc *, struct lwp *, struct pgrp *, struct session *);
Index: kern/init_main.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_main.c,v
retrieving revision 1.225
diff -u -p -r1.225 init_main.c
--- kern/init_main.c	2 Nov 2003 16:42:22 -0000	1.225
+++ kern/init_main.c	2 Nov 2003 18:52:53 -0000
@@ -567,10 +567,6 @@ main(void)
 	if (kthread_create1(uvm_pageout, NULL, NULL, "pagedaemon"))
 		panic("fork pagedaemon");
 
-	/* Create the process reaper kernel thread. */
-	if (kthread_create1(reaper, NULL, NULL, "reaper"))
-		panic("fork reaper");
-
 	/* Create the filesystem syncer kernel thread. */
 	if (kthread_create1(sched_sync, NULL, NULL, "ioflush"))
 		panic("fork syncer");
Index: kern/kern_exit.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_exit.c,v
retrieving revision 1.124
diff -u -p -r1.124 kern_exit.c
--- kern/kern_exit.c	16 Sep 2003 13:46:24 -0000	1.124
+++ kern/kern_exit.c	2 Nov 2003 18:52:53 -0000
@@ -131,7 +131,7 @@ static void lwp_exit_hook(struct lwp *, 
 static void exit_psignal(struct proc *, struct proc *);
 
 /*
- * Fill in the appropriate signal information, and kill the parent.
+ * Fill in the appropriate signal information, and signal the parent.
  */
 static void
 exit_psignal(struct proc *p, struct proc *pp)
@@ -233,9 +233,15 @@ exit1(struct lwp *l, int rv)
 	p->p_sigctx.ps_sigcheck = 0;
 	timers_free(p, TIMERS_ALL);
 
-	if (sa || (p->p_nlwps > 1))
+	if (sa || (p->p_nlwps > 1)) {
 		exit_lwps(l);
 
+		/*
+		 * Collect thread u-areas.
+		 */
+		uvm_uarea_drain(FALSE);
+	}
+
 #if defined(__HAVE_RAS)
 	ras_purgeall(p);
 #endif
@@ -302,8 +308,6 @@ exit1(struct lwp *l, int rv)
 	 * Give orphaned children to init(8).
 	 */
 	q = LIST_FIRST(&p->p_children);
-	if (q)		/* only need this if any child is S_ZOMB */
-		wakeup((caddr_t)initproc);
 	for (; q != 0; q = nq) {
 		nq = LIST_NEXT(q, p_sibling);
 
@@ -326,6 +330,16 @@ exit1(struct lwp *l, int rv)
 		} else {
 			proc_reparent(q, initproc);
 		}
+
+		/*
+		 * If child is already zombie, notify the new parent,
+		 * so that they'd know they should collect it.
+		 */
+		if (q->p_stat == SZOMB) {
+			if ((q->p_flag & P_FSTRACE) == 0 && q->p_exitsig != 0)
+				exit_psignal(q, q->p_pptr);
+			wakeup(q->p_pptr);
+		}
 	}
 
 	/*
@@ -350,6 +364,37 @@ exit1(struct lwp *l, int rv)
 	}
 
 	/*
+	 * Deactivate the address space before the vmspace is
+	 * freed.  Note that we will continue to run on this
+	 * vmspace's context until the switch to the idle process
+	 * in switch_exit().
+	 */
+	pmap_deactivate(l);
+
+	/*
+	 * Free the VM resources we're still holding on to.
+	 * We must do this from a valid thread because doing
+	 * so may block. This frees vmspace, which we don't
+	 * need anymore. The only remaining lwp is the one
+	 * we run at this moment, nothing runs in userland
+	 * anymore.
+	 */
+	uvm_proc_exit(p);
+
+	/*
+	 * Give machine-dependent code a chance to free any
+	 * MD LWP resources while we can still block. This must be done
+	 * before uvm_lwp_exit(), in case these resources are in the 
+	 * PCB.
+	 * THIS IS LAST BLOCKING OPERATION.
+	 */
+	cpu_wait(l);
+
+	/*
+	 * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
+	 */
+
+	/*
 	 * Save exit status and final rusage info, adding in child rusage
 	 * info and self times.
 	 * In order to pick up the time for the current execution, we must
@@ -361,24 +406,21 @@ exit1(struct lwp *l, int rv)
 	ruadd(p->p_ru, &p->p_stats->p_cru);
 
 	/*
-	 * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
-	 */
-
-	/*
-	 * Move proc from allproc to zombproc, but do not yet
-	 * wake up the reaper.  We will put the proc on the
-	 * deadproc list later (using the p_dead member), and
-	 * wake up the reaper when we do.
-	 * Changing the state to SDEAD stops it being found by pfind().
+	 * Move proc from allproc to zombproc, it's now ready
+	 * to be collected by parent. Remaining lwp resources
+	 * will be freed in lwp_exit2() once we'd switch to idle
+	 * context.
+	 * Changing the state to SZOMB stops it being found by pfind().
 	 */
 	s = proclist_lock_write();
-	p->p_stat = SDEAD;
-	p->p_nrlwps--;
 	l->l_stat = LSDEAD;
 	LIST_REMOVE(p, p_list);
 	LIST_INSERT_HEAD(&zombproc, p, p_list);
 	LIST_REMOVE(l, l_list);
-	l->l_flag |= L_DETACHED;
+	l->l_flag |= L_DETACHED|L_PROCEXIT;
+	p->p_stat = SZOMB;
+	p->p_nrlwps--;
+	p->p_nlwps--;
 	proclist_unlock_write(s);
 
 	/*
@@ -412,7 +454,12 @@ exit1(struct lwp *l, int rv)
 		 */
 		if (LIST_FIRST(&pp->p_children) == NULL)
 			wakeup((caddr_t)pp);
+	} else {
+		if ((p->p_flag & P_FSTRACE) == 0 && p->p_exitsig != 0)
+			exit_psignal(p, p->p_pptr);
+		wakeup(p->p_pptr);
 	}
+		
 
 	/*
 	 * Release the process's signal state.
@@ -434,15 +481,11 @@ exit1(struct lwp *l, int rv)
 	pstatsfree(p->p_stats);
 	p->p_limit = NULL;
 
-	/* This process no longer needs to hold the kernel lock. */
-	KERNEL_PROC_UNLOCK(l);
-
 	/*
 	 * Finally, call machine-dependent code to switch to a new
 	 * context (possibly the idle context).  Once we are no longer
-	 * using the dead process's vmspace and stack, exit2() will be
-	 * called to schedule those resources to be released by the
-	 * reaper thread.
+	 * using the dead lwp's stack, lwp_exit2() will be called
+	 * to arrange for the resources to be released.
 	 *
 	 * Note that cpu_exit() will end with a call equivalent to
 	 * cpu_switch(), finishing our execution (pun intended).
@@ -534,115 +577,6 @@ lwp_exit_hook(struct lwp *l, void *arg)
 	lwp_exit(l);
 }
 
-/*
- * We are called from cpu_exit() once it is safe to schedule the
- * dead process's resources to be freed (i.e., once we've switched to
- * the idle PCB for the current CPU).
- *
- * NOTE: One must be careful with locking in this routine.  It's
- * called from a critical section in machine-dependent code, so
- * we should refrain from changing any interrupt state.
- *
- * We lock the deadproc list (a spin lock), place the proc on that
- * list (using the p_dead member), and wake up the reaper.
- */
-void
-exit2(struct lwp *l)
-{
-	struct proc *p = l->l_proc;
-
-	simple_lock(&deadproc_slock);
-	SLIST_INSERT_HEAD(&deadprocs, p, p_dead);
-	simple_unlock(&deadproc_slock);
-
-	/* lwp_exit2() will wake up deadproc for us. */
-	lwp_exit2(l);
-}
-
-/*
- * Process reaper.  This is run by a kernel thread to free the resources
- * of a dead process.  Once the resources are free, the process becomes
- * a zombie, and the parent is allowed to read the undead's status.
- */
-void
-reaper(void *arg)
-{
-	struct proc *p;
-	struct lwp *l;
-
-	KERNEL_PROC_UNLOCK(curlwp);
-
-	for (;;) {
-		simple_lock(&deadproc_slock);
-		p = SLIST_FIRST(&deadprocs);
-		l = LIST_FIRST(&deadlwp);
-		if (p == NULL && l == NULL) {
-			/* No work for us; go to sleep until someone exits. */
-			(void) ltsleep(&deadprocs, PVM|PNORELOCK,
-			    "reaper", 0, &deadproc_slock);
-			continue;
-		}
-
-		if (l != NULL ) {
-			p = l->l_proc;
-
-			/* Remove lwp from the deadlwp list. */
-			LIST_REMOVE(l, l_list);
-			simple_unlock(&deadproc_slock);
-			KERNEL_PROC_LOCK(curlwp);
-			
-			/*
-			 * Give machine-dependent code a chance to free any
-			 * resources it couldn't free while still running on
-			 * that process's context.  This must be done before
-			 * uvm_lwp_exit(), in case these resources are in the 
-			 * PCB.
-			 */
-			cpu_wait(l);
-
-			/*
-			 * Free the VM resources we're still holding on to.
-			 */
-			uvm_lwp_exit(l);
-
-			l->l_stat = LSZOMB;
-			if (l->l_flag & L_DETACHED) {
-				/* Nobody waits for detached LWPs. */
-				LIST_REMOVE(l, l_sibling);
-				p->p_nlwps--;
-				pool_put(&lwp_pool, l);
-			} else {
-				p->p_nzlwps++;
-				wakeup((caddr_t)&p->p_nlwps);
-			}
-			/* XXXNJW where should this be with respect to 
-			 * the wakeup() above? */
-			KERNEL_PROC_UNLOCK(curlwp);
-		} else {
-			/* Remove proc from the deadproc list. */
-			SLIST_REMOVE_HEAD(&deadprocs, p_dead);
-			simple_unlock(&deadproc_slock);
-			KERNEL_PROC_LOCK(curlwp);
-
-			/*
-			 * Free the VM resources we're still holding on to.
-			 * We must do this from a valid thread because doing
-			 * so may block.
-			 */
-			uvm_proc_exit(p);
-			
-			/* Process is now a true zombie. */
-			p->p_stat = SZOMB;
-			
-			/* Wake up the parent so it can get exit status. */
-			if ((p->p_flag & P_FSTRACE) == 0 && p->p_exitsig != 0)
-				exit_psignal(p, p->p_pptr);
-			KERNEL_PROC_UNLOCK(curlwp);
-			wakeup((caddr_t)p->p_pptr);
-		}
-	}
-}
-
 int
 sys_wait4(struct lwp *l, void *v, register_t *retval)
 {
@@ -670,6 +604,11 @@ sys_wait4(struct lwp *l, void *v, regist
 		*retval = 0;
 		return 0;
 	}
+
+	/*
+	 * Collect child u-areas.
+	 */
+	uvm_uarea_drain(FALSE);
 
 	retval[0] = child->p_pid;
 
Index: kern/kern_lwp.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_lwp.c,v
retrieving revision 1.14
diff -u -p -r1.14 kern_lwp.c
--- kern/kern_lwp.c	30 Oct 2003 23:31:21 -0000	1.14
+++ kern/kern_lwp.c	2 Nov 2003 18:52:54 -0000
@@ -57,8 +57,6 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v
 #include <uvm/uvm_extern.h>
 
 struct lwplist alllwp;
-struct lwplist deadlwp;
-struct lwplist zomblwp;
 
 #define LWP_DEBUG
 
@@ -369,9 +367,9 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lw
 
 	struct proc *p = l->l_proc;
 	struct lwp *l2, *l3;
-	int nfound, error, s, wpri;
-	static char waitstr1[] = "lwpwait";
-	static char waitstr2[] = "lwpwait2";
+	int nfound, error, wpri;
+	static const char waitstr1[] = "lwpwait";
+	static const char waitstr2[] = "lwpwait2";
 
 	DPRINTF(("lwp_wait1: %d.%d waiting for %d.\n",
 	    p->p_pid, l->l_lid, lid));
@@ -393,10 +391,6 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lw
 			if (departed)
 				*departed = l2->l_lid;
 
-			s = proclist_lock_write();
-			LIST_REMOVE(l2, l_zlist); /* off zomblwp */
-			proclist_unlock_write(s);
-
 			simple_lock(&p->p_lwplock);
 			LIST_REMOVE(l2, l_sibling);
 			p->p_nlwps--;
@@ -548,11 +542,6 @@ lwp_exit(struct lwp *l)
 
 	s = proclist_lock_write();
 	LIST_REMOVE(l, l_list);
-	if ((l->l_flag & L_DETACHED) == 0) {
-		DPRINTF(("lwp_exit: %d.%d going on zombie list\n", p->p_pid,
-		    l->l_lid));
-		LIST_INSERT_HEAD(&zomblwp, l, l_zlist);
-	}
 	proclist_unlock_write(s);
 
 	simple_lock(&p->p_lwplock);
@@ -569,16 +558,52 @@ lwp_exit(struct lwp *l)
 
 }
 
-
+/*
+ * We are called from cpu_exit() once it is safe to schedule the
+ * dead process's resources to be freed (i.e., once we've switched to
+ * the idle PCB for the current CPU).
+ *
+ * NOTE: One must be careful with locking in this routine.  It's
+ * called from a critical section in machine-dependent code, so
+ * we should refrain from changing any interrupt state.
+ */
 void
 lwp_exit2(struct lwp *l)
 {
+	struct proc *p;
 
-	simple_lock(&deadproc_slock);
-	LIST_INSERT_HEAD(&deadlwp, l, l_list);
-	simple_unlock(&deadproc_slock);
+#ifdef DEBUG
+	if (l->l_flag & L_PROCEXIT)
+		l->l_proc = NULL;
+#endif
 
-	wakeup(&deadprocs);
+	/*
+	 * Free the VM resources we're still holding on to.
+	 */
+	uvm_lwp_exit(l);
+
+	l->l_stat = LSZOMB;
+	if (l->l_flag & L_DETACHED) {
+		/* Nobody waits for detached LWPs. */
+		LIST_REMOVE(l, l_sibling);
+
+		if ((l->l_flag & L_PROCEXIT) == 0) {
+			p = l->l_proc;
+			p->p_nlwps--;
+		} else {
+			/*
+			 * Release the kernel lock before decommission
+			 * of the LWP.
+			 */
+			KERNEL_PROC_UNLOCK(l);
+		}
+
+		pool_put(&lwp_pool, l);
+	} else {
+		p = l->l_proc;
+		p->p_nzlwps++;
+		wakeup(&p->p_nlwps);
+	}
 }
 
 /*
Index: kern/kern_proc.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_proc.c,v
retrieving revision 1.66
diff -u -p -r1.66 kern_proc.c
--- kern/kern_proc.c	16 Sep 2003 12:05:49 -0000	1.66
+++ kern/kern_proc.c	2 Nov 2003 18:52:54 -0000
@@ -134,17 +134,6 @@ struct proclist zombproc;	/* resources h
 struct lock proclist_lock;
 
 /*
- * List of processes that has called exit, but need to be reaped.
- * Locking of this proclist is special; it's accessed in a
- * critical section of process exit, and thus locking it can't
- * modify interrupt state.
- * We use a simple spin lock for this proclist.
- * Processes on this proclist are also on zombproc.
- */
-struct simplelock deadproc_slock;
-struct deadprocs deadprocs = SLIST_HEAD_INITIALIZER(deadprocs);
-
-/*
  * pid to proc lookup is done by indexing the pid_table array. 
  * Since pid numbers are only allocated when an empty slot
  * has been found, there is no need to search any lists ever.
@@ -228,8 +217,6 @@ procinit(void)
 
 	spinlockinit(&proclist_lock, "proclk", 0);
 
-	simple_lock_init(&deadproc_slock);
-
 	pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table,
 			    M_PROC, M_WAITOK);
 	/* Set free list running through table...
@@ -248,8 +235,6 @@ procinit(void)
 #undef LINK_EMPTY
 
 	LIST_INIT(&alllwp);
-	LIST_INIT(&deadlwp);
-	LIST_INIT(&zomblwp);
 
 	uihashtbl =
 	    hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash);
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.84
diff -u -p -r1.84 uvm_extern.h
--- uvm/uvm_extern.h	11 Aug 2003 16:33:30 -0000	1.84
+++ uvm/uvm_extern.h	2 Nov 2003 18:52:55 -0000
@@ -576,7 +576,7 @@ boolean_t		uvm_kernacc __P((caddr_t, siz
 __dead void		uvm_scheduler __P((void)) __attribute__((noreturn));
 void			uvm_swapin __P((struct lwp *));
 boolean_t		uvm_uarea_alloc(vaddr_t *);
-void			uvm_uarea_free(vaddr_t);
+void			uvm_uarea_drain(boolean_t);
 boolean_t		uvm_useracc __P((caddr_t, size_t, int));
 int			uvm_vslock __P((struct proc *, caddr_t, size_t,
 			    vm_prot_t));
Index: uvm/uvm_glue.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_glue.c,v
retrieving revision 1.71
diff -u -p -r1.71 uvm_glue.c
--- uvm/uvm_glue.c	2 Nov 2003 16:53:43 -0000	1.71
+++ uvm/uvm_glue.c	2 Nov 2003 18:52:55 -0000
@@ -99,6 +99,8 @@ void *uvm_uareas;
 int uvm_nuarea;
 struct simplelock uvm_uareas_slock = SIMPLELOCK_INITIALIZER;
 
+static void uvm_uarea_free(vaddr_t);
+
 /*
  * XXXCDC: do these really belong here?
  */
@@ -377,8 +379,8 @@ uvm_uarea_alloc(vaddr_t *uaddrp)
 #endif
 
 	simple_lock(&uvm_uareas_slock);
-	uaddr = (vaddr_t)uvm_uareas;
-	if (uaddr) {
+	if (uvm_nuarea > 0) {
+		uaddr = (vaddr_t)uvm_uareas;
 		uvm_uareas = *(void **)uvm_uareas;
 		uvm_nuarea--;
 		simple_unlock(&uvm_uareas_slock);
@@ -392,23 +394,43 @@ uvm_uarea_alloc(vaddr_t *uaddrp)
 }
 
 /*
- * uvm_uarea_free: free a u-area
+ * uvm_uarea_free: free a u-area; never blocks
  */
 
-void
+static void
 uvm_uarea_free(vaddr_t uaddr)
 {
+	simple_lock(&uvm_uareas_slock);
+	*(void **)uaddr = uvm_uareas;
+	uvm_uareas = (void *)uaddr;
+	uvm_nuarea++;
+	simple_unlock(&uvm_uareas_slock);
+}
+
+/*
+ * uvm_uarea_drain: return memory of u-areas over limit
+ * back to system
+ */
+
+void
+uvm_uarea_drain(boolean_t empty)
+{
+	int leave = empty ? 0 : UVM_NUAREA_MAX;
+	vaddr_t uaddr;
+
+	if (uvm_nuarea <= leave)
+		return;
 
 	simple_lock(&uvm_uareas_slock);
-	if (uvm_nuarea < UVM_NUAREA_MAX) {
-		*(void **)uaddr = uvm_uareas;
-		uvm_uareas = (void *)uaddr;
-		uvm_nuarea++;
-		simple_unlock(&uvm_uareas_slock);
-	} else {
+	while(uvm_nuarea > leave) {
+		uaddr = (vaddr_t)uvm_uareas;
+		uvm_uareas = *(void **)uvm_uareas;
+		uvm_nuarea--;
 		simple_unlock(&uvm_uareas_slock);
 		uvm_km_free(kernel_map, uaddr, USPACE);
+		simple_lock(&uvm_uareas_slock);
 	}
+	simple_unlock(&uvm_uareas_slock);
 }
 
 /*
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.55
diff -u -p -r1.55 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c	26 Sep 2003 04:03:39 -0000	1.55
+++ uvm/uvm_pdaemon.c	2 Nov 2003 18:52:55 -0000
@@ -274,6 +274,12 @@ uvm_pageout(void *arg)
 		 */
 
 		pool_drain(0);
+
+		/*
+		 * free any cached u-areas we don't need
+		 */
+		uvm_uarea_drain(TRUE);
+
 	}
 	/*NOTREACHED*/
 }
-- 
Jaromir Dolecek <jdolecek@NetBSD.org>            http://www.NetBSD.cz/
-=- We should be mindful of the potential goal, but as the tantric    -=-
-=- Buddhist masters say, ``You may notice during meditation that you -=-
-=- sometimes levitate or glow.   Do not let this distract you.''     -=-