Subject: Re: kern/29652
To: None <gnats-bugs@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: netbsd-bugs
Date: 08/18/2005 21:28:52
--NextPart-20050818212834-0535200
Content-Type: Text/Plain; charset=us-ascii

> >  panic: kernel diagnostic assertion "p->p_nrlwps == 0" failed: file "/usr/src/sys/kern/kern_exit.c", line 781
> 
> as p_nrlwps currently has no locking afaik, the panic is not surprising. :)
> 
> proc.h claims it's protected by p_lock, but i think
> sched_lock is more straightforward lock to use.

here's a patch.

YAMAMOTO Takashi

--NextPart-20050818212834-0535200
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"

Index: sys/proc.h
===================================================================
--- sys/proc.h	(revision 1266)
+++ sys/proc.h	(working copy)
@@ -151,6 +151,7 @@ struct emul {
  *
  * Fields marked 'p:' are protected by the process's own p_lock.
  * Fields marked 'l:' are protected by the proclist_lock
+ * Fields marked 's:' are protected by the SCHED_LOCK.
  */
 struct proc {
 	LIST_ENTRY(proc) p_list;	/* List of all processes */
@@ -191,7 +192,7 @@ struct proc {
 #define	p_startzero	p_nlwps
 
 	int 		p_nlwps;	/* p: Number of LWPs */
-	int 		p_nrlwps;	/* p: Number of running LWPs */
+	int 		p_nrlwps;	/* s: Number of running LWPs */
 	int 		p_nzlwps;	/* p: Number of zombie LWPs */
 	int 		p_nlwpid;	/* p: Next LWP ID */
 
Index: kern/kern_lwp.c
===================================================================
--- kern/kern_lwp.c	(revision 1301)
+++ kern/kern_lwp.c	(working copy)
@@ -107,10 +107,8 @@ sys__lwp_create(struct lwp *l, void *v, 
 		SCHED_LOCK(s);
 		l2->l_stat = LSRUN;
 		setrunqueue(l2);
-		SCHED_UNLOCK(s);
-		simple_lock(&p->p_lock);
 		p->p_nrlwps++;
-		simple_unlock(&p->p_lock);
+		SCHED_UNLOCK(s);
 	} else {
 		l2->l_stat = LSSUSPENDED;
 	}
@@ -226,10 +224,8 @@ lwp_suspend(struct lwp *l, struct lwp *t
 			SCHED_LOCK(s);
 			remrunqueue(t);
 			t->l_stat = LSSUSPENDED;
-			SCHED_UNLOCK(s);
-			simple_lock(&p->p_lock);
 			p->p_nrlwps--;
-			simple_unlock(&p->p_lock);
+			SCHED_UNLOCK(s);
 			break;
 		case LSSLEEP:
 			t->l_stat = LSSUSPENDED;
@@ -562,11 +558,10 @@ lwp_exit(struct lwp *l)
 	cpu_lwp_free(l, 0);
 #endif
 
-	simple_lock(&p->p_lock);
+	SCHED_LOCK(s);
 	p->p_nrlwps--;
-	simple_unlock(&p->p_lock);
-
 	l->l_stat = LSDEAD;
+	SCHED_UNLOCK(s);
 
 	/* This LWP no longer needs to hold the kernel lock. */
 	KERNEL_PROC_UNLOCK(l);
Index: kern/kern_exit.c
===================================================================
--- kern/kern_exit.c	(revision 1300)
+++ kern/kern_exit.c	(working copy)
@@ -446,6 +446,8 @@ exit1(struct lwp *l, int rv)
 	l->l_flag |= L_DETACHED|L_PROCEXIT;	/* detached from proc too */
 	l->l_stat = LSDEAD;
 
+	KASSERT(p->p_nrlwps == 1);
+	KASSERT(p->p_nlwps == 1);
 	p->p_nrlwps--;
 	p->p_nlwps--;
 
Index: compat/mach/mach_thread.c
===================================================================
--- compat/mach/mach_thread.c	(revision 1248)
+++ compat/mach/mach_thread.c	(working copy)
@@ -232,10 +232,8 @@ mach_thread_create_running(args)
 	mctc.mctc_lwp->l_private = 0;
 	mctc.mctc_lwp->l_stat = LSRUN;
 	setrunqueue(mctc.mctc_lwp);
-	SCHED_UNLOCK(s);
-	simple_lock(&p->p_lock);
 	p->p_nrlwps++;
-	simple_unlock(&p->p_lock);
+	SCHED_UNLOCK(s);
 
 	/*
 	 * Get the child's kernel port

--NextPart-20050818212834-0535200--