Subject: Re: kern/29652
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: netbsd-bugs
Date: 08/18/2005 12:30:02
The following reply was made to PR kern/29652; it has been noted by GNATS.

From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
To: gnats-bugs@netbsd.org
Cc: kern-bug-people@netbsd.org, gnats-admin@netbsd.org,
	netbsd-bugs@netbsd.org
Subject: Re: kern/29652
Date: Thu, 18 Aug 2005 21:28:52 +0900

 --NextPart-20050818212834-0535200
 Content-Type: Text/Plain; charset=us-ascii
 
 > >  panic: kernel diagnostic assertion "p->p_nrlwps == 0" failed: file "/usr/src/sys/kern/kern_exit.c", line 781
 > 
 > as p_nrlwps currently has no locking afaik, the panic is not surprising. :)
 > 
 > proc.h claims it's protected by p_lock, but i think
 > sched_lock is more straightforward lock to use.
 
 here's a patch.
 
 YAMAMOTO Takashi
 
 --NextPart-20050818212834-0535200
 Content-Type: Text/Plain; charset=us-ascii
 Content-Disposition: attachment; filename="a.diff"
 
 Index: sys/proc.h
 ===================================================================
 --- sys/proc.h	(revision 1266)
 +++ sys/proc.h	(working copy)
 @@ -151,6 +151,7 @@ struct emul {
   *
   * Fields marked 'p:' are protected by the process's own p_lock.
   * Fields marked 'l:' are protected by the proclist_lock
 + * Fields marked 's:' are protected by the SCHED_LOCK.
   */
  struct proc {
  	LIST_ENTRY(proc) p_list;	/* List of all processes */
 @@ -191,7 +192,7 @@ struct proc {
  #define	p_startzero	p_nlwps
  
  	int 		p_nlwps;	/* p: Number of LWPs */
 -	int 		p_nrlwps;	/* p: Number of running LWPs */
 +	int 		p_nrlwps;	/* s: Number of running LWPs */
  	int 		p_nzlwps;	/* p: Number of zombie LWPs */
  	int 		p_nlwpid;	/* p: Next LWP ID */
  
 Index: kern/kern_lwp.c
 ===================================================================
 --- kern/kern_lwp.c	(revision 1301)
 +++ kern/kern_lwp.c	(working copy)
 @@ -107,10 +107,8 @@ sys__lwp_create(struct lwp *l, void *v, 
  		SCHED_LOCK(s);
  		l2->l_stat = LSRUN;
  		setrunqueue(l2);
 -		SCHED_UNLOCK(s);
 -		simple_lock(&p->p_lock);
  		p->p_nrlwps++;
 -		simple_unlock(&p->p_lock);
 +		SCHED_UNLOCK(s);
  	} else {
  		l2->l_stat = LSSUSPENDED;
  	}
 @@ -226,10 +224,8 @@ lwp_suspend(struct lwp *l, struct lwp *t
  			SCHED_LOCK(s);
  			remrunqueue(t);
  			t->l_stat = LSSUSPENDED;
 -			SCHED_UNLOCK(s);
 -			simple_lock(&p->p_lock);
  			p->p_nrlwps--;
 -			simple_unlock(&p->p_lock);
 +			SCHED_UNLOCK(s);
  			break;
  		case LSSLEEP:
  			t->l_stat = LSSUSPENDED;
 @@ -562,11 +558,10 @@ lwp_exit(struct lwp *l)
  	cpu_lwp_free(l, 0);
  #endif
  
 -	simple_lock(&p->p_lock);
 +	SCHED_LOCK(s);
  	p->p_nrlwps--;
 -	simple_unlock(&p->p_lock);
 -
  	l->l_stat = LSDEAD;
 +	SCHED_UNLOCK(s);
  
  	/* This LWP no longer needs to hold the kernel lock. */
  	KERNEL_PROC_UNLOCK(l);
 Index: kern/kern_exit.c
 ===================================================================
 --- kern/kern_exit.c	(revision 1300)
 +++ kern/kern_exit.c	(working copy)
 @@ -446,6 +446,8 @@ exit1(struct lwp *l, int rv)
  	l->l_flag |= L_DETACHED|L_PROCEXIT;	/* detached from proc too */
  	l->l_stat = LSDEAD;
  
 +	KASSERT(p->p_nrlwps == 1);
 +	KASSERT(p->p_nlwps == 1);
  	p->p_nrlwps--;
  	p->p_nlwps--;
  
 Index: compat/mach/mach_thread.c
 ===================================================================
 --- compat/mach/mach_thread.c	(revision 1248)
 +++ compat/mach/mach_thread.c	(working copy)
 @@ -232,10 +232,8 @@ mach_thread_create_running(args)
  	mctc.mctc_lwp->l_private = 0;
  	mctc.mctc_lwp->l_stat = LSRUN;
  	setrunqueue(mctc.mctc_lwp);
 -	SCHED_UNLOCK(s);
 -	simple_lock(&p->p_lock);
  	p->p_nrlwps++;
 -	simple_unlock(&p->p_lock);
 +	SCHED_UNLOCK(s);
  
  	/*
  	 * Get the child's kernel port
 
 --NextPart-20050818212834-0535200--