Subject: Re: PR/32962
To: None <mrg@NetBSD.org, gnats-admin@netbsd.org, netbsd-bugs@netbsd.org,>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: netbsd-bugs
Date: 11/02/2006 17:10:02
The following reply was made to PR kern/32962; it has been noted by GNATS.

From: yamt@mwd.biglobe.ne.jp (YAMAMOTO Takashi)
To: ad@netbsd.org
Cc: mrg@eterna.com.au, gnats-bugs@NetBSD.org
Subject: Re: PR/32962
Date: Fri,  3 Nov 2006 01:06:40 +0900 (JST)

 > Index: kern/kern_lwp.c
 > ===================================================================
 > RCS file: /cvsroot/src/sys/kern/kern_lwp.c,v
 > retrieving revision 1.48
 > diff -u -r1.48 kern_lwp.c
 > --- kern/kern_lwp.c	1 Nov 2006 10:17:58 -0000	1.48
 > +++ kern/kern_lwp.c	1 Nov 2006 19:06:26 -0000
 > @@ -661,8 +661,8 @@
 >  		l->l_stat = LSZOMB;
 >  		p = l->l_proc;
 >  		p->p_nzlwps++;
 > -		KERNEL_UNLOCK();
 >  		wakeup(&p->p_nlwps);
 > +		KERNEL_UNLOCK();
 >  	}
 >  }
 
 i don't think this makes differences.
 
 > Index: kern/kern_sig.c
 > ===================================================================
 > RCS file: /cvsroot/src/sys/kern/kern_sig.c,v
 > retrieving revision 1.235
 > diff -u -r1.235 kern_sig.c
 > --- kern/kern_sig.c	1 Nov 2006 10:17:58 -0000	1.235
 > +++ kern/kern_sig.c	1 Nov 2006 19:06:28 -0000
 > @@ -1494,6 +1494,8 @@
 >  	if (l->l_flag & L_SA && l->l_savp->savp_lwp != l)
 >  		return 0;
 >  
 > +	KERNEL_PROC_LOCK(l);
 > +
 >  	if (p->p_stat == SSTOP) {
 >  		/*
 >  		 * The process is stopped/stopping. Stop ourselves now that
 > @@ -1514,6 +1516,7 @@
 >  		signum = firstsig(&ss);
 >  		if (signum == 0) {		 	/* no signal to send */
 >  			p->p_sigctx.ps_sigcheck = 0;
 > +			KERNEL_PROC_UNLOCK(l);
 >  			return (0);
 >  		}
 >  							/* take the signal! */
 > @@ -1655,6 +1658,7 @@
 >  						/* leave the signal for later */
 >  	sigaddset(&p->p_sigctx.ps_siglist, signum);
 >  	CHECKSIGS(p);
 > +	KERNEL_PROC_UNLOCK(l);
 >  	return (signum);
 >  }
 
 does it mean protecting signal states by kernel_lock?
 
 > Index: kern/kern_synch.c
 > ===================================================================
 > RCS file: /cvsroot/src/sys/kern/kern_synch.c,v
 > retrieving revision 1.171
 > diff -u -r1.171 kern_synch.c
 > --- kern/kern_synch.c	1 Nov 2006 10:17:58 -0000	1.171
 > +++ kern/kern_synch.c	1 Nov 2006 19:06:29 -0000
 > @@ -112,6 +112,14 @@
 >  int	lbolt;			/* once a second sleep address */
 >  int	rrticks;		/* number of hardclock ticks per roundrobin() */
 >  
 > +#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
 > +#define	XXX_SCHED_LOCK		simple_lock(&sched_lock)
 > +#define	XXX_SCHED_UNLOCK	simple_unlock(&sched_lock)
 > +#else
 > +#define	XXX_SCHED_LOCK		/* nothing */
 > +#define	XXX_SCHED_UNLOCK	/* nothing */
 > +#endif
 > +
 >  /*
 >   * Sleep queues.
 >   *
 > @@ -535,22 +543,22 @@
 >  	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
 >  	 */
 >  	if (catch) {
 > -		SCHED_UNLOCK(s);
 > +		XXX_SCHED_UNLOCK;
 >  		l->l_flag |= L_SINTR;
 >  		if (((sig = CURSIG(l)) != 0) ||
 >  		    ((p->p_flag & P_WEXIT) && p->p_nlwps > 1)) {
 > -			SCHED_LOCK(s);
 > +			XXX_SCHED_LOCK;
 >  			if (l->l_wchan != NULL)
 >  				unsleep(l);
 >  			l->l_stat = LSONPROC;
 > -			SCHED_UNLOCK(s);
 > +			XXX_SCHED_UNLOCK;
 >  			goto resume;
 >  		}
 >  		if (l->l_wchan == NULL) {
 >  			catch = 0;
 >  			goto resume;
 >  		}
 > -		SCHED_LOCK(s);
 > +		XXX_SCHED_LOCK;
 >  	} else
 >  		sig = 0;
 >  	l->l_stat = LSSLEEP;
 
 why do you want to keep spl at splsched?
 
 > @@ -1017,6 +1025,13 @@
 >  	microtime(&l->l_cpu->ci_schedstate.spc_runtime);
 >  
 >  	/*
 > +	 * Reacquire the kernel_lock now.  We do this after we've
 > +	 * released the scheduler lock to avoid deadlock, and before
 > +	 * we reacquire the interlock.
 > +	 */
 > +	KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
 > +
 > +	/*
 >  	 * Check if the process exceeds its CPU resource allocation.
 >  	 * If over max, kill it.  In any case, if it has run for more
 >  	 * than 10 minutes, reduce priority to give others a chance.
 > @@ -1039,13 +1054,6 @@
 >  		SCHED_UNLOCK(s);
 >  	}
 >  
 > -	/*
 > -	 * Reacquire the kernel_lock now.  We do this after we've
 > -	 * released the scheduler lock to avoid deadlock, and before
 > -	 * we reacquire the interlock.
 > -	 */
 > -	KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
 > -
 >  	return retval;
 >  }
 
 is it necessary?
 the code in question was out of kernel_lock even before the recent changes.
 
 YAMAMOTO Takashi