Subject: re: kern/32962
To: None <mrg@NetBSD.org, gnats-admin@netbsd.org, netbsd-bugs@netbsd.org,>
From: matthew green <mrg@eterna.com.au>
List: netbsd-bugs
Date: 10/19/2006 21:15:07
The following reply was made to PR kern/32962; it has been noted by GNATS.

From: matthew green <mrg@eterna.com.au>
To: njoly@pasteur.fr, skrll@netbsd.org, kim@netbsd.org,
	gnats-bugs@netbsd.org, netbsd-bugs@netbsd.org
Cc: 
Subject: re: kern/32962 
Date: Fri, 20 Oct 2006 07:14:52 +1000

 the previous patch leaves hung processes under load.  this one should
 avoid that.  partly based on an idea from chuq.
 
 
 .mrg.
 
 
 Index: kern_sig.c
 ===================================================================
 RCS file: /cvsroot/src/sys/kern/kern_sig.c,v
 retrieving revision 1.230
 diff -p -r1.230 kern_sig.c
 *** kern_sig.c	12 Oct 2006 01:32:17 -0000	1.230
 --- kern_sig.c	19 Oct 2006 09:14:09 -0000
 *************** __KERNEL_RCSID(0, "$NetBSD: kern_sig.c,v
 *** 90,97 ****
   static int	build_corename(struct proc *, char *, const char *, size_t);
   #endif
   static void	ksiginfo_exithook(struct proc *, void *);
 ! static void	ksiginfo_put(struct proc *, const ksiginfo_t *);
 ! static ksiginfo_t *ksiginfo_get(struct proc *, int);
   static void	kpsignal2(struct proc *, const ksiginfo_t *, int);
   
   sigset_t	contsigmask, stopsigmask, sigcantmask;
 --- 90,97 ----
   static int	build_corename(struct proc *, char *, const char *, size_t);
   #endif
   static void	ksiginfo_exithook(struct proc *, void *);
 ! static void	ksiginfo_queue(struct proc *, const ksiginfo_t *, ksiginfo_t **);
 ! static ksiginfo_t *ksiginfo_dequeue(struct proc *, int);
   static void	kpsignal2(struct proc *, const ksiginfo_t *, int);
   
   sigset_t	contsigmask, stopsigmask, sigcantmask;
 *************** POOL_INIT(ksiginfo_pool, sizeof(ksiginfo
 *** 132,138 ****
    * signal, or return NULL if one not found.
    */
   static ksiginfo_t *
 ! ksiginfo_get(struct proc *p, int signo)
   {
   	ksiginfo_t *ksi;
   	int s;
 --- 132,138 ----
    * signal, or return NULL if one not found.
    */
   static ksiginfo_t *
 ! ksiginfo_dequeue(struct proc *p, int signo)
   {
   	ksiginfo_t *ksi;
   	int s;
 *************** out:
 *** 159,165 ****
    * or for non RT signals with non-existing entries.
    */
   static void
 ! ksiginfo_put(struct proc *p, const ksiginfo_t *ksi)
   {
   	ksiginfo_t *kp;
   	struct sigaction *sa = &SIGACTION_PS(p->p_sigacts, ksi->ksi_signo);
 --- 159,165 ----
    * or for non RT signals with non-existing entries.
    */
   static void
 ! ksiginfo_queue(struct proc *p, const ksiginfo_t *ksi, ksiginfo_t **newkp)
   {
   	ksiginfo_t *kp;
   	struct sigaction *sa = &SIGACTION_PS(p->p_sigacts, ksi->ksi_signo);
 *************** ksiginfo_put(struct proc *p, const ksigi
 *** 167,172 ****
 --- 167,173 ----
   
   	if ((sa->sa_flags & SA_SIGINFO) == 0)
   		return;
 + 
   	/*
   	 * If there's no info, don't save it.
   	 */
 *************** ksiginfo_put(struct proc *p, const ksigi
 *** 186,192 ****
   			}
   		}
   	}
 ! 	kp = pool_get(&ksiginfo_pool, PR_NOWAIT);
   	if (kp == NULL) {
   #ifdef DIAGNOSTIC
   		printf("Out of memory allocating siginfo for pid %d\n",
 --- 187,197 ----
   			}
   		}
   	}
 ! 	if (newkp && *newkp) {
 ! 		kp = *newkp;
 ! 		*newkp = NULL;
 ! 	} else
 ! 		kp = pool_get(&ksiginfo_pool, PR_NOWAIT);
   	if (kp == NULL) {
   #ifdef DIAGNOSTIC
   		printf("Out of memory allocating siginfo for pid %d\n",
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1024,1029 ****
 --- 1029,1035 ----
   {
   	struct lwp *l, *suspended = NULL;
   	struct sadata_vp *vp;
 + 	ksiginfo_t *newkp;
   	int	s = 0, prop, allsusp;
   	sig_t	action;
   	int	signum = ksi->ksi_signo;
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1058,1064 ****
   		 * caught, make sure to save any ksiginfo.
   		 */
   		if (sigismember(&p->p_sigctx.ps_sigcatch, signum))
 ! 			ksiginfo_put(p, ksi);
   	} else {
   		/*
   		 * If the signal was the result of a trap, reset it
 --- 1064,1070 ----
   		 * caught, make sure to save any ksiginfo.
   		 */
   		if (sigismember(&p->p_sigctx.ps_sigcatch, signum))
 ! 			ksiginfo_queue(p, ksi, NULL);
   	} else {
   		/*
   		 * If the signal was the result of a trap, reset it
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1141,1149 ****
   	 */
   	if (action == SIG_HOLD &&
   	    ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) {
 ! 		ksiginfo_put(p, ksi);
   		return;
   	}
   	/* XXXSMP: works, but icky */
   	if (dolock)
   		SCHED_LOCK(s);
 --- 1147,1168 ----
   	 */
   	if (action == SIG_HOLD &&
   	    ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) {
 ! 		ksiginfo_queue(p, ksi, NULL);
 ! 		return;
 ! 	}
 ! 
 ! 	/*
 ! 	 * Allocate a ksiginfo_t incase we need to insert it with
 ! 	 * the scheduler lock held.
 ! 	 */
 ! 	newkp = pool_get(&ksiginfo_pool, PR_NOWAIT);
 ! 	if (newkp == NULL) {
 ! #ifdef DIAGNOSTIC
 ! 		printf("kpsignal2: couldn't allocated from ksiginfo_pool\n");
 ! #endif
   		return;
   	}
 + 
   	/* XXXSMP: works, but icky */
   	if (dolock)
   		SCHED_LOCK(s);
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1161,1167 ****
   					/*NOTREACHED*/
   				} else if (l->l_flag & L_SA_YIELD) {
   					/* idle LWP is already waking up */
 ! 					goto out;
   					/*NOTREACHED*/
   				}
   			}
 --- 1180,1186 ----
   					/*NOTREACHED*/
   				} else if (l->l_flag & L_SA_YIELD) {
   					/* idle LWP is already waking up */
 ! 					goto done;
   					/*NOTREACHED*/
   				}
   			}
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1170,1176 ****
   				if (l->l_stat == LSRUN ||
   				    l->l_stat == LSONPROC) {
   					signotify(p);
 ! 					goto out;
   					/*NOTREACHED*/
   				}
   				if (l->l_stat == LSSLEEP &&
 --- 1189,1195 ----
   				if (l->l_stat == LSRUN ||
   				    l->l_stat == LSONPROC) {
   					signotify(p);
 ! 					goto done;
   					/*NOTREACHED*/
   				}
   				if (l->l_stat == LSSLEEP &&
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1198,1204 ****
   		/*
   		 * The signal will be noticed very soon.
   		 */
 ! 		goto out;
   		/*NOTREACHED*/
   	} else {
   		/*
 --- 1217,1223 ----
   		/*
   		 * The signal will be noticed very soon.
   		 */
 ! 		goto done;
   		/*NOTREACHED*/
   	} else {
   		/*
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1245,1251 ****
   			 * stopping could cause deadlock.
   			 */
   			if (p->p_flag & P_PPWAIT) {
 ! 				goto out;
   			}
   			sigdelset(&p->p_sigctx.ps_siglist, signum);
   			p->p_xstat = signum;
 --- 1264,1270 ----
   			 * stopping could cause deadlock.
   			 */
   			if (p->p_flag & P_PPWAIT) {
 ! 				goto done;
   			}
   			sigdelset(&p->p_sigctx.ps_siglist, signum);
   			p->p_xstat = signum;
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1320,1326 ****
   			l = proc_unstop(p);
   			if (l && (action == SIG_CATCH))
   				goto runfast;
 ! 			goto out;
   		}
   
   		if (prop & SA_STOP) {
 --- 1339,1345 ----
   			l = proc_unstop(p);
   			if (l && (action == SIG_CATCH))
   				goto runfast;
 ! 			goto done;
   		}
   
   		if (prop & SA_STOP) {
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1342,1348 ****
   		 */
   		if (l)
   			goto run;
 ! 		goto out;
   	case SIDL:
   		/* Process is being created by fork */
   		/* XXX: We are not ready to receive signals yet */
 --- 1361,1367 ----
   		 */
   		if (l)
   			goto run;
 ! 		goto done;
   	case SIDL:
   		/* Process is being created by fork */
   		/* XXX: We are not ready to receive signals yet */
 *************** kpsignal2(struct proc *p, const ksiginfo
 *** 1354,1382 ****
   	/*NOTREACHED*/
   
    runfast:
 - 	if (action == SIG_CATCH) {
 - 		ksiginfo_put(p, ksi);
 - 		action = SIG_HOLD;
 - 	}
   	/*
   	 * Raise priority to at least PUSER.
   	 */
   	if (l->l_priority > PUSER)
   		l->l_priority = PUSER;
    run:
 - 	if (action == SIG_CATCH) {
 - 		ksiginfo_put(p, ksi);
 - 		action = SIG_HOLD;
 - 	}
 - 
   	setrunnable(l);		/* XXXSMP: recurse? */
 -  out:
 - 	if (action == SIG_CATCH)
 - 		ksiginfo_put(p, ksi);
    done:
   	/* XXXSMP: works, but icky */
   	if (dolock)
   		SCHED_UNLOCK(s);
   }
   
   siginfo_t *
 --- 1373,1395 ----
   	/*NOTREACHED*/
   
    runfast:
   	/*
   	 * Raise priority to at least PUSER.
   	 */
   	if (l->l_priority > PUSER)
   		l->l_priority = PUSER;
    run:
   	setrunnable(l);		/* XXXSMP: recurse? */
    done:
 + 	if (action == SIG_CATCH)
 + 		ksiginfo_queue(p, ksi, &newkp);
 + 
   	/* XXXSMP: works, but icky */
   	if (dolock)
   		SCHED_UNLOCK(s);
 + 
 + 	if (newkp)
 + 		pool_put(&ksiginfo_pool, newkp);
   }
   
   siginfo_t *
 *************** postsig(int signum)
 *** 1902,1908 ****
   		} else
   			returnmask = &p->p_sigctx.ps_sigmask;
   		p->p_stats->p_ru.ru_nsignals++;
 ! 		ksi = ksiginfo_get(p, signum);
   #ifdef KTRACE
   		if (KTRPOINT(p, KTR_PSIG))
   			ktrpsig(l, signum, action,
 --- 1915,1921 ----
   		} else
   			returnmask = &p->p_sigctx.ps_sigmask;
   		p->p_stats->p_ru.ru_nsignals++;
 ! 		ksi = ksiginfo_dequeue(p, signum);
   #ifdef KTRACE
   		if (KTRPOINT(p, KTR_PSIG))
   			ktrpsig(l, signum, action,
 *************** __sigtimedwait1(struct lwp *l, void *v, 
 *** 2419,2425 ****
   	if ((signum = firstsig(&twaitset))) {
   		/* found pending signal */
   		sigdelset(&p->p_sigctx.ps_siglist, signum);
 ! 		ksi = ksiginfo_get(p, signum);
   		if (!ksi) {
   			/* No queued siginfo, manufacture one */
   			ksi = pool_get(&ksiginfo_pool, PR_WAITOK);
 --- 2432,2438 ----
   	if ((signum = firstsig(&twaitset))) {
   		/* found pending signal */
   		sigdelset(&p->p_sigctx.ps_siglist, signum);
 ! 		ksi = ksiginfo_dequeue(p, signum);
   		if (!ksi) {
   			/* No queued siginfo, manufacture one */
   			ksi = pool_get(&ksiginfo_pool, PR_WAITOK);