Subject: Re: proposed fix for PR 30348 -- sigwait doesn't get signals from pthread_kill
To: Chuck Silvers <chuq@chuq.com>
From: Jaromir Dolecek <jdolecek@NetBSD.org>
List: tech-userlevel
Date: 10/09/2005 22:22:39
On Sun, Oct 09, 2005 at 09:09:12AM -0700, Chuck Silvers wrote:
> hi,
> 
> the attached patch fixes the interactions between sigtimedwait() and
> pthread_kill().  it allows sigtimedwait() (and its wrappers sigwait()
> and sigwaitinfo()) to return properly if a signal is received while
> the thread is sleeping or if a signal is pending already when the
> function is called.  while I was looking, I noticed another place where
> we return an error code instead of putting the error code in errno and
> returning -1, so I fixed that too.

I believe the check for already pending signals should be done before
handling the polling (timeout->tv_sec == tv_usec == 0) case. Otherwise
it might return failure, since pthread_kill() signals are not
propagated to kernel process state IIRC and thus sigtimedwait() system
call doesn't know about them.

Thanks for fixing this.

Jaromir
 
> note that this is not safe for PTHREAD_CONCURRENCY > 1, but there are
> various other problems with that already so I didn't want to make this
> more complicated than it needed to be.
> 
> anyone see any problem with this?
> 
> -Chuck

> Index: pthread_sig.c
> ===================================================================
> RCS file: /cvsroot/src/lib/libpthread/pthread_sig.c,v
> retrieving revision 1.42
> diff -u -p -r1.42 pthread_sig.c
> --- pthread_sig.c	13 Sep 2005 02:45:38 -0000	1.42
> +++ pthread_sig.c	9 Oct 2005 15:59:22 -0000
> @@ -46,6 +46,7 @@ __RCSID("$NetBSD: pthread_sig.c,v 1.42 2
>  
>  #define __EXPOSE_STACK 1
>  #include <sys/param.h>
> +#include <sys/types.h>
>  #include <errno.h>
>  #include <lwp.h>
>  #include <stdint.h>
> @@ -299,13 +300,13 @@ pthread_sigtimedwait__callback(void *arg
>  }
>  
>  int
> -sigtimedwait(const sigset_t * __restrict set, siginfo_t * __restrict info, const struct timespec * __restrict timeout)
> +sigtimedwait(const sigset_t * __restrict set, siginfo_t * __restrict info,
> +    const struct timespec * __restrict timeout)
>  {
> -	pthread_t self;
> -	int error = 0;
> -	pthread_t target;
> +	pthread_t self, target;
>  	sigset_t wset;
>  	struct timespec timo;
> +	int sig, error = 0;
>  
>  	/* if threading not started yet, just do the syscall */
>  	if (__predict_false(pthread__started == 0))
> @@ -322,8 +323,10 @@ sigtimedwait(const sigset_t * __restrict
>  	}
>  
>  	if (timeout) {
> -		if ((u_int) timeout->tv_nsec >= 1000000000)
> -			return (EINVAL);
> +		if ((u_int) timeout->tv_nsec >= 1000000000) {
> +			errno = EINVAL;
> +			return (-1);
> +		}
>  
>  		timo = *timeout;
>  	}
> @@ -331,6 +334,20 @@ sigtimedwait(const sigset_t * __restrict
>  	pthread_spinlock(self, &pt_sigwaiting_lock);
>  
>  	/*
> +	 * Check if one of the signals that we will be waiting for is
> +	 * already pending.  If so, return it immediately.
> +	 */
> +	wset = *set;
> +	__sigandset(&self->pt_siglist, &wset);
> +	sig = firstsig(&wset);
> +	if (sig) {
> +		info->si_signo = sig;
> +		pthread_spinunlock(self, &pt_sigwaiting_lock);
> +		pthread__testcancel(self);
> +		return 0;
> +	}
> +
> +	/*
>  	 * If there is already master thread running, arrange things
>  	 * to accomodate for eventual extra signals to wait for,
>  	 * and join the sigwaiting list.
> @@ -339,6 +356,8 @@ sigtimedwait(const sigset_t * __restrict
>  		struct pt_alarm_t timoalarm;
>  		struct timespec etimo;
>  
> +		SDPRINTF(("(stw %p) not master\n", self));
> +
>  		/*
>  		 * Get current time. We need it if we would become master.
>  		 */
> @@ -383,7 +402,9 @@ sigtimedwait(const sigset_t * __restrict
>  
>  		PTQ_INSERT_TAIL(&pt_sigwaiting, self, pt_sleep);
>  
> +		SDPRINTF(("(stw %p) not master blocking\n", self));
>  		pthread__block(self, &pt_sigwaiting_lock);
> +		SDPRINTF(("(stw %p) not master unblocked\n", self));
>  
>  		/* check if we got a signal we waited for */
>  		if (info->si_signo) {
> @@ -443,6 +464,7 @@ sigtimedwait(const sigset_t * __restrict
>  
>  	/* Master thread loop */
>  	pt_sigwmaster = self;
> +	SDPRINTF(("(stw %p) i am the master\n", self));
>  	for(;;) {
>  		/* Build our wait set */
>  		wset = *set;
> @@ -457,39 +479,50 @@ sigtimedwait(const sigset_t * __restrict
>  		 * We are either the only one, or wait set was setup already.
>  		 * Just do the syscall now.
>  		 */
> +		SDPRINTF(("(stw %p) master blocking\n", self));
>  		error = __sigtimedwait(&wset, info, (timeout) ? &timo : NULL);
> +		SDPRINTF(("(stw %p) master unblocking\n", self));
>  
>  		pthread_spinlock(self, &pt_sigwaiting_lock);
>  		if ((error && (errno != ECANCELED || self->pt_cancel))
>  		    || (!error && __sigismember14(set, info->si_signo)) ) {
> +
>  			/*
>  			 * Normal function return. Clear pt_sigwmaster,
>  			 * and if wait queue is nonempty, make first waiter
>  			 * new master.
>  			 */
> +			SDPRINTF(("(stw %p) master normal self %d\n",
> +				  self, info->si_signo));
>  			pt_sigwmaster = NULL;
>  			if (!PTQ_EMPTY(&pt_sigwaiting)) {
>  				pt_sigwmaster = PTQ_FIRST(&pt_sigwaiting);
> +				SDPRINTF(("(stw %p) new master %p\n", self,
> +					  pt_sigwmaster));
>  				PTQ_REMOVE(&pt_sigwaiting, pt_sigwmaster,
>  					pt_sleep);
>  				pthread__sched(self, pt_sigwmaster);
>  			}
>  
>  			pthread_spinunlock(self, &pt_sigwaiting_lock);
> -
>  			pthread__testcancel(self);
>  			return (error);
>  		}
>  
>  		if (!error) {
> +
>  			/*
>  			 * Got a signal, but not from _our_ wait set.
>  			 * Scan the queue of sigwaiters and wakeup
>  			 * the first thread waiting for this signal.
>  			 */
>  			PTQ_FOREACH(target, &pt_sigwaiting, pt_sleep) {
> -			    if (__sigismember14(target->pt_sigwait, info->si_signo)) {
> -				pthread__assert(target->pt_state == PT_STATE_BLOCKED_QUEUE);
> +			    if (__sigismember14(target->pt_sigwait,
> +						info->si_signo)) {
> +				pthread__assert(target->pt_state ==
> +						PT_STATE_BLOCKED_QUEUE);
> +				SDPRINTF(("(stw %p) master target %p\n",
> +					  self, target));
>  
>  				/* copy to waiter siginfo */
>  				memcpy(target->pt_wsig, info, sizeof(*info));
> @@ -500,6 +533,7 @@ sigtimedwait(const sigset_t * __restrict
>  			}
>  
>  			if (!target) {
> +
>  				/*
>  				 * Didn't find anyone waiting on this signal.
>  				 * Deliver signal normally. This might
> @@ -507,11 +541,14 @@ sigtimedwait(const sigset_t * __restrict
>  				 * 'their' signal arrives before the master
>  				 * thread would be scheduled after _lwp_wakeup().
>  				 */
> +				SDPRINTF(("(stw %p) master orphaned\n", self));
>  				pthread__signal(self, NULL, info);
>  			} else {
> +
>  				/*
>  				 * Signal waiter removed, adjust our wait set.
>  				 */
> +				SDPRINTF(("(stw %p) master raced\n", self));
>  				wset = *set;
>  				PTQ_FOREACH(target, &pt_sigwaiting, pt_sleep)
>  					__sigplusset(target->pt_sigwait, &wset);
> @@ -787,14 +824,38 @@ pthread__kill_self(pthread_t self, sigin
>  static void
>  pthread__kill(pthread_t self, pthread_t target, siginfo_t *si)
>  {
> +	int deliver;
>  
>  	SDPRINTF(("(pthread__kill %p) target %p sig %d code %d\n", self, target,
>  	    si->si_signo, si->si_code));
>  
>  	if (__sigismember14(&target->pt_sigmask, si->si_signo)) {
> -		/* Record the signal for later delivery. */
> -		__sigaddset14(&target->pt_siglist, si->si_signo);
> -		return;
> +		SDPRINTF(("(pthread__kill %p) target masked\n", target));
> +
> +		/*
> +		 * If the target is waiting for this signal in sigtimedwait(),
> +		 * make the target runnable but do not deliver the signal.
> +		 * Otherwise record the signal for later delivery.
> +		 * XXX not MPsafe.
> +		 */
> +		pthread_spinlock(self, &self->pt_statelock);
> +		if (target->pt_state == PT_STATE_BLOCKED_QUEUE &&
> +		    target->pt_sleepq == &pt_sigwaiting &&
> +		    __sigismember14(target->pt_sigwait, si->si_signo)) {
> +			SDPRINTF(("(pthread__kill %p) stw\n", target));
> +			target->pt_wsig->si_signo = si->si_signo;
> +			pthread_spinunlock(self, &self->pt_statelock);
> +			deliver = 0;
> +		} else {
> +			SDPRINTF(("(pthread__kill %p) deferring\n", target));
> +			pthread_spinunlock(self, &self->pt_statelock);
> +			__sigaddset14(&target->pt_siglist, si->si_signo);
> +			return;
> +		}
> +	} else {
> +		SDPRINTF(("(pthread__kill %p) target %p delivering\n",
> +			  self, target));
> +		deliver = 1;
>  	}
>  
>  	if (self == target) {
> @@ -818,6 +879,8 @@ pthread__kill(pthread_t self, pthread_t 
>  	 */
>  	pthread_spinlock(self, &target->pt_statelock);
>  	if (target->pt_blockgen != target->pt_unblockgen) {
> +		SDPRINTF(("(pthread__kill %p) target running\n", target));
> +
>  		/*
>  		 * The target is not on a queue at all, and
>  		 * won't run again for a while. Try to wake it
> @@ -833,6 +896,8 @@ pthread__kill(pthread_t self, pthread_t 
>  		_lwp_wakeup(target->pt_blockedlwp);
>  		return;
>  	}
> +	SDPRINTF(("(pthread__kill %p) target state %d\n", target,
> +		  target->pt_state));
>  	switch (target->pt_state) {
>  	case PT_STATE_SUSPENDED:
>  		pthread_spinlock(self, &pthread__runqueue_lock);
> @@ -853,7 +918,8 @@ pthread__kill(pthread_t self, pthread_t 
>  		;
>  	}
>  
> -	pthread__deliver_signal(self, target, si);
> +	if (deliver)
> +		pthread__deliver_signal(self, target, si);
>  	pthread__sched(self, target);
>  	pthread_spinunlock(self, &target->pt_statelock);
>  }


-- 
Jaromir Dolecek <jdolecek@NetBSD.org>            http://www.NetBSD.cz/
-=- We can walk our road together if our goals are all the same;     -=-
-=- We can run alone and free if we pursue a different aim.          -=-