Subject: sig*wait() implementation, RFC
To: None <tech-kern@netbsd.org>
From: Jaromir Dolecek <jdolecek@netbsd.org>
List: tech-kern
Date: 02/09/2003 12:26:23
--ELM740951443-8389-0_
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset=US-ASCII

Here is latest patch. It includes only changes for kern/kern_sig.c
and lib/libpthread/pthread_sig.c, changes to includes and syscall tables
are not included. Only sigtimedwait() is coded; sigwait() and
sigwaitinfo() will be wrappers around sigtimedwait().

The syscall has been simplified further and made slightly more efficient
too. It now also returns ECANCELED when it's woken explicitly up
via _lwp_wakeup(). This feature is used by libpthread wrapper. The syscall
is complete as far as I'm concerned (including support for timeout).

I implemented the libpthread version using the syscall, rather than
completely userland item. IMHO this is better alternative than
informing SA app about blocked signals via upcall, and I believe
it's safer to do this way generally.  Since sig*wait() is not
performance sensitive, I think using syscall is fine in this case.

The libpthread wrapper makes sure that only one thread ('master')
is actually calling the syscall. When there are multiple threads
calling sig*wait() at the same time, the first one acts as proxy
doing the syscall, and others just wait for the 'master' to hand
them results. When a thread comes to sig*wait() while different
thread is already in kernel waiting for the signal, it adjusts the
signal wait set, signals 'master' thread via _lwp_wakeup() if any
new signal was added, and blocks in userland waiting for results.
The master thread notices the wakup condition, calls the syscall
again with new arguments, and makes sure to hand results to right
thread once it returns.

When sigtimedwait() is called with zero timeout (tv_sec == tv_usec == 0),
it would directly call the syscall rather than going through above.
This is safe and DTRT - the syscall code doesn't change any
sigwait state for process when called with zero timeout, it just
checks set of pending signals.

You might note support for timeout parameter for sigtimedwait() is
not implemented at this moment for the libpthread wrapper yet.
It's quite simple to do - the nonmaster threads would use pthread
alarms to handle the timeout, and the master thread's timeout would
be handled by the syscall. Of course, master thread must adjust
it's timeout any time it's _lwp_wakeup()ped, and nonmaster thread
has to adjust the timeout when it would become a master. This is
SMOP, but I thought it would be useful to get feedback for the
general handling while finishing this.

Opinions?

Jaromir
-- 
Jaromir Dolecek <jdolecek@NetBSD.org>            http://www.NetBSD.org/
-=- We should be mindful of the potential goal, but as the tantric    -=-
-=- Buddhist masters say, ``You may notice during meditation that you -=-
-=- sometimes levitate or glow.   Do not let this distract you.''     -=-

--ELM740951443-8389-0_
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset=ISO-8859-2
Content-Disposition: attachment; filename=sigw.diff

Index: lib/libpthread/pthread_sig.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/pthread_sig.c,v
retrieving revision 1.7
diff -u -p -r1.7 pthread_sig.c
--- lib/libpthread/pthread_sig.c	2003/01/30 01:12:42	1.7
+++ lib/libpthread/pthread_sig.c	2003/02/09 10:43:08
@@ -88,6 +88,11 @@ static pthread_spin_t pt_sigsuspended_lo
  */
 static pthread_cond_t pt_sigsuspended_cond = PTHREAD_COND_INITIALIZER;
 
+/* Queue of threads that are waiting in sigtimedwait(). */
+static struct pthread_queue_t pt_sigwaiting;
+static pthread_spin_t pt_sigwaiting_lock;
+static pthread_t pt_sigwmaster;
+static pthread_cond_t pt_sigwaiting_cond = PTHREAD_COND_INITIALIZER;
 
 static void pthread__kill(pthread_t, pthread_t, int, int);
 static void pthread__kill_self(pthread_t, int, int);
@@ -96,6 +101,8 @@ static void 
 pthread__signal_tramp(int, int, void (*)(int, int, struct sigcontext *),
     ucontext_t *, sigset_t *);
 
+static int firstsig(const sigset_t *);
+
 __strong_alias(__libc_thr_sigsetmask,pthread_sigmask)
 
 void
@@ -103,6 +110,9 @@ pthread__signal_init(void)
 {
 	SDPRINTF(("(signal_init) setting process sigmask\n"));
 	__sigprocmask14(0, NULL, &pt_process_sigmask);
+
+	PTQ_INIT(&pt_sigsuspended);
+	PTQ_INIT(&pt_sigwaiting);
 }
 
 int
@@ -148,8 +158,8 @@ pthread_kill(pthread_t thread, int sig)
 
 
 /*
- * Interpositioning is our friend. We need to intercept sigaction() and
- * sigsuspend().
+ * Interpositioning is our friend. We need to intercept sigaction(),
+ * sigsuspend() and sigtimedwait().
  */
 
 int
@@ -222,6 +232,188 @@ __sigsuspend14(const sigset_t *sigmask)
 
 	errno = EINTR;
 	return -1;
+}
+
+/*
+ * Interpositioned sigtimedwait(2), need to multiplex all
+ * eventual callers to a single kernel lwp.
+ */
+int _sys_sigtimedwait(const sigset_t * __restrict, siginfo_t * __restrict,
+	const struct timespec * __restrict);
+
+int
+sigtimedwait(const sigset_t * __restrict set, siginfo_t * __restrict info, const struct timespec * __restrict timeout)
+{
+	pthread_t self;
+	int error = 0;
+	pthread_t target;
+	sigset_t wset;
+	extern int pthread__started;
+
+	/* if threading not started yet, just do the syscall */
+	if (__predict_false(pthread__started == 0))
+		return (_sys_sigtimedwait(set, info, timeout));
+
+	self = pthread__self();
+	pthread__testcancel(self);
+
+	/* also call syscall if timeout is zero (i.e. polling) */
+	if (timeout && timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
+		error = _sys_sigtimedwait(set, info, timeout);
+		pthread__testcancel(self);
+		return (error);
+	}
+
+	pthread_spinlock(self, &pt_sigwaiting_lock);
+
+	/*
+	 * If there is already master thread running, arrange things
+	 * to accomodate for eventual extra signals to wait for,
+	 * and join the sigwaiting list.
+	 */
+	if (pt_sigwmaster) {
+		/* Check if this thread's wait set is different to master set. */
+		wset = *set;
+		__sigminusset(pt_sigwmaster->pt_sigwait, &wset);
+		if (firstsig(&wset)) {
+			/* some new signal in set, wakeup master */
+			__sigplusset(&wset, pt_sigwmaster->pt_sigwait);
+			_lwp_wakeup(pt_sigwmaster->pt_blockedlwp);
+		}
+
+		/* Save our wait set and info pointer */
+		wset = *set;
+		self->pt_sigwait = &wset;
+		self->pt_wsig = info;
+
+		/* zero to recognize when we get passed the signal from master */
+		info->si_signo = 0;
+
+		if (timeout) {
+			/* XXX Setup alarm */
+		}
+
+		pthread_spinlock(self, &self->pt_statelock);
+		self->pt_state = PT_STATE_BLOCKED_QUEUE;
+		self->pt_sleepobj = &pt_sigwaiting_cond;
+		self->pt_sleepq = &pt_sigwaiting;
+		self->pt_sleeplock = &pt_sigwaiting_lock;
+		pthread_spinunlock(self, &self->pt_statelock);
+
+		PTQ_INSERT_TAIL(&pt_sigwaiting, self, pt_sleep);
+
+		pthread__block(self, &pt_sigwaiting_lock);
+
+		/* check if we got a signal we waited for */
+		if (info->si_signo) {
+			/* got the signal from master */
+			pthread__testcancel(self);
+			return (0);
+		}
+
+		/* not signal, must have been upgraded to master */
+		pthread_spinlock(self, &pt_sigwaiting_lock);
+		assert(pt_sigwmaster == self);
+
+		/* become master */
+	}
+
+	/* Save wait set */
+	wset = *set;
+	if (!PTQ_EMPTY(&pt_sigwaiting)) {
+		PTQ_FOREACH(target, &pt_sigwaiting, pt_sleep)
+			__sigplusset(target->pt_sigwait, &wset);
+	}
+
+	self->pt_sigwait = &wset;
+	self->pt_wsig = NULL;
+
+	/* Master thread loop */
+	pt_sigwmaster = self;
+	for(;;) {
+		pthread_spinunlock(self, &pt_sigwaiting_lock);
+
+		/*
+		 * We are either the only one, or wait set was setup already.
+		 * Just do the syscall now.
+		 */
+		error = _sys_sigtimedwait(&wset, info, timeout);
+
+		pthread_spinlock(self, &pt_sigwaiting_lock);
+		if ((error && errno != ECANCELED)
+		    || (!error && __sigismember14(set, info->si_signo)) ) {
+			/*
+			 * Normal function return. Clear pt_sigwmaster,
+			 * and if wait queue is nonempty, make first waiter
+			 * new master.
+			 */
+			pt_sigwmaster = NULL;
+			if (!PTQ_EMPTY(&pt_sigwaiting)) {
+				pt_sigwmaster = PTQ_FIRST(&pt_sigwaiting);
+				PTQ_REMOVE(&pt_sigwaiting, pt_sigwmaster,
+					pt_sleep);
+				pthread__sched(self, pt_sigwmaster);
+			}
+
+			pthread_spinunlock(self, &pt_sigwaiting_lock);
+
+			pthread__testcancel(self);
+			return (error);
+		}
+
+		if (!error) {
+			/*
+			 * Got a signal, but not from _our_ wait set.
+			 * Scan the queue of sigwaiters and wakeup
+			 * the first thread waiting for this signal.
+			 */
+			PTQ_FOREACH(target, &pt_sigwaiting, pt_sleep) {
+			    if (__sigismember14(target->pt_sigwait, info->si_signo)) {
+				assert(target->pt_state==PT_STATE_BLOCKED_QUEUE);
+
+				/* copy to waiter siginfo */
+				memcpy(target->pt_wsig, info, sizeof(*info));
+				PTQ_REMOVE(&pt_sigwaiting, target, pt_sleep);
+				pthread__sched(self, target);
+				break;
+			    }
+			}
+
+			if (!target) {
+				/*
+				 * Didn't find anyone waiting on this signal.
+				 * Deliver signal normally. This might
+				 * happen if a thread times out, but
+				 * 'their' signal arrives before the master
+				 * thread would be scheduled after _lwp_wakeup().
+				 */
+				pthread__signal(self, NULL, info->si_signo,
+					info->si_code);
+			} else {
+				/*
+				 * Signal waiter removed, adjust our wait set.
+				 */
+				wset = *set;
+				PTQ_FOREACH(target, &pt_sigwaiting, pt_sleep)
+					__sigplusset(target->pt_sigwait, &wset);
+			}
+		} else {
+			/*
+			 * ECANCELED - new sigwaiter arrived and added to master
+			 * wait set, or some sigwaiter exited due to timeout
+			 * and removed from master wait set. All the work
+			 * was done already, so just update our timeout
+			 * and go back to syscall.
+			 */
+		}
+
+		if (timeout) {
+			/* XXX adjust timeout */
+		}
+	}
+
+	/* NOTREACHED */
+	return (0);
 }
 
 /*
Index: sys/kern/kern_sig.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_sig.c,v
retrieving revision 1.132
diff -u -p -r1.132 kern_sig.c
--- sys/kern/kern_sig.c	2003/02/07 09:02:14	1.132
+++ sys/kern/kern_sig.c	2003/02/09 10:43:08
@@ -356,6 +356,7 @@ siginit(struct proc *p)
 		SIGACTION_PS(ps, signum).sa_flags = SA_RESTART;
 	}
 	sigemptyset(&p->p_sigctx.ps_sigcatch);
+	p->p_sigctx.ps_sigwaited = 0;
 	p->p_flag &= ~P_NOCLDSTOP;
 
 	/*
@@ -402,6 +403,7 @@ execsigs(struct proc *p)
 		SIGACTION_PS(ps, signum).sa_flags = SA_RESTART;
 	}
 	sigemptyset(&p->p_sigctx.ps_sigcatch);
+	p->p_sigctx.ps_sigwaited = 0;
 	p->p_flag &= ~P_NOCLDSTOP;
 
 	/*
@@ -856,6 +858,27 @@ psignal1(struct proc *p, int signum,
 	p->p_sigctx.ps_sigcheck = 1;
 
 	/*
+	 * If the signal doesn't have SA_CANTMASK (no override for SIGKILL,
+	 * please!), check if anything waits on it. If yes, clear the
+	 * pending signal from siglist set, save it to ps_sigwaited,
+	 * clear sigwait list, and wakeup any sigwaiters.
+	 * The signal won't be processed further here.
+	 */
+	if ((prop & SA_CANTMASK) == 0
+	    && p->p_sigctx.ps_sigwaited < 0
+	    && sigismember(&p->p_sigctx.ps_sigwait, signum)) {
+		sigdelset(&p->p_sigctx.ps_siglist, signum);
+		p->p_sigctx.ps_sigwaited = signum;
+		sigemptyset(&p->p_sigctx.ps_sigwait);
+
+		if (dolock)
+			wakeup_one(&p->p_sigctx.ps_sigwait);
+		else
+			sched_wakeup(&p->p_sigctx.ps_sigwait);
+		return;
+	}
+
+	/*
 	 * Defer further processing for signals which are held,
 	 * except that stopped processes must be continued by SIGCONT.
 	 */
@@ -1817,6 +1840,118 @@ sys_setcontext(struct lwp *l, void *v, r
 	return (EJUSTRETURN);
 }
 
+/*
+ * sigtimedwait(2) system call, used also for implementation
+ * of sigwaitinfo() and sigwait().
+ *
+ * Note this system call is only used by single-threaded programs.
+ * libpthread handles signal waiting for threaded ones. Due to
+ * this, we don't need to care about concurrent waits or access. 
+ *
+ * XXX We currently don't actually support queued signals, so we fill
+ * only si_signo of siginfo_t, and always set si_code to SI_USER.
+ */
+int
+sys_sigtimedwait(struct lwp *l, void *v, register_t *retval)
+{
+	struct sys_sigtimedwait_args /* {
+		syscallarg(const sigset_t *) set;
+		syscallarg(siginfo_t *) info;
+		syscallarg(const struct timespec *) timeout;
+	} */ *uap = v;
+	sigset_t waitset, twaitset;
+	struct proc *p = l->l_proc;
+	int error;
+	int signum;
+	int timo = 0;
+
+	if ((error = copyin(SCARG(uap, set), &waitset, sizeof(waitset))))
+		return (error);
+
+	/*
+	 * Silently ignore SA_CANTMASK signals. psignal1() would
+	 * ignore SA_CANTMASK signals in waitset, we do this
+	 * only for the below siglist check.
+	 */
+	sigminusset(&sigcantmask, &waitset);
+
+	/*
+	 * First scan siglist and check if there is signal from
+	 * our waitset already pending.
+	 */
+	twaitset = waitset;
+	__sigandset(&p->p_sigctx.ps_siglist, &twaitset);
+	if ((signum = firstsig(&twaitset))) {
+		/* found pending signal */
+		sigdelset(&p->p_sigctx.ps_siglist, signum);
+		goto sig;
+	}
+
+	/*
+	 * Calculate timeout, if it was specified.
+	 */
+	if (SCARG(uap, timeout)) {
+		struct timespec ts;
+		uint64_t ms;
+
+		if ((error = copyin(SCARG(uap, timeout), &ts, sizeof(ts))))
+			return (error);
+
+		ms = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000);
+		timo = mstohz(ms);
+		if (timo == 0 && ts.tv_sec == 0 && ts.tv_nsec > 0)
+			timo = 1;
+		if (timo <= 0)
+			return (EAGAIN);
+	}
+
+	/*
+	 * Setup ps_sigwait list.
+	 */
+	p->p_sigctx.ps_sigwaited = -1;
+	p->p_sigctx.ps_sigwait = waitset;
+
+	/*
+	 * Wait for signal to arrive. We can either be woken up or
+	 * time out.
+	 */
+	error = tsleep(&p->p_sigctx.ps_sigwait, PPAUSE|PCATCH, "sigwait", timo);
+
+	/*
+	 * Check if a signal from our wait set has arrived, or if it
+	 * was just wakeup.
+	 */
+	if (!error) {
+		if ((signum = p->p_sigctx.ps_sigwaited) <= 0) {
+			/* wakeup via _lwp_wakeup() */
+			error = ECANCELED;
+		}
+	}
+
+	/*
+	 * On error, clear sigwait indication. psignal1() sets it
+	 * in !error case.
+	 */
+	if (error)
+		p->p_sigctx.ps_sigwaited = 0;
+
+	/*
+	 * If a signal from the wait set arrived, copy it to userland.
+	 * XXX no queued signals for now
+	 */
+	if (signum > 0) {
+		siginfo_t si;
+
+ sig:
+		memset(&si, 0, sizeof(si));
+		si.si_signo = signum;
+		si.si_code = SI_USER;
+
+		error = copyout(&si, SCARG(uap, info), sizeof(si));
+	}
+
+	return (error);
+}
 
 /*
  * Returns true if signal is ignored or masked for passed process.

--ELM740951443-8389-0_--