Subject: Re: Update: rough SA patch (works on SMP)
To: Frank van der Linden <fvdl@netbsd.org>
From: Stephan Uphoff <ups@stups.com>
List: tech-kern
Date: 06/29/2003 17:57:53
Frank van der Linden wrote:
> Yes, xmms now seems stable for me on SMP when signaled, and otherwise.
> phoenix seems okay for most of the time, though I got it to SIGBUS
> by clicking around on news.bbc.co.uk. This may just be a libpthread
> problem that was there before too, though.

I found a code path that might screw up an upcall and fixed it.
Looks like I can't blame libpthread anymore ;-)

Can you try the following incremental patch ?
My SMP machine is headless and phoenix runs using X over ssh tunnel.
Probably because of this I had a tough time reproducing a single 
SIGBUS before the newest patch. (>30min intensive browsing)
Since the changes I have been browsing for hours without a SIGBUS.

	Stephan

--- kern_sa.c.Jun28	2003-06-29 13:54:43.000000000 -0400
+++ kern_sa.c	2003-06-29 15:10:01.000000000 -0400
@@ -53,6 +53,8 @@
 
 #include <uvm/uvm_extern.h>
 
+#include <sys/kernel.h>  /* For lbolt hack */
+
 static void sa_vp_donate(struct lwp *);
 static int sa_newcachelwp(struct lwp *);
 static struct lwp *sa_vp_repossess(struct lwp *l);
@@ -884,6 +886,8 @@
 		sau = sadata_upcall_alloc(1);
 
 		while (sa->sa_nstacks == 0) {
+			int status;
+
 			/*
 			 * This should be a transient condition, so we'll just
 			 * sleep until some stacks come in; presumably, some
@@ -911,8 +915,19 @@
 
 			DPRINTFN(7, ("sa_upcall_userret(%d.%d) sleeping"
 			    " for stacks\n", l->l_proc->p_pid, l->l_lid));
-			tsleep((caddr_t) &sa->sa_nstacks, PWAIT|PCATCH, 
+			status = tsleep((caddr_t) &sa->sa_nstacks, PWAIT|PCATCH, 
 			    "sastacks", 0);
+			if(status)
+			{
+				if (p->p_flag & P_WEXIT)
+				{
+					sadata_upcall_free(sau);
+					lwp_exit(l);
+				}
+				/* Signal pending - can't sleep */
+				/* Wait a while .. things might get better */  
+				 tsleep((caddr_t) &lbolt, PWAIT, "lbolt: sastacks", 0);
+			}	
 
 			/* XXXUPSXXX NEED TO STOP THE LWP HERE ON REQUEST */
 
@@ -970,9 +985,7 @@
 
 	sau = SIMPLEQ_FIRST(&sa->sa_upcalls);
 	SIMPLEQ_REMOVE_HEAD(&sa->sa_upcalls, sau_next);
-	if (SIMPLEQ_EMPTY(&sa->sa_upcalls))
-		l->l_flag &= ~L_SA_UPCALL;
-
+	
 	if (sau->sau_flags & SAU_FLAG_DEFERRED) {
 		sa_upcall_getstate(sau,
 		    sau->sau_state.deferred.e_lwp,
@@ -1092,14 +1105,18 @@
 	cpu_upcall(l, type, nevents, nint, sapp, ap, stack, sa->sa_upcall);
 
 	SCHED_ASSERT_UNLOCKED();
+	if (SIMPLEQ_EMPTY(&sa->sa_upcalls)) 
+	{
+		l->l_flag &= ~L_SA_UPCALL;
+		sa_vp_donate(l);
+		/* May not be reached  */
+	}
 
-	sa_vp_donate(l);
 	SCHED_ASSERT_UNLOCKED();
 
 	/* May not be reached  */
 	
 	SA_LWP_STATE_UNLOCK(l,s);
-       
 	KERNEL_PROC_UNLOCK(l);
 }