Subject: rough SA patch (works on SMP)
To: None <tech-kern@netbsd.org>
From: Stephan Uphoff <ups@stups.com>
List: tech-kern
Date: 06/23/2003 04:50:32
Hi,

I mangled Nathan's SA kernel code to hopefully squash a few bugs 
without introducing new ones and added a modified patch from 
YAMAMOTO Takashi.

The resulting patch is extremely ugly, is only meant as a
temporary band-aid and will be trashed as soon as possible.

The signaling code still contains several race conditions.
Stopping or tracing a process might cause a kernel panic.
( this is true for regular and SMP kernels)

However running phoenix on my SMP machine did not cause 
any panics.

Unfortunately phoenix eventually will core dump with a bus error
in _setcontext_u_xmm when it tries to put the IP on the stack.
I have not yet investigated if the dump is caused by incorrect
upcall arguments or by libpthread. (Timing of upcalls changed) 

FYI: The patch only allows a single SA LWP to be active in 
     userspace.
     I have not checked if the patches make sense for non i386 
     architectures.

Any feedback is appreciated

	Stephan


Index: kern/kern_sa.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_sa.c,v
retrieving revision 1.16
diff -c -r1.16 kern_sa.c
*** kern/kern_sa.c	2003/05/28 22:17:20	1.16
--- kern/kern_sa.c	2003/06/23 07:33:37
***************
*** 53,58 ****
--- 53,59 ----
  
  #include <uvm/uvm_extern.h>
  
+ static void sa_vp_donate(struct lwp *);
  static int sa_newcachelwp(struct lwp *);
  static struct lwp *sa_vp_repossess(struct lwp *l);
  
***************
*** 72,77 ****
--- 73,95 ----
  #endif
  
  
+ 
+ #define SA_LWP_STATE_LOCK(l,s)      \
+     do {                            \
+            s =  (l)->l_flag ;       \
+            (l)->l_flag &= ~L_SA;    \
+     } while (0)                     
+            
+ 
+ 
+ #define SA_LWP_STATE_UNLOCK(l,s)      \
+     do {                               \
+ 	(l)->l_flag |=  ( s & L_SA);   \
+     } while (0)                     
+    
+ 
+ 
+ 
  /*
   * sadata_upcall_alloc:
   *
***************
*** 139,144 ****
--- 157,166 ----
  		simple_lock_init(&sa->sa_lock);
  		sa->sa_flag = SCARG(uap, flags) & SA_FLAG_ALL;
  		sa->sa_vp = NULL;
+ 		
+ 		sa->sa_old_lwp = NULL;
+ 		sa->sa_vp_wait_count = 0;
+ 
  		sa->sa_idle = NULL;
  		sa->sa_woken = NULL;
  		sa->sa_concurrency = 1;
***************
*** 275,281 ****
--- 297,305 ----
  void
  sa_yield(struct lwp *l)
  {
+ #if 0
  	struct lwp *l2;
+ #endif
  	struct proc *p = l->l_proc;
  	struct sadata *sa = p->p_sa;
  	int s, ret;
***************
*** 284,296 ****
--- 308,325 ----
  	 * If we're the last running LWP, stick around to recieve
  	 * signals.
  	 */
+ #if 0
  	if (p->p_nrlwps == 1) {
+ #endif
  		DPRINTFN(1,("sa_yield(%d.%d) going dormant\n",
  		    p->p_pid, l->l_lid));
  		/*
  		 * A signal will probably wake us up. Worst case, the upcall
  		 * happens and just causes the process to yield again.
  		 */
+ 
+ 		sa_vp_donate(l);
+ 
  		s = splsched();	/* Protect from timer expirations */
  		KDASSERT(sa->sa_vp == l);
  		/*
***************
*** 299,316 ****
  		 * going to sleep. It might make more sense for this to
  		 * be handled inside of tsleep....
  		 */
! 		ret = 0;
  		while  ((ret == 0) && (p->p_userret == NULL)) {
  			sa->sa_idle = l;
  			l->l_flag &= ~L_SA;
  			ret = tsleep((caddr_t) l, PUSER | PCATCH, "sawait", 0);
  			l->l_flag |= L_SA;
  			sa->sa_idle = NULL;
! 			sa->sa_vp = l;
  		}
  		splx(s);
  		DPRINTFN(1,("sa_yield(%d.%d) returned\n",
  		    p->p_pid, l->l_lid));
  	} else {
  		DPRINTFN(1,("sa_yield(%d.%d) stepping aside\n", p->p_pid, l->l_lid));
  	
--- 328,347 ----
  		 * going to sleep. It might make more sense for this to
  		 * be handled inside of tsleep....
  		 */
! 		ret = 0;	
! 
  		while  ((ret == 0) && (p->p_userret == NULL)) {
  			sa->sa_idle = l;
  			l->l_flag &= ~L_SA;
  			ret = tsleep((caddr_t) l, PUSER | PCATCH, "sawait", 0);
  			l->l_flag |= L_SA;
  			sa->sa_idle = NULL;
! 			sa_vp_donate(l);
  		}
  		splx(s);
  		DPRINTFN(1,("sa_yield(%d.%d) returned\n",
  		    p->p_pid, l->l_lid));
+ #if 0
  	} else {
  		DPRINTFN(1,("sa_yield(%d.%d) stepping aside\n", p->p_pid, l->l_lid));
  	
***************
*** 334,339 ****
--- 365,371 ----
  		KDASSERT(p->p_flag & P_WEXIT);
  		/* mostly NOTREACHED */
  	}
+ #endif
  }
  
  
***************
*** 371,381 ****
  	struct sadata_upcall *sau;
  	struct sadata *sa = l->l_proc->p_sa;
  	stack_t st;
  
! 	l->l_flag &= ~L_SA; /* XXX prevent recursive upcalls if we sleep for
! 			      memory */
  	sau = sadata_upcall_alloc(1);
! 	l->l_flag |= L_SA;
  
  	if (sa->sa_nstacks == 0) {
  		/* assign to assure that it gets freed */
--- 403,417 ----
  	struct sadata_upcall *sau;
  	struct sadata *sa = l->l_proc->p_sa;
  	stack_t st;
+ 	int s;
  
! 	/* XXX prevent recursive upcalls if we sleep formemory */
! 	SA_LWP_STATE_LOCK(l,s);
! 
  	sau = sadata_upcall_alloc(1);
! 
! 	SA_LWP_STATE_UNLOCK(l,s);
! 
  
  	if (sa->sa_nstacks == 0) {
  		/* assign to assure that it gets freed */
***************
*** 497,502 ****
--- 533,540 ----
  			 * XXX the recovery from this situation deserves
  			 * XXX more thought.
  			 */
+ 
+ 			/* XXXUPSXXX Should only happen with concurrency > 1 */
  #ifdef DIAGNOSTIC
  			printf("sa_switch(%d.%d): no cached LWP for upcall.\n",
  			    p->p_pid, l->l_lid);
***************
*** 547,552 ****
--- 585,591 ----
  
  		l->l_flag |= L_SA_BLOCKING;
  		l2->l_priority = l2->l_usrpri;
+ 		sa->sa_vp = l2;
  		setrunnable(l2);
  		PRELE(l2); /* Remove the artificial hold-count */
  
***************
*** 564,572 ****
  		 */
  		if (sa->sa_idle)
  			l2 = NULL;
! 		else
! 			l2 = sa->sa_vp;
  	} else {
  		/*
  		 * Case 3: The VP is empty. As in case 2, we were
  		 * woken up and called tsleep again, but additionally,
--- 603,616 ----
  		 */
  		if (sa->sa_idle)
  			l2 = NULL;
! 		else {
! 			l2 = sa->sa_vp; /* XXXUPSXXX Unfair advantage for l2 ? */
! 			if((l2->l_stat != LSRUN) || ((l2->l_flag & L_INMEM) == 0))
! 				l2 = NULL;
! 		}
  	} else {
+ 
+ #if 0
  		/*
  		 * Case 3: The VP is empty. As in case 2, we were
  		 * woken up and called tsleep again, but additionally,
***************
*** 585,596 ****
--- 629,657 ----
  			mi_switch(l, NULL);
  			return;
  		}
+ #else
+ 		mi_switch(l, NULL);
+ 		return;
+ #endif
+ 		
  	sa_upcall_failed:
+ #if 0
  		cpu_setfunc(l2, sa_yieldcall, l2);
  
  		l2->l_priority = l2->l_usrpri;
  		setrunnable(l2);
  		PRELE(l2); /* Remove the artificial hold-count */
+ #else
+ 		
+ 		/* sa_putcachelwp does not block because we have a hold count on l2 */
+ 		sa_putcachelwp(p, l2);
+ 		PRELE(l2); /* Remove the artificial hold-count */
+ 
+ 		mi_switch(l, NULL);
+ 		return;
+ 
+ 
+ #endif
  	}
  
  
***************
*** 621,628 ****
--- 682,691 ----
  	 */
  	if (l->l_flag & L_SA_BLOCKING)
  		l->l_flag |= L_SA_UPCALL;
+ #if 0
  	else
  		sa_vp_repossess(l);
+ #endif
  }
  
  void
***************
*** 631,636 ****
--- 694,700 ----
  	struct lwp *l;
  	struct proc *p;
  	struct sadata *sa;
+ 	int s;
  
  	l = arg;
  	p = l->l_proc;
***************
*** 643,653 ****
--- 707,721 ----
  		/* Allocate the next cache LWP */
  		DPRINTFN(6,("sa_switchcall(%d.%d) allocating LWP\n",
  		    p->p_pid, l->l_lid));
+ 		SA_LWP_STATE_LOCK(l,s);
  		sa_newcachelwp(l);
+ 		SA_LWP_STATE_UNLOCK(l,s);
+ 
  	}
  	upcallret(l);
  }
  
+ #if 0
  void
  sa_yieldcall(void *arg)
  {
***************
*** 672,677 ****
--- 740,746 ----
  	sa_yield(l);
  	upcallret(l);
  }
+ #endif
  
  static int
  sa_newcachelwp(struct lwp *l)
***************
*** 768,779 ****
  	void *stack, *ap;
  	ucontext_t u, *up;
  	int i, nsas, nint, nevents, type;
  
  	p = l->l_proc;
  	sa = p->p_sa;
  
  	KERNEL_PROC_LOCK(l);
! 	l->l_flag &= ~L_SA;
  
  	DPRINTFN(7,("sa_upcall_userret(%d.%d %x) \n", p->p_pid, l->l_lid,
  	    l->l_flag));
--- 837,849 ----
  	void *stack, *ap;
  	ucontext_t u, *up;
  	int i, nsas, nint, nevents, type;
+ 	int s;
  
  	p = l->l_proc;
  	sa = p->p_sa;
  
  	KERNEL_PROC_LOCK(l);
! 	SA_LWP_STATE_LOCK(l,s);
  
  	DPRINTFN(7,("sa_upcall_userret(%d.%d %x) \n", p->p_pid, l->l_lid,
  	    l->l_flag));
***************
*** 805,818 ****
  			 * Ideally, tsleep() would have a variant that took
  			 * a LWP to switch to.
  			 */
- 			l->l_flag &= ~L_SA;
  			DPRINTFN(7, ("sa_upcall_userret(%d.%d) sleeping"
  			    " for stacks\n", l->l_proc->p_pid, l->l_lid));
  			tsleep((caddr_t) &sa->sa_nstacks, PWAIT|PCATCH, 
  			    "sastacks", 0);
  			if (p->p_flag & P_WEXIT)
  				lwp_exit(l);
- 			l->l_flag |= L_SA;
  		}
  		l2 = sa_vp_repossess(l);
  
--- 875,889 ----
  			 * Ideally, tsleep() would have a variant that took
  			 * a LWP to switch to.
  			 */
  			DPRINTFN(7, ("sa_upcall_userret(%d.%d) sleeping"
  			    " for stacks\n", l->l_proc->p_pid, l->l_lid));
  			tsleep((caddr_t) &sa->sa_nstacks, PWAIT|PCATCH, 
  			    "sastacks", 0);
+ 
+ 			/* XXXUPSXXX NEED TO STOP THE LWP HERE ON REQUEST */
+ 
  			if (p->p_flag & P_WEXIT)
  				lwp_exit(l);
  		}
  		l2 = sa_vp_repossess(l);
  
***************
*** 961,970 ****
  	    l->l_lid, type));
  
  	cpu_upcall(l, type, nevents, nint, sapp, ap, stack, sa->sa_upcall);
! 	l->l_flag |= L_SA;
  	KERNEL_PROC_UNLOCK(l);
  }
  
  static struct lwp *
  sa_vp_repossess(struct lwp *l)
  {
--- 1032,1048 ----
  	    l->l_lid, type));
  
  	cpu_upcall(l, type, nevents, nint, sapp, ap, stack, sa->sa_upcall);
! 
! 	sa_vp_donate(l);
! 	
! 	/* May not be reached  */
! 	
! 	SA_LWP_STATE_UNLOCK(l,s);
!        
  	KERNEL_PROC_UNLOCK(l);
  }
  
+ #if 0
  static struct lwp *
  sa_vp_repossess(struct lwp *l)
  {
***************
*** 977,982 ****
--- 1055,1064 ----
  	 * Put ourselves on the virtual processor and note that the
  	 * previous occupant of that position was interrupted.
  	 */
+ 
+ 
+ 
+ 
  	l2 = sa->sa_vp;
  	sa->sa_vp = l;
  	if (sa->sa_idle == l2)
***************
*** 1011,1016 ****
--- 1093,1202 ----
  	}
  	return l2;
  }
+ #endif
+ 
+ static struct lwp *
+ sa_vp_repossess(struct lwp *l)
+ {
+ 	struct lwp *l2;
+ 	struct proc *p = l->l_proc;
+ 	struct sadata *sa = p->p_sa;
+ 	int s;
+ 
+ 	l->l_flag |= L_SA_WANTS_VP;
+ 	sa->sa_vp_wait_count++;
+ 
+ 	if(sa->sa_idle != NULL)
+ 	{
+ 		/* XXXUPSXXX Simple but slow */
+ 		wakeup(sa->sa_idle);
+ 	}
+ 	else
+ 	{
+ 		SCHED_LOCK(s);
+ 		l->l_flag |= L_SA_UPCALL;
+ 		/* kick the process */
+ 		signotify(p);
+ 		SCHED_UNLOCK(s);
+ 	}
+ 
+ 	
+ 	while(sa->sa_vp != l)
+ 	{
+ 		tsleep((caddr_t) l, PWAIT|PCATCH, 
+ 		       "sa processor", 0);
+ 		
+ 		/* XXXUPSXXX NEED TO STOP THE LWP HERE ON REQUEST ??? */
+ 		
+ 		if (p->p_flag & P_WEXIT)
+ 			lwp_exit(l);
+ 	}
+ 
+ 	l2 = sa->sa_old_lwp;
+ 	return l2;
+ }
+ 
+ static void
+ sa_vp_donate(struct lwp *l)
+ {
+ 	
+ 	struct proc *p = l->l_proc;
+ 	struct sadata *sa = p->p_sa;	
+ 	struct lwp *l2;
+ 	int s;
+ 
+ 	if (sa->sa_vp_wait_count == 0)
+ 	{
+ 		return;
+ 	}
+ 
+ 	LIST_FOREACH(l2, &p->p_lwps, l_sibling)
+ 	{
+ 		if(l2->l_flag &  L_SA_WANTS_VP)
+ 		{
+ 		
+ 			SCHED_LOCK(s);
+ 			
+ 			sa_putcachelwp(p, l);
+ 			sa->sa_vp = l2;
+ 			sa->sa_vp_wait_count--;
+ 			l2->l_flag &= ~L_SA_WANTS_VP;
+ 			sa->sa_old_lwp = l;
+ 			
+ 
+ 			SCHED_UNLOCK(s);
+ 
+ 			
+ 			wakeup((caddr_t) l2);	
+ 
+ 			SCHED_LOCK(s);
+ 
+ 
+ 			if((l2->l_stat == LSRUN) && ((l2->l_flag & L_INMEM) != 0))
+ 				mi_switch(l,l2);
+ 			else
+ 				mi_switch(l,NULL);
+ 	
+ 			/*
+ 			 * This isn't quite a NOTREACHED; we may get here if
+ 			 * the process exits before this LWP is reused. In
+ 			 * that case, we want to call lwp_exit(), which will
+ 			 * be done by the userret() hooks.
+ 			 */
+ 			SCHED_ASSERT_UNLOCKED();
+ 			splx(s);
+ 			KDASSERT(p->p_flag & P_WEXIT);
+ 			/* mostly NOTREACHED */
+ 
+ 			lwp_exit(l);
+ 		}
+ 	}
+ 	
+ #ifdef DIAGNOSTIC
+ 	printf("sa_vp_donate couldn't find someone to donate the CPU to \n");
+ #endif	
+ }
+ 
  
  
  #ifdef DEBUG
Index: kern/kern_sig.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_sig.c,v
retrieving revision 1.141
diff -c -r1.141 kern_sig.c
*** kern/kern_sig.c	2003/05/20 17:42:51	1.141
--- kern/kern_sig.c	2003/06/23 07:33:38
***************
*** 903,909 ****
  	} else {
  		/* Process is sleeping or stopped */
  		if (p->p_flag & P_SA) {
! 			l = p->p_sa->sa_idle;
  		} else {
  			/*
  			 * Find out if any of the sleeps are interruptable,
--- 903,916 ----
  	} else {
  		/* Process is sleeping or stopped */
  		if (p->p_flag & P_SA) {
! 			struct lwp *l2 = p->p_sa->sa_vp;
! 			if ((l2->l_stat == LSSLEEP) &&  (l2->l_flag & L_SINTR))
! 				l = l2; 
! 			else if (l2->l_stat == LSSUSPENDED)
! 				suspended = l2;
! 			else if ((l2->l_stat != LSZOMB) && 
! 				 (l2->l_stat != LSDEAD))
! 				allsusp = 0;
  		} else {
  			/*
  			 * Find out if any of the sleeps are interruptable,
***************
*** 1074,1079 ****
--- 1081,1091 ----
  	siginfo_t *si;	
  
  	if (p->p_flag & P_SA) {
+ 
+ 		/* XXXUPSXXX What if not on sa_vp ? */
+ 
+ 		int s = l->l_flag & L_SA;
+ 		l->l_flag &= ~L_SA; 
  		si = pool_get(&siginfo_pool, PR_WAITOK);
  		si->si_signo = sig;
  		si->si_errno = 0;
***************
*** 1086,1091 ****
--- 1098,1106 ----
  
  		sa_upcall(l, SA_UPCALL_SIGNAL | SA_UPCALL_DEFER, le, li, 
  			    sizeof(siginfo_t), si);
+ 
+ 		
+ 		l->l_flag |= s;
  		return;
  	}
  
***************
*** 1139,1144 ****
--- 1154,1169 ----
  	int		s = 0, signum, prop;
  	int		dolock = (l->l_flag & L_SINTR) == 0, locked = !dolock;
  	sigset_t	ss;
+ 
+ 	
+ 	if (l->l_flag & L_SA) {
+ 		struct sadata *sa = p->p_sa;	
+ 
+ 		/* Bail out if we do not own the virtual processor */
+ 		if (sa->sa_vp != l)
+ 			return 0;
+ 	}
+ 
  
  	if (p->p_stat == SSTOP) {
  		/*
Index: kern/kern_time.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_time.c,v
retrieving revision 1.70
diff -c -r1.70 kern_time.c
*** kern/kern_time.c	2003/05/28 22:27:57	1.70
--- kern/kern_time.c	2003/06/23 07:33:39
***************
*** 869,874 ****
--- 869,885 ----
  	struct ptimers *pt = (struct ptimers *)arg;
  	unsigned int i, fired, done;
  	KERNEL_PROC_LOCK(l);
+ 
+ 	{
+ 		struct proc	*p = l->l_proc;
+ 		struct sadata *sa = p->p_sa;	
+ 
+ 		/* Bail out if we do not own the virtual processor */
+ 		if (sa->sa_vp != l) {
+ 			KERNEL_PROC_UNLOCK(l);
+ 			return ;
+ 		}
+ 	}
  	
  	fired = pt->pts_fired;
  	done = 0;
***************
*** 1188,1195 ****
  itimerfire(struct ptimer *pt)
  {
  	struct proc *p = pt->pt_proc;
  	int s;
! 
  	if (pt->pt_ev.sigev_notify == SIGEV_SIGNAL) {
  		/*
  		 * No RT signal infrastructure exists at this time;
--- 1199,1207 ----
  itimerfire(struct ptimer *pt)
  {
  	struct proc *p = pt->pt_proc;
+ #if 0
  	int s;
! #endif
  	if (pt->pt_ev.sigev_notify == SIGEV_SIGNAL) {
  		/*
  		 * No RT signal infrastructure exists at this time;
***************
*** 1215,1231 ****
--- 1227,1250 ----
  			 * makes testing for sa_idle alone insuffucent to
  			 * determine if we really should call setrunnable.
  			 */
+ #if 0
+ 
  		        if ((sa->sa_idle) && (p->p_stat != SSTOP)) {
  				SCHED_LOCK(s);
  				setrunnable(sa->sa_idle);
  				SCHED_UNLOCK(s);
  			}
+ #endif
  			pt->pt_poverruns = pt->pt_overruns;
  			pt->pt_overruns = 0;
  			i = 1 << pt->pt_entry;
  			p->p_timers->pts_fired = i;
  			p->p_userret = timerupcall;
  			p->p_userret_arg = p->p_timers;
+ 			
+ 			if (sa->sa_idle)
+ 				wakeup(sa->sa_idle);
+ 
  		} else if (p->p_userret == timerupcall) {
  			i = 1 << pt->pt_entry;
  			if ((p->p_timers->pts_fired & i) == 0) {
Index: sys/lwp.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lwp.h,v
retrieving revision 1.6
diff -c -r1.6 lwp.h
*** sys/lwp.h	2003/02/04 13:41:48	1.6
--- sys/lwp.h	2003/06/23 07:33:39
***************
*** 117,122 ****
--- 117,123 ----
  #define	L_SA_UPCALL	0x200000 /* SA upcall is pending */
  #define	L_SA_BLOCKING	0x400000 /* Blocking in tsleep() */
  #define	L_DETACHED	0x800000 /* Won't be waited for. */
+ #define L_SA_WANTS_VP   0x1000000 /* SA LWP wants a virtual processor */
  
  /*
   * Status values.
Index: sys/savar.h
===================================================================
RCS file: /cvsroot/src/sys/sys/savar.h,v
retrieving revision 1.4
diff -c -r1.4 savar.h
*** sys/savar.h	2003/02/02 02:22:14	1.4
--- sys/savar.h	2003/06/23 07:33:39
***************
*** 75,80 ****
--- 75,83 ----
  	int	sa_flag;		/* SA_* flags */
  	sa_upcall_t	sa_upcall;	/* upcall entry point */
  	struct lwp	*sa_vp;		/* "virtual processor" allocation */
+ 	struct lwp	*sa_old_lwp;	/*  XXXUPSXXX hack: lwp that used to be on  sa_vp */
+ 	int    sa_vp_wait_count;        /*  XXXUPSXXX hack: number of LWPs waiting 
on VP */
+ 
  	struct lwp	*sa_woken;	/* list of woken lwps */
  	struct lwp	*sa_idle;      	/* lwp in sawait */
  	int	sa_concurrency;		/* desired concurrency */