Subject: Pulling a fix from wrstuden-fixsa into NetBSD 4.0.1
To: None <tech-kern@netbsd.org>
From: Bill Stouder-Studenmund <wrstuden@netbsd.org>
List: tech-kern
Date: 09/28/2007 10:31:49
--E0h0CbphJD8hN+Gf
Content-Type: multipart/mixed; boundary="M2Pxvdb9QxnGd/3e"
Content-Disposition: inline


--M2Pxvdb9QxnGd/3e
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

As some of you have noticed, I've been working on fixing some of the=20
issues with our SA implementation on the wrstuden-fixsa branch. This=20
branch is for fixing SA in context of NetBSD-4.

The hope is that I'll finish it all up in time for 4.1.

There is one fix however that I'd like everyone to think about for NetBSD=
=20
4.0.1. This is the fix that stops us from trying to allocate memory while=
=20
in tsleep.

The solution is to note that we never will have more than one "Blocked"=20
upcall per vp outstanding at any one time. Thus we can pre-allocate an=20
upcall event structure and stash it on the vp. When we block, we grab that=
=20
upcall event structure, set up the event, and go to sleep. When we go to=20
deliver an upcall to user space, before we free it, we check to see if our=
=20
vp is missing an upcall event structure. If it is, we just hang this event=
=20
structure off the vp instead of freeing.

Patch is attached.

I've tested this and it works fine. However I think it needs more testing=
=20
before I suggest pulling it into 4.0.1.

The main observable issue this change will fix is an occasional lost=20
wakeup. This would usually happen when the system's under memory pressure.=
=20
What happens is that we end up sleeping for memory, and whatever we wanted=
=20
to originally sleep for happens before we get the memeory. So we end up=20
waiting for an event that's already happened & thus wait forever.

Take care,

Bill

--M2Pxvdb9QxnGd/3e
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="fix-sa-4.0.diff.txt"
Content-Transfer-Encoding: quoted-printable

Index: src/sys/kern/kern_sa.c
diff -c src/sys/kern/kern_sa.c:1.87 src/sys/kern/kern_sa.c:1.87.4.1
*** src/sys/kern/kern_sa.c:1.87	Wed Nov  1 10:17:58 2006
--- src/sys/kern/kern_sa.c	Thu May 17 22:53:05 2007
***************
*** 1,4 ****
! /*	$NetBSD: kern_sa.c,v 1.87 2006/11/01 10:17:58 yamt Exp $	*/
 =20
  /*-
   * Copyright (c) 2001, 2004, 2005 The NetBSD Foundation, Inc.
--- 1,4 ----
! /*	$NetBSD: kern_sa.c,v 1.87.4.1 2007/05/17 22:53:05 wrstuden Exp $	*/
 =20
  /*-
   * Copyright (c) 2001, 2004, 2005 The NetBSD Foundation, Inc.
***************
*** 40,46 ****
 =20
  #include "opt_ktrace.h"
  #include "opt_multiprocessor.h"
! __KERNEL_RCSID(0, "$NetBSD: kern_sa.c,v 1.87 2006/11/01 10:17:58 yamt Exp=
 $");
 =20
  #include <sys/param.h>
  #include <sys/systm.h>
--- 40,46 ----
 =20
  #include "opt_ktrace.h"
  #include "opt_multiprocessor.h"
! __KERNEL_RCSID(0, "$NetBSD: kern_sa.c,v 1.87.4.1 2007/05/17 22:53:05 wrst=
uden Exp $");
 =20
  #include <sys/param.h>
  #include <sys/systm.h>
***************
*** 151,159 ****
--- 151,162 ----
  sa_newsavp(struct sadata *sa)
  {
  	struct sadata_vp *vp, *qvp;
+ 	struct sadata_upcall *sau;
 =20
  	/* Allocate virtual processor data structure */
  	vp =3D pool_get(&savp_pool, PR_WAITOK);
+ 	/* And preallocate an upcall data structure for sleeping */
+ 	sau =3D sadata_upcall_alloc(1);
  	/* Initialize. */
  	memset(vp, 0, sizeof(*vp));
  	simple_lock_init(&vp->savp_lock);
***************
*** 163,168 ****
--- 166,172 ----
  	vp->savp_ofaultaddr =3D 0;
  	LIST_INIT(&vp->savp_lwpcache);
  	vp->savp_ncached =3D 0;
+ 	vp->savp_sleeper_upcall =3D sau;
  	SIMPLEQ_INIT(&vp->savp_upcalls);
 =20
  	simple_lock(&sa->sa_lock);
***************
*** 269,274 ****
--- 273,282 ----
  	p->p_flag &=3D ~P_SA;
  	while ((vp =3D SLIST_FIRST(&p->p_sa->sa_vps)) !=3D NULL) {
  		SLIST_REMOVE_HEAD(&p->p_sa->sa_vps, savp_next);
+ 		if (vp->savp_sleeper_upcall) {
+ 			sadata_upcall_free(vp->savp_sleeper_upcall);
+ 			vp->savp_sleeper_upcall =3D NULL;
+ 		}
  		pool_put(&savp_pool, vp);
  	}
  	pool_put(&sadata_pool, sa);
***************
*** 504,509 ****
--- 512,518 ----
  	vaddr_t uaddr;
  	boolean_t inmem;
  	int addedconcurrency, error, s;
+ 	struct sadata_vp *vp;
 =20
  	p =3D l->l_proc;
  	sa =3D p->p_sa;
***************
*** 527,543 ****
  			newlwp(l, p, uaddr, inmem, 0, NULL, 0,
  			    child_return, 0, &l2);
  			l2->l_flag |=3D L_SA;
! 			l2->l_savp =3D sa_newsavp(sa);
! 			if (l2->l_savp) {
! 				l2->l_savp->savp_lwp =3D l2;
  				cpu_setfunc(l2, sa_switchcall, NULL);
  				error =3D sa_upcall(l2, SA_UPCALL_NEWPROC,
  				    NULL, NULL, 0, NULL, NULL);
  				if (error) {
  					/* free new savp */
! 					SLIST_REMOVE(&sa->sa_vps, l2->l_savp,
  					    sadata_vp, savp_next);
! 					pool_put(&savp_pool, l2->l_savp);
  				}
  			} else
  				error =3D 1;
--- 536,557 ----
  			newlwp(l, p, uaddr, inmem, 0, NULL, 0,
  			    child_return, 0, &l2);
  			l2->l_flag |=3D L_SA;
! 			l2->l_savp =3D vp =3D sa_newsavp(sa);
! 			if (vp) {
! 				vp->savp_lwp =3D l2;
  				cpu_setfunc(l2, sa_switchcall, NULL);
  				error =3D sa_upcall(l2, SA_UPCALL_NEWPROC,
  				    NULL, NULL, 0, NULL, NULL);
  				if (error) {
  					/* free new savp */
! 					SLIST_REMOVE(&sa->sa_vps, vp,
  					    sadata_vp, savp_next);
! 					if (vp->savp_sleeper_upcall) {
! 						sadata_upcall_free(
! 						    vp->savp_sleeper_upcall);
! 						vp->savp_sleeper_upcall =3D NULL;
! 					}
! 					pool_put(&savp_pool, vp);
  				}
  			} else
  				error =3D 1;
***************
*** 921,932 ****
   */
 =20
  void
! sa_switch(struct lwp *l, struct sadata_upcall *sau, int type)
  {
  	struct proc *p =3D l->l_proc;
  	struct sadata_vp *vp =3D l->l_savp;
  	struct lwp *l2;
- 	struct sadata_upcall *freesau =3D NULL;
  	int s;
 =20
  	DPRINTFN(4,("sa_switch(%d.%d type %d VP %d)\n", p->p_pid, l->l_lid,
--- 935,946 ----
   */
 =20
  void
! sa_switch(struct lwp *l, int type)
  {
  	struct proc *p =3D l->l_proc;
  	struct sadata_vp *vp =3D l->l_savp;
+ 	struct sadata_upcall *sau =3D NULL;
  	struct lwp *l2;
  	int s;
 =20
  	DPRINTFN(4,("sa_switch(%d.%d type %d VP %d)\n", p->p_pid, l->l_lid,
***************
*** 936,942 ****
 =20
  	if (p->p_flag & P_WEXIT) {
  		mi_switch(l, NULL);
- 		sadata_upcall_free(sau);
  		return;
  	}
 =20
--- 950,955 ----
***************
*** 956,962 ****
  			s =3D splsched();
  			SCHED_UNLOCK(s);
  		}
- 		sadata_upcall_free(sau);
  		return;
  	} else if (vp->savp_lwp =3D=3D l) {
  		/*
--- 969,974 ----
***************
*** 964,975 ****
--- 976,993 ----
  		 * a SA_BLOCKED upcall and allocate resources for the
  		 * UNBLOCKED upcall.
  		 */
+ 		if (vp->savp_sleeper_upcall) {
+ 			sau =3D vp->savp_sleeper_upcall;
+ 			vp->savp_sleeper_upcall =3D NULL;
+ 		}
 =20
  		if (sau =3D=3D NULL) {
  #ifdef DIAGNOSTIC
  			printf("sa_switch(%d.%d): no upcall data.\n",
  			    p->p_pid, l->l_lid);
  #endif
+ panic("Oops! Don't have a sleeper!\n");
+ 			/* XXXWRS Shouldn't we just kill the app here? */
  			mi_switch(l, NULL);
  			return;
  		}
***************
*** 1016,1022 ****
  			cpu_setfunc(l2, sa_switchcall, NULL);
  			sa_putcachelwp(p, l2); /* PHOLD from sa_getcachelwp */
  			mi_switch(l, NULL);
! 			sadata_upcall_free(sau);
  			DPRINTFN(10,("sa_switch(%d.%d) page fault resolved\n",
  				     p->p_pid, l->l_lid));
  			if (vp->savp_faultaddr =3D=3D vp->savp_ofaultaddr)
--- 1034,1047 ----
  			cpu_setfunc(l2, sa_switchcall, NULL);
  			sa_putcachelwp(p, l2); /* PHOLD from sa_getcachelwp */
  			mi_switch(l, NULL);
! 			/*
! 			 * WRS Not sure how vp->savp_sleeper_upcall !=3D NULL
! 			 * but be careful none the less
! 			 */
! 			if (vp->savp_sleeper_upcall =3D=3D NULL)
! 				vp->savp_sleeper_upcall =3D sau;
! 			else
! 				sadata_upcall_free(sau);
  			DPRINTFN(10,("sa_switch(%d.%d) page fault resolved\n",
  				     p->p_pid, l->l_lid));
  			if (vp->savp_faultaddr =3D=3D vp->savp_ofaultaddr)
***************
*** 1044,1050 ****
  		 * SA_UNBLOCKED upcall (select and poll cause this
  		 * kind of behavior a lot).
  		 */
- 		freesau =3D sau;
  		l2 =3D NULL;
  	} else {
  		/* NOTREACHED */
--- 1069,1074 ----
***************
*** 1054,1060 ****
  	DPRINTFN(4,("sa_switch(%d.%d) switching to LWP %d.\n",
  	    p->p_pid, l->l_lid, l2 ? l2->l_lid : 0));
  	mi_switch(l, l2);
- 	sadata_upcall_free(freesau);
  	DPRINTFN(4,("sa_switch(%d.%d flag %x) returned.\n",
  	    p->p_pid, l->l_lid, l->l_flag));
  	KDASSERT(l->l_wchan =3D=3D 0);
--- 1078,1083 ----
***************
*** 1062,1067 ****
--- 1085,1098 ----
  	SCHED_ASSERT_UNLOCKED();
  }
 =20
+ /*
+  * sa_switchcall
+  *
+  * We need to pass an upcall to userland. We are now
+  * running on a spare stack and need to allocate a new
+  * one. Also, if we are passed an sa upcall, we need to dispatch
+  * it to the app.
+  */
  static void
  sa_switchcall(void *arg)
  {
***************
*** 1100,1110 ****
  			SIMPLEQ_INSERT_TAIL(&vp->savp_upcalls, sau, sau_next);
  			l2->l_flag |=3D L_SA_UPCALL;
  		} else {
  #ifdef DIAGNOSTIC
  			printf("sa_switchcall(%d.%d flag %x): Not enough stacks.\n",
  			    p->p_pid, l->l_lid, l->l_flag);
  #endif
! 			sadata_upcall_free(sau);
  			PHOLD(l2);
  			SCHED_LOCK(s);
  			sa_putcachelwp(p, l2); /* sets L_SA */
--- 1131,1156 ----
  			SIMPLEQ_INSERT_TAIL(&vp->savp_upcalls, sau, sau_next);
  			l2->l_flag |=3D L_SA_UPCALL;
  		} else {
+ 			/*
+ 			 * Oops! We're in trouble. The app hasn't
+ 			 * passeed us in any stacks on which to deliver
+ 			 * the upcall.
+ 			 *
+ 			 * WRS: I think this code is wrong. If we can't
+ 			 * get a stack, we are dead. We either need
+ 			 * to block waiting for one (assuming there's a
+ 			 * live vp still in userland so it can hand back
+ 			 * stacks, or we should just kill the process
+ 			 * as we're deadlocked.
+ 			 */
  #ifdef DIAGNOSTIC
  			printf("sa_switchcall(%d.%d flag %x): Not enough stacks.\n",
  			    p->p_pid, l->l_lid, l->l_flag);
  #endif
! 			if (vp->savp_sleeper_upcall =3D=3D NULL)
! 				vp->savp_sleeper_upcall =3D sau;
! 			else
! 				sadata_upcall_free(sau);
  			PHOLD(l2);
  			SCHED_LOCK(s);
  			sa_putcachelwp(p, l2); /* sets L_SA */
***************
*** 1585,1591 ****
  	}
  	type =3D sau->sau_type;
 =20
! 	sadata_upcall_free(sau);
 =20
  	DPRINTFN(7,("sa_makeupcalls(%d.%d): type %d\n", p->p_pid,
  	    l->l_lid, type));
--- 1631,1640 ----
  	}
  	type =3D sau->sau_type;
 =20
! 	if (vp->savp_sleeper_upcall =3D=3D NULL)
! 		vp->savp_sleeper_upcall =3D sau;
! 	else
! 		sadata_upcall_free(sau);
 =20
  	DPRINTFN(7,("sa_makeupcalls(%d.%d): type %d\n", p->p_pid,
  	    l->l_lid, type));
Index: src/sys/kern/kern_synch.c
diff -c src/sys/kern/kern_synch.c:1.173 src/sys/kern/kern_synch.c:1.173.4.1
*** src/sys/kern/kern_synch.c:1.173	Fri Nov  3 20:46:00 2006
--- src/sys/kern/kern_synch.c	Thu May 17 22:53:05 2007
***************
*** 1,4 ****
! /*	$NetBSD: kern_synch.c,v 1.173 2006/11/03 20:46:00 ad Exp $	*/
 =20
  /*-
   * Copyright (c) 1999, 2000, 2004 The NetBSD Foundation, Inc.
--- 1,4 ----
! /*	$NetBSD: kern_synch.c,v 1.173.4.1 2007/05/17 22:53:05 wrstuden Exp $	*/
 =20
  /*-
   * Copyright (c) 1999, 2000, 2004 The NetBSD Foundation, Inc.
***************
*** 76,82 ****
   */
 =20
  #include <sys/cdefs.h>
! __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.173 2006/11/03 20:46:00 ad E=
xp $");
 =20
  #include "opt_ddb.h"
  #include "opt_ktrace.h"
--- 76,82 ----
   */
 =20
  #include <sys/cdefs.h>
! __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.173.4.1 2007/05/17 22:53:05 =
wrstuden Exp $");
 =20
  #include "opt_ddb.h"
  #include "opt_ktrace.h"
***************
*** 445,451 ****
  	struct lwp *l =3D curlwp;
  	struct proc *p =3D l ? l->l_proc : NULL;
  	struct slpque *qp;
- 	struct sadata_upcall *sau;
  	int sig, s;
  	int catch =3D priority & PCATCH;
  	int relock =3D (priority & PNORELOCK) =3D=3D 0;
--- 445,450 ----
***************
*** 482,499 ****
  		ktrcsw(l, 1, 0);
  #endif
 =20
- 	/*
- 	 * XXX We need to allocate the sadata_upcall structure here,
- 	 * XXX since we can't sleep while waiting for memory inside
- 	 * XXX sa_upcall().  It would be nice if we could safely
- 	 * XXX allocate the sadata_upcall structure on the stack, here.
- 	 */
- 	if (l->l_flag & L_SA) {
- 		sau =3D sadata_upcall_alloc(0);
- 	} else {
- 		sau =3D NULL;
- 	}
-=20
  	SCHED_LOCK(s);
 =20
  #ifdef DIAGNOSTIC
--- 481,486 ----
***************
*** 567,573 ****
  	p->p_stats->p_ru.ru_nvcsw++;
  	SCHED_ASSERT_LOCKED();
  	if (l->l_flag & L_SA)
! 		sa_switch(l, sau, SA_UPCALL_BLOCKED);
  	else
  		mi_switch(l, NULL);
 =20
--- 554,560 ----
  	p->p_stats->p_ru.ru_nvcsw++;
  	SCHED_ASSERT_LOCKED();
  	if (l->l_flag & L_SA)
! 		sa_switch(l, SA_UPCALL_BLOCKED);
  	else
  		mi_switch(l, NULL);
 =20
Index: src/sys/sys/savar.h
diff -c src/sys/sys/savar.h:1.20 src/sys/sys/savar.h:1.20.10.1
*** src/sys/sys/savar.h:1.20	Sun Jun 25 08:12:54 2006
--- src/sys/sys/savar.h	Thu May 17 22:53:07 2007
***************
*** 1,4 ****
! /*	$NetBSD: savar.h,v 1.20 2006/06/25 08:12:54 yamt Exp $	*/
 =20
  /*-
   * Copyright (c) 2001 The NetBSD Foundation, Inc.
--- 1,4 ----
! /*	$NetBSD: savar.h,v 1.20.10.1 2007/05/17 22:53:07 wrstuden Exp $	*/
 =20
  /*-
   * Copyright (c) 2001 The NetBSD Foundation, Inc.
***************
*** 110,115 ****
--- 110,116 ----
  	vaddr_t	savp_ofaultaddr;	/* old page fault address */
  	LIST_HEAD(, lwp)	savp_lwpcache; /* list of available lwps */
  	int	savp_ncached;		/* list length */
+ 	struct sadata_upcall	*savp_sleeper_upcall; /* cached upcall data */
  	SIMPLEQ_HEAD(, sadata_upcall)	savp_upcalls; /* pending upcalls */
  };
 =20
***************
*** 134,140 ****
  void	sadata_upcall_free(struct sadata_upcall *);
 =20
  void	sa_release(struct proc *);
! void	sa_switch(struct lwp *, struct sadata_upcall *, int);
  void	sa_preempt(struct lwp *);
  void	sa_yield(struct lwp *);
  int	sa_upcall(struct lwp *, int, struct lwp *, struct lwp *, size_t, void=
 *,
--- 135,141 ----
  void	sadata_upcall_free(struct sadata_upcall *);
 =20
  void	sa_release(struct proc *);
! void	sa_switch(struct lwp *, int);
  void	sa_preempt(struct lwp *);
  void	sa_yield(struct lwp *);
  int	sa_upcall(struct lwp *, int, struct lwp *, struct lwp *, size_t, void=
 *,

--M2Pxvdb9QxnGd/3e--

--E0h0CbphJD8hN+Gf
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.7 (NetBSD)

iD8DBQFG/TqEWz+3JHUci9cRAs7ZAJ9kPvyiZRYVY+LgRJJhHpV5s23rggCeOWai
ksiEXf7L0Ouz94ldLyaVl+Y=
=FxPB
-----END PGP SIGNATURE-----

--E0h0CbphJD8hN+Gf--