Subject: Re: deadlock with sched_lock in SA code
To: Jason Thorpe <thorpej@shagadelic.org>
From: Chuck Silvers <chuq@chuq.com>
List: tech-kern
Date: 09/18/2005 16:03:10
--r5Pyd7+fXNt84Ff3
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Sun, Aug 28, 2005 at 12:20:11PM -0700, Jason Thorpe wrote:
>
> On Aug 28, 2005, at 8:17 AM, Chuck Silvers wrote:
>
> >allocating pages from UVM can call wakeup(), so we must avoid that
> >while holding sched_lock. one way to do this would be to call
> >sadata_upcall_alloc() before acquiring sched_lock and passing the
> >resulting pointer to sa_switch(), instead of calling that in
> >sa_switch() itself. does anyone have any better suggestions?
> >if not, I'll fix it that way.
>
> That sounds reasonable.
>
> It would be nice to put some assertions in place to prevent this from
> happening again.
ok, the diff is attached.
it turned out that there was a similar problem with freeing a
struct sadata_upcall, pool_put() can also call wakeup(). to get around
this, I had sa_switch() free the structure after it sleeps instead of before.
this means we're hanging onto the structure for the duration of the sleep
for a lame reason, oh well.
I also took the opportunity to clean up the freeing of sau->sau_arg,
this is now done via a callback. sa_upcall0() always returns 0, so I
made it void and removed the code that handled error returns.
anyone have any comments on this before I check it in?
-Chuck
--r5Pyd7+fXNt84Ff3
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="diff.sa-deadlock.2"
Index: kern/kern_sa.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_sa.c,v
retrieving revision 1.65
diff -u -p -r1.65 kern_sa.c
--- kern/kern_sa.c 7 Sep 2005 23:31:06 -0000 1.65
+++ kern/kern_sa.c 18 Sep 2005 22:53:20 -0000
@@ -70,8 +70,8 @@ static struct lwp *sa_vp_repossess(struc
static __inline int sa_pagefault(struct lwp *, ucontext_t *);
-static int sa_upcall0(struct lwp *, int, struct lwp *, struct lwp *,
- size_t, void *, struct sadata_upcall *);
+static void sa_upcall0(struct sadata_upcall *, int, struct lwp *, struct lwp *,
+ size_t, void *, void (*)(void *));
static void sa_upcall_getstate(union sau_state *, struct lwp *);
MALLOC_DEFINE(M_SA, "sa", "Scheduler activations");
@@ -109,39 +109,30 @@ SPLAY_GENERATE(sasttree, sastack, sast_n
struct sadata_upcall *
sadata_upcall_alloc(int waitok)
{
+ struct sadata_upcall *sau;
- /* XXX zero the memory? */
- return (pool_get(&saupcall_pool, waitok ? PR_WAITOK : PR_NOWAIT));
+ sau = pool_get(&saupcall_pool, waitok ? PR_WAITOK : PR_NOWAIT);
+ if (sau) {
+ sau->sau_arg = NULL;
+ }
+ return sau;
}
/*
* sadata_upcall_free:
*
- * Free an sadata_upcall structure, and any associated
- * argument data.
+ * Free an sadata_upcall structure and any associated argument data.
*/
void
sadata_upcall_free(struct sadata_upcall *sau)
{
- extern struct pool siginfo_pool; /* XXX Ew. */
- /*
- * XXX We have to know what the origin of sau_arg is
- * XXX in order to do the right thing, here. Sucks
- * XXX to be a non-garbage-collecting kernel.
- */
+ if (sau == NULL) {
+ return;
+ }
if (sau->sau_arg) {
- switch (sau->sau_type) {
- case SA_UPCALL_SIGNAL:
- case SA_UPCALL_SIGEV:
- pool_put(&siginfo_pool, sau->sau_arg);
- break;
- default:
- panic("sadata_free: unknown type of sau_arg: %d",
- sau->sau_type);
- }
+ (*sau->sau_argfreefunc)(sau->sau_arg);
}
-
pool_put(&saupcall_pool, sau);
}
@@ -443,7 +434,7 @@ sys_sa_enable(struct lwp *l, void *v, re
if (p->p_flag & P_SA) /* Already running! */
return (EBUSY);
- error = sa_upcall(l, SA_UPCALL_NEWPROC, l, NULL, 0, NULL);
+ error = sa_upcall(l, SA_UPCALL_NEWPROC, l, NULL, 0, NULL, NULL);
if (error)
return (error);
@@ -496,7 +487,7 @@ sa_increaseconcurrency(struct lwp *l, in
l2->l_savp->savp_lwp = l2;
cpu_setfunc(l2, sa_switchcall, NULL);
error = sa_upcall(l2, SA_UPCALL_NEWPROC,
- NULL, NULL, 0, NULL);
+ NULL, NULL, 0, NULL, NULL);
if (error) {
/* free new savp */
SLIST_REMOVE(&sa->sa_vps, l2->l_savp,
@@ -587,11 +578,14 @@ sys_sa_setconcurrency(struct lwp *l, voi
vp->savp_lwp->l_flag &=
~(L_SA_IDLE|L_SA_YIELD|L_SINTR);
SCHED_UNLOCK(s);
- DPRINTFN(11,("sys_sa_concurrency(%d.%d) NEWPROC vp %d\n",
- l->l_proc->p_pid, l->l_lid, vp->savp_id));
+ DPRINTFN(11,("sys_sa_concurrency(%d.%d) "
+ "NEWPROC vp %d\n",
+ l->l_proc->p_pid, l->l_lid,
+ vp->savp_id));
cpu_setfunc(vp->savp_lwp, sa_switchcall, NULL);
- /* error = */ sa_upcall(vp->savp_lwp, SA_UPCALL_NEWPROC,
- NULL, NULL, 0, NULL);
+ /* error = */ sa_upcall(vp->savp_lwp,
+ SA_UPCALL_NEWPROC,
+ NULL, NULL, 0, NULL, NULL);
SCHED_LOCK(s);
sa->sa_concurrency++;
setrunnable(vp->savp_lwp);
@@ -721,7 +715,7 @@ sa_preempt(struct lwp *l)
*/
if (sa->sa_flag & SA_FLAG_PREEMPT)
sa_upcall(l, SA_UPCALL_PREEMPTED | SA_UPCALL_DEFER_EVENT,
- l, NULL, 0, NULL);
+ l, NULL, 0, NULL, NULL);
}
@@ -733,13 +727,13 @@ sa_preempt(struct lwp *l)
*/
int
sa_upcall(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted,
- size_t argsize, void *arg)
+ size_t argsize, void *arg, void (*func)(void *))
{
struct sadata_upcall *sau;
struct sadata *sa = l->l_proc->p_sa;
struct sadata_vp *vp = l->l_savp;
struct sastack *sast;
- int error, f;
+ int f;
/* XXX prevent recursive upcalls if we sleep for memory */
SA_LWP_STATE_LOCK(l, f);
@@ -754,12 +748,7 @@ sa_upcall(struct lwp *l, int type, struc
SA_LWP_STATE_LOCK(l, f);
sau = sadata_upcall_alloc(1);
SA_LWP_STATE_UNLOCK(l, f);
- error = sa_upcall0(l, type, event, interrupted, argsize, arg, sau);
- if (error) {
- sadata_upcall_free(sau);
- sa_setstackfree(sast, sa);
- return (error);
- }
+ sa_upcall0(sau, type, event, interrupted, argsize, arg, func);
sau->sau_stack = sast->sast_stack;
SIMPLEQ_INSERT_TAIL(&vp->savp_upcalls, sau, sau_next);
@@ -768,15 +757,14 @@ sa_upcall(struct lwp *l, int type, struc
return (0);
}
-static int
-sa_upcall0(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted,
- size_t argsize, void *arg, struct sadata_upcall *sau)
+static void
+sa_upcall0(struct sadata_upcall *sau, int type, struct lwp *event,
+ struct lwp *interrupted, size_t argsize, void *arg, void (*func)(void *))
{
KDASSERT((event == NULL) || (event != interrupted));
sau->sau_flags = 0;
- sau->sau_arg = 0;
if (type & SA_UPCALL_DEFER_EVENT) {
sau->sau_event.ss_deferred.ss_lwp = event;
@@ -792,8 +780,7 @@ sa_upcall0(struct lwp *l, int type, stru
sau->sau_type = type & SA_UPCALL_TYPE_MASK;
sau->sau_argsize = argsize;
sau->sau_arg = arg;
-
- return (0);
+ sau->sau_argfreefunc = func;
}
@@ -868,14 +855,15 @@ sa_pagefault(struct lwp *l, ucontext_t *
* TSLEEP() ITSELF! We are called with sched_lock held, and must
* hold it right through the mi_switch() call.
*/
+
void
-sa_switch(struct lwp *l, int type)
+sa_switch(struct lwp *l, struct sadata_upcall *sau, int type)
{
struct proc *p = l->l_proc;
struct sadata_vp *vp = l->l_savp;
- struct sadata_upcall *sau;
struct lwp *l2;
- int error, s;
+ struct sadata_upcall *freesau = NULL;
+ int s;
DPRINTFN(4,("sa_switch(%d.%d type %d VP %d)\n", p->p_pid, l->l_lid,
type, vp->savp_lwp ? vp->savp_lwp->l_lid : 0));
@@ -884,10 +872,12 @@ sa_switch(struct lwp *l, int type)
if (p->p_flag & P_WEXIT) {
mi_switch(l, NULL);
+ sadata_upcall_free(sau);
return;
}
if (l->l_flag & L_SA_YIELD) {
+
/*
* Case 0: we're blocking in sa_yield
*/
@@ -902,6 +892,7 @@ sa_switch(struct lwp *l, int type)
s = splsched();
SCHED_UNLOCK(s);
}
+ sadata_upcall_free(sau);
return;
} else if (vp->savp_lwp == l) {
/*
@@ -910,6 +901,15 @@ sa_switch(struct lwp *l, int type)
* UNBLOCKED upcall.
*/
+ if (sau == NULL) {
+#ifdef DIAGNOSTIC
+ printf("sa_switch(%d.%d): no upcall data.\n",
+ p->p_pid, l->l_lid);
+#endif
+ mi_switch(l, NULL);
+ return;
+ }
+
/*
* The process of allocating a new LWP could cause
* sleeps. We're called from inside sleep, so that
@@ -931,38 +931,12 @@ sa_switch(struct lwp *l, int type)
p->p_pid, l->l_lid);
#endif
mi_switch(l, NULL);
- return;
- }
-
- /*
- * XXX We need to allocate the sadata_upcall structure here,
- * XXX since we can't sleep while waiting for memory inside
- * XXX sa_upcall(). It would be nice if we could safely
- * XXX allocate the sadata_upcall structure on the stack, here.
- */
- sau = sadata_upcall_alloc(0);
- if (sau == NULL) {
-#ifdef DIAGNOSTIC
- printf("sa_switch(%d.%d): couldn't allocate upcall data.\n",
- p->p_pid, l->l_lid);
-#endif
- sa_putcachelwp(p, l2); /* PHOLD from sa_getcachelwp */
- mi_switch(l, NULL);
+ sadata_upcall_free(sau);
return;
}
cpu_setfunc(l2, sa_switchcall, sau);
- error = sa_upcall0(l2, SA_UPCALL_BLOCKED, l, NULL, 0, NULL,
- sau);
- if (error) {
-#ifdef DIAGNOSTIC
- printf("sa_switch(%d.%d): Error %d from sa_upcall()\n",
- p->p_pid, l->l_lid, error);
-#endif
- sa_putcachelwp(p, l2); /* PHOLD from sa_getcachelwp */
- mi_switch(l, NULL);
- return;
- }
+ sa_upcall0(sau, SA_UPCALL_BLOCKED, l, NULL, 0, NULL, NULL);
/*
* Perform the double/upcall pagefault check.
@@ -975,9 +949,9 @@ sa_switch(struct lwp *l, int type)
*/
if ((l->l_flag & L_SA_PAGEFAULT) && sa_pagefault(l,
&sau->sau_event.ss_captured.ss_ctx) != 0) {
- sadata_upcall_free(sau);
sa_putcachelwp(p, l2); /* PHOLD from sa_getcachelwp */
mi_switch(l, NULL);
+ sadata_upcall_free(sau);
DPRINTFN(10,("sa_switch(%d.%d) page fault resolved\n",
p->p_pid, l->l_lid));
if (vp->savp_faultaddr == vp->savp_ofaultaddr)
@@ -997,6 +971,7 @@ sa_switch(struct lwp *l, int type)
KDASSERT(l2 != l);
} else if (vp->savp_lwp != NULL) {
+
/*
* Case 2: We've been woken up while another LWP was
* on the VP, but we're going back to sleep without
@@ -1007,11 +982,13 @@ sa_switch(struct lwp *l, int type)
* go. If the LWP on the VP was idling, don't make it
* run again, though.
*/
+ freesau = sau;
if (vp->savp_lwp->l_flag & L_SA_YIELD)
l2 = NULL;
else {
- l2 = vp->savp_lwp; /* XXXUPSXXX Unfair advantage for l2 ? */
- if((l2->l_stat != LSRUN) || ((l2->l_flag & L_INMEM) == 0))
+ /* XXXUPSXXX Unfair advantage for l2 ? */
+ l2 = vp->savp_lwp;
+ if (l2->l_stat != LSRUN || (l2->l_flag & L_INMEM) == 0)
l2 = NULL;
}
} else {
@@ -1022,7 +999,7 @@ sa_switch(struct lwp *l, int type)
DPRINTFN(4,("sa_switch(%d.%d) switching to LWP %d.\n",
p->p_pid, l->l_lid, l2 ? l2->l_lid : 0));
mi_switch(l, l2);
-
+ sadata_upcall_free(freesau);
DPRINTFN(4,("sa_switch(%d.%d flag %x) returned.\n",
p->p_pid, l->l_lid, l->l_flag));
KDASSERT(l->l_wchan == 0);
@@ -1219,7 +1196,6 @@ sa_unblock_userret(struct lwp *l)
lwp_exit(l);
sau = sadata_upcall_alloc(1);
- sau->sau_arg = NULL;
if (p->p_flag & P_WEXIT) {
sadata_upcall_free(sau);
lwp_exit(l);
@@ -1235,19 +1211,8 @@ sa_unblock_userret(struct lwp *l)
* Defer saving the event lwp's state because a
* PREEMPT upcall could be on the queue already.
*/
- if (sa_upcall0(l, SA_UPCALL_UNBLOCKED | SA_UPCALL_DEFER_EVENT,
- l, l2, 0, NULL, sau) != 0) {
- /*
- * We were supposed to deliver an UNBLOCKED
- * upcall, but don't have resources to do so.
- */
-#ifdef DIAGNOSTIC
- printf("sa_unblock_userret: out of upcall resources"
- " for %d.%d\n", p->p_pid, l->l_lid);
-#endif
- sigexit(l, SIGABRT);
- /* NOTREACHED */
- }
+ sa_upcall0(sau, SA_UPCALL_UNBLOCKED | SA_UPCALL_DEFER_EVENT,
+ l, l2, 0, NULL, NULL);
sau->sau_stack = sast->sast_stack;
SCHED_LOCK(s);
@@ -1309,25 +1274,12 @@ sa_upcall_userret(struct lwp *l)
p->p_pid, l->l_lid, l2->l_lid));
sau = sadata_upcall_alloc(1);
- sau->sau_arg = NULL;
if (p->p_flag & P_WEXIT) {
sadata_upcall_free(sau);
lwp_exit(l);
}
- if (sa_upcall0(l, SA_UPCALL_UNBLOCKED, l2, l, 0, NULL,
- sau) != 0) {
- /*
- * We were supposed to deliver an UNBLOCKED
- * upcall, but don't have resources to do so.
- */
-#ifdef DIAGNOSTIC
- printf("sa_upcall_userret: out of upcall resources"
- " for %d.%d\n", p->p_pid, l->l_lid);
-#endif
- sigexit(l, SIGABRT);
- /* NOTREACHED */
- }
+ sa_upcall0(sau, SA_UPCALL_UNBLOCKED, l2, l, 0, NULL, NULL);
sau->sau_stack = sast->sast_stack;
SIMPLEQ_INSERT_TAIL(&vp->savp_upcalls, sau, sau_next);
Index: kern/kern_sig.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_sig.c,v
retrieving revision 1.208
diff -u -p -r1.208 kern_sig.c
--- kern/kern_sig.c 23 Jul 2005 22:02:13 -0000 1.208
+++ kern/kern_sig.c 18 Sep 2005 22:53:21 -0000
@@ -87,6 +87,7 @@ static void ksiginfo_exithook(struct pro
static void ksiginfo_put(struct proc *, const ksiginfo_t *);
static ksiginfo_t *ksiginfo_get(struct proc *, int);
static void kpsignal2(struct proc *, const ksiginfo_t *, int);
+static void sig_free_siginfo(void *);
sigset_t contsigmask, stopsigmask, sigcantmask;
@@ -1388,6 +1389,13 @@ kpsignal2(struct proc *p, const ksiginfo
SCHED_UNLOCK(s);
}
+static void
+sig_free_siginfo(void *arg)
+{
+
+ pool_put(&siginfo_pool, arg);
+}
+
void
kpsendsig(struct lwp *l, const ksiginfo_t *ksi, const sigset_t *mask)
{
@@ -1410,7 +1418,7 @@ kpsendsig(struct lwp *l, const ksiginfo_
else
li = l;
if (sa_upcall(l, SA_UPCALL_SIGNAL | SA_UPCALL_DEFER, le, li,
- sizeof(*si), si) != 0) {
+ sizeof(*si), si, sig_free_siginfo) != 0) {
pool_put(&siginfo_pool, si);
if (KSI_TRAP_P(ksi))
/* XXX What do we do here?? */;
Index: kern/kern_synch.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_synch.c,v
retrieving revision 1.149
diff -u -p -r1.149 kern_synch.c
--- kern/kern_synch.c 29 May 2005 22:24:15 -0000 1.149
+++ kern/kern_synch.c 18 Sep 2005 22:53:22 -0000
@@ -384,6 +384,7 @@ ltsleep(__volatile const void *ident, in
struct lwp *l = curlwp;
struct proc *p = l ? l->l_proc : NULL;
struct slpque *qp;
+ struct sadata_upcall *sau;
int sig, s;
int catch = priority & PCATCH;
int relock = (priority & PNORELOCK) == 0;
@@ -420,6 +421,18 @@ ltsleep(__volatile const void *ident, in
ktrcsw(p, 1, 0);
#endif
+ /*
+ * XXX We need to allocate the sadata_upcall structure here,
+ * XXX since we can't sleep while waiting for memory inside
+ * XXX sa_upcall(). It would be nice if we could safely
+ * XXX allocate the sadata_upcall structure on the stack, here.
+ */
+ if (l->l_flag & L_SA) {
+ sau = sadata_upcall_alloc(0);
+ } else {
+ sau = NULL;
+ }
+
SCHED_LOCK(s);
#ifdef DIAGNOSTIC
@@ -490,7 +503,7 @@ ltsleep(__volatile const void *ident, in
p->p_stats->p_ru.ru_nvcsw++;
SCHED_ASSERT_LOCKED();
if (l->l_flag & L_SA)
- sa_switch(l, SA_UPCALL_BLOCKED);
+ sa_switch(l, sau, SA_UPCALL_BLOCKED);
else
mi_switch(l, NULL);
Index: kern/kern_time.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_time.c,v
retrieving revision 1.92
diff -u -p -r1.92 kern_time.c
--- kern/kern_time.c 23 Jul 2005 18:54:07 -0000 1.92
+++ kern/kern_time.c 18 Sep 2005 22:53:23 -0000
@@ -100,7 +100,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_time.c,
#include <machine/cpu.h>
static void timerupcall(struct lwp *, void *);
-
+static void timer_free_siginfo(void *);
/* Time of day and interval timer support.
*
@@ -867,6 +867,14 @@ sys_timer_getoverrun(struct lwp *l, void
return (0);
}
+static void
+timer_free_siginfo(void *arg)
+{
+ extern struct pool siginfo_pool; /* XXX Ew. */
+
+ pool_put(&siginfo_pool, arg);
+}
+
/* Glue function that triggers an upcall; called from userret(). */
static void
timerupcall(struct lwp *l, void *arg)
@@ -894,7 +902,7 @@ timerupcall(struct lwp *l, void *arg)
si = pool_get(&siginfo_pool, PR_WAITOK);
si->_info = pt->pts_timers[i]->pt_info.ksi_info;
if (sa_upcall(l, SA_UPCALL_SIGEV | SA_UPCALL_DEFER, NULL, l,
- sizeof(*si), si) != 0) {
+ sizeof(*si), si, timer_free_siginfo) != 0) {
pool_put(&siginfo_pool, si);
/* XXX What do we do here?? */
} else
Index: kern/subr_pool.c
===================================================================
RCS file: /cvsroot/src/sys/kern/subr_pool.c,v
retrieving revision 1.101
diff -u -p -r1.101 subr_pool.c
--- kern/subr_pool.c 18 Jun 2005 01:34:03 -0000 1.101
+++ kern/subr_pool.c 18 Sep 2005 22:53:24 -0000
@@ -808,11 +808,10 @@ pool_get(struct pool *pp, int flags)
(flags & PR_WAITOK) != 0))
panic("pool_get: %s: must have NOWAIT", pp->pr_wchan);
-#ifdef LOCKDEBUG
if (flags & PR_WAITOK)
simple_lock_only_held(NULL, "pool_get(PR_WAITOK)");
-#endif
#endif /* DIAGNOSTIC */
+ SCHED_ASSERT_UNLOCKED();
simple_lock(&pp->pr_slock);
pr_enter(pp, file, line);
@@ -1060,6 +1059,7 @@ pool_do_put(struct pool *pp, void *v, st
int s;
LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
+ SCHED_ASSERT_UNLOCKED();
page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask);
Index: sys/savar.h
===================================================================
RCS file: /cvsroot/src/sys/sys/savar.h,v
retrieving revision 1.15
diff -u -p -r1.15 savar.h
--- sys/savar.h 14 Mar 2004 01:08:47 -0000 1.15
+++ sys/savar.h 18 Sep 2005 22:53:26 -0000
@@ -63,6 +63,7 @@ struct sadata_upcall {
int sau_type;
size_t sau_argsize;
void *sau_arg;
+ void (*sau_argfreefunc)(void *);
stack_t sau_stack;
union sau_state sau_event;
union sau_state sau_interrupted;
@@ -131,10 +132,11 @@ struct sadata_upcall *sadata_upcall_allo
void sadata_upcall_free(struct sadata_upcall *);
void sa_release(struct proc *);
-void sa_switch(struct lwp *, int);
+void sa_switch(struct lwp *, struct sadata_upcall *, int);
void sa_preempt(struct lwp *);
void sa_yield(struct lwp *);
-int sa_upcall(struct lwp *, int, struct lwp *, struct lwp *, size_t, void *);
+int sa_upcall(struct lwp *, int, struct lwp *, struct lwp *, size_t, void *,
+ void (*)(void *));
void sa_putcachelwp(struct proc *, struct lwp *);
struct lwp *sa_getcachelwp(struct sadata_vp *);
--r5Pyd7+fXNt84Ff3--