Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys exit_lwps, lwp_wait: fix a race condition by re-trying i...



details:   https://anonhg.NetBSD.org/src/rev/df88e7e4491c
branches:  trunk
changeset: 781755:df88e7e4491c
user:      rmind <rmind%NetBSD.org@localhost>
date:      Thu Sep 27 20:43:15 2012 +0000

description:
exit_lwps, lwp_wait: fix a race condition by re-trying if p_lock was dropped
in a case of process exit.  Necessary to re-flag all LWPs for exit, as their
state might have changed or new LWPs spawned.

Should fix PR/46168 and PR/46402.

diffstat:

 sys/kern/kern_exit.c |  39 +++++++++++++--------------------------
 sys/kern/kern_lwp.c  |  31 ++++++++++++++-----------------
 sys/kern/sys_lwp.c   |  15 +++++----------
 sys/sys/lwp.h        |   6 ++----
 4 files changed, 34 insertions(+), 57 deletions(-)

diffs (217 lines):

diff -r 376c8a572fdc -r df88e7e4491c sys/kern/kern_exit.c
--- a/sys/kern/kern_exit.c      Thu Sep 27 18:28:53 2012 +0000
+++ b/sys/kern/kern_exit.c      Thu Sep 27 20:43:15 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: kern_exit.c,v 1.241 2012/08/05 14:53:25 riastradh Exp $        */
+/*     $NetBSD: kern_exit.c,v 1.242 2012/09/27 20:43:15 rmind Exp $    */
 
 /*-
  * Copyright (c) 1998, 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.241 2012/08/05 14:53:25 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.242 2012/09/27 20:43:15 rmind Exp $");
 
 #include "opt_ktrace.h"
 #include "opt_perfctrs.h"
@@ -599,17 +599,14 @@
 void
 exit_lwps(struct lwp *l)
 {
-       struct proc *p;
-       struct lwp *l2;
-       int error;
-       lwpid_t waited;
+       proc_t *p = l->l_proc;
+       lwp_t *l2;
        int nlocks;
 
        KERNEL_UNLOCK_ALL(l, &nlocks);
+retry:
+       KASSERT(mutex_owned(p->p_lock));
 
-       p = l->l_proc;
-       KASSERT(mutex_owned(p->p_lock));
-retry:
        /*
         * Interrupt LWPs in interruptable sleep, unsuspend suspended
         * LWPs and then wait for everyone else to finish.
@@ -623,30 +620,20 @@
                    l2->l_stat == LSSUSPENDED || l2->l_stat == LSSTOP) {
                        /* setrunnable() will release the lock. */
                        setrunnable(l2);
-                       DPRINTF(("exit_lwps: Made %d.%d runnable\n",
-                           p->p_pid, l2->l_lid));
                        continue;
                }
                lwp_unlock(l2);
        }
+
+       /*
+        * Wait for every LWP to exit.  Note: LWPs can get suspended/slept
+        * behind us or there may even be new LWPs created.  Therefore, a
+        * full retry is required on error.
+        */
        while (p->p_nlwps > 1) {
-               DPRINTF(("exit_lwps: waiting for %d LWPs (%d zombies)\n",
-                   p->p_nlwps, p->p_nzlwps));
-               error = lwp_wait1(l, 0, &waited, LWPWAIT_EXITCONTROL);
-               if (p->p_nlwps == 1)
-                       break;
-               if (error == EDEADLK) {
-                       /*
-                        * LWPs can get suspended/slept behind us.
-                        * (eg. sa_setwoken)
-                        * kick them again and retry.
-                        */
+               if (lwp_wait(l, 0, NULL, true)) {
                        goto retry;
                }
-               if (error)
-                       panic("exit_lwps: lwp_wait1 failed with error %d",
-                           error);
-               DPRINTF(("exit_lwps: Got LWP %d from lwp_wait1()\n", waited));
        }
 
        KERNEL_LOCK(nlocks, l);
diff -r 376c8a572fdc -r df88e7e4491c sys/kern/kern_lwp.c
--- a/sys/kern/kern_lwp.c       Thu Sep 27 18:28:53 2012 +0000
+++ b/sys/kern/kern_lwp.c       Thu Sep 27 20:43:15 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: kern_lwp.c,v 1.172 2012/08/30 02:26:02 matt Exp $      */
+/*     $NetBSD: kern_lwp.c,v 1.173 2012/09/27 20:43:15 rmind Exp $     */
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -211,7 +211,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.172 2012/08/30 02:26:02 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.173 2012/09/27 20:43:15 rmind Exp $");
 
 #include "opt_ddb.h"
 #include "opt_lockdebug.h"
@@ -533,22 +533,21 @@
  * Must be called with p->p_lock held.
  */
 int
-lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags)
+lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
 {
-       struct proc *p = l->l_proc;
-       struct lwp *l2;
-       int nfound, error;
-       lwpid_t curlid;
-       bool exiting;
+       const lwpid_t curlid = l->l_lid;
+       proc_t *p = l->l_proc;
+       lwp_t *l2;
+       int error;
 
        KASSERT(mutex_owned(p->p_lock));
 
        p->p_nlwpwait++;
        l->l_waitingfor = lid;
-       curlid = l->l_lid;
-       exiting = ((flags & LWPWAIT_EXITCONTROL) != 0);
 
        for (;;) {
+               int nfound;
+
                /*
                 * Avoid a race between exit1() and sigexit(): if the
                 * process is dumping core, then we need to bail out: call
@@ -558,10 +557,7 @@
                if ((p->p_sflag & PS_WCORE) != 0) {
                        mutex_exit(p->p_lock);
                        lwp_userret(l);
-#ifdef DIAGNOSTIC
-                       panic("lwp_wait1");
-#endif
-                       /* NOTREACHED */
+                       KASSERT(false);
                }
 
                /*
@@ -651,13 +647,14 @@
                }
 
                /*
-                * The kernel is careful to ensure that it can not deadlock
-                * when exiting - just keep waiting.
+                * Note: since the lock will be dropped, need to restart on
+                * wakeup to run all LWPs again, e.g. there may be new LWPs.
                 */
                if (exiting) {
                        KASSERT(p->p_nlwps > 1);
                        cv_wait(&p->p_lwpcv, p->p_lock);
-                       continue;
+                       error = EAGAIN;
+                       break;
                }
 
                /*
diff -r 376c8a572fdc -r df88e7e4491c sys/kern/sys_lwp.c
--- a/sys/kern/sys_lwp.c        Thu Sep 27 18:28:53 2012 +0000
+++ b/sys/kern/sys_lwp.c        Thu Sep 27 20:43:15 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: sys_lwp.c,v 1.54 2012/05/21 14:15:19 martin Exp $      */
+/*     $NetBSD: sys_lwp.c,v 1.55 2012/09/27 20:43:15 rmind Exp $       */
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.54 2012/05/21 14:15:19 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.55 2012/09/27 20:43:15 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -354,19 +354,14 @@
        lwpid_t dep;
 
        mutex_enter(p->p_lock);
-       error = lwp_wait1(l, SCARG(uap, wait_for), &dep, 0);
+       error = lwp_wait(l, SCARG(uap, wait_for), &dep, false);
        mutex_exit(p->p_lock);
 
-       if (error)
-               return error;
-
-       if (SCARG(uap, departed)) {
+       if (!error && SCARG(uap, departed)) {
                error = copyout(&dep, SCARG(uap, departed), sizeof(dep));
-               if (error)
-                       return error;
        }
 
-       return 0;
+       return error;
 }
 
 int
diff -r 376c8a572fdc -r df88e7e4491c sys/sys/lwp.h
--- a/sys/sys/lwp.h     Thu Sep 27 18:28:53 2012 +0000
+++ b/sys/sys/lwp.h     Thu Sep 27 20:43:15 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: lwp.h,v 1.163 2012/07/22 22:40:18 rmind Exp $  */
+/*     $NetBSD: lwp.h,v 1.164 2012/09/27 20:43:15 rmind Exp $  */
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010
@@ -315,9 +315,7 @@
 bool   lwp_alive(lwp_t *);
 lwp_t  *lwp_find_first(proc_t *);
 
-/* Flags for _lwp_wait1 */
-#define LWPWAIT_EXITCONTROL    0x00000001
-int    lwp_wait1(lwp_t *, lwpid_t, lwpid_t *, int);
+int    lwp_wait(lwp_t *, lwpid_t, lwpid_t *, bool);
 void   lwp_continue(lwp_t *);
 void   lwp_unsleep(lwp_t *, bool);
 void   lwp_unstop(lwp_t *);



Home | Main Index | Thread Index | Old Index