NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

lib/42900: pthread_create(3) deadlock in pthread_atfork(3)'s handler



>Number:         42900
>Category:       lib
>Synopsis:       pthread_create(3) deadlock in pthread_atfork(3)'s handler
>Confidential:   no
>Severity:       critical
>Priority:       high
>Responsible:    lib-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sun Feb 28 15:15:00 +0000 2010
>Originator:     Takehiko NOZAKI
>Release:        5.99.24
>Organization:
>Environment:
NetBSD vmware 5.99.24 NetBSD 5.99.24 (MONOLITHIC) #0: Mon Feb 15 02:18:10 JST 
2010  root@vmware:/usr/obj.i386/sys/arch/i386/compile/MONOLITHIC i386
>Description:
1. our pthread_create(3) internally uses pthread_atfork(3)
and set 2 child handler, pthread__child_callback() and
pthread__fork_callback(). so, the application do that:
  a) call pthread_atfork(3) and register handler
  b) call fork(2)
  c) call pthread_create(3) in a's handler
may caught deadlock by atfork_lock.

2. Linux/glibc2's pthread_atfork(3) don't gain lock
when executing their handler, so they allow to call
pthread_atfork(3) in its handler recursively
(yes it's meaningless, but unfortunately spec doesn't forbid it).
but our libc's fork gain atfork_lock, may deadlock.

these problems also exists 5.0.x, i hope fixed till 5.1.


 this bug first reported at ruby-dev ML by taca@-san.
http://redmine.ruby-lang.org/issues/show/2603

 i got many suggestion from many ruby's developer
including naruse-san and usaku-san.
 glibc2's implementation status from linux kernel developer, kosaki-san.
 NetBSD's bug is anlyzed by enami@-san.

thanks a lot for all peoples!
>How-To-Repeat:
try following sample.

if a thread has been created before fork(2) by
-DTHREAD_HAS_BEEN_CREATED, no deadlock occured.

but without it, may hangup.

--
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>


static int running;
static pthread_t thread;
static pthread_mutex_t mutex;
static pthread_cond_t cond;

static void *timer(void *);
static void init_timer(void);
static void start_timer(void);
static void stop_timer(void);

static void *
timer(void *arg)
{
        struct timeval now;
        struct timespec timeout;

        pthread_mutex_lock(&mutex);
        pthread_cond_signal(&cond);
        do {
                printf("zzz...\n");
                gettimeofday(&now, NULL);
                timeout.tv_sec  = now.tv_sec + 1;
                timeout.tv_nsec = now.tv_usec * 1000;
                pthread_cond_timedwait(&cond, &mutex, &timeout);
        } while (running);
        pthread_mutex_unlock(&mutex);
        return NULL;
}

static void
init_timer(void)
{
        running = 0;
        pthread_mutex_init(&mutex, NULL);
        pthread_cond_init(&cond, NULL);
}

static void
start_timer(void)
{
        pthread_mutex_lock(&mutex);
        running = 1;
        if (pthread_create(&thread, NULL, &timer, NULL) == 0)
                pthread_cond_wait(&cond, &mutex);
        pthread_mutex_unlock(&mutex);
}

void
stop_timer()
{
        pthread_mutex_lock(&mutex);
        running = 0;
        pthread_cond_signal(&cond);
        pthread_mutex_unlock(&mutex);
        pthread_join(thread, NULL);
}

int
main(void)
{
        int pid;

        pthread_atfork(&stop_timer, &start_timer, &init_timer);
        init_timer();
#ifdef THREAD_HAS_BEEN_CREATED
        start_timer();
#endif

        pid = fork();
        if (pid < 0)
                abort();
        if (pid == 0) {
                /* child process */
        } else {
                /* parent process */
        }
}

>Fix:
see following patch.

*** limitation ***
  1. move pthread_atfork(3) from libc to libpthread, and added fork(2)
  to libpthread. so that if you don't link libpthread, the handler may
  never executed(at leaset, other OS can't compile if don't add
  -pthread, so i never mind). 


  2. glibc2 approach, they uses memory barrier and if modified
  atfork handler queue, retry it.
  but i simply capture TAILQ_FIRST/LAST in fork(2).


Index: lib/libc/gen/pthread_atfork.c
===================================================================
RCS file: /cvsroot/src/lib/libc/gen/pthread_atfork.c,v
retrieving revision 1.8
diff -u -r1.8 pthread_atfork.c
--- lib/libc/gen/pthread_atfork.c       28 Apr 2008 20:22:59 -0000      1.8
+++ lib/libc/gen/pthread_atfork.c       28 Feb 2010 10:53:31 -0000
@@ -35,169 +35,24 @@
 #endif /* LIBC_SCCS and not lint */
 
 #include "namespace.h"
-
-#include <errno.h>
-#include <stdlib.h>
 #include <unistd.h>
-#include <sys/queue.h>
 #include "reentrant.h"
 
-#ifdef __weak_alias
 __weak_alias(pthread_atfork, _pthread_atfork)
 __weak_alias(fork, _fork)
-#endif /* __weak_alias */
-
-pid_t  __fork __P((void));     /* XXX */
-
-struct atfork_callback {
-       SIMPLEQ_ENTRY(atfork_callback) next;
-       void (*fn)(void);
-};
-
-/*
- * Hypothetically, we could protect the queues with a rwlock which is
- * write-locked by pthread_atfork() and read-locked by fork(), but
- * since the intended use of the functions is obtaining locks to hold
- * across the fork, forking is going to be serialized anyway.
- */
-static struct atfork_callback atfork_builtin;
-static mutex_t atfork_lock = MUTEX_INITIALIZER;
-SIMPLEQ_HEAD(atfork_callback_q, atfork_callback);
-
-static struct atfork_callback_q prepareq = SIMPLEQ_HEAD_INITIALIZER(prepareq);
-static struct atfork_callback_q parentq = SIMPLEQ_HEAD_INITIALIZER(parentq);
-static struct atfork_callback_q childq = SIMPLEQ_HEAD_INITIALIZER(childq);
-
-static struct atfork_callback *
-af_alloc(void)
-{
-
-       if (atfork_builtin.fn == NULL)
-               return &atfork_builtin;
-
-       return malloc(sizeof(atfork_builtin));
-}
 
-static void
-af_free(struct atfork_callback *af)
-{
-
-       if (af != &atfork_builtin)
-               free(af);
-}
+pid_t   __fork(void);   /* XXX */
 
 int
+/*ARGSUSED*/
 pthread_atfork(void (*prepare)(void), void (*parent)(void),
     void (*child)(void))
 {
-       struct atfork_callback *newprepare, *newparent, *newchild;
-
-       newprepare = newparent = newchild = NULL;
-
-       mutex_lock(&atfork_lock);
-       if (prepare != NULL) {
-               newprepare = af_alloc();
-               if (newprepare == NULL) {
-                       mutex_unlock(&atfork_lock);
-                       return ENOMEM;
-               }
-               newprepare->fn = prepare;
-       }
-
-       if (parent != NULL) {
-               newparent = af_alloc();
-               if (newparent == NULL) {
-                       if (newprepare != NULL)
-                               af_free(newprepare);
-                       mutex_unlock(&atfork_lock);
-                       return ENOMEM;
-               }
-               newparent->fn = parent;
-       }
-
-       if (child != NULL) {
-               newchild = af_alloc();
-               if (newchild == NULL) {
-                       if (newprepare != NULL)
-                               af_free(newprepare);
-                       if (newparent != NULL)
-                               af_free(newparent);
-                       mutex_unlock(&atfork_lock);
-                       return ENOMEM;
-               }
-               newchild->fn = child;
-       }
-
-       /*
-        * The order in which the functions are called is specified as
-        * LIFO for the prepare handler and FIFO for the others; insert
-        * at the head and tail as appropriate so that SIMPLEQ_FOREACH()
-        * produces the right order.
-        */
-       if (prepare)
-               SIMPLEQ_INSERT_HEAD(&prepareq, newprepare, next);
-       if (parent)
-               SIMPLEQ_INSERT_TAIL(&parentq, newparent, next);
-       if (child)
-               SIMPLEQ_INSERT_TAIL(&childq, newchild, next);
-       mutex_unlock(&atfork_lock);
-
        return 0;
 }
 
 pid_t
 fork(void)
 {
-       struct atfork_callback *iter;
-       pid_t ret;
-
-       mutex_lock(&atfork_lock);
-       SIMPLEQ_FOREACH(iter, &prepareq, next)
-               (*iter->fn)();
-
-       ret = __fork();
-
-       if (ret != 0) {
-               /*
-                * We are the parent. It doesn't matter here whether
-                * the fork call succeeded or failed.
-                */
-               SIMPLEQ_FOREACH(iter, &parentq, next)
-                       (*iter->fn)();
-               mutex_unlock(&atfork_lock);
-       } else {
-               /* We are the child */
-               SIMPLEQ_FOREACH(iter, &childq, next)
-                       (*iter->fn)();
-               /*
-                * Note: We are explicitly *not* unlocking
-                * atfork_lock.  Unlocking atfork_lock is problematic,
-                * because if any threads in the parent blocked on it
-                * between the initial lock and the fork() syscall,
-                * unlocking in the child will try to schedule
-                * threads, and either the internal mutex interlock or
-                * the runqueue spinlock could have been held at the
-                * moment of fork(). Since the other threads do not
-                * exist in this process, the spinlock will never be
-                * unlocked, and we would wedge.
-                * Instead, we reinitialize atfork_lock, since we know
-                * that the state of the atfork lists is consistent here,
-                * and that there are no other threads to be affected by
-                * the forcible cleaning of the queue.
-                * This permits double-forking to work, although
-                * it requires knowing that it's "safe" to initialize
-                * a locked mutex in this context.
-                *
-                * The problem exists for users of this interface,
-                * too, since the intented use of pthread_atfork() is
-                * to acquire locks across the fork call to ensure
-                * that the child sees consistent state. There's not
-                * much that can usefully be done in a child handler,
-                * and conventional wisdom discourages using them, but
-                * they're part of the interface, so here we are...
-                */
-               mutex_init(&atfork_lock, NULL);
-       }
-
-       return ret;
+       return __fork();
 }
Index: lib/libpthread/Makefile
===================================================================
RCS file: /cvsroot/src/lib/libpthread/Makefile,v
retrieving revision 1.56
diff -u -r1.56 Makefile
--- lib/libpthread/Makefile     16 May 2009 22:21:18 -0000      1.56
+++ lib/libpthread/Makefile     28 Feb 2010 10:53:31 -0000
@@ -39,6 +39,7 @@
 # library semantics will end up discarding potentially important objects.
 #
 SRCS=  pthread.c 
+SRCS+= pthread_atfork.c
 SRCS+= pthread_attr.c
 SRCS+= pthread_barrier.c
 SRCS+= pthread_cancelstub.c
Index: lib/libpthread/pthread.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/pthread.c,v
retrieving revision 1.113
diff -u -r1.113 pthread.c
--- lib/libpthread/pthread.c    3 Oct 2009 23:49:50 -0000       1.113
+++ lib/libpthread/pthread.c    28 Feb 2010 10:53:31 -0000
@@ -68,10 +68,7 @@
 static int     pthread__stackid_setup(void *, size_t, pthread_t *);
 static int     pthread__stackalloc(pthread_t *);
 static void    pthread__initmain(pthread_t *);
-static void    pthread__fork_callback(void);
 static void    pthread__reap(pthread_t);
-static void    pthread__child_callback(void);
-static void    pthread__start(void);
 
 void   pthread__init(void);
 
@@ -82,7 +79,7 @@
 
 static pthread_attr_t pthread_default_attr;
 static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };
-static pthread_t pthread__first;
+pthread_t pthread__first;
 
 enum {
        DIAGASSERT_ABORT =      1<<0,
@@ -228,50 +225,9 @@
 
        /* Tell libc that we're here and it should role-play accordingly. */
        pthread__first = first;
-       pthread_atfork(NULL, NULL, pthread__fork_callback);
        __isthreaded = 1;
 }
 
-static void
-pthread__fork_callback(void)
-{
-
-       /* lwpctl state is not copied across fork. */
-       if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &pthread__first->pt_lwpctl)) {
-               err(1, "_lwp_ctl");
-       }
-}
-
-static void
-pthread__child_callback(void)
-{
-
-       /*
-        * Clean up data structures that a forked child process might
-        * trip over. Note that if threads have been created (causing
-        * this handler to be registered) the standards say that the
-        * child will trigger undefined behavior if it makes any
-        * pthread_* calls (or any other calls that aren't
-        * async-signal-safe), so we don't really have to clean up
-        * much. Anything that permits some pthread_* calls to work is
-        * merely being polite.
-        */
-       pthread__started = 0;
-}
-
-static void
-pthread__start(void)
-{
-
-       /*
-        * Per-process timers are cleared by fork(); despite the
-        * various restrictions on fork() and threads, it's legal to
-        * fork() before creating any threads. 
-        */
-       pthread_atfork(NULL, NULL, pthread__child_callback);
-}
-
-
 /* General-purpose thread data structure sanitization. */
 /* ARGSUSED */
 static void
@@ -327,10 +283,8 @@
         * It's okay to check this without a lock because there can
         * only be one thread before it becomes true.
         */
-       if (pthread__started == 0) {
-               pthread__start();
+       if (pthread__started == 0)
                pthread__started = 1;
-       }
 
        if (attr == NULL)
                nattr = pthread_default_attr;
Index: lib/libpthread/pthread_atfork.c
===================================================================
RCS file: lib/libpthread/pthread_atfork.c
diff -N lib/libpthread/pthread_atfork.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ lib/libpthread/pthread_atfork.c     28 Feb 2010 10:53:31 -0000
@@ -0,0 +1,168 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2002, 2010 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Nathan J. Williams, Takehiko NOZAKI.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD$");
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+#include <sys/mman.h>
+#include <sys/sysctl.h>
+#include <sys/lwpctl.h>
+
+#include <sys/queue.h>
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "pthread.h"
+#include "pthread_int.h"
+
+pid_t  __fork(void);   /* XXX */
+
+struct atfork_callback {
+       TAILQ_ENTRY(atfork_callback) entry;
+       void (*preparefn)(void);
+       void (*parentfn)(void);
+       void (*childfn)(void);
+};
+
+/*
+ * Hypothetically, we could protect the queues with a rwlock which is
+ * write-locked by pthread_atfork() and read-locked by fork(), but
+ * since the intended use of the functions is obtaining locks to hold
+ * across the fork, forking is going to be serialized anyway.
+ */
+static pthread_mutex_t atfork_lock = PTHREAD_MUTEX_INITIALIZER;
+
+TAILQ_HEAD(atfork_callback_q, atfork_callback);
+static struct atfork_callback_q atforkq = TAILQ_HEAD_INITIALIZER(atforkq);
+
+int
+pthread_atfork(void (*prepare)(void), void (*parent)(void),
+    void (*child)(void))
+{
+       struct atfork_callback *newatfork;
+
+       if (prepare != NULL || parent != NULL || child != NULL) {
+               newatfork = malloc(sizeof(struct atfork_callback));
+               if (newatfork == NULL)
+                       return ENOMEM;
+               newatfork->preparefn = prepare;
+               newatfork->parentfn  = parent;
+               newatfork->childfn   = child;
+               pthread_mutex_lock(&atfork_lock);
+               TAILQ_INSERT_TAIL(&atforkq, newatfork, entry);
+               pthread_mutex_unlock(&atfork_lock);
+       }
+
+       return 0;
+}
+
+/* XXX: FIXME */
+extern int pthread__started;
+extern pthread_t pthread__first;
+
+pid_t
+fork(void)
+{
+       struct atfork_callback *head, *tail, *iter;
+       pid_t ret;
+
+       pthread_mutex_lock(&atfork_lock);
+       head = TAILQ_FIRST(&atforkq);
+       tail = TAILQ_LAST(&atforkq, atfork_callback_q);
+       pthread_mutex_unlock(&atfork_lock);
+
+       /*
+        * The order in which the functions are called is specified as
+        * LIFO for the prepare handler and FIFO for the others.
+        */
+       if (tail != NULL) {
+               for (iter = tail; /**/;
+                    iter = TAILQ_PREV(iter, atfork_callback_q, entry)) {
+                       if (iter->preparefn)
+                               (*iter->preparefn)();
+                       if (iter == head)
+                               break;
+               }
+       }
+
+       ret = __fork();
+
+       if (ret != 0) {
+               /*
+                * We are the parent. It doesn't matter here whether
+                * the fork call succeeded or failed.
+                */
+               if (head != NULL) {
+                       for (iter = head; /**/;
+                            iter = TAILQ_NEXT(iter, entry)) {
+                               if (iter->parentfn)
+                                       (*iter->parentfn)();
+                               if (iter == tail)
+                                       break;
+                       }
+               }
+       } else {
+               /* lwpctl state is not copied across fork. */
+               if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &pthread__first->pt_lwpctl))
+                       err(1, "_lwp_ctl");
+
+               /*
+                * Clean up data structures that a forked child process might
+                * trip over. Note that if threads have been created (causing
+                * this handler to be registered) the standards say that the
+                * child will trigger undefined behavior if it makes any
+                * pthread_* calls (or any other calls that aren't
+                * async-signal-safe), so we don't really have to clean up
+                * much. Anything that permits some pthread_* calls to work is
+                * merely being polite.
+                */
+               pthread__started = 0;
+
+               /* We are the child */
+               if (head != NULL) {
+                       for (iter = head; /**/;
+                            iter = TAILQ_NEXT(iter, entry)) {
+                               if (iter->childfn)
+                                       (*iter->childfn)();
+                               if (iter == tail)
+                                       break;
+                       }
+               }
+       }
+
+       return ret;
+}



Home | Main Index | Thread Index | Old Index