NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

kern/42772: fork from other than the main thread causes wrong pthread condition



>Number:         42772
>Category:       kern
>Synopsis:       fork from other than the main thread causes wrong pthread 
>condition
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Tue Feb 09 01:20:00 +0000 2010
>Originator:     NARUSE, Yui
>Release:        5.0.1
>Organization:
>Environment:
NetBSD kelvena 5.0_STABLE NetBSD 5.0_STABLE (GENERIC) #0: Sat Jan 16 22:36:33 
JST 2010  naruse@:/usr/obj/sys/arch/i386/compile/GENERIC i386
>Description:
1. start initial thread
2. start second thread
3. start third thread
4. stop second thread
5. fork from third thread
then check ((struct __pthread_st *)pthread_self())->pt_lid in child process.
It should equal to _lwp_self() = 1, but it is 3; equals to third thread's 
pt_lid.

Run following code:

/*-
 * Copyright (c)2010 Takehiko NOZAKI,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <errno.h>
#include <lwp.h>
#include <pthread.h>
#include <pthread_queue.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/tree.h>
#include <unistd.h>
#define PTHREAD_KEYS_MAX        256
#define PTHREAD__UNPARK_MAX     32

/*
 * The size of this structure needs to be no larger than struct
 * __pthread_cleanup_store, defined in pthread.h.
 */
struct pt_clean_t {
        PTQ_ENTRY(pt_clean_t)   ptc_next;
        void    (*ptc_cleanup)(void *);
        void    *ptc_arg;
};

struct pthread_lock_ops {
        void    (*plo_init)(__cpu_simple_lock_t *);
        int     (*plo_try)(__cpu_simple_lock_t *);
        void    (*plo_unlock)(__cpu_simple_lock_t *);
        void    (*plo_lock)(__cpu_simple_lock_t *);
};

struct  __pthread_st {
        pthread_t       pt_self;        /* Must be first. */
        unsigned int    pt_magic;       /* Magic number */
        int             pt_state;       /* running, blocked, etc. */
        pthread_mutex_t pt_lock;        /* lock on state */
        int             pt_flags;       /* see PT_FLAG_* below */
        int             pt_cancel;      /* Deferred cancellation */
        int             pt_errno;       /* Thread-specific errno. */
        stack_t         pt_stack;       /* Our stack */
        void            *pt_exitval;    /* Read by pthread_join() */
        char            *pt_name;       /* Thread's name, set by the app. */
        int             pt_willpark;    /* About to park */
        lwpid_t         pt_unpark;      /* Unpark this when parking */
        struct pthread_lock_ops pt_lockops;/* Cached to avoid PIC overhead */
        pthread_mutex_t *pt_droplock;   /* Drop this lock if cancelled */
        pthread_cond_t  pt_joiners;     /* Threads waiting to join. */

        /* Threads to defer waking, usually until pthread_mutex_unlock(). */
        lwpid_t         pt_waiters[PTHREAD__UNPARK_MAX];
        size_t          pt_nwaiters;

        /* Stack of cancellation cleanup handlers and their arguments */
        PTQ_HEAD(, pt_clean_t)  pt_cleanup_stack;

        /* LWP ID and entry on the list of all threads. */
        lwpid_t         pt_lid;
        RB_ENTRY(__pthread_st) pt_alltree;
        PTQ_ENTRY(__pthread_st) pt_allq;
        PTQ_ENTRY(__pthread_st) pt_deadq;

        /*
         * General synchronization data.  We try to align, as threads
         * on other CPUs will access this data frequently.
         */
        int             pt_dummy1 __aligned(128);
        struct lwpctl   *pt_lwpctl;     /* Kernel/user comms area */
        volatile int    pt_blocking;    /* Blocking in userspace */
        volatile int    pt_rwlocked;    /* Handed rwlock successfully */
        volatile int    pt_signalled;   /* Received pthread_cond_signal() */
        volatile int    pt_mutexwait;   /* Waiting to acquire mutex */
        void * volatile pt_mutexnext;   /* Next thread in chain */
        void * volatile pt_sleepobj;    /* Object slept on */
        PTQ_ENTRY(__pthread_st) pt_sleep;
        void            (*pt_early)(void *);
        int             pt_dummy2 __aligned(128);

        /* Thread-specific data.  Large so it sits close to the end. */
        int             pt_havespecific;
        void            *pt_specific[PTHREAD_KEYS_MAX];

        /*
         * Context for thread creation.  At the end as it's cached
         * and then only ever passed to _lwp_create(). 
         */
        ucontext_t      pt_uc;
};

static int running;
static pthread_t timer_thread, thread;
static pthread_mutex_t timer_mutex, mutex;
static pthread_cond_t timer_cond, cond;

static void *timer(void *);
static void init_timer(void);
static void start_timer(void);
static void stop_timer(void);

static void
show_lwps(void)
{
        int i;
        printf("%6d: lwps: ", getpid());
        for (i = 1; i < 5; i++) {
                printf("%2d ", _lwp_kill(i, 0));
        }
        printf("\n");
}

static void *
timer(void *arg)
{
        struct timeval now;
        struct timespec timeout;

        pthread_mutex_lock(&timer_mutex);
        pthread_cond_signal(&timer_cond);
        do {
                int err;
                printf("%6d: zzz...\n",getpid());
                gettimeofday(&now, NULL);
                timeout.tv_sec  = now.tv_sec + 1;
                timeout.tv_nsec = now.tv_usec * 1000;
                err = pthread_cond_timedwait(&timer_cond, &timer_mutex, 
&timeout);
                switch (err) {
                case 0:
                case ETIMEDOUT:
                        break;
                case ESRCH:
                        printf("%6d: err: ESRCH %s\n", getpid(), 
sys_errlist[err]);
                        abort();
                        break;
                default:
                        printf("%6d: err: %d = %s\n", getpid(), err, 
sys_errlist[err]);
                        abort();
                }
        } while (running);
        pthread_mutex_unlock(&timer_mutex);
        return NULL;
}

static void
init_timer(void)
{
        running = 0;
        pthread_mutex_init(&timer_mutex, NULL);
        pthread_cond_init(&timer_cond, NULL);
}

static void
start_timer(void)
{
        printf("%6d: starting timer\n", getpid());
        pthread_mutex_lock(&timer_mutex);
        running = 1;
        if (pthread_create(&timer_thread, NULL, &timer, NULL) == 0) {
                pthread_cond_wait(&timer_cond, &timer_mutex);
        }


        pthread_mutex_unlock(&timer_mutex);
}

static void
stop_timer()
{
        printf("%6d: stopping timer\n", getpid());
        pthread_mutex_lock(&timer_mutex);
        running = 0;
        pthread_cond_signal(&timer_cond);
        pthread_mutex_unlock(&timer_mutex);
        pthread_join(timer_thread, NULL);
}

static void
my_fork()
{
        int pid;
        struct  __pthread_st *ptst;

        show_lwps();
        stop_timer();
        show_lwps();

        printf("%6d: fork() in thirdthread(_lwp_self()=%d)\n", getpid(), 
_lwp_self());
        pid = fork();
        if (pid < 0)
                abort();
        if (pid == 0) {
                /* child process */
                printf("%6d: CHILD _lwp_self()=%d pthread_self()->pt_lid=%d\n",
                                getpid(),_lwp_self(),
                                ((struct __pthread_st 
*)pthread_self())->pt_lid);
                if (_lwp_self() != ((struct __pthread_st 
*)pthread_self())->pt_lid)
                printf("%6d: CHILD's self->pt_lid is wrong!\n", getpid());
                /* ((struct __pthread_st *)pthread_self())->pt_lid = 
_lwp_self(); */
                show_lwps();

                pthread_mutex_init(&mutex, NULL);
                pthread_cond_init(&cond, NULL);
                init_timer();
                start_timer();
        } else {
                /* parent process */
                printf("%6d: PARENT _lwp_self()=%d pthread_self()->pt_lid=%d\n",
                                getpid(),_lwp_self(),
                                ((struct __pthread_st 
*)pthread_self())->pt_lid);
                show_lwps();
                start_timer();
                show_lwps();
        }
        pthread_cond_wait(&cond, &mutex);
}

static void *
thirdthread(void *arg)
{
        printf("%6d: starting thirdthread\n", getpid());
        my_fork();
        exit(0);
        return NULL;
}

int
main(void)
{
        init_timer();
        start_timer();

        pthread_mutex_init(&mutex, NULL);
        pthread_cond_init(&cond, NULL);
        if (pthread_create(&thread, NULL, &thirdthread, NULL) == 0)
                pthread_cond_wait(&cond, &mutex);

        sleep(1);
        return 0;
}


Then following result:
 16558: starting timer
 16558: zzz...
 16558: starting thirdthread
 16558: lwps:  0  0  0 -1 
 16558: stopping timer
 16558: lwps:  0 -1  0 -1 
 16558: fork() in thirdthread(_lwp_self()=3)
 16558: PARENT _lwp_self()=3 pthread_self()->pt_lid=3
  5661: CHILD _lwp_self()=1 pthread_self()->pt_lid=3
 16558: lwps:  0 -1  0 -1 
  5661: CHILD's self->pt_lid is wrong!
 16558: starting timer
  5661: lwps:  0 -1 -1 -1 
  5661: starting timer
 16558: zzz...
  5661: zzz...
 16558: lwps:  0 -1  0  0 
  5661: err: ESRCH No such process

Last error message is because it calls _lwp_park with wrong argument:
  2570      2 a.out    CALL  _lwp_park(0xbb7ffd94,3,0x804a348,0x804a348)
  2570      2 a.out    RET   _lwp_park -1 errno 3 No such process

Workaround for above program is uncomment this line:
/* ((struct __pthread_st *)pthread_self())->pt_lid = _lwp_self(); */
The code resets pt_lid and it will work.

Fix for NetBSD may be following, but it may need more initialization codes.

Index: lib/libpthread/pthread.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/pthread.c,v
retrieving revision 1.106.2.2
diff -u -p -r1.106.2.2 pthread.c
--- lib/libpthread/pthread.c    11 Jan 2010 00:47:29 -0000      1.106.2.2
+++ lib/libpthread/pthread.c    8 Feb 2010 22:59:19 -0000
@@ -261,6 +261,8 @@ pthread__child_callback(void)
         * much. Anything that permits some pthread_* calls to work is
         * merely being polite.
         */
+       struct __pthread_st *self = pthread_self();
+       self->pt_lid = _lwp_self();
        pthread__started = 0;
 }
 
>How-To-Repeat:

>Fix:



Home | Main Index | Thread Index | Old Index