NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
kern/42772: fork from other than the main thread causes wrong pthread condition
>Number: 42772
>Category: kern
>Synopsis: fork from other than the main thread causes wrong pthread
>condition
>Confidential: no
>Severity: non-critical
>Priority: medium
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Tue Feb 09 01:20:00 +0000 2010
>Originator: NARUSE, Yui
>Release: 5.0.1
>Organization:
>Environment:
NetBSD kelvena 5.0_STABLE NetBSD 5.0_STABLE (GENERIC) #0: Sat Jan 16 22:36:33
JST 2010 naruse@:/usr/obj/sys/arch/i386/compile/GENERIC i386
>Description:
1. start initial thread
2. start second thread
3. start third thread
4. stop second thread
5. fork from third thread
then check ((struct __pthread_st *)pthread_self())->pt_lid in child process.
It should equal to _lwp_self() = 1, but it is 3; equals to third thread's
pt_lid.
Run following code:
/*-
* Copyright (c)2010 Takehiko NOZAKI,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <errno.h>
#include <lwp.h>
#include <pthread.h>
#include <pthread_queue.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/tree.h>
#include <unistd.h>
#define PTHREAD_KEYS_MAX 256
#define PTHREAD__UNPARK_MAX 32
/*
* The size of this structure needs to be no larger than struct
* __pthread_cleanup_store, defined in pthread.h.
*/
struct pt_clean_t {
PTQ_ENTRY(pt_clean_t) ptc_next;
void (*ptc_cleanup)(void *);
void *ptc_arg;
};
struct pthread_lock_ops {
void (*plo_init)(__cpu_simple_lock_t *);
int (*plo_try)(__cpu_simple_lock_t *);
void (*plo_unlock)(__cpu_simple_lock_t *);
void (*plo_lock)(__cpu_simple_lock_t *);
};
struct __pthread_st {
pthread_t pt_self; /* Must be first. */
unsigned int pt_magic; /* Magic number */
int pt_state; /* running, blocked, etc. */
pthread_mutex_t pt_lock; /* lock on state */
int pt_flags; /* see PT_FLAG_* below */
int pt_cancel; /* Deferred cancellation */
int pt_errno; /* Thread-specific errno. */
stack_t pt_stack; /* Our stack */
void *pt_exitval; /* Read by pthread_join() */
char *pt_name; /* Thread's name, set by the app. */
int pt_willpark; /* About to park */
lwpid_t pt_unpark; /* Unpark this when parking */
struct pthread_lock_ops pt_lockops;/* Cached to avoid PIC overhead */
pthread_mutex_t *pt_droplock; /* Drop this lock if cancelled */
pthread_cond_t pt_joiners; /* Threads waiting to join. */
/* Threads to defer waking, usually until pthread_mutex_unlock(). */
lwpid_t pt_waiters[PTHREAD__UNPARK_MAX];
size_t pt_nwaiters;
/* Stack of cancellation cleanup handlers and their arguments */
PTQ_HEAD(, pt_clean_t) pt_cleanup_stack;
/* LWP ID and entry on the list of all threads. */
lwpid_t pt_lid;
RB_ENTRY(__pthread_st) pt_alltree;
PTQ_ENTRY(__pthread_st) pt_allq;
PTQ_ENTRY(__pthread_st) pt_deadq;
/*
* General synchronization data. We try to align, as threads
* on other CPUs will access this data frequently.
*/
int pt_dummy1 __aligned(128);
struct lwpctl *pt_lwpctl; /* Kernel/user comms area */
volatile int pt_blocking; /* Blocking in userspace */
volatile int pt_rwlocked; /* Handed rwlock successfully */
volatile int pt_signalled; /* Received pthread_cond_signal() */
volatile int pt_mutexwait; /* Waiting to acquire mutex */
void * volatile pt_mutexnext; /* Next thread in chain */
void * volatile pt_sleepobj; /* Object slept on */
PTQ_ENTRY(__pthread_st) pt_sleep;
void (*pt_early)(void *);
int pt_dummy2 __aligned(128);
/* Thread-specific data. Large so it sits close to the end. */
int pt_havespecific;
void *pt_specific[PTHREAD_KEYS_MAX];
/*
* Context for thread creation. At the end as it's cached
* and then only ever passed to _lwp_create().
*/
ucontext_t pt_uc;
};
static int running;
static pthread_t timer_thread, thread;
static pthread_mutex_t timer_mutex, mutex;
static pthread_cond_t timer_cond, cond;
static void *timer(void *);
static void init_timer(void);
static void start_timer(void);
static void stop_timer(void);
static void
show_lwps(void)
{
int i;
printf("%6d: lwps: ", getpid());
for (i = 1; i < 5; i++) {
printf("%2d ", _lwp_kill(i, 0));
}
printf("\n");
}
static void *
timer(void *arg)
{
struct timeval now;
struct timespec timeout;
pthread_mutex_lock(&timer_mutex);
pthread_cond_signal(&timer_cond);
do {
int err;
printf("%6d: zzz...\n",getpid());
gettimeofday(&now, NULL);
timeout.tv_sec = now.tv_sec + 1;
timeout.tv_nsec = now.tv_usec * 1000;
err = pthread_cond_timedwait(&timer_cond, &timer_mutex,
&timeout);
switch (err) {
case 0:
case ETIMEDOUT:
break;
case ESRCH:
printf("%6d: err: ESRCH %s\n", getpid(),
sys_errlist[err]);
abort();
break;
default:
printf("%6d: err: %d = %s\n", getpid(), err,
sys_errlist[err]);
abort();
}
} while (running);
pthread_mutex_unlock(&timer_mutex);
return NULL;
}
static void
init_timer(void)
{
running = 0;
pthread_mutex_init(&timer_mutex, NULL);
pthread_cond_init(&timer_cond, NULL);
}
static void
start_timer(void)
{
printf("%6d: starting timer\n", getpid());
pthread_mutex_lock(&timer_mutex);
running = 1;
if (pthread_create(&timer_thread, NULL, &timer, NULL) == 0) {
pthread_cond_wait(&timer_cond, &timer_mutex);
}
pthread_mutex_unlock(&timer_mutex);
}
static void
stop_timer()
{
printf("%6d: stopping timer\n", getpid());
pthread_mutex_lock(&timer_mutex);
running = 0;
pthread_cond_signal(&timer_cond);
pthread_mutex_unlock(&timer_mutex);
pthread_join(timer_thread, NULL);
}
static void
my_fork()
{
int pid;
struct __pthread_st *ptst;
show_lwps();
stop_timer();
show_lwps();
printf("%6d: fork() in thirdthread(_lwp_self()=%d)\n", getpid(),
_lwp_self());
pid = fork();
if (pid < 0)
abort();
if (pid == 0) {
/* child process */
printf("%6d: CHILD _lwp_self()=%d pthread_self()->pt_lid=%d\n",
getpid(),_lwp_self(),
((struct __pthread_st
*)pthread_self())->pt_lid);
if (_lwp_self() != ((struct __pthread_st
*)pthread_self())->pt_lid)
printf("%6d: CHILD's self->pt_lid is wrong!\n", getpid());
/* ((struct __pthread_st *)pthread_self())->pt_lid =
_lwp_self(); */
show_lwps();
pthread_mutex_init(&mutex, NULL);
pthread_cond_init(&cond, NULL);
init_timer();
start_timer();
} else {
/* parent process */
printf("%6d: PARENT _lwp_self()=%d pthread_self()->pt_lid=%d\n",
getpid(),_lwp_self(),
((struct __pthread_st
*)pthread_self())->pt_lid);
show_lwps();
start_timer();
show_lwps();
}
pthread_cond_wait(&cond, &mutex);
}
static void *
thirdthread(void *arg)
{
printf("%6d: starting thirdthread\n", getpid());
my_fork();
exit(0);
return NULL;
}
int
main(void)
{
init_timer();
start_timer();
pthread_mutex_init(&mutex, NULL);
pthread_cond_init(&cond, NULL);
if (pthread_create(&thread, NULL, &thirdthread, NULL) == 0)
pthread_cond_wait(&cond, &mutex);
sleep(1);
return 0;
}
Then following result:
16558: starting timer
16558: zzz...
16558: starting thirdthread
16558: lwps: 0 0 0 -1
16558: stopping timer
16558: lwps: 0 -1 0 -1
16558: fork() in thirdthread(_lwp_self()=3)
16558: PARENT _lwp_self()=3 pthread_self()->pt_lid=3
5661: CHILD _lwp_self()=1 pthread_self()->pt_lid=3
16558: lwps: 0 -1 0 -1
5661: CHILD's self->pt_lid is wrong!
16558: starting timer
5661: lwps: 0 -1 -1 -1
5661: starting timer
16558: zzz...
5661: zzz...
16558: lwps: 0 -1 0 0
5661: err: ESRCH No such process
Last error message is because it calls _lwp_park with wrong argument:
2570 2 a.out CALL _lwp_park(0xbb7ffd94,3,0x804a348,0x804a348)
2570 2 a.out RET _lwp_park -1 errno 3 No such process
Workaround for above program is uncomment this line:
/* ((struct __pthread_st *)pthread_self())->pt_lid = _lwp_self(); */
The code resets pt_lid and it will work.
Fix for NetBSD may be following, but it may need more initialization codes.
Index: lib/libpthread/pthread.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/pthread.c,v
retrieving revision 1.106.2.2
diff -u -p -r1.106.2.2 pthread.c
--- lib/libpthread/pthread.c 11 Jan 2010 00:47:29 -0000 1.106.2.2
+++ lib/libpthread/pthread.c 8 Feb 2010 22:59:19 -0000
@@ -261,6 +261,8 @@ pthread__child_callback(void)
* much. Anything that permits some pthread_* calls to work is
* merely being polite.
*/
+ struct __pthread_st *self = pthread_self();
+ self->pt_lid = _lwp_self();
pthread__started = 0;
}
>How-To-Repeat:
>Fix:
Home |
Main Index |
Thread Index |
Old Index