NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: lib/59125: pthread_once(3) races with concurrent fork



My first attempt was to count a fork generation number, and shoehorn
it into the existing pthread_once structure with a futex, so that
pthread_once can reliably distinguish between

(a) another thread in this process is initializing it, or
(b) another thread in some ancestor of this process was initializing
    it when we forked,

and handle waiting and wakeups for transition changes just like it
currently uses the fork-unsafe pthread_mutex(3) for.

The fork generation number itself doesn't require any synchronization:
it is only ever modified while the process is single-threaded, in the
fork child.  Of course, it will roll over after 2^30 _nested_ forks,
but most process hierarchies never come near that (and any exec resets
it, naturally).

(It turns out this is how glibc works too.)

Unfortunately, that doesn't help: the child can still observe multiple
calls to the init routine, if the fork happens anywhere from the start
of the init routine to when the pthread_once_t is marked done.  So the
attached variant of the test program which checks for repeated
initialization calls still fails -- perhaps at lower frequency than
the original test program's deadlocks, but not much lower, often
enough to trigger on my laptop in a VM after a few minutes; in any
case it rounds to `still not fork-safe'.
#include <sys/wait.h>

#include <err.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

pthread_once_t once;
int x;
long long ntrials;

static void
siginfo(int signo)
{
	char buf[128];

	snprintf_ss(buf, sizeof(buf), "no hang after %llu trials\n", ntrials);
	(void)write(STDOUT_FILENO, buf, strlen(buf));
	if (signo != SIGINFO) {
		(void)signal(signo, SIG_DFL);
		raise(signo);
	}
}

static void
ofunc_silent(void)
{
	x++;
}

static void *
fork_and_once(void *cookie)
{
	pthread_barrier_t *bar = cookie;
	pid_t pid, child;
	int status;

	(void)pthread_barrier_wait(bar);
	if ((pid = fork()) == -1)
		err(1, "fork");
	if (pid == 0) {
		(void)alarm(1);
		(void)pthread_once(&once, &ofunc_silent);
		_exit(x - 1);
	}
	if ((child = waitpid(pid, &status, 0)) == -1)
		err(1, "waitpid");
	if (child != pid) {
		errx(1, "[%llu trials] child=%lld pid=%lld", ntrials,
		    (long long)child, (long long)pid);
	}
	if (WIFSIGNALED(status)) {
		errx(1, "[%llu trials] child exited on signal %d (%s)",
		    ntrials,
		    WTERMSIG(status),
		    strsignal(WTERMSIG(status)));
	}
	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
		errx(1, "[%llu trials] child exited 0x%x", ntrials, status);
	return NULL;
}

int
main(void)
{
	static pthread_once_t once0 = PTHREAD_ONCE_INIT;
	pthread_barrier_t bar;
	sigset_t mask, omask;
	int error;

	error = pthread_barrier_init(&bar, NULL, 2);
	if (error)
		err(1, "pthread_barrier_init");

	if (sigemptyset(&mask) == -1)
		err(1, "sigemptyset");
	if (sigaddset(&mask, SIGINFO) == -1)
		err(1, "sigaddset");
	if (sigaddset(&mask, SIGINT) == -1)
		err(1, "sigaddset");
	if (signal(SIGINFO, &siginfo) == SIG_ERR)
		err(1, "signal(SIGINFO)");
	if (signal(SIGINT, &siginfo) == SIG_ERR)
		err(1, "signal(SIGINFO)");

	for (;;) {
		pthread_t t;

		if (sigprocmask(SIG_BLOCK, &mask, &omask) == -1)
			err(1, "sigprocmask(SIG_BLOCK)");

		once = once0;
		x = 0;

		error = pthread_create(&t, NULL, &fork_and_once, &bar);
		if (error)
			errc(1, error, "pthread_create");
		(void)alarm(1);
		(void)pthread_barrier_wait(&bar);
		(void)pthread_once(&once, &ofunc_silent);
		(void)alarm(0);
		error = pthread_join(t, NULL);
		if (error)
			errc(1, error, "pthread_join");

		if (x != 1)
			errx(1, "[%llu trials] x=%d\n", ntrials, x);

		ntrials++;
		if (sigprocmask(SIG_SETMASK, &omask, NULL) == -1)
			err(1, "sigprocmask(SIG_SETMASK)");
	}
	return 0;
}
# HG changeset patch
# User Taylor R Campbell <riastradh%NetBSD.org@localhost>
# Date 1741198759 0
#      Wed Mar 05 18:19:19 2025 +0000
# Branch trunk
# Node ID db0cae397ecdf2795f54954e2009a3d06fdb95ef
# Parent  cd6b61c1b621c4bb80fbe22e714ba188467be421
# EXP-Topic riastradh-pr59125-oncefork
WIP: pthread_once(3): Try to avoid fork races with futexes.

Turns out, this doesn't work!  It is still possible for a fork
concurrent with pthread_once to cause the child to see the
initialization action run twice.

PR lib/59125: pthread_once(3) races with concurrent fork

diff -r cd6b61c1b621 -r db0cae397ecd lib/libc/gen/pthread_atfork.c
--- a/lib/libc/gen/pthread_atfork.c	Wed Mar 05 15:29:16 2025 +0000
+++ b/lib/libc/gen/pthread_atfork.c	Wed Mar 05 18:19:19 2025 +0000
@@ -175,6 +175,8 @@ out:	mutex_unlock(&atfork_lock);
 	return error;
 }
 
+uint64_t __libc_forkgen;
+
 pid_t
 fork(void)
 {
@@ -199,6 +201,7 @@ fork(void)
 		mutex_unlock(&atfork_lock);
 	} else {
 		/* We are the child */
+		__libc_forkgen++;
 		_malloc_postfork_child();
 		SIMPLEQ_FOREACH(iter, &childq, next)
 			(*iter->fn)();
diff -r cd6b61c1b621 -r db0cae397ecd lib/libc/include/forkgen.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/libc/include/forkgen.h	Wed Mar 05 18:19:19 2025 +0000
@@ -0,0 +1,36 @@
+/*	$NetBSD$	*/
+
+/*-
+ * Copyright (c) 2025 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_LIB_LIBC_FORKGEN_H_
+#define	_LIB_LIBC_FORKGEN_H_
+
+#include <stdint.h>
+
+extern uint64_t __libc_forkgen;
+
+#endif	/* _LIB_LIBC_FORKGEN_H_ */
diff -r cd6b61c1b621 -r db0cae397ecd lib/libpthread/pthread_once.c
--- a/lib/libpthread/pthread_once.c	Wed Mar 05 15:29:16 2025 +0000
+++ b/lib/libpthread/pthread_once.c	Wed Mar 05 18:19:19 2025 +0000
@@ -1,12 +1,9 @@
-/*	$NetBSD: pthread_once.c,v 1.5 2025/03/04 00:41:00 riastradh Exp $	*/
+/*	$NetBSD$	*/
 
 /*-
- * Copyright (c) 2001, 2003 The NetBSD Foundation, Inc.
+ * Copyright (c) 2025 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Nathan J. Williams, and by Jason R. Thorpe.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -15,13 +12,6 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *        This product includes software developed by the NetBSD
- *        Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -37,41 +27,226 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: pthread_once.c,v 1.5 2025/03/04 00:41:00 riastradh Exp $");
+__RCSID("$NetBSD");
 
 /* Need to use libc-private names for atomic operations. */
 #include "../../common/lib/libc/atomic/atomic_op_namespace.h"
 
+#include <sys/futex.h>
+#include <sys/syscall.h>
+
+#include <limits.h>
+#include <stdatomic.h>
+#include <unistd.h>
+
+#include "../../lib/libc/include/forkgen.h"
+
 #include "pthread.h"
 #include "pthread_int.h"
 #include "reentrant.h"
 
-static void
-once_cleanup(void *closure)
+/*****************************************************************************/
+/*
+ * BEGIN MOVE ME ELSEWEHRE
+ */
+
+#define	atomic_store_relaxed(p, v)					      \
+	atomic_store_explicit(p, v, memory_order_relaxed)
+
+#define	atomic_store_release(p, v)					      \
+	atomic_store_explicit(p, v, memory_order_release)
+
+#define	atomic_load_relaxed(p)						      \
+	atomic_load_explicit(p, memory_order_relaxed)
+
+#define	atomic_load_acquire(p)						      \
+	atomic_load_explicit(p, memory_order_acquire)
+
+static int
+atomic_cas_int(volatile int *p, int o, int n)
+{
+
+	(void)atomic_compare_exchange_strong_explicit(p, &o, n,
+	    memory_order_relaxed, memory_order_relaxed);
+	return o;
+}
+
+static int
+futex(volatile int *uaddr, int op, int val, const struct timespec *timeout,
+    volatile int *uaddr2, int val2, int val3)
+{
+	return syscall(SYS___futex, uaddr, op, val, timeout, uaddr2,
+	    val2, val3);
+}
+
+static int
+futex_wait(volatile int *uaddr, int cmpval, const struct timespec *timeout)
+{
+
+	return futex(uaddr, FUTEX_WAIT, /*val*/cmpval, timeout, /*uaddr2*/NULL,
+	    /*val2*/0, /*val3*/0);
+}
+
+static int
+futex_wake(volatile int *uaddr, int nwake)
 {
 
-       pthread_mutex_unlock((pthread_mutex_t *)closure);
+	return futex(uaddr, FUTEX_WAKE, /*val*/nwake, /*timeout*/NULL,
+	    /*uaddr2*/NULL, /*val2*/0, /*val3*/0);
+}
+
+static int
+futex_wake_all(volatile int *uaddr)
+{
+
+	return futex_wake(uaddr, INT_MAX);
+}
+
+/*
+ * END MOVE ME ELSEWEHRE
+ */
+/*****************************************************************************/
+
+#define	ONCE_LOCKED	((int)__BIT(31))
+#define	ONCE_WAITING	((int)__BIT(30))
+#define	ONCE_FORKGEN	((int)__BITS(29,0))
+#define	ONCE_INIT	0
+#define	ONCE_DONE	1
+
+static int
+once_forkgen(void)
+{
+
+	/*
+	 * This algorithm is limited to 2^30 - 1 nested forks before it
+	 * may deadlock in the event of a race.
+	 */
+	return __libc_forkgen & ONCE_FORKGEN;
+}
+
+static void
+once_cleanup(void *cookie)
+{
+	pthread_once_t *once = cookie;
+
+	/*
+	 * Clear the state and wake other callers so someone else can
+	 * try in ourstead.
+	 *
+	 * If concurrent fork happens before the store, the child will
+	 * see a stale fork generation and restore it to the initial
+	 * state.  If concurrent fork happens after the store, the
+	 * child will see the initial state.  Either way, the wakeups
+	 * don't matter for the child -- the thread calling fork won't
+	 * be woken up, and any other threads won't be in the child, so
+	 * it's all moot.
+	 *
+	 * Don't worry about micro-optimizing this with the
+	 * ONCE_WAITING bit -- cancellation is an unlikely scenario
+	 * that's not worth optimizing; let's keep it simple and
+	 * reliable.
+	 */
+	pthread__assert((atomic_load_relaxed(&once->pto_done) &
+		~ONCE_WAITING) == (ONCE_LOCKED|once_forkgen()));
+	atomic_store_relaxed(&once->pto_done, ONCE_INIT);
+	(void)futex_wake_all(&once->pto_done);
 }
 
 int
-pthread_once(pthread_once_t *once_control, void (*routine)(void))
+pthread_once(pthread_once_t *once, void (*init)(void))
 {
+	const int forkgen = once_forkgen();
+	int done, cancelstate;
+
 	if (__predict_false(__uselibcstub))
-		return __libc_thr_once_stub(once_control, routine);
+		return __libc_thr_once_stub(once, init);
+
+	/*
+	 * Optimistically check whether it's already done, with a
+	 * load-acquire to synchronize with the store-release in
+	 * whatever thread did the initialization.
+	 */
+	if (__predict_true(atomic_load_acquire(&once->pto_done) ==
+		ONCE_DONE))
+		return 0;
 
-	if (once_control->pto_done == 0) {
-		pthread_mutex_lock(&once_control->pto_mutex);
-		pthread_cleanup_push(&once_cleanup, &once_control->pto_mutex);
-		if (once_control->pto_done == 0) {
-			routine();
-			membar_release();
-			once_control->pto_done = 1;
-		}
-		pthread_cleanup_pop(1);
-	} else {
+	/*
+	 * Defer cancellation while we synchronize in case the caller
+	 * is configured as PTHREAD_CANCEL_ASYNCHRONOUS.
+	 */
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancelstate);
+top:
+	/*
+	 * Try to claim it, marked with the current fork generation.
+	 * If it was done in the interim (unlikely, only if we lose a
+	 * race to another thread), great -- issue an acquire barrier
+	 * to synchronize with the other thread's initialization,
+	 * restore the cancellation state, and we're done.
+	 */
+	done = atomic_cas_int(&once->pto_done, ONCE_INIT,
+	    ONCE_LOCKED|forkgen);
+	if (__predict_false(done == ONCE_DONE)) {
 		membar_acquire();
+		goto out;
 	}
 
+	/*
+	 * If is held by a another thread in this process, that thread
+	 * is presumably still running -- even if it has been
+	 * cancelled, the once_cleanup routine will handle it.  Wait
+	 * for that thread to release it.
+	 *
+	 * If it was held by another thread in an ancestor of this
+	 * process, from a different fork generation, try to clear it
+	 * and start over.  We have to use atomic_cas_* to clear it
+	 * because another thread might be doing the same thing -- it
+	 * doesn't matter which of us wins, but the transition to clear
+	 * must happen only once (otherwise another thread might have
+	 * finished initialization first, at which point we must not
+	 * clear it).
+	 */
+	if (__predict_false(done != ONCE_INIT)) {
+		if ((done & ~ONCE_WAITING) == (ONCE_LOCKED|forkgen)) {
+			(void)futex_wait(&once->pto_done, done, NULL);
+		} else {
+			(void)atomic_cas_int(&once->pto_done, done,
+			    ONCE_INIT);
+		}
+		goto top;
+	}
+
+	/*
+	 * Acquired at the current fork generation.  Arrange to back
+	 * out if cancelled, and then run the init routine with the
+	 * cancellation state restored to the caller's.
+	 */
+	pthread_cleanup_push(&once_cleanup, once);
+	pthread_setcancelstate(cancelstate, NULL);
+	(*init)();
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancelstate);
+	pthread_cleanup_pop(/*execute*/0);
+
+	/*
+	 * Mark it done with a store-release, to synchronize with other
+	 * users' load-acquire.  If there were other waiters, notify
+	 * other callers that we are done -- but avoid a futex syscall
+	 * if not.
+	 */
+	membar_release();
+	done = atomic_cas_int(&once->pto_done, ONCE_LOCKED|forkgen,
+	    ONCE_DONE);
+	if (__predict_false(done != (ONCE_LOCKED|forkgen))) {
+		pthread__assert(done == (ONCE_LOCKED|ONCE_WAITING|forkgen));
+		atomic_store_relaxed(&once->pto_done, ONCE_DONE);
+		(void)futex_wake_all(&once->pto_done);
+	}
+out:
+	/*
+	 * Restore the cancellation state now that we are done
+	 * synchronizing and no longer need to clean up after
+	 * ourselves.
+	 */
+	pthread_setcancelstate(cancelstate, NULL);
 	return 0;
 }
 


Home | Main Index | Thread Index | Old Index