NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

lib/56275: pthread_cond_timedwait broken



>Number:         56275
>Category:       lib
>Synopsis:       pthread_cond_timedwait broken
>Confidential:   no
>Severity:       critical
>Priority:       high
>Responsible:    lib-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Fri Jun 25 01:30:01 +0000 2021
>Originator:     David A. Holland
>Release:        NetBSD 9.99.85 (20210623)
>Organization:
>Environment:
System: NetBSD valkyrie 9.99.85 NetBSD 9.99.85 (VALKYRIE) #7: Wed Jun 23 18:32:25 EDT 2021  dholland@valkyrie:/usr/src/sys/arch/amd64/compile/VALKYRIE amd64
Architecture: x86_64
Machine: amd64
>Description:

I was running tests for a database engine and one failed; it was
expecting that if pthread_cond_broadcast is not called,
pthread_cond_timedwait should only time out and never return 0
(meaning it was awakened). This expectation is not strictly legitimate
according to POSIX, which allows arbitrary spontaneous wakeups, but it
is reasonable, and in fact our code appears to guarantee it. There are
three ways out of the wait loop in pthread_cond_timedwait:
   - thread cancellation (irrelevant)
   - ETIMEDOUT or some other permanent error from _lwp_park
     (results in nonzero return from pthread_cond_timedwait)
   - something setting waiter.lid to 0
     (arises from a wakeup, results in zero return)

There's only one place where ->lid is set to 0, and that's when a
mutex is passed to the next thread. (Condvar wakeups happen by
shifting the waiter queue to the associated mutex.) When no
pthread_cond_broadcast calls are happening, condvar waiter queues
should not be transferred to mutexes and nothing should ever wake up a
waiter. As far as I can tell.

Nonetheless, it was happening. The best guess I have is that the
waiter queues are getting crosslinked somehow.

I was also seeing other odd behavior like 1-second sleeps consistently
getting spurious wakeups after 0.72-0.73 seconds, short sleeps
sometimes taking twice as long as requested but _not_ reporting
timeout, and other oddities, some of which is doubtless a result of
the database engine's nontrivial behavior.

So I wrote a (comparatively) simple test program to try to isolate
what's going on better. It doesn't exhibit the same broken behavior,
but it's quite broken in its own right.

After the initialization phase (once the ----- prints) there are no
pthread_cond_broadcast calls, but there are sometimes spurious
wakeups. Also pthread_condvar_timedwait frequently returns ETIMEDOUT
after sleeping only a few us, which is definitely not permitted by
POSIX. You can get more of this by commenting out the print in the
loop in run().

And also, every time the thing deadlocks (never in the initialization
phase) -- I'm pretty sure this is not because the code is wrong, as
all the mutex calls are paired and all the condvar waits have a
timeout attached.

>How-To-Repeat:

compile with gcc -Wall -pthread sludge.c -o sludge

   --- cut here (sludge.c) ---
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <err.h>

#include <pthread.h>

#define NTHREADS 32
#define NCONDS 8
#define NLOOPS 30

static unsigned state[NCONDS];
static pthread_cond_t conds[NCONDS];
static pthread_mutex_t cmutexes[NCONDS];

static pthread_t threads[NTHREADS];

static void spin(void) {
   volatile double d;
   unsigned i;

   d = 6.3;
   for (i=0; i<100000; i++) {
      d = d * 5.4 - 3.2;
   }
}

static void do_acquire(unsigned n) {
   int e;

   e = pthread_mutex_lock(&cmutexes[n]);
   if (e) {
      errx(1, "pthread_mutex_lock: %s", strerror(e));
   }
}

static void do_release(unsigned n) {
   int e;

   e = pthread_mutex_unlock(&cmutexes[n]);
   if (e) {
      errx(1, "pthread_mutex_unlock: %s", strerror(e));
   }
}

static void do_broadcast(unsigned n) {
   int e;

   e = pthread_cond_broadcast(&conds[n]);
   if (e) {
      errx(1, "pthread_cond_broadcast: %s", strerror(e));
   }
}

static void do_timedwait(unsigned t, unsigned n, unsigned usecs) {
   struct timespec start, then, end, elapsed;
   unsigned eusecs;
   int e;

   clock_gettime(CLOCK_MONOTONIC, &start);
   then.tv_sec += ((unsigned long long)usecs * 1000 + start.tv_nsec) / 1000000000;
   then.tv_nsec = ((unsigned long long)usecs * 1000 + start.tv_nsec) % 1000000000;
   e = pthread_cond_timedwait(&conds[n], &cmutexes[n], &then);
   clock_gettime(CLOCK_MONOTONIC, &end);
   if (e != 0 && e != ETIMEDOUT) {
      errx(1, "pthread_cond_timedwait: %s", strerror(e));
   }

   if (end.tv_nsec < start.tv_nsec) {
      elapsed.tv_nsec = end.tv_nsec + 1000000000 - start.tv_nsec;
      elapsed.tv_sec = end.tv_sec - start.tv_sec - 1;
   }
   else {
      elapsed.tv_nsec = end.tv_nsec - start.tv_nsec;
      elapsed.tv_sec = end.tv_sec - start.tv_sec;
   }

   eusecs = elapsed.tv_sec * 1000000 + elapsed.tv_nsec / 1000;
   if (eusecs > 12 * usecs / 10 || eusecs < 9 * usecs / 10) {
      fprintf(stderr, "thread %u sleep %u, slept %u: %s\n",
	      t, usecs, eusecs, e == 0 ? "awoke" : "timeout");
   }
}

static void begin(unsigned t, unsigned s) {
   unsigned i, n;

   for (i=0; i<NCONDS; i++) {
      n = (i + s) % NCONDS;
      do_acquire(n);
      spin();
      do_release(n);
   }

   for (i=0; i<s; i++) {
      do_acquire(i);
      while (state[i] == 0) {
	 do_timedwait(t, i, 10000);
      }
      do_release(i);
   }
   do_acquire(s);
   if (!state[i]) {
      state[i] = 1;
      fprintf(stderr, "initialized %u\n", s);
   }
   do_broadcast(s);
   do_release(s);
   for (i=s+1; i<NCONDS; i++) {
      do_acquire(i);
      while (state[i] == 0) {
	 do_timedwait(t, i, 10000);
      }
      do_release(i);
   }
}

static void run(unsigned t, unsigned s) {
   unsigned n, k;

   n = s;
   for (k=0; k<NLOOPS; k++) {
      fprintf(stderr, "[thread %u trying cond %u]\n", t, n);
      do_acquire(n);
      do_timedwait(t, n, 100000);
      do_release(n);
      n = (n * 119) % NCONDS;
   }
}

static void *thread(void *q) {
   unsigned t, s;

   t = (uintptr_t)q;
   s = t % NCONDS;
   begin(t, s);
   if (t == 0) {
      sleep(1);
      fprintf(stderr, "--------\n");
      sleep(4);
   }
   else {
      sleep(5);
   }
   run(t, s);
   return NULL;
}

static void spawn(void) {
   unsigned i;
   int e;
   pthread_attr_t attr;

   e = pthread_attr_init(&attr);
   if (e) {
      errx(1, "pthread_attr_init: %s", strerror(e));
   }

   for (i=0; i<NTHREADS; i++) {
      e = pthread_create(&threads[i], &attr, thread, (void *)(uintptr_t)i);
      if (e) {
	 errx(1, "pthread_create: %s", strerror(e));
      }
   }
   pthread_attr_destroy(&attr);
}

static void join(void) {
   unsigned i;
   int e;
   void *result;

   for (i=0; i<NTHREADS; i++) {
      e = pthread_join(threads[i], &result);
      if (e) {
	 errx(1, "pthread_join: %s", strerror(e));
      }
      (void)result;
   }
}


static void init(void) {
   unsigned i;
   int e;
   pthread_condattr_t attr;
   pthread_mutexattr_t mattr;

   e = pthread_condattr_init(&attr);
   if (e) {
      errx(1, "pthread_condattr_init: %s", strerror(e));
   }
   e = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
   if (e) {
      errx(1, "pthread_condattr_setclock: %s", strerror(e));
   }

   e = pthread_mutexattr_init(&mattr);
   if (e) {
      errx(1, "pthread_mutexattr_init: %s", strerror(e));
   }

   for (i=0; i<NCONDS; i++) {
      e = pthread_cond_init(&conds[i], &attr);
      if (e) {
	 errx(1, "pthread_cond_init: %s", strerror(e));
      }

      e = pthread_mutex_init(&cmutexes[i], &mattr);
      if (e) {
	 errx(1, "pthread_mutex_init: %s", strerror(e));
      }
   }

   pthread_condattr_destroy(&attr);
   pthread_mutexattr_destroy(&mattr);
}

int main(void) {
   init();
   spawn();
   join();
   return 0;
}
   --- cut here ---
>Fix:

duct tape?



Home | Main Index | Thread Index | Old Index