NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: PR/57437 CVS commit: src/lib/libpthread



> Date: Fri, 26 May 2023 10:29:54 +0000
> From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
> 
> We could add a call to pthread__smt_wake in pthread_mutex_unlock to
> match a pthread__smt_wait in pthread__mutex_pause, but that's not
> enough on its own, because currently pthread__mutex_spin calls
> pthread__mutex_pause in a loop and might just WFE again without
> realizing it can make progress after it has been woken by SEV.
> 
> So we would need to restructure pthread__mutex_spin so that it calls
> pthread__smt_wait exactly once after it examines the owner and
> determines the owner is still running on a CPU.

The attached patch does just this for Arm, and leaves non-Arm alone.
It can be tested independently for pthread_mutex and pthread_rwlock by
defining or undefining the appropriate macro in pthread_md.h.  I don't
have the time to do performance measurements right now, but if you
feel so inclined, we would certainly consider it!
From 81aff4df7bf94b3fc8a88db755de2451711ed040 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
Date: Fri, 26 May 2023 10:56:00 +0000
Subject: [PATCH] WIP: Use WFE/SEV judiciously in pthread_mutex and
 pthread_rwlock.

Can be turned on or off by defining or undefining the appropriate
macro in pthread_md.h:

PTHREAD__MUTEX_SPIN_WAIT_WAKE
PTHREAD__RWLOCK_SPIN_WAIT_WAKE

PR port-arm/57437

XXX pullup-10
---
 lib/libpthread/arch/aarch64/pthread_md.h |  3 +++
 lib/libpthread/arch/arm/pthread_md.h     |  6 +++++-
 lib/libpthread/pthread_mutex.c           | 17 +++++++++++++++--
 lib/libpthread/pthread_rwlock.c          | 22 ++++++++++++++++++++++
 4 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/lib/libpthread/arch/aarch64/pthread_md.h b/lib/libpthread/arch/aarch64/pthread_md.h
index 7b79e6d347ea..c8c0b4265211 100644
--- a/lib/libpthread/arch/aarch64/pthread_md.h
+++ b/lib/libpthread/arch/aarch64/pthread_md.h
@@ -42,6 +42,9 @@ pthread__sp(void)
 	return ret;
 }
 
+#define	PTHREAD__MUTEX_SPIN_WAIT_WAKE
+#define	PTHREAD__RWLOCK_SPIN_WAIT_WAKE
+
 #define pthread__smt_wait()	__asm __volatile("wfe") /* wfe */
 #define pthread__smt_wake()	__asm __volatile("sev") /* sev */
 
diff --git a/lib/libpthread/arch/arm/pthread_md.h b/lib/libpthread/arch/arm/pthread_md.h
index 1ed6a4914cae..cc04ed4a788d 100644
--- a/lib/libpthread/arch/arm/pthread_md.h
+++ b/lib/libpthread/arch/arm/pthread_md.h
@@ -50,12 +50,16 @@ pthread__sp(void)
 }
 
 #if defined(__thumb__) && defined(_ARM_ARCH_6)
+#define	PTHREAD__MUTEX_SPIN_WAIT_WAKE
+#define	PTHREAD__RWLOCK_SPIN_WAIT_WAKE
 #define pthread__smt_wait()	__asm __volatile(".inst.n 0xbf20") /* wfe */
 #define pthread__smt_wake()	__asm __volatile(".inst.n 0xbf40") /* sev */
 #elif !defined(__thumb__)
+#define	PTHREAD__MUTEX_SPIN_WAIT_WAKE
+#define	PTHREAD__RWLOCK_SPIN_WAIT_WAKE
 #define pthread__smt_wait()	__asm __volatile(".inst 0xe320f002") /* wfe */
 #define pthread__smt_wake()	__asm __volatile(".inst 0xe320f004") /* sev */
-#else
+#else  /* !defined(__thumb__) && !defined(_ARM_ARCH_6) */
 #define pthread__smt_wait()	__nothing
 #define pthread__smt_wake()	__nothing
 #endif
diff --git a/lib/libpthread/pthread_mutex.c b/lib/libpthread/pthread_mutex.c
index 00005c9eac6b..77f402ff1d12 100644
--- a/lib/libpthread/pthread_mutex.c
+++ b/lib/libpthread/pthread_mutex.c
@@ -103,7 +103,9 @@ struct waiter {
 static void	pthread__mutex_wakeup(pthread_t, struct pthread__waiter *);
 static int	pthread__mutex_lock_slow(pthread_mutex_t *,
     const struct timespec *);
+#ifndef PTHREAD__MUTEX_SPIN_WAIT_WAKE
 static void	pthread__mutex_pause(void);
+#endif
 
 int		_pthread_mutex_held_np(pthread_mutex_t *);
 pthread_t	_pthread_mutex_owner_np(pthread_mutex_t *);
@@ -238,6 +240,7 @@ pthread_mutex_timedlock(pthread_mutex_t* ptm, const struct timespec *ts)
 	return pthread__mutex_lock_slow(ptm, ts);
 }
 
+#ifndef PTHREAD__MUTEX_SPIN_WAIT_WAKE
 /* We want function call overhead. */
 NOINLINE static void
 pthread__mutex_pause(void)
@@ -245,6 +248,7 @@ pthread__mutex_pause(void)
 
 	pthread__smt_pause();
 }
+#endif
 
 /*
  * Spin while the holder is running.  'lwpctl' gives us the true
@@ -254,18 +258,24 @@ NOINLINE static void *
 pthread__mutex_spin(pthread_mutex_t *ptm, pthread_t owner)
 {
 	pthread_t thread;
-	unsigned int count, i;
+#ifndef PTHREAD__MUTEX_SPIN_WAIT_WAKE
+	unsigned int count = 2, i;
+#endif
 
-	for (count = 2;; owner = ptm->ptm_owner) {
+	for (;; owner = ptm->ptm_owner) {
 		thread = (pthread_t)MUTEX_OWNER(owner);
 		if (thread == NULL)
 			break;
 		if (thread->pt_lwpctl->lc_curcpu == LWPCTL_CPU_NONE)
 			break;
+#ifdef PTHREAD__MUTEX_SPIN_WAIT_WAKE
+		pthread__smt_wait();
+#else
 		if (count < 128)
 			count += count;
 		for (i = count; i != 0; i--)
 			pthread__mutex_pause();
+#endif
 	}
 
 	return owner;
@@ -501,6 +511,9 @@ pthread_mutex_unlock(pthread_mutex_t *ptm)
 	 */
 #ifndef PTHREAD__ATOMIC_IS_MEMBAR
 	membar_enter();
+#endif
+#ifdef PTHREAD__MUTEX_SPIN_WAIT_WAKE
+	pthread__smt_wake();
 #endif
 	if (MUTEX_OWNER(newval) == 0 && ptm->ptm_waiters != NULL) {
 		pthread__mutex_wakeup(self,
diff --git a/lib/libpthread/pthread_rwlock.c b/lib/libpthread/pthread_rwlock.c
index 90e48a14f3ab..a5c112bbdf5c 100644
--- a/lib/libpthread/pthread_rwlock.c
+++ b/lib/libpthread/pthread_rwlock.c
@@ -127,6 +127,7 @@ pthread_rwlock_destroy(pthread_rwlock_t *ptr)
 	return 0;
 }
 
+#ifndef PTHREAD__RWLOCK_SPIN_WAIT_WAKE
 /* We want function call overhead. */
 NOINLINE static void
 pthread__rwlock_pause(void)
@@ -134,12 +135,15 @@ pthread__rwlock_pause(void)
 
 	pthread__smt_pause();
 }
+#endif
 
 NOINLINE static int
 pthread__rwlock_spin(uintptr_t owner)
 {
 	pthread_t thread;
+#ifndef PTHREAD__RWLOCK_SPIN_WAIT_WAKE
 	unsigned int i;
+#endif
 
 	if ((owner & ~RW_THREAD) != RW_WRITE_LOCKED)
 		return 0;
@@ -149,8 +153,12 @@ pthread__rwlock_spin(uintptr_t owner)
 	    thread->pt_lwpctl->lc_curcpu == LWPCTL_CPU_NONE)
 		return 0;
 
+#ifdef PTHREAD__RWLOCK_SPIN_WAIT_WAKE
+	pthread__smt_wait();
+#else
 	for (i = 128; i != 0; i--)
 		pthread__rwlock_pause();
+#endif
 	return 1;
 }
 
@@ -487,6 +495,9 @@ pthread_rwlock_unlock(pthread_rwlock_t *ptr)
 			next = rw_cas(ptr, owner, new);
 			if (owner == next) {
 				/* Released! */
+#ifdef PTHREAD__RWLOCK_SPIN_WAIT_WAKE
+				pthread__smt_wake();
+#endif
 				return 0;
 			}
 			continue;
@@ -559,6 +570,17 @@ pthread_rwlock_unlock(pthread_rwlock_t *ptr)
 			ptr->ptr_nreaders = 0;
 			pthread__unpark_all(&ptr->ptr_rblocked, self,
 			    interlock);
+
+			/*
+			 * If we're doing CPU wait/wake instructions
+			 * for spin loops, there may also be other CPUs
+			 * spinning for a read lock -- wake them
+			 * promptly too; no reason to make them hang
+			 * when they could be making progress.
+			 */
+#ifdef PTHREAD__RWLOCK_SPIN_WAIT_WAKE
+			pthread__smt_wake();
+#endif
 		}
 		pthread_mutex_unlock(interlock);
 


Home | Main Index | Thread Index | Old Index