Subject: Re: [nathanw_sa] RAS locking in libpthread
To: None <tech-userlevel@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-userlevel
Date: 12/30/2002 14:23:35
--45Z9DzgjV8m4Oswq
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Sun, Dec 29, 2002 at 05:17:39PM -0800, Jason R Thorpe wrote:

 > The following adds support for using RAS locking to libpthread.  It is
 > preferred on uniprocessors where RAS is available.  Multiprocessors always
 > use atomic locking.
 > 
 > This is only minimally tested at this time -- If there are no objections,
 > I'll check it in after a more thorough round of testing.

Attached is the patch I will be committing, after fixing a few bugs
in the first one.

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--45Z9DzgjV8m4Oswq
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=pthread-ras-patch

Index: TODO
===================================================================
RCS file: /cvsroot/src/lib/libpthread/Attic/TODO,v
retrieving revision 1.1.2.16
diff -c -r1.1.2.16 TODO
*** TODO	2002/10/30 07:35:31	1.1.2.16
--- TODO	2002/12/30 22:18:51
***************
*** 21,29 ****
  - Consider moving pthread__signal_tramp() to its own file, and building
    it with -fasync-unwind-tables, so that DWARF2 EH unwinding works through
    it.  (This is required for e.g. GCC's libjava.)
- - locks: Add support for RAS on uniprocessors.
- - pthread_lock.c: pthread_spin_lock() should use a loop around
-   __cpu_simple_lock_try() so that it is easy to add support for RAS.
  - Add locking to ld.elf_so so that multiple threads doing lazy binding
    doesn't trash things.
  - Verify the cancel stub symbol trickery.
--- 21,26 ----
Index: pthread.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/Attic/pthread.c,v
retrieving revision 1.1.2.36
diff -c -r1.1.2.36 pthread.c
*** pthread.c	2002/12/20 01:06:16	1.1.2.36
--- pthread.c	2002/12/30 22:18:52
***************
*** 125,130 ****
--- 125,133 ----
  	extern int __isthreaded;
  	extern pthread_ops_t *__libc_pthread_ops;
  
+ 	/* Initialize locks first; they're needed elsewhere. */
+ 	pthread__lockprim_init();
+ 
  	/* Basic data structure setup */
  	pthread_attr_init(&pthread_default_attr);
  	PTQ_INIT(&allqueue);
Index: pthread_int.h
===================================================================
RCS file: /cvsroot/src/lib/libpthread/Attic/pthread_int.h,v
retrieving revision 1.1.2.36
diff -c -r1.1.2.36 pthread_int.h
*** pthread_int.h	2002/12/16 18:17:46	1.1.2.36
--- pthread_int.h	2002/12/30 22:18:52
***************
*** 151,158 ****
  #endif
  };
  
  
- 
  /* Thread types */
  #define PT_THREAD_NORMAL	1
  #define PT_THREAD_UPCALL	2
--- 151,162 ----
  #endif
  };
  
+ struct pthread_lock_ops {
+ 	void	(*plo_init)(__cpu_simple_lock_t *);
+ 	int	(*plo_try)(__cpu_simple_lock_t *);
+ 	void	(*plo_unlock)(__cpu_simple_lock_t *);
+ };
  
  /* Thread types */
  #define PT_THREAD_NORMAL	1
  #define PT_THREAD_UPCALL	2
***************
*** 235,244 ****
--- 239,255 ----
  void	pthread__alarm_process(pthread_t self, void *arg);
  
  /* Internal locking primitives */
+ void	pthread__lockprim_init(void);
  void	pthread_lockinit(pthread_spin_t *lock);
  void	pthread_spinlock(pthread_t thread, pthread_spin_t *lock);
  int	pthread_spintrylock(pthread_t thread, pthread_spin_t *lock);
  void	pthread_spinunlock(pthread_t thread, pthread_spin_t *lock);
+ 
+ extern const struct pthread_lock_ops *pthread__lock_ops;
+ 
+ #define	pthread__simple_lock_init(alp)	(*pthread__lock_ops->plo_init)(alp)
+ #define	pthread__simple_lock_try(alp)	(*pthread__lock_ops->plo_try)(alp)
+ #define	pthread__simple_unlock(alp)	(*pthread__lock_ops->plo_unlock)(alp)
  
  int	_getcontext_u(ucontext_t *);
  int	_setcontext_u(const ucontext_t *);
Index: pthread_lock.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/Attic/pthread_lock.c,v
retrieving revision 1.1.2.13
diff -c -r1.1.2.13 pthread_lock.c
*** pthread_lock.c	2002/12/18 22:53:14	1.1.2.13
--- pthread_lock.c	2002/12/30 22:18:52
***************
*** 36,41 ****
--- 36,45 ----
   * POSSIBILITY OF SUCH DAMAGE.
   */
  
+ #include <sys/param.h>
+ #include <sys/ras.h>
+ #include <sys/sysctl.h>
+ 
  #include <assert.h>
  #include <errno.h>
  #include <unistd.h>
***************
*** 56,66 ****
  
  static int nspins = NSPINS;
  
  void
  pthread_lockinit(pthread_spin_t *lock)
  {
  
! 	__cpu_simple_lock_init(lock);
  }
  
  void
--- 60,170 ----
  
  static int nspins = NSPINS;
  
+ extern char pthread__lock_ras_start[], pthread__lock_ras_end[];
+ 
+ static void
+ pthread__ras_simple_lock_init(__cpu_simple_lock_t *alp)
+ {
+ 
+ 	*alp = __SIMPLELOCK_UNLOCKED;
+ }
+ 
+ static int
+ pthread__ras_simple_lock_try(__cpu_simple_lock_t *alp)
+ {
+ 	__cpu_simple_lock_t old;
+ 
+ 	/* This is the atomic sequence. */
+ 	__asm __volatile("pthread__lock_ras_start:");
+ 	old = *alp;
+ 	*alp = __SIMPLELOCK_LOCKED;
+ 	__asm __volatile("pthread__lock_ras_end:");
+ 
+ 	return (old == __SIMPLELOCK_UNLOCKED);
+ }
+ 
+ static void
+ pthread__ras_simple_unlock(__cpu_simple_lock_t *alp)
+ {
+ 
+ 	*alp = __SIMPLELOCK_UNLOCKED;
+ }
+ 
+ static const struct pthread_lock_ops pthread__lock_ops_ras = {
+ 	pthread__ras_simple_lock_init,
+ 	pthread__ras_simple_lock_try,
+ 	pthread__ras_simple_unlock,
+ };
+ 
+ static void
+ pthread__atomic_simple_lock_init(__cpu_simple_lock_t *alp)
+ {
+ 
+ 	__cpu_simple_lock_init(alp);
+ }
+ 
+ static int
+ pthread__atomic_simple_lock_try(__cpu_simple_lock_t *alp)
+ {
+ 
+ 	return (__cpu_simple_lock_try(alp));
+ }
+ 
+ static void
+ pthread__atomic_simple_unlock(__cpu_simple_lock_t *alp)
+ {
+ 
+ 	__cpu_simple_unlock(alp);
+ }
+ 
+ static const struct pthread_lock_ops pthread__lock_ops_atomic = {
+ 	pthread__atomic_simple_lock_init,
+ 	pthread__atomic_simple_lock_try,
+ 	pthread__atomic_simple_unlock,
+ };
+ 
+ /*
+  * We default to pointing to the RAS primitives; we might need to use
+  * locks early, but before main() starts.  This is safe, since no other
+  * threads will be active for the process, so atomicity will not be
+  * required.
+  */
+ const struct pthread_lock_ops *pthread__lock_ops = &pthread__lock_ops_ras;
+ 
+ /*
+  * Initialize the locking primitives.  On uniprocessors, we always
+  * use Restartable Atomic Sequences if they are available.  Otherwise,
+  * we fall back onto machine-dependent atomic lock primitives.
+  */
+ void
+ pthread__lockprim_init(void)
+ {
+ 	int mib[2];
+ 	size_t len; 
+ 	int ncpu;
+  
+ 	mib[0] = CTL_HW;
+ 	mib[1] = HW_NCPU; 
+  
+ 	len = sizeof(ncpu);
+ 	sysctl(mib, 2, &ncpu, &len, NULL, 0);
+ 
+ 	if (ncpu == 1 &&
+ 	    rasctl(pthread__lock_ras_start,
+ 	    	   (caddr_t)pthread__lock_ras_end -
+ 	    	   (caddr_t)pthread__lock_ras_start, RAS_INSTALL) == 0) {
+ 		pthread__lock_ops = &pthread__lock_ops_ras;
+ 		return;
+ 	}
+ 
+ 	pthread__lock_ops = &pthread__lock_ops_atomic;
+ }
+ 
  void
  pthread_lockinit(pthread_spin_t *lock)
  {
  
! 	pthread__simple_lock_init(lock);
  }
  
  void
***************
*** 80,86 ****
  	++thread->pt_spinlocks;
  
  	do {
! 		while (((ret = __cpu_simple_lock_try(lock)) == 0) && --count)
  			;
  
  		if (ret == 1)
--- 184,190 ----
  	++thread->pt_spinlocks;
  
  	do {
! 		while (((ret = pthread__simple_lock_try(lock)) == 0) && --count)
  			;
  
  		if (ret == 1)
***************
*** 126,132 ****
  		thread, thread->pt_spinlocks));
  	++thread->pt_spinlocks;
  
! 	ret = __cpu_simple_lock_try(lock);
  	
  	if (ret == 0) {
  	SDPRINTF(("(pthread_spintrylock %p) decrementing spinlock from %d\n",
--- 230,236 ----
  		thread, thread->pt_spinlocks));
  	++thread->pt_spinlocks;
  
! 	ret = pthread__simple_lock_try(lock);
  	
  	if (ret == 0) {
  	SDPRINTF(("(pthread_spintrylock %p) decrementing spinlock from %d\n",
***************
*** 147,153 ****
  pthread_spinunlock(pthread_t thread, pthread_spin_t *lock)
  {
  
! 	__cpu_simple_unlock(lock);
  	SDPRINTF(("(pthread_spinunlock %p) decrementing spinlock %p (count %d)\n",
  		thread, lock, thread->pt_spinlocks));
  	--thread->pt_spinlocks;
--- 251,257 ----
  pthread_spinunlock(pthread_t thread, pthread_spin_t *lock)
  {
  
! 	pthread__simple_unlock(lock);
  	SDPRINTF(("(pthread_spinunlock %p) decrementing spinlock %p (count %d)\n",
  		thread, lock, thread->pt_spinlocks));
  	--thread->pt_spinlocks;
***************
*** 228,234 ****
  		return EINVAL;
  #endif
  
! 	__cpu_simple_lock(&lock->pts_spin);
  
  	return 0;
  }
--- 332,339 ----
  		return EINVAL;
  #endif
  
! 	while (pthread__simple_lock_try(&lock->pts_spin) == 0)
! 		/* spin */ ;
  
  	return 0;
  }
***************
*** 242,248 ****
  		return EINVAL;
  #endif
  
! 	if (__cpu_simple_lock_try(&lock->pts_spin) == 0)
  		return EBUSY;
  
  	return 0;
--- 347,353 ----
  		return EINVAL;
  #endif
  
! 	if (pthread__simple_lock_try(&lock->pts_spin) == 0)
  		return EBUSY;
  
  	return 0;
***************
*** 257,263 ****
  		return EINVAL;
  #endif
  
! 	__cpu_simple_unlock(&lock->pts_spin);
  
  	return 0;
  }
--- 362,368 ----
  		return EINVAL;
  #endif
  
! 	pthread__simple_unlock(&lock->pts_spin);
  
  	return 0;
  }
Index: pthread_mutex.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/Attic/pthread_mutex.c,v
retrieving revision 1.1.2.15
diff -c -r1.1.2.15 pthread_mutex.c
*** pthread_mutex.c	2002/10/26 02:17:44	1.1.2.15
--- pthread_mutex.c	2002/12/30 22:18:52
***************
*** 89,96 ****
   * same mutex.
   * 
   * A memory barrier after a lock and before an unlock will provide
!  * this behavior. This code relies on __cpu_simple_lock_try() to issue
!  * a barrier after obtaining a lock, and on __cpu_simple_unlock() to
   * issue a barrier before releasing a lock.
   */
  
--- 89,96 ----
   * same mutex.
   * 
   * A memory barrier after a lock and before an unlock will provide
!  * this behavior. This code relies on pthread__simple_lock_try() to issue
!  * a barrier after obtaining a lock, and on pthread__simple_unlock() to
   * issue a barrier before releasing a lock.
   */
  
***************
*** 103,109 ****
  		return EINVAL;
  #endif
  
! 	if (__predict_false(__cpu_simple_lock_try(&mutex->ptm_lock) == 0))
  		pthread_mutex_lock_slow(mutex);
  
  	/* We have the lock! */
--- 103,109 ----
  		return EINVAL;
  #endif
  
! 	if (__predict_false(pthread__simple_lock_try(&mutex->ptm_lock) == 0))
  		pthread_mutex_lock_slow(mutex);
  
  	/* We have the lock! */
***************
*** 121,127 ****
  	self = pthread__self();
  
  	while (/*CONSTCOND*/1) {
! 		if (__cpu_simple_lock_try(&mutex->ptm_lock))
  		    break; /* got it! */
  		
  		/* Okay, didn't look free. Get the interlock... */
--- 121,127 ----
  	self = pthread__self();
  
  	while (/*CONSTCOND*/1) {
! 		if (pthread__simple_lock_try(&mutex->ptm_lock))
  		    break; /* got it! */
  		
  		/* Okay, didn't look free. Get the interlock... */
***************
*** 169,175 ****
  		return EINVAL;
  #endif
  
! 	if (__cpu_simple_lock_try(&mutex->ptm_lock) == 0)
  		return EBUSY;
  
  #ifdef ERRORCHECK
--- 169,175 ----
  		return EINVAL;
  #endif
  
! 	if (pthread__simple_lock_try(&mutex->ptm_lock) == 0)
  		return EBUSY;
  
  #ifdef ERRORCHECK
***************
*** 201,207 ****
  #ifdef ERRORCHECK
  	mutex->ptm_owner = NULL;
  #endif
! 	__cpu_simple_unlock(&mutex->ptm_lock);
  	pthread_spinunlock(self, &mutex->ptm_interlock);
  
  	/* Give the head of the blocked queue another try. */
--- 201,207 ----
  #ifdef ERRORCHECK
  	mutex->ptm_owner = NULL;
  #endif
! 	pthread__simple_unlock(&mutex->ptm_lock);
  	pthread_spinunlock(self, &mutex->ptm_interlock);
  
  	/* Give the head of the blocked queue another try. */
Index: pthread_sa.c
===================================================================
RCS file: /cvsroot/src/lib/libpthread/Attic/pthread_sa.c,v
retrieving revision 1.1.2.34
diff -c -r1.1.2.34 pthread_sa.c
*** pthread_sa.c	2002/12/16 18:32:21	1.1.2.34
--- pthread_sa.c	2002/12/30 22:18:53
***************
*** 381,387 ****
  						 */
  						lock = victim->pt_heldlock;
  						victim->pt_heldlock = NULL;
! 						__cpu_simple_unlock(lock);
  						victim->pt_uc = 
  						    victim->pt_sleepuc;
  						victim->pt_sleepuc = NULL;
--- 381,387 ----
  						 */
  						lock = victim->pt_heldlock;
  						victim->pt_heldlock = NULL;
! 						pthread__simple_unlock(lock);
  						victim->pt_uc = 
  						    victim->pt_sleepuc;
  						victim->pt_sleepuc = NULL;
Index: pthread_types.h
===================================================================
RCS file: /cvsroot/src/lib/libpthread/Attic/pthread_types.h,v
retrieving revision 1.1.2.9
diff -c -r1.1.2.9 pthread_types.h
*** pthread_types.h	2002/10/28 00:06:09	1.1.2.9
--- pthread_types.h	2002/12/30 22:18:53
***************
*** 89,95 ****
  	unsigned int	ptm_magic;
  
  	/* Not a real spinlock; will never be spun on. Locked with
! 	 * __cpu_simple_lock_try() or not at all. Therefore, not
  	 * subject to preempted-spinlock-continuation.
  	 * 
  	 * Open research question: Would it help threaded applications if
--- 89,95 ----
  	unsigned int	ptm_magic;
  
  	/* Not a real spinlock; will never be spun on. Locked with
! 	 * pthread__simple_lock_try() or not at all. Therefore, not
  	 * subject to preempted-spinlock-continuation.
  	 * 
  	 * Open research question: Would it help threaded applications if

--45Z9DzgjV8m4Oswq--