Subject: hppa alignment restrictions on __cpu_simple_lock_t
To: None <tech-kern@netbsd.org>
From: Nick Hudson <skrll@netbsd.org>
List: tech-kern
Date: 06/27/2007 11:55:35
--Boundary-00=_nIkgGid7uUzDSm9
Content-Type: text/plain;
  charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

Hi,

It won't come as a surprise to a lot of you that hppa is special.

The ldcw (load and clear word) instruction on PA 1.1 requires a 16 byte 
aligned word to act upon (use of an unaligned word is undefined). This 
presents a problem with __cpu_simple_lock_t where this alignment restriction 
needs to be observed.

There are a couple of potential solutions. The first, and the one I've 
implemented in the attached patch, is to define

	typedef volatile struct {
		volatile unsigned long csl_lock[4];
	} __cpu_simple_lock_t;

and update the functions acting on the __cpu_simple_lock_t to pick out the 
aligned word. New inlines __SIMPLE_LOCKED_P and __SIMPLE_UNLOCKED_P are 
provided to replace the current comparisons to __SIMPLE_LOCKED and 
__SIMPLE_UNLOCKED. Also, __cpu_simple_lock_{set,clear} are provided for 
the !MP case.

The second potential solution is to specify the alignment restriction on the 
struct, e.g.

	typedef struct {
		volatile unsigned long csl_lock;
	} __cpu_simple_lock_t __aligned(16);

but this places requirement on memory allocators to always return correctly 
aligned pointers. Memory allocators in both kernel and userland (as 
__cpu_simple_lock_t is used in libpthread). I'm not sure this is workable and 
is definitely wasteful of memory.

Thoughts?

Thanks,
Nick

--Boundary-00=_nIkgGid7uUzDSm9
Content-Type: text/x-diff;
  charset="us-ascii";
  name="lock.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
	filename="lock.diff"

Index: sys/kern/kern_lock.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_lock.c,v
retrieving revision 1.116
diff -u -p -u -r1.116 kern_lock.c
--- sys/kern/kern_lock.c	18 Jun 2007 21:37:32 -0000	1.116
+++ sys/kern/kern_lock.c	27 Jun 2007 10:33:37 -0000
@@ -1150,7 +1150,7 @@ simple_lock_init(volatile struct simplel
 #if defined(MULTIPROCESSOR) /* { */
 	__cpu_simple_lock_init(&alp->lock_data);
 #else
-	alp->lock_data = __SIMPLELOCK_UNLOCKED;
+	__cpu_simple_lock_clear(&alp->lock_data);
 #endif /* } */
 	alp->lock_file = NULL;
 	alp->lock_line = 0;
@@ -1171,7 +1171,7 @@ _simple_lock(volatile struct simplelock 
 	 * MULTIPROCESSOR case: This is `safe' since if it's not us, we
 	 * don't take any action, and just fall into the normal spin case.
 	 */
-	if (alp->lock_data == __SIMPLELOCK_LOCKED) {
+	if (__SIMPLELOCK_LOCKED_P(&alp->lock_data)) {
 #if defined(MULTIPROCESSOR) /* { */
 		if (alp->lock_holder == cpu_num) {
 			SLOCK_WHERE("simple_lock: locking against myself\n",
@@ -1190,7 +1190,7 @@ _simple_lock(volatile struct simplelock 
 	__cpu_simple_lock(&alp->lock_data);
 	s = splhigh();
 #else
-	alp->lock_data = __SIMPLELOCK_LOCKED;
+	__cpu_simple_lock_set(&alp->lock_data);
 #endif /* } */
 
 	if (alp->lock_holder != LK_NOCPU) {
@@ -1227,7 +1227,7 @@ _simple_lock_held(volatile struct simple
 	else
 		__cpu_simple_unlock(&alp->lock_data);
 #else
-	if (alp->lock_data == __SIMPLELOCK_LOCKED) {
+	if (__SIMPLELOCK_LOCKED_P(&alp->lock_data)) {
 		locked = 1;
 		KASSERT(alp->lock_holder == cpu_num);
 	}
@@ -1258,11 +1258,11 @@ _simple_lock_try(volatile struct simplel
 		goto out;
 	}
 #else
-	if (alp->lock_data == __SIMPLELOCK_LOCKED) {
+	if (__SIMPLELOCK_LOCKED_P(&alp->lock_data)) {
 		SLOCK_WHERE("simple_lock_try: lock held\n", alp, id, l);
 		goto out;
 	}
-	alp->lock_data = __SIMPLELOCK_LOCKED;
+	__cpu_simple_lock_set(&alp->lock_data);
 #endif /* MULTIPROCESSOR */ /* } */
 
 	/*
@@ -1297,7 +1297,7 @@ _simple_unlock(volatile struct simpleloc
 	 * MULTIPROCESSOR case: This is `safe' because we think we hold
 	 * the lock, and if we don't, we don't take any action.
 	 */
-	if (alp->lock_data == __SIMPLELOCK_UNLOCKED) {
+	if (__SIMPLELOCK_UNLOCKED_P(&alp->lock_data)) {
 		SLOCK_WHERE("simple_unlock: lock not held\n",
 		    alp, id, l);
 		goto out;
@@ -1320,7 +1320,7 @@ _simple_unlock(volatile struct simpleloc
 	/* Now that we've modified all fields, release the lock. */
 	__cpu_simple_unlock(&alp->lock_data);
 #else
-	alp->lock_data = __SIMPLELOCK_UNLOCKED;
+	__cpu_simple_lock_clear(&alp->lock_data);
 	KASSERT(alp->lock_holder == cpu_number());
 	alp->lock_holder = LK_NOCPU;
 #endif /* } */
Index: sys/kern/kern_mutex.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_mutex.c,v
retrieving revision 1.14
diff -u -p -u -r1.14 kern_mutex.c
--- sys/kern/kern_mutex.c	17 May 2007 14:51:40 -0000	1.14
+++ sys/kern/kern_mutex.c	27 Jun 2007 10:33:38 -0000
@@ -350,7 +350,7 @@ mutex_destroy(kmutex_t *mtx)
 		MUTEX_ASSERT(mtx, !MUTEX_OWNED(mtx->mtx_owner) &&
 		    !MUTEX_HAS_WAITERS(mtx));
 	} else {
-		MUTEX_ASSERT(mtx, mtx->mtx_lock != __SIMPLELOCK_LOCKED);
+		MUTEX_ASSERT(mtx, !__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock));
 	}
 
 	LOCKDEBUG_FREE(mtx, MUTEX_GETID(mtx));
@@ -451,7 +451,7 @@ mutex_vector_enter(kmutex_t *mtx)
 		do {
 			if (panicstr != NULL)
 				break;
-			while (mtx->mtx_lock == __SIMPLELOCK_LOCKED) {
+			while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
 				SPINLOCK_BACKOFF(count); 
 #ifdef LOCKDEBUG
 				if (SPINLOCK_SPINOUT(spins))
@@ -684,7 +684,7 @@ mutex_vector_exit(kmutex_t *mtx)
 
 	if (MUTEX_SPIN_P(mtx)) {
 #ifdef FULL
-		if (mtx->mtx_lock != __SIMPLELOCK_LOCKED)
+		if (!__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock))
 			MUTEX_ABORT(mtx, "exiting unheld spin mutex");
 		MUTEX_UNLOCKED(mtx);
 		__cpu_simple_unlock(&mtx->mtx_lock);
@@ -758,7 +758,7 @@ mutex_owned(kmutex_t *mtx)
 	if (MUTEX_ADAPTIVE_P(mtx))
 		return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
 #ifdef FULL
-	return mtx->mtx_lock == __SIMPLELOCK_LOCKED;
+	return __SIMPLELOCK_LOCKED_P(&mtx->mtx_lock);
 #else
 	return 1;
 #endif
@@ -852,7 +852,7 @@ mutex_spin_retry(kmutex_t *mtx)
 	do {
 		if (panicstr != NULL)
 			break;
-		while (mtx->mtx_lock == __SIMPLELOCK_LOCKED) {
+		while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
 			SPINLOCK_BACKOFF(count); 
 #ifdef LOCKDEBUG
 			if (SPINLOCK_SPINOUT(spins))
Index: sys/arch/hppa/include/lock.h
===================================================================
RCS file: /cvsroot/src/sys/arch/hppa/include/lock.h,v
retrieving revision 1.11
diff -u -p -u -r1.11 lock.h
--- sys/arch/hppa/include/lock.h	9 Feb 2007 21:55:04 -0000	1.11
+++ sys/arch/hppa/include/lock.h	27 Jun 2007 10:33:38 -0000
@@ -44,8 +44,28 @@
 #ifndef _HPPA_LOCK_H_
 #define	_HPPA_LOCK_H_
 
+#define HPPA_LDCW_ALIGN	16
+
+#define __SIMPLELOCK_ALIGN(p) \
+    (unsigned long *)(((uintptr_t)(p) + HPPA_LDCW_ALIGN - 1) & ~(HPPA_LDCW_ALIGN - 1))
+
+#define __SIMPLELOCK_RAW_LOCKED		0
+#define __SIMPLELOCK_RAW_UNLOCKED	1
+
+static __inline int
+__SIMPLELOCK_LOCKED_P(__cpu_simple_lock_t *__ptr)
+{
+	return *__SIMPLELOCK_ALIGN(__ptr) == __SIMPLELOCK_RAW_LOCKED;
+}
+
+static __inline int
+__SIMPLELOCK_UNLOCKED_P(__cpu_simple_lock_t *__ptr)
+{
+	return *__SIMPLELOCK_ALIGN(__ptr) == __SIMPLELOCK_RAW_UNLOCKED;
+}
+
 static __inline int
-__ldcw(__cpu_simple_lock_t *__ptr)
+__ldcw(unsigned long *__ptr)
 {
 	int __val;
 
@@ -69,14 +89,16 @@ __sync(void)
 static __inline void
 __cpu_simple_lock_init(__cpu_simple_lock_t *alp)
 {
+	__cpu_simple_lock_t ul = __SIMPLELOCK_UNLOCKED;
 
-	*alp = __SIMPLELOCK_UNLOCKED;
+	*alp = ul;
 	__sync();
 }
 
 static __inline void
 __cpu_simple_lock(__cpu_simple_lock_t *alp)
 {
+	unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
 
 	/*
 	 * Note, if we detect that the lock is held when
@@ -85,23 +107,42 @@ __cpu_simple_lock(__cpu_simple_lock_t *a
 	 * some work.
 	 */
 
-	while (__ldcw(alp) == __SIMPLELOCK_LOCKED)
-		while (*alp == __SIMPLELOCK_LOCKED)
+	while (__ldcw(__aptr) == __SIMPLELOCK_RAW_LOCKED)
+		while (*__aptr == __SIMPLELOCK_RAW_LOCKED)
 			;
 }
 
 static __inline int
 __cpu_simple_lock_try(__cpu_simple_lock_t *alp)
 {
+	unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
 
-	return (__ldcw(alp) != __SIMPLELOCK_LOCKED);
+	return (__ldcw(__aptr) != __SIMPLELOCK_RAW_LOCKED);
 }
 
 static __inline void
 __cpu_simple_unlock(__cpu_simple_lock_t *alp)
 {
+	unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
+
 	__sync();
-	*alp = __SIMPLELOCK_UNLOCKED;
+	*__aptr = __SIMPLELOCK_RAW_UNLOCKED;
+}
+
+static __inline void
+__cpu_simple_lock_set(__cpu_simple_lock_t *alp)
+{
+	unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
+
+	*__aptr = __SIMPLELOCK_RAW_LOCKED;
+}
+
+static __inline void
+__cpu_simple_lock_clear(__cpu_simple_lock_t *alp)
+{
+	unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
+
+	*__aptr = __SIMPLELOCK_RAW_UNLOCKED;
 }
 
 static __inline void
Index: sys/arch/hppa/include/mutex.h
===================================================================
RCS file: /cvsroot/src/sys/arch/hppa/include/mutex.h,v
retrieving revision 1.5
diff -u -p -u -r1.5 mutex.h
--- sys/arch/hppa/include/mutex.h	15 May 2007 18:00:34 -0000	1.5
+++ sys/arch/hppa/include/mutex.h	27 Jun 2007 10:33:38 -0000
@@ -54,23 +54,22 @@
 struct kmutex {
 	union {
 		/*
-		 * Only the low 4 bytes of the lock will be used by
-		 * __cpu_simple_lock(), but it must be aligned on a
-		 * 16-byte boundary.  See hppa/lock.h
+		 * Only the 16 bytes aligned word of __cpu_simple_lock_t will
+		 * be used. It's 16 bytes to simplify the allocation.
+		 * See hppa/lock.h
 		 */
 #ifdef __MUTEX_PRIVATE
-		__cpu_simple_lock_t	mtxu_lock;		/* 0-15 */
 		struct {
-			volatile uint32_t	mtxs_lockword;	/* 0-3 */
-			volatile uint32_t	mtxs_owner;	/* 4-7 */
-			ipl_cookie_t		mtxs_ipl;	/* 8-11 */
-			volatile uint8_t	mtxs_waiters;	/* 12 */
+			__cpu_simple_lock_t	mtxu_lock;	/* 0-15 */
+			volatile uint32_t	mtxs_owner;	/* 16-19 */
+			ipl_cookie_t		mtxs_ipl;	/* 20-23 */
+			volatile uint8_t	mtxs_waiters;	/* 24 */
 
 			/* For LOCKDEBUG */
-			uint8_t			mtxs_id[3];	/* 13-15 */
+			uint8_t			mtxs_id[3];	/* 25-27 */
 		} s;
 #endif
-		uint8_t			mtxu_pad[16];		/* 0-15 */
+		uint8_t			mtxu_pad[32];	/* 0 - 32 */
 	} u;
 } __aligned (16);
 #endif
@@ -79,7 +78,7 @@ struct kmutex {
 
 #define	__HAVE_MUTEX_STUBS	1
 
-#define	mtx_lock	u.mtxu_lock
+#define	mtx_lock	u.s.mtxu_lock
 #define	mtx_owner	u.s.mtxs_owner
 #define	mtx_ipl		u.s.mtxs_ipl
 #define	mtx_waiters	u.s.mtxs_waiters
Index: sys/arch/hppa/include/types.h
===================================================================
RCS file: /cvsroot/src/sys/arch/hppa/include/types.h,v
retrieving revision 1.14
diff -u -p -u -r1.14 types.h
--- sys/arch/hppa/include/types.h	17 May 2007 14:51:20 -0000	1.14
+++ sys/arch/hppa/include/types.h	27 Jun 2007 10:33:38 -0000
@@ -36,6 +36,7 @@
 #ifndef	_HPPA_TYPES_H_
 #define	_HPPA_TYPES_H_
 
+#include <sys/cdefs.h>
 #include <sys/featuretest.h>
 
 #if defined(_NETBSD_SOURCE)
@@ -63,10 +64,13 @@ typedef unsigned long vm_size_t;
 /*
  * Semaphores must be aligned on 16-byte boundaries on the PA-RISC.
  */
-typedef volatile unsigned long __cpu_simple_lock_t;
+typedef volatile struct {
+	volatile unsigned long csl_lock[4];
+} __cpu_simple_lock_t;
 
-#define __SIMPLELOCK_LOCKED	0
-#define __SIMPLELOCK_UNLOCKED	1
+
+#define __SIMPLELOCK_LOCKED	{ { 0, 0, 0, 0} }
+#define __SIMPLELOCK_UNLOCKED	{ { 1, 1, 1, 1} }
 
 typedef int			register_t;
 

--Boundary-00=_nIkgGid7uUzDSm9--