Subject: hppa alignment restrictions on __cpu_simple_lock_t
To: None <tech-kern@netbsd.org>
From: Nick Hudson <skrll@netbsd.org>
List: tech-kern
Date: 06/27/2007 11:55:35
--Boundary-00=_nIkgGid7uUzDSm9
Content-Type: text/plain;
charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline
Hi,
It won't come as a surprise to a lot of you that hppa is special.
The ldcw (load and clear word) instruction on PA 1.1 requires a 16 byte
aligned word to act upon (use of an unaligned word is undefined). This
presents a problem with __cpu_simple_lock_t where this alignment restriction
needs to be observed.
There are a couple of potential solutions. The first, and the one I've
implemented in the attached patch, is to define
typedef volatile struct {
volatile unsigned long csl_lock[4];
} __cpu_simple_lock_t;
and update the functions acting on the __cpu_simple_lock_t to pick out the
aligned word. New inlines __SIMPLE_LOCKED_P and __SIMPLE_UNLOCKED_P are
provided to replace the current comparisons to __SIMPLE_LOCKED and
__SIMPLE_UNLOCKED. Also, __cpu_simple_lock_{set,clear} are provided for
the !MP case.
The second potential solution is to specify the alignment restriction on the
struct, e.g.
typedef struct {
volatile unsigned long csl_lock;
} __cpu_simple_lock_t __aligned(16);
but this places requirement on memory allocators to always return correctly
aligned pointers. Memory allocators in both kernel and userland (as
__cpu_simple_lock_t is used in libpthread). I'm not sure this is workable and
is definitely wasteful of memory.
Thoughts?
Thanks,
Nick
--Boundary-00=_nIkgGid7uUzDSm9
Content-Type: text/x-diff;
charset="us-ascii";
name="lock.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="lock.diff"
Index: sys/kern/kern_lock.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_lock.c,v
retrieving revision 1.116
diff -u -p -u -r1.116 kern_lock.c
--- sys/kern/kern_lock.c 18 Jun 2007 21:37:32 -0000 1.116
+++ sys/kern/kern_lock.c 27 Jun 2007 10:33:37 -0000
@@ -1150,7 +1150,7 @@ simple_lock_init(volatile struct simplel
#if defined(MULTIPROCESSOR) /* { */
__cpu_simple_lock_init(&alp->lock_data);
#else
- alp->lock_data = __SIMPLELOCK_UNLOCKED;
+ __cpu_simple_lock_clear(&alp->lock_data);
#endif /* } */
alp->lock_file = NULL;
alp->lock_line = 0;
@@ -1171,7 +1171,7 @@ _simple_lock(volatile struct simplelock
* MULTIPROCESSOR case: This is `safe' since if it's not us, we
* don't take any action, and just fall into the normal spin case.
*/
- if (alp->lock_data == __SIMPLELOCK_LOCKED) {
+ if (__SIMPLELOCK_LOCKED_P(&alp->lock_data)) {
#if defined(MULTIPROCESSOR) /* { */
if (alp->lock_holder == cpu_num) {
SLOCK_WHERE("simple_lock: locking against myself\n",
@@ -1190,7 +1190,7 @@ _simple_lock(volatile struct simplelock
__cpu_simple_lock(&alp->lock_data);
s = splhigh();
#else
- alp->lock_data = __SIMPLELOCK_LOCKED;
+ __cpu_simple_lock_set(&alp->lock_data);
#endif /* } */
if (alp->lock_holder != LK_NOCPU) {
@@ -1227,7 +1227,7 @@ _simple_lock_held(volatile struct simple
else
__cpu_simple_unlock(&alp->lock_data);
#else
- if (alp->lock_data == __SIMPLELOCK_LOCKED) {
+ if (__SIMPLELOCK_LOCKED_P(&alp->lock_data)) {
locked = 1;
KASSERT(alp->lock_holder == cpu_num);
}
@@ -1258,11 +1258,11 @@ _simple_lock_try(volatile struct simplel
goto out;
}
#else
- if (alp->lock_data == __SIMPLELOCK_LOCKED) {
+ if (__SIMPLELOCK_LOCKED_P(&alp->lock_data)) {
SLOCK_WHERE("simple_lock_try: lock held\n", alp, id, l);
goto out;
}
- alp->lock_data = __SIMPLELOCK_LOCKED;
+ __cpu_simple_lock_set(&alp->lock_data);
#endif /* MULTIPROCESSOR */ /* } */
/*
@@ -1297,7 +1297,7 @@ _simple_unlock(volatile struct simpleloc
* MULTIPROCESSOR case: This is `safe' because we think we hold
* the lock, and if we don't, we don't take any action.
*/
- if (alp->lock_data == __SIMPLELOCK_UNLOCKED) {
+ if (__SIMPLELOCK_UNLOCKED_P(&alp->lock_data)) {
SLOCK_WHERE("simple_unlock: lock not held\n",
alp, id, l);
goto out;
@@ -1320,7 +1320,7 @@ _simple_unlock(volatile struct simpleloc
/* Now that we've modified all fields, release the lock. */
__cpu_simple_unlock(&alp->lock_data);
#else
- alp->lock_data = __SIMPLELOCK_UNLOCKED;
+ __cpu_simple_lock_clear(&alp->lock_data);
KASSERT(alp->lock_holder == cpu_number());
alp->lock_holder = LK_NOCPU;
#endif /* } */
Index: sys/kern/kern_mutex.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_mutex.c,v
retrieving revision 1.14
diff -u -p -u -r1.14 kern_mutex.c
--- sys/kern/kern_mutex.c 17 May 2007 14:51:40 -0000 1.14
+++ sys/kern/kern_mutex.c 27 Jun 2007 10:33:38 -0000
@@ -350,7 +350,7 @@ mutex_destroy(kmutex_t *mtx)
MUTEX_ASSERT(mtx, !MUTEX_OWNED(mtx->mtx_owner) &&
!MUTEX_HAS_WAITERS(mtx));
} else {
- MUTEX_ASSERT(mtx, mtx->mtx_lock != __SIMPLELOCK_LOCKED);
+ MUTEX_ASSERT(mtx, !__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock));
}
LOCKDEBUG_FREE(mtx, MUTEX_GETID(mtx));
@@ -451,7 +451,7 @@ mutex_vector_enter(kmutex_t *mtx)
do {
if (panicstr != NULL)
break;
- while (mtx->mtx_lock == __SIMPLELOCK_LOCKED) {
+ while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
SPINLOCK_BACKOFF(count);
#ifdef LOCKDEBUG
if (SPINLOCK_SPINOUT(spins))
@@ -684,7 +684,7 @@ mutex_vector_exit(kmutex_t *mtx)
if (MUTEX_SPIN_P(mtx)) {
#ifdef FULL
- if (mtx->mtx_lock != __SIMPLELOCK_LOCKED)
+ if (!__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock))
MUTEX_ABORT(mtx, "exiting unheld spin mutex");
MUTEX_UNLOCKED(mtx);
__cpu_simple_unlock(&mtx->mtx_lock);
@@ -758,7 +758,7 @@ mutex_owned(kmutex_t *mtx)
if (MUTEX_ADAPTIVE_P(mtx))
return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
#ifdef FULL
- return mtx->mtx_lock == __SIMPLELOCK_LOCKED;
+ return __SIMPLELOCK_LOCKED_P(&mtx->mtx_lock);
#else
return 1;
#endif
@@ -852,7 +852,7 @@ mutex_spin_retry(kmutex_t *mtx)
do {
if (panicstr != NULL)
break;
- while (mtx->mtx_lock == __SIMPLELOCK_LOCKED) {
+ while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
SPINLOCK_BACKOFF(count);
#ifdef LOCKDEBUG
if (SPINLOCK_SPINOUT(spins))
Index: sys/arch/hppa/include/lock.h
===================================================================
RCS file: /cvsroot/src/sys/arch/hppa/include/lock.h,v
retrieving revision 1.11
diff -u -p -u -r1.11 lock.h
--- sys/arch/hppa/include/lock.h 9 Feb 2007 21:55:04 -0000 1.11
+++ sys/arch/hppa/include/lock.h 27 Jun 2007 10:33:38 -0000
@@ -44,8 +44,28 @@
#ifndef _HPPA_LOCK_H_
#define _HPPA_LOCK_H_
+#define HPPA_LDCW_ALIGN 16
+
+#define __SIMPLELOCK_ALIGN(p) \
+ (unsigned long *)(((uintptr_t)(p) + HPPA_LDCW_ALIGN - 1) & ~(HPPA_LDCW_ALIGN - 1))
+
+#define __SIMPLELOCK_RAW_LOCKED 0
+#define __SIMPLELOCK_RAW_UNLOCKED 1
+
+static __inline int
+__SIMPLELOCK_LOCKED_P(__cpu_simple_lock_t *__ptr)
+{
+ return *__SIMPLELOCK_ALIGN(__ptr) == __SIMPLELOCK_RAW_LOCKED;
+}
+
+static __inline int
+__SIMPLELOCK_UNLOCKED_P(__cpu_simple_lock_t *__ptr)
+{
+ return *__SIMPLELOCK_ALIGN(__ptr) == __SIMPLELOCK_RAW_UNLOCKED;
+}
+
static __inline int
-__ldcw(__cpu_simple_lock_t *__ptr)
+__ldcw(unsigned long *__ptr)
{
int __val;
@@ -69,14 +89,16 @@ __sync(void)
static __inline void
__cpu_simple_lock_init(__cpu_simple_lock_t *alp)
{
+ __cpu_simple_lock_t ul = __SIMPLELOCK_UNLOCKED;
- *alp = __SIMPLELOCK_UNLOCKED;
+ *alp = ul;
__sync();
}
static __inline void
__cpu_simple_lock(__cpu_simple_lock_t *alp)
{
+ unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
/*
* Note, if we detect that the lock is held when
@@ -85,23 +107,42 @@ __cpu_simple_lock(__cpu_simple_lock_t *a
* some work.
*/
- while (__ldcw(alp) == __SIMPLELOCK_LOCKED)
- while (*alp == __SIMPLELOCK_LOCKED)
+ while (__ldcw(__aptr) == __SIMPLELOCK_RAW_LOCKED)
+ while (*__aptr == __SIMPLELOCK_RAW_LOCKED)
;
}
static __inline int
__cpu_simple_lock_try(__cpu_simple_lock_t *alp)
{
+ unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
- return (__ldcw(alp) != __SIMPLELOCK_LOCKED);
+ return (__ldcw(__aptr) != __SIMPLELOCK_RAW_LOCKED);
}
static __inline void
__cpu_simple_unlock(__cpu_simple_lock_t *alp)
{
+ unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
+
__sync();
- *alp = __SIMPLELOCK_UNLOCKED;
+ *__aptr = __SIMPLELOCK_RAW_UNLOCKED;
+}
+
+static __inline void
+__cpu_simple_lock_set(__cpu_simple_lock_t *alp)
+{
+ unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
+
+ *__aptr = __SIMPLELOCK_RAW_LOCKED;
+}
+
+static __inline void
+__cpu_simple_lock_clear(__cpu_simple_lock_t *alp)
+{
+ unsigned long *__aptr = __SIMPLELOCK_ALIGN(alp);
+
+ *__aptr = __SIMPLELOCK_RAW_UNLOCKED;
}
static __inline void
Index: sys/arch/hppa/include/mutex.h
===================================================================
RCS file: /cvsroot/src/sys/arch/hppa/include/mutex.h,v
retrieving revision 1.5
diff -u -p -u -r1.5 mutex.h
--- sys/arch/hppa/include/mutex.h 15 May 2007 18:00:34 -0000 1.5
+++ sys/arch/hppa/include/mutex.h 27 Jun 2007 10:33:38 -0000
@@ -54,23 +54,22 @@
struct kmutex {
union {
/*
- * Only the low 4 bytes of the lock will be used by
- * __cpu_simple_lock(), but it must be aligned on a
- * 16-byte boundary. See hppa/lock.h
+ * Only the 16 bytes aligned word of __cpu_simple_lock_t will
+ * be used. It's 16 bytes to simplify the allocation.
+ * See hppa/lock.h
*/
#ifdef __MUTEX_PRIVATE
- __cpu_simple_lock_t mtxu_lock; /* 0-15 */
struct {
- volatile uint32_t mtxs_lockword; /* 0-3 */
- volatile uint32_t mtxs_owner; /* 4-7 */
- ipl_cookie_t mtxs_ipl; /* 8-11 */
- volatile uint8_t mtxs_waiters; /* 12 */
+ __cpu_simple_lock_t mtxu_lock; /* 0-15 */
+ volatile uint32_t mtxs_owner; /* 16-19 */
+ ipl_cookie_t mtxs_ipl; /* 20-23 */
+ volatile uint8_t mtxs_waiters; /* 24 */
/* For LOCKDEBUG */
- uint8_t mtxs_id[3]; /* 13-15 */
+ uint8_t mtxs_id[3]; /* 25-27 */
} s;
#endif
- uint8_t mtxu_pad[16]; /* 0-15 */
+ uint8_t mtxu_pad[32]; /* 0 - 32 */
} u;
} __aligned (16);
#endif
@@ -79,7 +78,7 @@ struct kmutex {
#define __HAVE_MUTEX_STUBS 1
-#define mtx_lock u.mtxu_lock
+#define mtx_lock u.s.mtxu_lock
#define mtx_owner u.s.mtxs_owner
#define mtx_ipl u.s.mtxs_ipl
#define mtx_waiters u.s.mtxs_waiters
Index: sys/arch/hppa/include/types.h
===================================================================
RCS file: /cvsroot/src/sys/arch/hppa/include/types.h,v
retrieving revision 1.14
diff -u -p -u -r1.14 types.h
--- sys/arch/hppa/include/types.h 17 May 2007 14:51:20 -0000 1.14
+++ sys/arch/hppa/include/types.h 27 Jun 2007 10:33:38 -0000
@@ -36,6 +36,7 @@
#ifndef _HPPA_TYPES_H_
#define _HPPA_TYPES_H_
+#include <sys/cdefs.h>
#include <sys/featuretest.h>
#if defined(_NETBSD_SOURCE)
@@ -63,10 +64,13 @@ typedef unsigned long vm_size_t;
/*
* Semaphores must be aligned on 16-byte boundaries on the PA-RISC.
*/
-typedef volatile unsigned long __cpu_simple_lock_t;
+typedef volatile struct {
+ volatile unsigned long csl_lock[4];
+} __cpu_simple_lock_t;
-#define __SIMPLELOCK_LOCKED 0
-#define __SIMPLELOCK_UNLOCKED 1
+
+#define __SIMPLELOCK_LOCKED { { 0, 0, 0, 0} }
+#define __SIMPLELOCK_UNLOCKED { { 1, 1, 1, 1} }
typedef int register_t;
--Boundary-00=_nIkgGid7uUzDSm9--