Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/mips/include mips: Brush up __cpu_simple_lock.
details: https://anonhg.NetBSD.org/src/rev/7771dbdf76bb
branches: trunk
changeset: 361158:7771dbdf76bb
user: riastradh <riastradh%NetBSD.org@localhost>
date: Sat Feb 12 17:10:02 2022 +0000
description:
mips: Brush up __cpu_simple_lock.
- Eradicate last vestiges of mb_* barriers.
- In __cpu_simple_lock_init, omit needless barrier. It is the
caller's responsibility to ensure __cpu_simple_lock_init happens
before other operations on it anyway, so there was never any need
for a barrier here.
- In __cpu_simple_lock_try, leave comments about memory ordering
guarantees of the kernel's _atomic_cas_uint, which are inexplicably
different from the non-underscored atomic_cas_uint.
- In __cpu_simple_unlock, use membar_exit instead of mb_memory, and do
it unconditionally.
This ensures that in __cpu_simple_lock/.../__cpu_simple_unlock, all
memory operations in the ellipsis happen before the store that
releases the lock.
- On Octeon, the barrier was omitted altogether, which is a bug --
it needs to be there or else there is no happens-before relation
and whoever takes the lock next might see stale values stored or
even stomp over the unlocking CPU's delayed loads.
- On non-Octeon, the mb_memory was sync. Using membar_exit
preserves this.
XXX On Octeon, membar_exit only issues syncw -- this seems wrong,
only store-before-store and not load/store-before-store, unless the
CNMIPS architecture guarantees it is sufficient here like
SPARCv8/v9 PSO (`Partial Store Order').
- Leave an essay with citations about why we have an apparently
pointless syncw _after_ releasing a lock, to work around a design
bug^W^Wquirk in cnmips which sometimes buffers stores for hundreds
of thousands of cycles for fun unless you issue syncw.
diffstat:
common/lib/libc/arch/mips/atomic/membar_ops.S | 12 +--
sys/arch/mips/include/lock.h | 116 ++++++++++++++-----------
2 files changed, 64 insertions(+), 64 deletions(-)
diffs (189 lines):
diff -r 40e67b2ca44d -r 7771dbdf76bb common/lib/libc/arch/mips/atomic/membar_ops.S
--- a/common/lib/libc/arch/mips/atomic/membar_ops.S Sat Feb 12 17:09:43 2022 +0000
+++ b/common/lib/libc/arch/mips/atomic/membar_ops.S Sat Feb 12 17:10:02 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: membar_ops.S,v 1.10 2020/08/10 14:37:38 skrll Exp $ */
+/* $NetBSD: membar_ops.S,v 1.11 2022/02/12 17:10:02 riastradh Exp $ */
/*-
* Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
@@ -46,16 +46,6 @@
END(_membar_producer)
#endif
-#ifdef _KERNEL
-STRONG_ALIAS(mb_read, _membar_sync)
-#ifdef __OCTEON__
-STRONG_ALIAS(mb_write, _membar_producer)
-#else
-STRONG_ALIAS(mb_write, _membar_sync)
-#endif
-STRONG_ALIAS(mb_memory, _membar_sync)
-#endif
-
ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
ATOMIC_OP_ALIAS(membar_enter,_membar_sync)
STRONG_ALIAS(_membar_enter,_membar_sync)
diff -r 40e67b2ca44d -r 7771dbdf76bb sys/arch/mips/include/lock.h
--- a/sys/arch/mips/include/lock.h Sat Feb 12 17:09:43 2022 +0000
+++ b/sys/arch/mips/include/lock.h Sat Feb 12 17:10:02 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: lock.h,v 1.21 2020/08/05 05:24:44 simonb Exp $ */
+/* $NetBSD: lock.h,v 1.22 2022/02/12 17:10:02 riastradh Exp $ */
/*-
* Copyright (c) 2001, 2007 The NetBSD Foundation, Inc.
@@ -41,6 +41,8 @@
#include <sys/param.h>
+#include <sys/atomic.h>
+
static __inline int
__SIMPLELOCK_LOCKED_P(const __cpu_simple_lock_t *__ptr)
{
@@ -98,63 +100,30 @@
return (v0 != 0);
}
-#ifdef MIPS1
-static __inline void
-mb_read(void)
-{
- __insn_barrier();
-}
-
-static __inline void
-mb_write(void)
-{
- __insn_barrier();
-}
-
-static __inline void
-mb_memory(void)
-{
- __insn_barrier();
-}
-#else /* MIPS1*/
-static __inline void
-mb_read(void)
-{
- __asm volatile(
- " .set push \n"
- " .set mips2 \n"
- " sync \n"
- " .set pop"
- ::: "memory"
- );
-}
-
-static __inline void
-mb_write(void)
-{
- mb_read();
-}
-
-static __inline void
-mb_memory(void)
-{
- mb_read();
-}
-#endif /* MIPS1 */
-
#else /* !_HARDKERNEL */
u_int _atomic_cas_uint(volatile u_int *, u_int, u_int);
u_long _atomic_cas_ulong(volatile u_long *, u_long, u_long);
void * _atomic_cas_ptr(volatile void *, void *, void *);
-void mb_read(void);
-void mb_write(void);
-void mb_memory(void);
static __inline int
__cpu_simple_lock_try(__cpu_simple_lock_t *lp)
{
+ /*
+ * Successful _atomic_cas_uint functions as a load-acquire --
+ * on MP systems, it issues sync after the LL/SC CAS succeeds;
+ * on non-MP systems every load is a load-acquire so it's moot.
+ * This pairs with the membar_exit and store sequence in
+ * __cpu_simple_unlock that functions as a store-release
+ * operation.
+ *
+ * NOTE: This applies only to _atomic_cas_uint (with the
+ * underscore), in sys/arch/mips/mips/lock_stubs_*.S. Not true
+ * for atomic_cas_uint (without the underscore), from
+ * common/lib/libc/arch/mips/atomic/atomic_cas.S which does not
+ * imply a load-acquire. It is unclear why these disagree.
+ */
return _atomic_cas_uint(lp,
__SIMPLELOCK_UNLOCKED, __SIMPLELOCK_LOCKED) ==
__SIMPLELOCK_UNLOCKED;
@@ -167,7 +136,6 @@
{
*lp = __SIMPLELOCK_UNLOCKED;
- mb_memory();
}
static __inline void
@@ -184,12 +152,54 @@
__cpu_simple_unlock(__cpu_simple_lock_t *lp)
{
-#ifndef _MIPS_ARCH_OCTEONP
- mb_memory();
-#endif
+ /*
+ * The membar_exit and then store functions as a store-release
+ * operation that pairs with the load-acquire operation in
+ * successful __cpu_simple_lock_try.
+ *
+ * Can't use atomic_store_release here because that's not
+ * available in userland at the moment.
+ */
+ membar_exit();
*lp = __SIMPLELOCK_UNLOCKED;
+
#ifdef _MIPS_ARCH_OCTEONP
- mb_write();
+ /*
+ * On Cavium's recommendation, we issue an extra SYNCW that is
+ * not necessary for correct ordering because apparently stores
+ * can get stuck in Octeon store buffers for hundreds of
+ * thousands of cycles, according to the following note:
+ *
+ * Programming Notes:
+ * [...]
+ * Core A (writer)
+ * SW R1, DATA
+ * LI R2, 1
+ * SYNCW
+ * SW R2, FLAG
+ * SYNCW
+ * [...]
+ *
+ * The second SYNCW instruction executed by core A is not
+ * necessary for correctness, but has very important
+ * performance effects on OCTEON. Without it, the store
+ * to FLAG may linger in core A's write buffer before it
+ * becomes visible to other cores. (If core A is not
+ * performing many stores, this may add hundreds of
+ * thousands of cycles to the flag release time since the
+ * OCTEON core normally retains stores to attempt to merge
+ * them before sending the store on the CMB.)
+ * Applications should include this second SYNCW
+ * instruction after flag or lock releases.
+ *
+ * Cavium Networks OCTEON Plus CN50XX Hardware Reference
+ * Manual, July 2008, Appendix A, p. 943.
+ * https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/hactive/CN50XX-HRM-V0.99E.pdf
+ *
+ * XXX It might be prudent to put this into
+ * atomic_store_release itself.
+ */
+ __asm volatile("syncw" ::: "memory");
#endif
}
Home |
Main Index |
Thread Index |
Old Index