Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/alpha Optimized fast-paths for rw_enter() / rw_trye...



details:   https://anonhg.NetBSD.org/src/rev/95082251ed9c
branches:  trunk
changeset: 984536:95082251ed9c
user:      thorpej <thorpej%NetBSD.org@localhost>
date:      Sun Jul 11 01:58:41 2021 +0000

description:
Optimized fast-paths for rw_enter() / rw_tryenter() / rw_exit().

diffstat:

 sys/arch/alpha/alpha/genassym.cf  |   11 ++-
 sys/arch/alpha/alpha/lock_stubs.s |  174 ++++++++++++++++++++++++++++++++++++-
 sys/arch/alpha/alpha/machdep.c    |   11 +-
 sys/arch/alpha/include/rwlock.h   |   38 ++++++++-
 4 files changed, 221 insertions(+), 13 deletions(-)

diffs (truncated from 345 to 300 lines):

diff -r c1a294667c8a -r 95082251ed9c sys/arch/alpha/alpha/genassym.cf
--- a/sys/arch/alpha/alpha/genassym.cf  Sun Jul 11 01:55:51 2021 +0000
+++ b/sys/arch/alpha/alpha/genassym.cf  Sun Jul 11 01:58:41 2021 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.28 2021/07/11 01:54:42 thorpej Exp $
+# $NetBSD: genassym.cf,v 1.29 2021/07/11 01:58:41 thorpej Exp $
 
 #
 # Copyright (c) 1982, 1990, 1993
@@ -66,12 +66,15 @@
 #      from: @(#)genassym.c    8.3 (Berkeley) 1/4/94
 #
 
+quote #define __RWLOCK_PRIVATE
+
 include <sys/param.h>
 include <sys/buf.h>
 include <sys/proc.h>
 include <sys/sched.h>
 include <sys/mbuf.h>
 include <sys/msgbuf.h>
+include <sys/rwlock.h>
 include <sys/syscall.h>
 
 include <machine/cpu.h>
@@ -196,3 +199,9 @@
 define CPU_INFO_SSIR           offsetof(struct cpu_info, ci_ssir)
 define CPU_INFO_MTX_COUNT      offsetof(struct cpu_info, ci_mtx_count)
 define CPU_INFO_SIZEOF         sizeof(struct cpu_info)
+
+# Bits in lock fields
+define RW_WRITE_WANTED         RW_WRITE_WANTED
+define RW_WRITE_LOCKED         RW_WRITE_LOCKED
+define RW_READ_INCR            RW_READ_INCR
+define RW_READ_COUNT_SHIFT     RW_READ_COUNT_SHIFT
diff -r c1a294667c8a -r 95082251ed9c sys/arch/alpha/alpha/lock_stubs.s
--- a/sys/arch/alpha/alpha/lock_stubs.s Sun Jul 11 01:55:51 2021 +0000
+++ b/sys/arch/alpha/alpha/lock_stubs.s Sun Jul 11 01:58:41 2021 +0000
@@ -1,11 +1,11 @@
-/*     $NetBSD: lock_stubs.s,v 1.4 2020/09/04 02:54:56 thorpej Exp $   */
+/*     $NetBSD: lock_stubs.s,v 1.5 2021/07/11 01:58:41 thorpej Exp $   */
 
 /*-
- * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * Copyright (c) 2007, 2021 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
- * by Andrew Doran.
+ * by Andrew Doran, and by Jason R. Thorpe.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,7 +34,7 @@
 
 #include <machine/asm.h>
 
-__KERNEL_RCSID(0, "$NetBSD: lock_stubs.s,v 1.4 2020/09/04 02:54:56 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lock_stubs.s,v 1.5 2021/07/11 01:58:41 thorpej Exp $");
 
 #include "assym.h"
 
@@ -63,7 +63,7 @@
        RET
 3:
        br      1b
-END(_lock_cas)
+       END(_lock_cas)
 
 #if !defined(LOCKDEBUG)
 
@@ -72,7 +72,7 @@
  */
 LEAF(mutex_enter, 1)
        LDGP(pv)
-       GET_CURLWP
+       GET_CURLWP      /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
 1:
        mov     v0, t1
        ldq_l   t2, 0(a0)
@@ -86,7 +86,7 @@
        jmp     (t12)
 3:
        br      1b
-END(mutex_enter)
+       END(mutex_enter)
 
 /*
  * void mutex_exit(kmutex_t *mtx);
@@ -94,7 +94,7 @@
 LEAF(mutex_exit, 1)
        LDGP(pv)
        MB
-       GET_CURLWP
+       GET_CURLWP      /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
        mov     zero, t3
 1:
        ldq_l   t2, 0(a0)
@@ -108,6 +108,162 @@
        jmp     (t12)
 3:
        br      1b
-END(mutex_exit)
+       END(mutex_exit)
+
+/*
+ * void rw_enter(krwlock_t *rwl, krw_t op);
+ *
+ * Acquire one hold on a RW lock.
+ */
+LEAF(rw_enter, 2)
+       LDGP(pv)
+
+       /*
+        * RW_READER == 0 (we have a compile-time assert in machdep.c
+        * to ensure this).
+        *
+        * Acquire for read is the most common case.
+        */
+       bne     a1, 3f
+
+       /* Acquiring for read. */
+1:     ldq_l   t0, 0(a0)
+       and     t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1
+       addq    t0, RW_READ_INCR, t2
+       bne     t1, 4f          /* contended */
+       stq_c   t2, 0(a0)
+       beq     t2, 2f          /* STQ_C failed; retry */
+       MB
+       RET
+
+2:     br      1b
+
+3:     /* Acquiring for write. */
+       GET_CURLWP      /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
+       ldq_l   t0, 0(a0)
+       or      v0, RW_WRITE_LOCKED, t2
+       bne     t0, 4f          /* contended */
+       stq_c   t2, 0(a0)
+       beq     t2, 4f          /* STQ_C failed; consider it contended */
+       MB
+       RET
+
+4:     lda     pv, rw_vector_enter
+       jmp     (pv)
+       END(rw_enter)
+
+/*
+ * int rw_tryenter(krwlock_t *rwl, krw_t op);
+ *
+ * Try to acquire one hold on a RW lock.
+ */
+LEAF(rw_tryenter, 2)
+       LDGP(pv)
+
+       /* See above. */
+       bne     a1, 3f
+
+       /* Acquiring for read. */
+1:     ldq_l   t0, 0(a0)
+       and     t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1
+       addq    t0, RW_READ_INCR, v0
+       bne     t1, 4f          /* contended */
+       stq_c   v0, 0(a0)
+       beq     v0, 2f          /* STQ_C failed; retry */
+       MB
+       RET                     /* v0 contains non-zero LOCK_FLAG from STQ_C */
+
+2:     br      1b
+
+       /* Acquiring for write. */
+3:     GET_CURLWP      /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
+       ldq_l   t0, 0(a0)
+       or      v0, RW_WRITE_LOCKED, v0
+       bne     t0, 4f          /* contended */
+       stq_c   v0, 0(a0)
+       /*
+        * v0 now contains the LOCK_FLAG value from STQ_C, which is either
+        * 0 for failure, or non-zero for success.  In either case, v0's
+        * value is correct.  Go ahead and perform the memory barrier even
+        * in the failure case because we expect it to be rare and it saves
+        * a branch-not-taken instruction in the success case.
+        */
+       MB
+       RET
+
+4:     mov     zero, v0        /* return 0 (failure) */
+       RET
+       END(rw_tryenter)
+
+/*
+ * void rw_exit(krwlock_t *rwl);
+ *
+ * Release one hold on a RW lock.
+ */
+LEAF(rw_exit, 1)
+       LDGP(pv)
+       MB
+
+       /*
+        * Check for write-lock release, and get the owner/count field
+        * on its own for sanity-checking against expected values.
+        */
+       ldq     a1, 0(a0)
+       and     a1, RW_WRITE_LOCKED, t1
+       srl     a1, RW_READ_COUNT_SHIFT, a2
+       bne     t1, 3f
+
+       /*
+        * Releasing a read-lock.  Make sure the count is non-zero.
+        * If it is zero, take the slow path where the juicy diagnostic
+        * checks are located.
+        */
+       beq     a2, 4f
+
+       /*
+        * We do the following trick to check to see if we're releasing
+        * the last read-count and there are waiters:
+        *
+        *      1. Set v0 to 1.
+        *      2. Shift the new read count into t1.
+        *      3. Conditally move t1 to v0 based on low-bit-set of t0
+        *         (RW_HAS_WAITERS).  If RW_HAS_WAITERS is not set, then
+        *         the move will not take place, and v0 will remain 1.
+        *         Otherwise, v0 will contain the updated read count.
+        *      4. Jump to slow path if v0 == 0.
+        */
+1:     ldq_l   t0, 0(a0)
+       ldiq    v0, 1
+       subq    t0, RW_READ_INCR, t2
+       srl     t2, RW_READ_COUNT_SHIFT, t1
+       cmovlbs t0, t1, v0
+       beq     v0, 4f
+       stq_c   t2, 0(a0)
+       beq     t2, 2f          /* STQ_C failed; try again */
+       RET
+
+2:     br      1b
+
+       /*
+        * Releasing a write-lock.  Make sure the owner field points
+        * to our LWP.  If it does not, take the slow path where the
+        * juicy diagnostic checks are located.  a2 contains the owner
+        * field shifted down.  Shift it back up to compare to curlwp;
+        * this conveniently discards the bits we don't want to compare.
+        */
+3:     GET_CURLWP      /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
+       sll     a2, RW_READ_COUNT_SHIFT, a2
+       mov     zero, t2        /* fast-path write-unlock stores NULL */
+       cmpeq   v0, a2, v0      /* v0 = (owner == curlwp) */
+       ldq_l   t0, 0(a0)
+       beq     v0, 4f          /* owner field mismatch; need slow path */
+       blbs    t0, 4f          /* RW_HAS_WAITERS set; need slow-path */
+       stq_c   t2, 0(a0)
+       beq     t2, 4f          /* STQ_C failed; need slow-path */
+       RET
+
+4:     lda     pv, rw_vector_exit
+       jmp     (pv)
+       END(rw_exit)
 
 #endif /* !LOCKDEBUG */
diff -r c1a294667c8a -r 95082251ed9c sys/arch/alpha/alpha/machdep.c
--- a/sys/arch/alpha/alpha/machdep.c    Sun Jul 11 01:55:51 2021 +0000
+++ b/sys/arch/alpha/alpha/machdep.c    Sun Jul 11 01:58:41 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: machdep.c,v 1.373 2021/07/04 22:42:35 thorpej Exp $ */
+/* $NetBSD: machdep.c,v 1.374 2021/07/11 01:58:41 thorpej Exp $ */
 
 /*-
  * Copyright (c) 1998, 1999, 2000, 2019, 2020 The NetBSD Foundation, Inc.
@@ -65,9 +65,11 @@
 #include "opt_dec_3000_500.h"
 #include "opt_execfmt.h"
 
+#define        __RWLOCK_PRIVATE 
+
 #include <sys/cdefs.h>                 /* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.373 2021/07/04 22:42:35 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.374 2021/07/11 01:58:41 thorpej Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -95,6 +97,7 @@
 #include <sys/kauth.h>
 #include <sys/atomic.h>
 #include <sys/cpu.h>
+#include <sys/rwlock.h>
 
 #include <machine/kcore.h>
 #include <machine/fpu.h>
@@ -133,6 +136,10 @@
 int sigpid = 0;
 #endif
 
+/* Assert some assumptions made in lock_stubs.s */
+__CTASSERT(RW_READER == 0);
+__CTASSERT(RW_HAS_WAITERS == 1);
+



Home | Main Index | Thread Index | Old Index