Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/aarch64 Part II of ad's aarch64 performance improve...



details:   https://anonhg.NetBSD.org/src/rev/640b7a5ea62e
branches:  trunk
changeset: 937228:640b7a5ea62e
user:      skrll <skrll%NetBSD.org@localhost>
date:      Wed Aug 12 13:19:35 2020 +0000

description:
Part II of ad's aarch64 performance improvements (cpu_switch.S bugs are
all mine)

- Use tpidr_el1 to hold curlwp and not curcpu, because curlwp is accessed
  much more often by MI code.  It also makes curlwp preemption safe and
  allows aarch64_curlwp() to be a const function (curcpu must be volatile).

- Make ASTs operate per-LWP rather than per-CPU, otherwise sometimes LWPs
  can see spurious ASTs (which doesn't cause a problem, it just means some
  time may be wasted).

- Use plain stores to set/clear ASTs.  Make sure ASTs are always set on the
  same CPU as the target LWP, and delivered via IPI if posted from a remote
  CPU so that they are resolved quickly.

- Add some cache line padding to struct cpu_info, to match x86.

- Add a memory barrier in a couple of places where ci_curlwp is set.  This
  is needed whenever an LWP that is resuming on the CPU could hold an
  adaptive mutex.  The barrier needs to drain the CPU's store buffer, so
  that the update to ci_curlwp becomes globally visible before the LWP can
  resume and call mutex_exit().  By my reading of the ARM docs it looks like
  the instruction I used will do the right thing, but I'm not 100% sure.

diffstat:

 sys/arch/aarch64/aarch64/copyinout.S    |   7 +--
 sys/arch/aarch64/aarch64/cpu_machdep.c  |  23 ++++++++++++--
 sys/arch/aarch64/aarch64/cpuswitch.S    |  52 ++++++++++++++++++--------------
 sys/arch/aarch64/aarch64/db_machdep.c   |   6 +--
 sys/arch/aarch64/aarch64/fusu.S         |   7 +--
 sys/arch/aarch64/aarch64/genassym.cf    |   4 +-
 sys/arch/aarch64/aarch64/idle_machdep.S |  10 +++--
 sys/arch/aarch64/aarch64/locore.S       |  25 ++++++++-------
 sys/arch/aarch64/aarch64/vectors.S      |   5 +-
 sys/arch/aarch64/include/cpu.h          |  48 +++++++++++++++++++++--------
 sys/arch/aarch64/include/proc.h         |   3 +-
 11 files changed, 115 insertions(+), 75 deletions(-)

diffs (truncated from 545 to 300 lines):

diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/copyinout.S
--- a/sys/arch/aarch64/aarch64/copyinout.S      Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/copyinout.S      Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: copyinout.S,v 1.14 2020/08/06 06:49:55 ryo Exp $ */
+/* $NetBSD: copyinout.S,v 1.15 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -33,7 +33,7 @@
 #include <aarch64/asm.h>
 #include "assym.h"
 
-RCSID("$NetBSD: copyinout.S,v 1.14 2020/08/06 06:49:55 ryo Exp $");
+RCSID("$NetBSD: copyinout.S,v 1.15 2020/08/12 13:19:35 skrll Exp $");
 
 #ifdef ARMV81_PAN
 #define PAN_ENABLE     \
@@ -80,8 +80,7 @@
 
        .macro exit_cpu_onfault
        /* curlwp->l_md.md_onfault = NULL */
-       mrs     x0, tpidr_el1                   /* curcpu */
-       ldr     x0, [x0, #CI_CURLWP]            /* x0 = curlwp */
+       mrs     x0, tpidr_el1                   /* x0 = curlwp */
        str     xzr, [x0, #L_MD_ONFAULT]        /* lwp->l_md_onfault = NULL */
 9:
        PAN_ENABLE                              /* enable PAN */
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/cpu_machdep.c
--- a/sys/arch/aarch64/aarch64/cpu_machdep.c    Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/cpu_machdep.c    Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu_machdep.c,v 1.10 2020/05/21 05:41:40 ryo Exp $ */
+/* $NetBSD: cpu_machdep.c,v 1.11 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014, 2019 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.10 2020/05/21 05:41:40 ryo Exp $");
+__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.11 2020/08/12 13:19:35 skrll Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -261,7 +261,7 @@
                intr_ipi_send(ci->ci_kcpuset, IPI_AST);
 #endif
        } else {
-               setsoftast(ci); /* force call to ast() */
+               l->l_md.md_astpending = 1;
        }
 }
 
@@ -272,7 +272,22 @@
        KASSERT(l->l_cpu == curcpu());
 
        l->l_pflag |= LP_OWEUPC;
-       setsoftast(l->l_cpu);
+       l->l_md.md_astpending = 1;
+}
+
+void
+cpu_signotify(struct lwp *l)
+{
+
+       KASSERT(kpreempt_disabled());
+
+       if (l->l_cpu != curcpu()) {
+#ifdef MULTIPROCESSOR
+               intr_ipi_send(l->l_cpu->ci_kcpuset, IPI_AST);
+#endif
+       } else {
+               l->l_md.md_astpending = 1;
+       }
 }
 
 #ifdef __HAVE_PREEMPTION
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/cpuswitch.S
--- a/sys/arch/aarch64/aarch64/cpuswitch.S      Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/cpuswitch.S      Wed Aug 12 13:19:35 2020 +0000
@@ -1,7 +1,7 @@
-/* $NetBSD: cpuswitch.S,v 1.24 2020/08/06 06:49:55 ryo Exp $ */
+/* $NetBSD: cpuswitch.S,v 1.25 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
- * Copyright (c) 2014 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -38,7 +38,7 @@
 #include "opt_ddb.h"
 #include "opt_kasan.h"
 
-RCSID("$NetBSD: cpuswitch.S,v 1.24 2020/08/06 06:49:55 ryo Exp $")
+RCSID("$NetBSD: cpuswitch.S,v 1.25 2020/08/12 13:19:35 skrll Exp $")
 
        ARMV8_DEFINE_OPTIONS
 
@@ -83,11 +83,9 @@
 #endif
        ldr     x5, [x1, #L_MD_CPACR]   /* get cpacr_el1 */
 
-       mrs     x3, tpidr_el1
        DISABLE_INTERRUPT
        mov     sp, x4                  /* restore stack pointer */
        msr     cpacr_el1, x5           /* restore cpacr_el1 */
-       str     x1, [x3, #CI_CURLWP]    /* switch curlwp to new lwp */
 
 #ifdef ARMV83_PAC
        /* Switch the PAC key. */
@@ -118,6 +116,10 @@
 1:
 #endif
 
+       msr     tpidr_el1, x1           /* switch curlwp to new lwp */
+       ldr     x3, [x1, #L_CPU]
+       str     x1, [x3, #CI_CURLWP]    /* switch curlwp to new lwp */
+       dmb     st                      /* see comments in kern_mutex.c */
        ENABLE_INTERRUPT
 
        /*
@@ -161,8 +163,7 @@
        stp     x27, x28, [sp, #TF_X27]
        stp     x29, x2, [sp, #TF_X29]  /* tf->lr = softint_cleanup; */
 
-       mrs     x20, tpidr_el1          /* x20 := curcpu() */
-       ldr     x19, [x20, #CI_CURLWP]  /* x19 := curcpu()->ci_curlwp */
+       mrs     x19, tpidr_el1          /* x19 := curlwp */
        mov     x4, sp
 
        mrs     x5, cpacr_el1
@@ -180,9 +181,13 @@
        ldr     x4, [x0, #L_MD_UTF]
 
        DISABLE_INTERRUPT
+       ldr     x20, [x19, #L_CPU]      /* x20 := curlwp->l_cpu */
+
        /* onto new stack */
        sub     sp, x4, #TF_SIZE        /* new sp := softlwp->l_md_utf - 1 */
+       msr     tpidr_el1, x0           /* curlwp = softlwp; */
        str     x0, [x20, #CI_CURLWP]   /* curcpu()->ci_curlwp = softlwp; */
+                                       /* no need for memory barrier here */
 
        mov     x5, #CPACR_FPEN_NONE
        msr     cpacr_el1, x5           /* cpacr_el1 = CPACR_FPEN_NONE */
@@ -203,7 +208,6 @@
        mov     x0, x19                 /* x0 := pinned_lwp */
        bl      _C_LABEL(softint_dispatch)
 
-       mrs     x20, tpidr_el1
        ldr     x6, [x19, #L_PCB]       /* x6 = lwp_getpcb(curlwp) */
        ldr     x4, [x6, #PCB_TF]       /* x4 := pinned_lwp->l_addr->pcb_tf */
 #ifdef DDB
@@ -212,7 +216,11 @@
        ldr     x5, [x19, #L_MD_CPACR]  /* x5 := pinned_lwp->l_md_cpacr */
 
        DISABLE_INTERRUPT
-       str     x19, [x20, #CI_CURLWP]  /* curcpu()->ci_curlwp := x19 */
+       msr     tpidr_el1, x19          /* curlwp = pinned_lwp */
+       ldr     x3, [x19, #L_CPU]       /* x3 = curlwp->l_cpu */
+       str     x19, [x3, #CI_CURLWP]   /* curlwp->l_cpu->ci_curlwp := x19 */
+       dmb     st                      /* see comments in kern_mutex.c */
+
        mov     sp, x4                  /* restore pinned_lwp sp */
        msr     cpacr_el1, x5           /* restore pinned_lwp cpacr */
 
@@ -249,10 +257,11 @@
 ENTRY_NP(softint_cleanup)
        mov     lr, x20                 /* restore original lr */
 
-       mrs     x20, tpidr_el1          /* curcpu() */
-       ldr     w2, [x20, #CI_MTX_COUNT]/* ->ci_mtx_count */
+       mrs     x20, tpidr_el1          /* curlwp */
+       ldr     x3, [x20, #L_CPU]       /* curcpu */
+       ldr     w2, [x3, #CI_MTX_COUNT] /* ->ci_mtx_count */
        add     w2, w2, #1
-       str     w2, [x20, #CI_MTX_COUNT]
+       str     w2, [x3, #CI_MTX_COUNT]
 
        msr     daif, x19               /* restore interrupt mask */
        ldp     x19, x20, [sp], #16     /* restore */
@@ -366,15 +375,13 @@
 ENTRY_NP(el0_trap_exit)
        DISABLE_INTERRUPT               /* make sure I|F marked */
 1:
-       /* while (curcpu()->ci_astpending & __BIT(0)) { */
+       /* while (curlwp->l_md.md_astpending != 0) { */
        mrs     x8, tpidr_el1
-       ldr     w9, [x8, #CI_ASTPENDING]
-       tbz     w9, #0, 9f
+       ldr     w9, [x8, #L_MD_ASTPENDING]
+       cbz     w9, 9f
 
-       /*  atomic_and_uint(&curcpu()->ci_astpending, ~__BIT(0)); */
-       mov     w1, #~__BIT(0)
-       add     x0, x8, #CI_ASTPENDING
-       bl      _C_LABEL(atomic_and_uint);
+       /* curlwp->l_md.md_astpending = 0; */
+       str     xzr, [x8, #L_MD_ASTPENDING]
 
        /*  trap_doast(tf); */
        ENABLE_INTERRUPT
@@ -384,8 +391,8 @@
        b       1b
        /* } */
 9:
-       mrs     x8, tpidr_el1
-       ldr     x9, [x8, #CI_CURLWP]
+
+       mrs     x9, tpidr_el1
        ldr     x23, [x9, #L_MD_CPACR]
        msr     cpacr_el1, x23          /* FP unit EL0 handover */
        isb                             /* necessary? */
@@ -446,8 +453,7 @@
  * int cpu_set_onfault(struct faultbuf *fb)
  */
 ENTRY_NP(cpu_set_onfault)
-       mrs     x3, tpidr_el1
-       ldr     x2, [x3, #CI_CURLWP]    /* curlwp = curcpu()->ci_curlwp */
+       mrs     x2, tpidr_el1           /* x2 = curlwp */
        str     x0, [x2, #L_MD_ONFAULT] /* l_md.md_onfault = fb */
 
        stp     x19, x20, [x0, #(FB_X19 * 8)]
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/db_machdep.c
--- a/sys/arch/aarch64/aarch64/db_machdep.c     Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/db_machdep.c     Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: db_machdep.c,v 1.25 2020/07/02 11:10:48 jmcneill Exp $ */
+/* $NetBSD: db_machdep.c,v 1.26 2020/08/12 13:19:35 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.25 2020/07/02 11:10:48 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.26 2020/08/12 13:19:35 skrll Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd32.h"
@@ -322,8 +322,6 @@
            &ci->ci_cpl, cpuid, cpuinfobuf.ci_cpl);
        db_printf("%p cpu[%lu].ci_softints     = 0x%08x\n",
            &ci->ci_softints, cpuid, cpuinfobuf.ci_softints);
-       db_printf("%p cpu[%lu].ci_astpending   = 0x%08x\n",
-           &ci->ci_astpending, cpuid, cpuinfobuf.ci_astpending);
        db_printf("%p cpu[%lu].ci_intr_depth   = %u\n",
            &ci->ci_intr_depth, cpuid, cpuinfobuf.ci_intr_depth);
        db_printf("%p cpu[%lu].ci_biglock_count = %u\n",
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/fusu.S
--- a/sys/arch/aarch64/aarch64/fusu.S   Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/fusu.S   Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: fusu.S,v 1.9 2020/08/06 06:49:55 ryo Exp $     */
+/*     $NetBSD: fusu.S,v 1.10 2020/08/12 13:19:35 skrll Exp $  */
 
 /*-
  * Copyright (c) 2014, 2019 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
 #include <aarch64/asm.h>
 #include "assym.h"
 
-RCSID("$NetBSD: fusu.S,v 1.9 2020/08/06 06:49:55 ryo Exp $");
+RCSID("$NetBSD: fusu.S,v 1.10 2020/08/12 13:19:35 skrll Exp $");
 
 #ifdef ARMV81_PAN
 #define PAN_ENABLE     \
@@ -73,8 +73,7 @@
 
        .macro exit_cpu_onfault
        /* curlwp->l_md.md_onfault = NULL */
-       mrs     x1, tpidr_el1                   /* curcpu */
-       ldr     x1, [x1, #CI_CURLWP]            /* x1 = curlwp */
+       mrs     x1, tpidr_el1                   /* x1 = curlwp */
        str     xzr, [x1, #L_MD_ONFAULT]        /* lwp->l_md_onfault = NULL */
 9:
        PAN_ENABLE                              /* enable PAN */
diff -r 4f77423b78b4 -r 640b7a5ea62e sys/arch/aarch64/aarch64/genassym.cf
--- a/sys/arch/aarch64/aarch64/genassym.cf      Wed Aug 12 12:59:57 2020 +0000
+++ b/sys/arch/aarch64/aarch64/genassym.cf      Wed Aug 12 13:19:35 2020 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.29 2020/08/06 06:49:55 ryo Exp $
+# $NetBSD: genassym.cf,v 1.30 2020/08/12 13:19:35 skrll Exp $
 #-
 # Copyright (c) 2014 The NetBSD Foundation, Inc.
 # All rights reserved.
@@ -154,6 +154,7 @@
 define L_MD_UTF                offsetof(struct lwp, l_md.md_utf)
 define L_MD_CPACR              offsetof(struct lwp, l_md.md_cpacr)
 define L_MD_ONFAULT            offsetof(struct lwp, l_md.md_onfault)
+define L_MD_ASTPENDING         offsetof(struct lwp, l_md.md_astpending)
 define L_MD_IA_KERN            offsetof(struct lwp, l_md.md_ia_kern)
 define L_MD_IA_USER            offsetof(struct lwp, l_md.md_ia_user)
 define L_MD_IB_USER            offsetof(struct lwp, l_md.md_ib_user)
@@ -288,7 +289,6 @@
 define CI_CPUID                offsetof(struct cpu_info, ci_cpuid)



Home | Main Index | Thread Index | Old Index