Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/aarch64/aarch64 Use separate cacheline aligned arra...



details:   https://anonhg.NetBSD.org/src/rev/7330494ade1f
branches:  trunk
changeset: 1004276:7330494ade1f
user:      jmcneill <jmcneill%NetBSD.org@localhost>
date:      Sun Oct 20 14:03:51 2019 +0000

description:
Use separate cacheline aligned arrays for mbox and hatched as before.

diffstat:

 sys/arch/aarch64/aarch64/cpu.c    |  42 +++++++++++++++++++-------------------
 sys/arch/aarch64/aarch64/locore.S |  31 +++++++++++++++++++---------
 2 files changed, 42 insertions(+), 31 deletions(-)

diffs (173 lines):

diff -r 66000dcc9d1c -r 7330494ade1f sys/arch/aarch64/aarch64/cpu.c
--- a/sys/arch/aarch64/aarch64/cpu.c    Sun Oct 20 12:25:43 2019 +0000
+++ b/sys/arch/aarch64/aarch64/cpu.c    Sun Oct 20 14:03:51 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.c,v 1.24 2019/10/20 11:17:41 jmcneill Exp $ */
+/* $NetBSD: cpu.c,v 1.25 2019/10/20 14:03:51 jmcneill Exp $ */
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <ryo%nerv.org@localhost>
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.24 2019/10/20 11:17:41 jmcneill Exp $");
+__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.25 2019/10/20 14:03:51 jmcneill Exp $");
 
 #include "locators.h"
 #include "opt_arm_debug.h"
@@ -69,9 +69,8 @@
 #ifdef MULTIPROCESSOR
 uint64_t cpu_mpidr[MAXCPUS];
 
-volatile u_int aarch64_cpu_mbox[MAXCPUS] __cacheline_aligned = { 0 };
-#define CPU_MBOX_HATCHED       __BIT(0)
-#define        CPU_MBOX_START          __BIT(1)
+volatile u_int aarch64_cpu_mbox[howmany(MAXCPUS, sizeof(u_int))] __cacheline_aligned = { 0 };
+volatile u_int aarch64_cpu_hatched[howmany(MAXCPUS, sizeof(u_int))] __cacheline_aligned = { 0 };
 u_int arm_cpu_max = 1;
 
 static kmutex_t cpu_hatch_lock;
@@ -500,7 +499,7 @@
 void
 cpu_boot_secondary_processors(void)
 {
-       u_int cpuno;
+       u_int n, bit;
 
        if ((boothowto & RB_MD1) != 0)
                return;
@@ -510,22 +509,20 @@
        VPRINTF("%s: starting secondary processors\n", __func__);
 
        /* send mbox to have secondary processors do cpu_hatch() */
-       for (cpuno = 1; cpuno < ncpu; cpuno++) {
-               if (cpu_hatched_p(cpuno) == false)
-                       continue;
-               atomic_or_uint(&aarch64_cpu_mbox[cpuno], CPU_MBOX_START);
-       }
+       for (n = 0; n < __arraycount(aarch64_cpu_mbox); n++)
+               atomic_or_uint(&aarch64_cpu_mbox[n], aarch64_cpu_hatched[n]);
        __asm __volatile ("sev; sev; sev");
 
        /* wait all cpus have done cpu_hatch() */
-       for (cpuno = 1; cpuno < ncpu; cpuno++) {
-               if (cpu_hatched_p(cpuno) == 0)
-                       continue;
-               while (membar_consumer(), aarch64_cpu_mbox[cpuno] & CPU_MBOX_START) {
+       for (n = 0; n < __arraycount(aarch64_cpu_mbox); n++) {
+               while (membar_consumer(), aarch64_cpu_mbox[n] & aarch64_cpu_hatched[n]) {
                        __asm __volatile ("wfe");
                }
-               /* Add processor to kcpuset */
-               kcpuset_set(kcpuset_attached, cpuno);
+               /* Add processors to kcpuset */
+               for (bit = 0; bit < 32; bit++) {
+                       if (aarch64_cpu_hatched[n] & __BIT(bit))
+                               kcpuset_set(kcpuset_attached, n * 32 + bit);
+               }
        }
 
        VPRINTF("%s: secondary processors hatched\n", __func__);
@@ -563,15 +560,18 @@
         * ci_index are each cpu0=0, cpu1=1, cpu2=undef, cpu3=2.
         * therefore we have to use device_unit instead of ci_index for mbox.
         */
-       const u_int cpuno = device_unit(ci->ci_dev);
-       atomic_and_uint(&aarch64_cpu_mbox[cpuno], ~(u_int)CPU_MBOX_START);
+       const u_int off = device_unit(ci->ci_dev) / 32;
+       const u_int bit = device_unit(ci->ci_dev) % 32;
+       atomic_and_uint(&aarch64_cpu_mbox[off], ~__BIT(bit));
        __asm __volatile ("sev; sev; sev");
 }
 
 bool
 cpu_hatched_p(u_int cpuindex)
 {
-       aarch64_dcache_inv_range((vaddr_t)&aarch64_cpu_mbox[cpuindex], 4);
-       return (aarch64_cpu_mbox[cpuindex] & CPU_MBOX_HATCHED) != 0;
+       const u_int off = cpuindex / 32;
+       const u_int bit = cpuindex % 32;
+       membar_consumer();
+       return (aarch64_cpu_hatched[off] & __BIT(bit)) != 0;
 }
 #endif /* MULTIPROCESSOR */
diff -r 66000dcc9d1c -r 7330494ade1f sys/arch/aarch64/aarch64/locore.S
--- a/sys/arch/aarch64/aarch64/locore.S Sun Oct 20 12:25:43 2019 +0000
+++ b/sys/arch/aarch64/aarch64/locore.S Sun Oct 20 14:03:51 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.S,v 1.43 2019/10/20 12:25:43 skrll Exp $        */
+/*     $NetBSD: locore.S,v 1.44 2019/10/20 14:03:51 jmcneill Exp $     */
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <ryo%nerv.org@localhost>
@@ -38,7 +38,7 @@
 #include <aarch64/hypervisor.h>
 #include "assym.h"
 
-RCSID("$NetBSD: locore.S,v 1.43 2019/10/20 12:25:43 skrll Exp $")
+RCSID("$NetBSD: locore.S,v 1.44 2019/10/20 14:03:51 jmcneill Exp $")
 
 
 /*#define DEBUG_LOCORE                 /* debug print */
@@ -361,19 +361,16 @@
        mov     x1, xzr
 1:
        add     x1, x1, #1
-       cmp     x1, MAXCPUS             /* cpuindex >= MAXCPUS ? */
+       cmp     x1, #MAXCPUS            /* cpuindex >= MAXCPUS ? */
        bge     toomanycpus
        ldr     x2, [x0, x1, lsl #3]    /* cpu_mpidr[cpuindex] */
        cmp     x2, x3                  /* == mpidr_el1 & MPIDR_AFF ? */
        bne     1b
 
        mov     x27, x1                 /* x27 = cpuindex */
-       ADDR    x0, _C_LABEL(aarch64_cpu_mbox)
-       add     x28, x0, x27, lsl #2    /* x28 = &aarch64_cpu_mbox[cpuindex] */
 
        /*
         * x27 = cpuindex
-        * x28 = &aarch64_cpu_mbox[cpuindex]
         */
 
        /* set stack pointer for boot */
@@ -445,21 +442,35 @@
        mrs     x1, mpidr_el1
        str     x1, [x0, #CI_MPIDR]     /* curcpu()->ci_mpidr = mpidr_el1 */
 
+       mov     x0, #32
+       udiv    x1, x27, x0
+       ADDR    x0, _C_LABEL(aarch64_cpu_hatched)
+       add     x28, x0, x1, lsl #2     /* x28 = &aarch64_cpu_hatched[cpuindex/32] */
+       mov     x0, #1
+       mov     x2, #32
+       msub    x1, x1, x2, x27
+       lsl     x29, x0, x1             /* x29 = 1 << (cpuindex % 32) */
+
        /*
-        * atomic_or_uint(&aarch64_cpu_mbox[cpuindex], 1)
+        * atomic_or_uint(&aarch64_cpu_hatched[cpuindex/32], 1<<cpuindex%32)
         * to tell my activity to primary processor.
         */
        mov     x0, x28
-       mov     x1, #1
+       mov     x1, x29
        bl      _C_LABEL(atomic_or_uint)        /* hatched! */
        dsb     sy
        sev
 
+       mov     x0, #32
+       udiv    x1, x27, x0
+       ADDR    x0, _C_LABEL(aarch64_cpu_mbox)
+       add     x28, x0, x1, lsl #2     /* x28 = &aarch64_cpu_mbox[cpuindex/32] */
+
        /* wait for the mailbox start bit to become true */
 1:
        dmb     sy
-       ldr     x20, [x28]
-       tst     x20, #2
+       ldr     w20, [x28]
+       tst     w20, w29
        bne     9f
        wfe
        b       1b



Home | Main Index | Thread Index | Old Index