Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/arm/arm Recode armv7_dcache_wbinv_all in asm. Add ...



details:   https://anonhg.NetBSD.org/src/rev/b66a682fd780
branches:  trunk
changeset: 781255:b66a682fd780
user:      matt <matt%NetBSD.org@localhost>
date:      Wed Aug 29 18:37:14 2012 +0000

description:
Recode armv7_dcache_wbinv_all in asm.  Add armv7_dcache_inv_all and
armv7_icache_inv_all as well.
Use dsb/dmb/isb instructions

diffstat:

 sys/arch/arm/arm/cpufunc.c           |   51 +-----
 sys/arch/arm/arm/cpufunc_asm_armv7.S |  314 ++++++++++++++++++++++++++++------
 2 files changed, 258 insertions(+), 107 deletions(-)

diffs (truncated from 461 to 300 lines):

diff -r 7b31a0d10392 -r b66a682fd780 sys/arch/arm/arm/cpufunc.c
--- a/sys/arch/arm/arm/cpufunc.c        Wed Aug 29 18:29:04 2012 +0000
+++ b/sys/arch/arm/arm/cpufunc.c        Wed Aug 29 18:37:14 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpufunc.c,v 1.112 2012/08/29 18:29:04 matt Exp $       */
+/*     $NetBSD: cpufunc.c,v 1.113 2012/08/29 18:37:14 matt Exp $       */
 
 /*
  * arm7tdmi support code Copyright (c) 2001 John Fremlin
@@ -49,7 +49,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.112 2012/08/29 18:29:04 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.113 2012/08/29 18:37:14 matt Exp $");
 
 #include "opt_compat_netbsd.h"
 #include "opt_cpuoptions.h"
@@ -2835,53 +2835,6 @@
        curcpu()->ci_ctrl = cpuctrl;
        cpu_control(0xffffffff, cpuctrl);
 }
-
-/* Clean the data cache to the level of coherency. Slow. */
-void
-armv7_dcache_wbinv_all(void)
-{
-       u_int clidr, loc, level;
-
-       /* Cache Level ID Register */
-       __asm volatile("mrc\tp15, 1, %0, c0, c0, 1" : "=r" (clidr));
-
-       loc = (clidr >> 24) & 7; /* Level of Coherency */
-
-       for (level = 0; level <= loc; level++) {
-               u_int ctype, csid;
-               int line_size, ways, nsets, wayshift, setshift;
-
-               ctype = (clidr >> (level * 3)) & 7;
-               /* We're supposed to stop when ctype == 0, but we
-                * trust that loc isn't larger than necesssary. */
-               if (ctype < 2) continue; /* no cache / only icache */
-
-               csid = get_cachesize_cp15(level << 1);
-               line_size = CPU_CSID_LEN(csid);
-               ways = CPU_CSID_ASSOC(csid);
-               nsets = (csid >> 13) & 0x7fff;
-
-               wayshift = __builtin_clz(ways); /* leading zeros */
-               setshift = line_size + 4;
-
-               for (; nsets >= 0; nsets--) {
-                       int way;
-
-                       for (way = ways; way >= 0; way--) {
-                               /* Clean by set/way */
-                               const u_int sw = (way << wayshift)
-                                   | (nsets << setshift)
-                                   | (level << 1);
-
-                               __asm volatile("mcr\tp15, 0, %0, c7, c10, 2"
-                                   :: "r"(sw));
-                       }
-               }
-       }
-
-       __asm volatile("dsb");
-       __asm volatile("isb");
-}
 #endif /* CPU_CORTEX */
 
 
diff -r 7b31a0d10392 -r b66a682fd780 sys/arch/arm/arm/cpufunc_asm_armv7.S
--- a/sys/arch/arm/arm/cpufunc_asm_armv7.S      Wed Aug 29 18:29:04 2012 +0000
+++ b/sys/arch/arm/arm/cpufunc_asm_armv7.S      Wed Aug 29 18:37:14 2012 +0000
@@ -31,50 +31,55 @@
 #include <machine/cpu.h>
 #include <machine/asm.h>
 
-#define entrysize              #32
-
        .arch   armv7a
 
-
 ENTRY(armv7_cpu_sleep)
-       tst     r0, #0x00000000         @shouldn't sleep 0
-       wfi
-       RET
+       tst     r0, #0x00000000         @ shouldn't sleep 0
+       wfene                           @ this can cheaper when doing MP
+       bx      lr
 END(armv7_cpu_sleep)
 
 ENTRY(armv7_wait)
-       mrc     p15, 0, r0, c2, c0, 0   @arbitrary read of CP15
-       add     r0, r0, #0              @a stall
-       RET
+       mrc     p15, 0, r0, c2, c0, 0   @ arbitrary read of CP15
+       add     r0, r0, #0              @ a stall
+       bx      lr
 END(armv7_wait)
 
 ENTRY(armv7_context_switch)
-       mcr     p15, 0, r0, c7, c10, 4  @drain the write buffer
-       mcr     p15, 0, r0, c2, c0, 0   @set the new TTB
-       mcr     p15, 0, r0, c8, c7, 0   @flush the I+D
-       RET
+       dsb                             @ data synchronization barrier
+       mcr     p15, 0, r0, c2, c0, 0   @ set the new TTB
+#ifdef MULTIPROCESSOR
+       mcr     p15, 0, r0, c8, c3, 0   @ flush I+D tlb single entry
+#else
+       mcr     p15, 0, r0, c8, c7, 0   @ flush the I+D
+#endif
+       dsb
+       isb
+       bx      lr
 END(armv7_context_switch)
 
 ENTRY(armv7_tlb_flushID_SE)
-       mcr     p15, 0, r0, c8, c7, 1   @flush I+D tlb single entry
-       mcr     p15, 0, r0, c7, c10, 4  @drain write buffer
-       RET
+#ifdef MULTIPROCESSOR
+       mcr     p15, 0, r0, c8, c3, 1   @ flush I+D tlb single entry
+#else
+       mcr     p15, 0, r0, c8, c7, 1   @ flush I+D tlb single entry
+#endif
+       dsb                             @ data synchronization barrier
+       isb
+       bx      lr
 END(armv7_tlb_flushID_SE)
 
 
 ENTRY(armv7_setttb)
-/* Does this even exist on armv7? */
-#ifdef PMAP_CACHE_VIVT
-       stmdb   sp!, {r0, lr}
-       bl      _C_LABEL(armv7_idcache_wbinv_all) @clean the D cache
-       ldmia   sp!, {r0, lr}
+       mcr     p15, 0, r0, c2, c0, 0   @ load new TTB
+#ifdef MULTIPROCESSOR
+       mcr     p15, 0, r0, c8, c3, 0   @ invalidate all I+D TLBs
+#else
+       mcr     p15, 0, r0, c8, c7, 0   @ invalidate all I+D TLBs
 #endif
-
-       mcr     p15, 0, r0, c2, c0, 0   @load new TTB
-       mcr     p15, 0, r0, c8, c7, 0   @invalidate I+D TLBs
-       mcr     p15, 0, r0, c7, c10, 4  @drain the write buffer
-
-       RET
+       dsb                             @ data synchronization barrier
+       isb
+       bx      lr
 END(armv7_setttb)
 
 /* Cache operations. */
@@ -82,14 +87,20 @@
 /* LINTSTUB: void armv7_icache_sync_range(vaddr_t, vsize_t); */
 ENTRY_NP(armv7_icache_sync_range)
 1:
-       mcr     p15, 0, r0, c7, c5, 1   @invalidate the I-Cache line
-       mcr     p15, 0, r0, c7, c10, 1  @wb the D-Cache line
-       add     r0, r0, entrysize
-       subs    r1, r1, entrysize
+       mcr     p15, 0, r0, c7, c5, 1   @ invalidate the I-Cache line
+       mcr     p15, 0, r0, c7, c10, 1  @ wb the D-Cache line
+       mrc     p15, 1, r2, c0, c0, 0   @ read CCSIDR
+       and     r2, r2, #7              @ get line size (log2(size)-4)
+       add     r2, r2, #4              @ adjust
+       mov     ip, #1                  @ make a bit mask
+       lsl     r2, ip, r2              @ and shift into position
+       add     r0, r0, r2
+       subs    r1, r1, r2
        bhi     1b
 
-       mcr     p15, 0, r0, c7, c10, 4  @drain the write buffer, BSB 
-       RET
+       dsb                             @ data synchronization barrier
+       isb
+       bx      lr
 END(armv7_icache_sync_range)
 
 /* LINTSTUB: void armv7_icache_sync_all(void); */
@@ -102,56 +113,80 @@
        stmdb   sp!, {r0, lr}
        bl      _C_LABEL(armv7_idcache_wbinv_all) @clean the D cache
        ldmia   sp!, {r0, lr}
-       mcr     p15, 0, r0, c7, c10, 4  @drain the write buffer, BSB
-       RET
+       dsb                             @ data synchronization barrier
+       isb
+       bx      lr
 END(armv7_icache_sync_all)
 
 ENTRY(armv7_dcache_wb_range)
 1:
-       mcr     p15, 0, r0, c7, c10, 1  @wb the D-Cache
-       add     r0, r0, entrysize
-       subs    r1, r1, entrysize
+       mcr     p15, 0, r0, c7, c10, 1  @ wb the D-Cache
+       mrc     p15, 1, r2, c0, c0, 0   @ read CCSIDR
+       and     r2, r2, #7              @ get line size (log2(size)-4)
+       add     r2, r2, #4              @ adjust
+       mov     ip, #1                  @ make a bit mask
+       lsl     r2, ip, r2              @ and shift into position
+       add     r0, r0, r2
+       subs    r1, r1, r2
        bhi     1b
-       mcr     p15, 0, r0, c7, c10, 4  @drain the write buffer, BSB 
-       RET
+       dsb                             @ data synchronization barrier
+       bx      lr
 END(armv7_dcache_wb_range)
 
 /* LINTSTUB: void armv7_dcache_wbinv_range(vaddr_t, vsize_t); */
 ENTRY(armv7_dcache_wbinv_range)
 1:
-       mcr     p15, 0, r0, c7, c14, 1  @wb and inv the D-Cache line
-       add     r0, r0, entrysize
-       subs    r1, r1, entrysize
+       mcr     p15, 0, r0, c7, c14, 1  @ wb and inv the D-Cache line
+       mrc     p15, 1, r2, c0, c0, 0   @ read CCSIDR
+       and     r2, r2, #7              @ get line size (log2(size)-4)
+       add     r2, r2, #4              @ adjust
+       mov     ip, #1                  @ make a bit mask
+       lsl     r2, ip, r2              @ and shift into position
+       add     r0, r0, r2
+       subs    r1, r1, r2
        bhi     1b
-       mcr     p15, 0, r0, c7, c10, 4  @drain the write buffer, BSB 
-       RET
+       dsb                             @ data synchronization barrier
+       bx      lr
 END(armv7_dcache_wbinv_range)
 
 /* * LINTSTUB: void armv7_dcache_inv_range(vaddr_t, vsize_t); */
 ENTRY(armv7_dcache_inv_range)
 1:
-       mcr     p15, 0, r0, c7, c6, 1   @invalidate the D-Cache line  
-       add     r0, r0, entrysize 
-       subs    r1, r1, entrysize
+       mcr     p15, 0, r0, c7, c6, 1   @ invalidate the D-Cache line  
+       mrc     p15, 1, r2, c0, c0, 0   @ read CCSIDR
+       and     r2, r2, #7              @ get line size (log2(size)-4)
+       add     r2, r2, #4              @ adjust
+       mov     ip, #1                  @ make a bit mask
+       lsl     r2, ip, r2              @ and shift into position
+       add     r0, r0, r2 
+       subs    r1, r1, r2
        bhi     1b
 
-       mcr     p15, 0, r0, c7, c10, 4  @drain the write buffer, BSB 
-       RET
+       dsb                             @ data synchronization barrier
+       bx      lr
 END(armv7_dcache_inv_range)
 
 
+/* * LINTSTUB: void armv7_idcache_wbinv_range(vaddr_t, vsize_t); */
 ENTRY(armv7_idcache_wbinv_range)
 1:
-       mcr     p15, 0, r0, c7, c5, 1   @invalidate the I-Cache line
-       mcr     p15, 0, r0, c7, c14, 1  @wb and inv the D-Cache line
-       add     r0, r0, entrysize
-       subs    r1, r1, entrysize
+       mcr     p15, 0, r0, c7, c5, 1   @ invalidate the I-Cache line
+       mcr     p15, 0, r0, c7, c14, 1  @ wb and inv the D-Cache line
+       mrc     p15, 1, r2, c0, c0, 0   @ read CCSIDR
+       and     r2, r2, #7              @ get line size (log2(size)-4)
+       add     r2, r2, #4              @ adjust
+       mov     ip, #1                  @ make a bit mask
+       lsl     r2, ip, r2              @ and shift into position
+       add     r0, r0, r2
+       subs    r1, r1, r2
        bhi     1b
 
-       mcr     p15, 0, r0, c7, c10, 4  @drain the write buffer, BSB 
-       RET
+       dsb                             @ data synchronization barrier
+       isb
+       bx      lr
 END(armv7_idcache_wbinv_range)
 
+/* * LINTSTUB: void armv7_idcache_wbinv_all(void); */
 ENTRY_NP(armv7_idcache_wbinv_all)
        /*
         * We assume that the code here can never be out of sync with the
@@ -164,6 +199,169 @@
 END(armv7_idcache_wbinv_all)
 
 /*
- * armv7_dcache_wbinv_all is in cpufunc.c. It's really too long to
- * write in assembler.
+ * These work very hard to not push registers onto the stack and to limit themselves
+ * to use r0-r3 and ip.
  */
+/* * LINTSTUB: void armv7_icache_inv_all(void); */
+ENTRY_NP(armv7_icache_inv_all)



Home | Main Index | Thread Index | Old Index