Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/crypto/chacha/arch/arm Adjust sp, not fp, to allocate a ...
details:   https://anonhg.NetBSD.org/src/rev/eae60a39af75
branches:  trunk
changeset: 937686:eae60a39af75
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sun Aug 23 16:39:06 2020 +0000
description:
Adjust sp, not fp, to allocate a 32-byte temporary.
Costs another couple MOV instructions, but we can't skimp on this --
there's no red zone below sp for interrupts on arm, so we can't touch
anything there.  So just use fp to save sp and then adjust sp itself,
rather than using fp as a temporary register to point just below sp.
Should fix PR port-arm/55598 -- previously the ChaCha self-test
failed 33/10000 trials triggered by sysctl during running system;
with the patch it has failed 0/10000 trials.
(Presumably it happened more often at boot time, leading to 5/26
failures in the test bed, because we just enabled interrupts and some
devices are starting to deliver interrupts.)
diffstat:
 sys/crypto/chacha/arch/arm/chacha_neon_32.S |  36 ++++++++++++++++------------
 1 files changed, 20 insertions(+), 16 deletions(-)
diffs (140 lines):
diff -r fa6eca80fc95 -r eae60a39af75 sys/crypto/chacha/arch/arm/chacha_neon_32.S
--- a/sys/crypto/chacha/arch/arm/chacha_neon_32.S       Sun Aug 23 16:18:12 2020 +0000
+++ b/sys/crypto/chacha/arch/arm/chacha_neon_32.S       Sun Aug 23 16:39:06 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: chacha_neon_32.S,v 1.3 2020/08/08 14:47:01 riastradh Exp $     */
+/*     $NetBSD: chacha_neon_32.S,v 1.4 2020/08/23 16:39:06 riastradh Exp $     */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include <machine/asm.h>
 
-RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2020/08/08 14:47:01 riastradh Exp $")
+RCSID("$NetBSD: chacha_neon_32.S,v 1.4 2020/08/23 16:39:06 riastradh Exp $")
 
        .fpu    neon
 
@@ -54,7 +54,7 @@
  */
 
 .macro ROUNDLD a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3, d0,d1,d2,d3
-       vld1.8          {\c2-\c3}, [fp, :256]
+       vld1.8          {\c2-\c3}, [sp, :256]
 .endm
 
 .macro ROUND   a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3, d0,d1,d2,d3, c0l, d0l,d0h,d1l,d1h,d2l,d2h,d3l,d3h
@@ -80,7 +80,7 @@
        vadd.u32        \c2, \c2, \d2
        vadd.u32        \c3, \c3, \d3
 
-       vst1.8          {\c0-\c1}, [fp, :256]   /* free c0 and c1 as temps */
+       vst1.8          {\c0-\c1}, [sp, :256]   /* free c0 and c1 as temps */
 
        veor            \c0, \b0, \c0
        veor            \c1, \b1, \c1
@@ -118,7 +118,7 @@
        vtbl.8          \d3l, {\d3l}, \c0l
        vtbl.8          \d3h, {\d3h}, \c0l
 
-       vld1.8          {\c0-\c1}, [fp, :256]   /* restore c0 and c1 */
+       vld1.8          {\c0-\c1}, [sp, :256]   /* restore c0 and c1 */
 
        /* c += d; b ^= c; b <<<= 7 */
        vadd.u32        \c2, \c2, \d2
@@ -126,7 +126,7 @@
        vadd.u32        \c0, \c0, \d0
        vadd.u32        \c1, \c1, \d1
 
-       vst1.8          {\c2-\c3}, [fp, :256]   /* free c2 and c3 as temps */
+       vst1.8          {\c2-\c3}, [sp, :256]   /* free c2 and c3 as temps */
 
        veor            \c2, \b2, \c2
        veor            \c3, \b3, \c3
@@ -160,17 +160,18 @@
        /* save callee-saves registers */
        push    {r4, r5, r6, r7, r8, r10, fp, lr}
        vpush   {d8-d15}
+       mov     fp, sp
 
        /* r7 := .Lconstants - .Lconstants_addr, r6 := .Lconstants_addr */
        ldr     r7, .Lconstants_addr
        adr     r6, .Lconstants_addr
 
        /* reserve space for two 128-bit/16-byte q registers */
-       sub     fp, sp, #0x20
-       bic     fp, fp, #0x1f   /* align */
+       sub     sp, sp, #0x20
+       bic     sp, sp, #0x1f   /* align */
 
        /* get parameters */
-       add     ip, sp, #96
+       add     ip, fp, #96
        add     r7, r7, r6      /* r7 := .Lconstants (= v0123) */
        ldm     ip, {r4, r5}    /* r4 := const, r5 := nr */
        ldm     r2, {r6, r8, r10}       /* (r6, r8, r10) := nonce[0:12) */
@@ -311,7 +312,7 @@
        vadd.u32 q3, q3, q8
        vadd.u32 q7, q7, q8
 
-       vld1.8  {q8-q9}, [fp, :256]     /* restore q8-q9 */
+       vld1.8  {q8-q9}, [sp, :256]     /* restore q8-q9 */
 
        vst1.8  {q0-q1}, [r0]!
        vld1.8  {q0}, [r3]      /* q0 := key[16:32) */
@@ -354,9 +355,10 @@
        /* zero temporary space on the stack */
        vmov.i32 q0, #0
        vmov.i32 q1, #0
-       vst1.8  {q0-q1}, [fp, :256]
+       vst1.8  {q0-q1}, [sp, :256]
 
        /* restore callee-saves registers and stack */
+       mov     sp, fp
        vpop    {d8-d15}
        pop     {r4, r5, r6, r7, r8, r10, fp, lr}
        bx      lr
@@ -374,17 +376,18 @@
        /* save callee-saves registers */
        push    {r4, r5, r6, r7, r8, r10, fp, lr}
        vpush   {d8-d15}
+       mov     fp, sp
 
        /* r7 := .Lconstants - .Lconstants_addr, r6 := .Lconstants_addr */
        ldr     r7, .Lconstants_addr
        adr     r6, .Lconstants_addr
 
        /* reserve space for two 128-bit/16-byte q registers */
-       sub     fp, sp, #0x20
-       bic     fp, fp, #0x1f   /* align */
+       sub     sp, sp, #0x20
+       bic     sp, sp, #0x1f   /* align */
 
        /* get parameters */
-       add     ip, sp, #96
+       add     ip, fp, #96
        add     r7, r7, r6      /* r7 := .Lconstants (= v0123) */
        ldm     ip, {r4, r5, ip}        /* r4 := key, r5 := const, ip := nr */
        ldm     r3, {r6, r8, r10}       /* (r6, r8, r10) := nonce[0:12) */
@@ -475,7 +478,7 @@
        veor    q0, q0, q8      /* compute ciphertext bytes [0:32) */
        veor    q1, q1, q9
 
-       vld1.8  {q8-q9}, [fp, :256]     /* restore q8-q9 */
+       vld1.8  {q8-q9}, [sp, :256]     /* restore q8-q9 */
 
        vst1.8  {q0-q1}, [r0]!  /* store ciphertext bytes [0:32) */
        vld1.8  {q0}, [r4]      /* q0 := key[16:32) */
@@ -552,9 +555,10 @@
        /* zero temporary space on the stack */
        vmov.i32 q0, #0
        vmov.i32 q1, #0
-       vst1.8  {q0-q1}, [fp, :256]
+       vst1.8  {q0-q1}, [sp, :256]
 
        /* restore callee-saves registers and stack */
+       mov     sp, fp
        vpop    {d8-d15}
        pop     {r4, r5, r6, r7, r8, r10, fp, lr}
        bx      lr
Home |
Main Index |
Thread Index |
Old Index