Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/crypto Align critical-path loops in AES and ChaCha.



details:   https://anonhg.NetBSD.org/src/rev/d088e7634a8b
branches:  trunk
changeset: 936482:d088e7634a8b
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Mon Jul 27 20:53:22 2020 +0000

description:
Align critical-path loops in AES and ChaCha.

diffstat:

 sys/crypto/aes/arch/arm/aes_armv8_64.S      |  18 +++++++++++++++++-
 sys/crypto/aes/arch/arm/aes_neon_32.S       |   4 +++-
 sys/crypto/aes/arch/x86/aes_ni_64.S         |  18 +++++++++++++++++-
 sys/crypto/chacha/arch/arm/chacha_neon_64.S |   4 +++-
 4 files changed, 40 insertions(+), 4 deletions(-)

diffs (truncated from 324 to 300 lines):

diff -r 93504fc6c172 -r d088e7634a8b sys/crypto/aes/arch/arm/aes_armv8_64.S
--- a/sys/crypto/aes/arch/arm/aes_armv8_64.S    Mon Jul 27 20:52:10 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_armv8_64.S    Mon Jul 27 20:53:22 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_armv8_64.S,v 1.8 2020/07/25 22:33:04 riastradh Exp $       */
+/*     $NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $       */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -440,6 +440,7 @@
 ENTRY(aesarmv8_enctodec)
        ldr     q0, [x0, x2, lsl #4]    /* load last round key */
        b       2f
+       _ALIGN_TEXT
 1:     aesimc  v0.16b, v0.16b  /* convert encryption to decryption */
 2:     str     q0, [x1], #0x10 /* store round key */
        subs    x2, x2, #1      /* count down round */
@@ -503,6 +504,7 @@
        mov     x9, x0                  /* x9 := enckey */
        mov     x10, x3                 /* x10 := nbytes */
        ldr     q0, [x4]                /* q0 := chaining value */
+       _ALIGN_TEXT
 1:     ldr     q1, [x1], #0x10         /* q1 := plaintext block */
        eor     v0.16b, v0.16b, v1.16b  /* q0 := cv ^ ptxt */
        mov     x0, x9                  /* x0 := enckey */
@@ -539,6 +541,7 @@
        ldr     q0, [x1, #-0x10]!       /* q0 := last ciphertext block */
        str     q0, [x4]                /* update iv */
        b       2f
+       _ALIGN_TEXT
 1:     ldr     q31, [x1, #-0x10]!      /* q31 := chaining value */
        eor     v0.16b, v0.16b, v31.16b /* q0 := plaintext block */
        str     q0, [x2, #-0x10]!       /* store plaintext block */
@@ -576,6 +579,7 @@
        ldp     q6, q7, [x1, #-0x20]!   /* q6, q7 := last ciphertext blocks */
        str     q7, [x4]                /* update iv */
        b       2f
+       _ALIGN_TEXT
 1:     ldp     q6, q7, [x1, #-0x20]!
        eor     v0.16b, v0.16b, v7.16b  /* q0 := pt0 */
        stp     q0, q1, [x2, #-0x20]!
@@ -629,6 +633,7 @@
        mov     x9, x0                  /* x9 := enckey */
        mov     x10, x3                 /* x10 := nbytes */
        ldr     q31, [x4]               /* q31 := tweak */
+       _ALIGN_TEXT
 1:     ldr     q0, [x1], #0x10         /* q0 := ptxt */
        mov     x0, x9                  /* x0 := enckey */
        mov     x3, x5                  /* x3 := nrounds */
@@ -661,6 +666,7 @@
        mov     x9, x0                  /* x9 := enckey */
        mov     x10, x3                 /* x10 := nbytes */
        ldr     q31, [x4]               /* q31 := tweak */
+       _ALIGN_TEXT
 1:     mov     v24.16b, v31.16b        /* q24 := tweak[0] */
        bl      aesarmv8_xts_mulx       /* q31 *= x; trash x0/q0/q1 */
        mov     v25.16b, v31.16b        /* q25 := tweak[1] */
@@ -729,6 +735,7 @@
        mov     x9, x0                  /* x9 := deckey */
        mov     x10, x3                 /* x10 := nbytes */
        ldr     q31, [x4]               /* q31 := tweak */
+       _ALIGN_TEXT
 1:     ldr     q0, [x1], #0x10         /* q0 := ctxt */
        mov     x0, x9                  /* x0 := deckey */
        mov     x3, x5                  /* x3 := nrounds */
@@ -761,6 +768,7 @@
        mov     x9, x0                  /* x9 := deckey */
        mov     x10, x3                 /* x10 := nbytes */
        ldr     q31, [x4]               /* q31 := tweak */
+       _ALIGN_TEXT
 1:     mov     v24.16b, v31.16b        /* q24 := tweak[0] */
        bl      aesarmv8_xts_mulx       /* q31 *= x; trash x0/q0/q1 */
        mov     v25.16b, v31.16b        /* q25 := tweak[1] */
@@ -879,6 +887,7 @@
        ldr     q0, [x3]                /* q0 := initial authenticator */
        mov     x9, x0                  /* x9 := enckey */
        mov     x5, x3                  /* x5 := &auth (enc1 trashes x3) */
+       _ALIGN_TEXT
 1:     ldr     q1, [x1], #0x10         /* q1 := plaintext block */
        mov     x0, x9                  /* x0 := enckey */
        mov     x3, x4                  /* x3 := nrounds */
@@ -913,6 +922,7 @@
 #if _BYTE_ORDER == _LITTLE_ENDIAN
        rev32   v2.16b, v2.16b          /* q2 := ctr (host-endian) */
 #endif
+       _ALIGN_TEXT
 1:     ldr     q3, [x1], #0x10         /* q3 := plaintext block */
        add     v2.4s, v2.4s, v5.4s     /* increment ctr (32-bit) */
        mov     x0, x9                  /* x0 := enckey */
@@ -972,6 +982,7 @@
        bl      aesarmv8_enc1           /* q0 := pad; trash x0/x3/q16 */
        b       2f
 
+       _ALIGN_TEXT
 1:     /*
         * Authenticate the last block and decrypt the next block
         * simultaneously.
@@ -1031,6 +1042,7 @@
 aesarmv8_enc1:
        ldr     q16, [x0], #0x10        /* load round key */
        b       2f
+       _ALIGN_TEXT
 1:     /* q0 := MixColumns(q0) */
        aesmc   v0.16b, v0.16b
 2:     subs    x3, x3, #1
@@ -1056,6 +1068,7 @@
 aesarmv8_enc2:
        ldr     q16, [x0], #0x10        /* load round key */
        b       2f
+       _ALIGN_TEXT
 1:     /* q[i] := MixColumns(q[i]) */
        aesmc   v0.16b, v0.16b
        aesmc   v1.16b, v1.16b
@@ -1085,6 +1098,7 @@
 aesarmv8_enc8:
        ldr     q16, [x0], #0x10        /* load round key */
        b       2f
+       _ALIGN_TEXT
 1:     /* q[i] := MixColumns(q[i]) */
        aesmc   v0.16b, v0.16b
        aesmc   v1.16b, v1.16b
@@ -1131,6 +1145,7 @@
 aesarmv8_dec1:
        ldr     q16, [x0], #0x10        /* load round key */
        b       2f
+       _ALIGN_TEXT
 1:     /* q0 := InMixColumns(q0) */
        aesimc  v0.16b, v0.16b
 2:     subs    x3, x3, #1
@@ -1157,6 +1172,7 @@
 aesarmv8_dec8:
        ldr     q16, [x0], #0x10        /* load round key */
        b       2f
+       _ALIGN_TEXT
 1:     /* q[i] := InMixColumns(q[i]) */
        aesimc  v0.16b, v0.16b
        aesimc  v1.16b, v1.16b
diff -r 93504fc6c172 -r d088e7634a8b sys/crypto/aes/arch/arm/aes_neon_32.S
--- a/sys/crypto/aes/arch/arm/aes_neon_32.S     Mon Jul 27 20:52:10 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_neon_32.S     Mon Jul 27 20:53:22 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_neon_32.S,v 1.2 2020/07/27 20:52:10 riastradh Exp $        */
+/*     $NetBSD: aes_neon_32.S,v 1.3 2020/07/27 20:53:22 riastradh Exp $        */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -316,6 +316,7 @@
 
        b       2f
 
+       _ALIGN_TEXT
 1:     vld1.64 {d28-d29}, [r0 :128]!   /* q14 = *rk++ */
 
        /* q0 := A = rk[i] + sb1_0(io) + sb1_1(jo) */
@@ -535,6 +536,7 @@
 
        b       2f
 
+       _ALIGN_TEXT
 1:     /* load dsbd */
        add     r4, r12, #(dsbd_0 - .Lconstants)
        vld1.64 {d16-d17}, [r4 :128]!   /* q8 := dsbd[0] */
diff -r 93504fc6c172 -r d088e7634a8b sys/crypto/aes/arch/x86/aes_ni_64.S
--- a/sys/crypto/aes/arch/x86/aes_ni_64.S       Mon Jul 27 20:52:10 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_ni_64.S       Mon Jul 27 20:53:22 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_ni_64.S,v 1.4 2020/07/25 22:29:06 riastradh Exp $  */
+/*     $NetBSD: aes_ni_64.S,v 1.5 2020/07/27 20:53:22 riastradh Exp $  */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -523,6 +523,7 @@
        movdqa  (%rdi,%rdx),%xmm0       /* load last round key */
        movdqa  %xmm0,(%rsi)    /* store last round key verbatim */
        jmp     2f
+       _ALIGN_TEXT
 1:     movdqa  (%rdi,%rdx),%xmm0       /* load round key */
        aesimc  %xmm0,%xmm0     /* convert encryption to decryption */
        movdqa  %xmm0,(%rsi)    /* store round key */
@@ -580,6 +581,7 @@
        jz      2f
        mov     %rcx,%r10               /* r10 := nbytes */
        movdqu  (%r8),%xmm0             /* xmm0 := chaining value */
+       _ALIGN_TEXT
 1:     movdqu  (%rsi),%xmm1            /* xmm1 := plaintext block */
        lea     0x10(%rsi),%rsi
        pxor    %xmm1,%xmm0             /* xmm0 := cv ^ ptxt */
@@ -615,6 +617,7 @@
        movdqu  -0x10(%rsi,%r10),%xmm0  /* xmm0 := last ciphertext block */
        movdqu  %xmm0,(%r8)             /* update iv */
        jmp     2f
+       _ALIGN_TEXT
 1:     movdqu  -0x10(%rsi,%r10),%xmm8  /* xmm8 := chaining value */
        pxor    %xmm8,%xmm0             /* xmm0 := ptxt */
        movdqu  %xmm0,(%rdx,%r10)       /* store plaintext block */
@@ -650,6 +653,7 @@
        movdqu  -0x10(%rsi,%r10),%xmm7  /* xmm7 := ciphertext block[n-1] */
        movdqu  %xmm7,(%r8)             /* update iv */
        jmp     2f
+       _ALIGN_TEXT
 1:     movdqu  -0x10(%rsi,%r10),%xmm7  /* xmm7 := cv[0] */
        pxor    %xmm7,%xmm0             /* xmm0 := ptxt[0] */
        movdqu  %xmm0,(%rdx,%r10)       /* store plaintext block */
@@ -706,6 +710,7 @@
 ENTRY(aesni_xts_enc1)
        mov     %rcx,%r10               /* r10 := nbytes */
        movdqu  (%r8),%xmm15            /* xmm15 := tweak */
+       _ALIGN_TEXT
 1:     movdqu  (%rsi),%xmm0            /* xmm0 := ptxt */
        lea     0x10(%rsi),%rsi         /* advance rdi to next block */
        pxor    %xmm15,%xmm0            /* xmm0 := ptxt ^ tweak */
@@ -738,6 +743,7 @@
        sub     $0x10,%rsp
        mov     %rcx,%r10               /* r10 := nbytes */
        movdqu  (%r8),%xmm15            /* xmm15 := tweak[0] */
+       _ALIGN_TEXT
 1:     movdqa  %xmm15,%xmm8            /* xmm8 := tweak[0] */
        call    aesni_xts_mulx          /* xmm15 := tweak[1] */
        movdqa  %xmm15,%xmm9            /* xmm9 := tweak[1] */
@@ -812,6 +818,7 @@
 ENTRY(aesni_xts_dec1)
        mov     %rcx,%r10               /* r10 := nbytes */
        movdqu  (%r8),%xmm15            /* xmm15 := tweak */
+       _ALIGN_TEXT
 1:     movdqu  (%rsi),%xmm0            /* xmm0 := ctxt */
        lea     0x10(%rsi),%rsi         /* advance rdi to next block */
        pxor    %xmm15,%xmm0            /* xmm0 := ctxt ^ tweak */
@@ -844,6 +851,7 @@
        sub     $0x10,%rsp
        mov     %rcx,%r10               /* r10 := nbytes */
        movdqu  (%r8),%xmm15            /* xmm15 := tweak[0] */
+       _ALIGN_TEXT
 1:     movdqa  %xmm15,%xmm8            /* xmm8 := tweak[0] */
        call    aesni_xts_mulx          /* xmm15 := tweak[1] */
        movdqa  %xmm15,%xmm9            /* xmm9 := tweak[1] */
@@ -964,6 +972,7 @@
        movdqu  (%rcx),%xmm0            /* xmm0 := auth */
        mov     %rdx,%r10               /* r10 := nbytes */
        mov     %rcx,%rdx               /* rdx := &auth */
+       _ALIGN_TEXT
 1:     pxor    (%rsi),%xmm0            /* xmm0 ^= plaintext block */
        lea     0x10(%rsi),%rsi
        mov     %r8d,%ecx               /* ecx := nrounds */
@@ -992,6 +1001,7 @@
        movdqa  ctr32_inc(%rip),%xmm5   /* xmm5 := (0,0,0,1) (le) */
        movdqu  (%r8),%xmm0             /* xmm0 := auth */
        pshufb  %xmm4,%xmm2             /* xmm2 := ctr (le) */
+       _ALIGN_TEXT
 1:     movdqu  (%rsi),%xmm3            /* xmm3 := plaintext block */
        paddd   %xmm5,%xmm2             /* increment ctr (32-bit) */
        lea     0x10(%rsi),%rsi
@@ -1040,6 +1050,7 @@
        call    aesni_enc1              /* xmm0 := pad; trash rax/rcx/xmm8 */
        jmp     2f
 
+       _ALIGN_TEXT
 1:     /*
         * Authenticate the last block and decrypt the next block
         * simultaneously.
@@ -1103,6 +1114,7 @@
        lea     0x10(%rdi,%rcx),%rax    /* rax := end of round key array */
        neg     %rcx            /* rcx := byte offset of round key from end */
        jmp     2f
+       _ALIGN_TEXT
 1:     aesenc  %xmm8,%xmm0
 2:     movdqa  (%rax,%rcx),%xmm8       /* load round key */
        add     $0x10,%rcx
@@ -1130,6 +1142,7 @@
        pxor    %xmm8,%xmm0     /* xor in first round key */
        pxor    %xmm8,%xmm1
        jmp     2f
+       _ALIGN_TEXT
 1:     aesenc  %xmm8,%xmm0
        aesenc  %xmm8,%xmm1
 2:     movdqa  (%rax,%rcx),%xmm8       /* load round key */
@@ -1165,6 +1178,7 @@
        lea     0x10(%rdi,%rcx),%rax    /* rax := end of round key array */
        neg     %rcx            /* rcx := byte offset of round key from end */
        jmp     2f
+       _ALIGN_TEXT
 1:     aesenc  %xmm8,%xmm0
        aesenc  %xmm8,%xmm1
        aesenc  %xmm8,%xmm2
@@ -1204,6 +1218,7 @@
        lea     0x10(%rdi,%rcx),%rax    /* rax := pointer to round key */
        neg     %rcx            /* rcx := byte offset of round key from end */
        jmp     2f
+       _ALIGN_TEXT
 1:     aesdec  %xmm8,%xmm0
 2:     movdqa  (%rax,%rcx),%xmm8       /* load round key */
        add     $0x10,%rcx
@@ -1237,6 +1252,7 @@
        lea     0x10(%rdi,%rcx),%rax    /* rax := pointer to round key */
        neg     %rcx            /* rcx := byte offset of round key from end */
        jmp     2f
+       _ALIGN_TEXT
 1:     aesdec  %xmm8,%xmm0
        aesdec  %xmm8,%xmm1
        aesdec  %xmm8,%xmm2
diff -r 93504fc6c172 -r d088e7634a8b sys/crypto/chacha/arch/arm/chacha_neon_64.S



Home | Main Index | Thread Index | Old Index