Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/crypto/aes/arch/arm Issue aese/aesmc and aesd/aesimc in ...



details:   https://anonhg.NetBSD.org/src/rev/ff14f4c2bf80
branches:  trunk
changeset: 936483:ff14f4c2bf80
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Mon Jul 27 20:54:11 2020 +0000

description:
Issue aese/aesmc and aesd/aesimc in pairs.

Advised by the aarch64 optimization guide; increases cgd throughput
by about 10%.

diffstat:

 sys/crypto/aes/arch/arm/aes_armv8_64.S |  88 +++++++++++++++++++++++----------
 1 files changed, 60 insertions(+), 28 deletions(-)

diffs (176 lines):

diff -r d088e7634a8b -r ff14f4c2bf80 sys/crypto/aes/arch/arm/aes_armv8_64.S
--- a/sys/crypto/aes/arch/arm/aes_armv8_64.S    Mon Jul 27 20:53:22 2020 +0000
+++ b/sys/crypto/aes/arch/arm/aes_armv8_64.S    Mon Jul 27 20:54:11 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $       */
+/*     $NetBSD: aes_armv8_64.S,v 1.10 2020/07/27 20:54:11 riastradh Exp $      */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -1041,15 +1041,18 @@
        .type   aesarmv8_enc1,@function
 aesarmv8_enc1:
        ldr     q16, [x0], #0x10        /* load round key */
-       b       2f
+       sub     x3, x3, #1
        _ALIGN_TEXT
-1:     /* q0 := MixColumns(q0) */
+1:     /* q0 := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q0)))) */
+       aese    v0.16b, v16.16b
        aesmc   v0.16b, v0.16b
-2:     subs    x3, x3, #1
+       ldr     q16, [x0], #0x10
+       subs    x3, x3, #1
+       b.ne    1b
        /* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */
        aese    v0.16b, v16.16b
-       ldr     q16, [x0], #0x10                /* load next round key */
-       b.ne    1b
+       ldr     q16, [x0]               /* load last round key */
+       /* q0 := AddRoundKey_q16(q0) */
        eor     v0.16b, v0.16b, v16.16b
        ret
 END(aesarmv8_enc1)
@@ -1067,17 +1070,21 @@
        .type   aesarmv8_enc2,@function
 aesarmv8_enc2:
        ldr     q16, [x0], #0x10        /* load round key */
-       b       2f
+       sub     x3, x3, #1
        _ALIGN_TEXT
-1:     /* q[i] := MixColumns(q[i]) */
+1:     /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i])))) */
+       aese    v0.16b, v16.16b
        aesmc   v0.16b, v0.16b
+       aese    v1.16b, v16.16b
        aesmc   v1.16b, v1.16b
-2:     subs    x3, x3, #1
+       ldr     q16, [x0], #0x10        /* load next round key */
+       subs    x3, x3, #1
+       b.ne    1b
        /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
        aese    v0.16b, v16.16b
        aese    v1.16b, v16.16b
-       ldr     q16, [x0], #0x10                /* load next round key */
-       b.ne    1b
+       ldr     q16, [x0]               /* load last round key */
+       /* q[i] := AddRoundKey_q16(q[i]) */
        eor     v0.16b, v0.16b, v16.16b
        eor     v1.16b, v1.16b, v16.16b
        ret
@@ -1097,18 +1104,28 @@
        .type   aesarmv8_enc8,@function
 aesarmv8_enc8:
        ldr     q16, [x0], #0x10        /* load round key */
-       b       2f
+       sub     x3, x3, #1
        _ALIGN_TEXT
-1:     /* q[i] := MixColumns(q[i]) */
+1:     /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i])))) */
+       aese    v0.16b, v16.16b
        aesmc   v0.16b, v0.16b
+       aese    v1.16b, v16.16b
        aesmc   v1.16b, v1.16b
+       aese    v2.16b, v16.16b
        aesmc   v2.16b, v2.16b
+       aese    v3.16b, v16.16b
        aesmc   v3.16b, v3.16b
+       aese    v4.16b, v16.16b
        aesmc   v4.16b, v4.16b
+       aese    v5.16b, v16.16b
        aesmc   v5.16b, v5.16b
+       aese    v6.16b, v16.16b
        aesmc   v6.16b, v6.16b
+       aese    v7.16b, v16.16b
        aesmc   v7.16b, v7.16b
-2:     subs    x3, x3, #1
+       ldr     q16, [x0], #0x10        /* load next round key */
+       subs    x3, x3, #1
+       b.ne    1b
        /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
        aese    v0.16b, v16.16b
        aese    v1.16b, v16.16b
@@ -1118,9 +1135,9 @@
        aese    v5.16b, v16.16b
        aese    v6.16b, v16.16b
        aese    v7.16b, v16.16b
-       ldr     q16, [x0], #0x10        /* load next round key */
-       b.ne    1b
-       eor     v0.16b, v0.16b, v16.16b /* AddRoundKey */
+       ldr     q16, [x0]               /* load last round key */
+       /* q[i] := AddRoundKey_q16(q[i]) */
+       eor     v0.16b, v0.16b, v16.16b
        eor     v1.16b, v1.16b, v16.16b
        eor     v2.16b, v2.16b, v16.16b
        eor     v3.16b, v3.16b, v16.16b
@@ -1144,15 +1161,19 @@
        .type   aesarmv8_dec1,@function
 aesarmv8_dec1:
        ldr     q16, [x0], #0x10        /* load round key */
-       b       2f
+       sub     x3, x3, #1
        _ALIGN_TEXT
-1:     /* q0 := InMixColumns(q0) */
+1:     /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
+       aesd    v0.16b, v16.16b
+       /* q0 := InMixColumns(q0) */
        aesimc  v0.16b, v0.16b
-2:     subs    x3, x3, #1
+       ldr     q16, [x0], #0x10        /* load next round key */
+       subs    x3, x3, #1
+       b.ne    1b
        /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
        aesd    v0.16b, v16.16b
-       ldr     q16, [x0], #0x10        /* load next round key */
-       b.ne    1b
+       ldr     q16, [x0]               /* load last round key */
+       /* q0 := AddRoundKey_q16(q0) */
        eor     v0.16b, v0.16b, v16.16b
        ret
 END(aesarmv8_dec1)
@@ -1171,18 +1192,29 @@
        .type   aesarmv8_dec8,@function
 aesarmv8_dec8:
        ldr     q16, [x0], #0x10        /* load round key */
-       b       2f
+       sub     x3, x3, #1
        _ALIGN_TEXT
-1:     /* q[i] := InMixColumns(q[i]) */
+1:     /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
+       aesd    v0.16b, v16.16b
+       /* q[i] := InMixColumns(q[i]) */
        aesimc  v0.16b, v0.16b
+       aesd    v1.16b, v16.16b
        aesimc  v1.16b, v1.16b
+       aesd    v2.16b, v16.16b
        aesimc  v2.16b, v2.16b
+       aesd    v3.16b, v16.16b
        aesimc  v3.16b, v3.16b
+       aesd    v4.16b, v16.16b
        aesimc  v4.16b, v4.16b
+       aesd    v5.16b, v16.16b
        aesimc  v5.16b, v5.16b
+       aesd    v6.16b, v16.16b
        aesimc  v6.16b, v6.16b
+       aesd    v7.16b, v16.16b
        aesimc  v7.16b, v7.16b
-2:     subs    x3, x3, #1
+       ldr     q16, [x0], #0x10        /* load next round key */
+       subs    x3, x3, #1
+       b.ne    1b
        /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
        aesd    v0.16b, v16.16b
        aesd    v1.16b, v16.16b
@@ -1192,9 +1224,9 @@
        aesd    v5.16b, v16.16b
        aesd    v6.16b, v16.16b
        aesd    v7.16b, v16.16b
-       ldr     q16, [x0], #0x10        /* load next round key */
-       b.ne    1b
-       eor     v0.16b, v0.16b, v16.16b /* AddRoundKey */
+       ldr     q16, [x0]               /* load last round key */
+       /* q[i] := AddRoundKey_q16(q[i]) */
+       eor     v0.16b, v0.16b, v16.16b
        eor     v1.16b, v1.16b, v16.16b
        eor     v2.16b, v2.16b, v16.16b
        eor     v3.16b, v3.16b, v16.16b



Home | Main Index | Thread Index | Old Index