Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src-draft/trunk]: src/sys New permutation-based AES implementation using ARM...



details:   https://anonhg.NetBSD.org/src-all/rev/8dac1e356fa8
branches:  trunk
changeset: 935244:8dac1e356fa8
user:      Taylor R Campbell <riastradh%NetBSD.org@localhost>
date:      Sun Jun 28 02:00:56 2020 +0000

description:
New permutation-based AES implementation using ARM NEON.

Work in progress -- clang support requires some work to adapt the
hokey intrinsic stubs, or just get the real arm_neon.h to be
available during the kernel build.

In principle this should work on armv7, but in practice there are
some barriers:

- need to implement fpu_kern_enter/leave
- need to figure out how pacify linker:

  armv7--netbsdelf-eabihf-ld: error: aes_neon.o uses VFP register arguments, netbsd does not
  armv7--netbsdelf-eabihf-ld: failed to merge target specific data of file aes_neon.o

- need to find the right place to call aes_md_init(&aes_neon_impl)

diffstat:

 sys/arch/aarch64/aarch64/cpu.c              |   17 +-
 sys/arch/aarch64/conf/files.aarch64         |    3 +
 sys/arch/arm/conf/Makefile.arm              |    2 +-
 sys/arch/arm/conf/files.arm                 |    4 +
 sys/crypto/aes/arch/arm/aes_neon.c          |  618 ++++++++++++++++++++++++++++
 sys/crypto/aes/arch/arm/aes_neon.h          |   36 +
 sys/crypto/aes/arch/arm/aes_neon_impl.c     |  181 ++++++++
 sys/crypto/aes/arch/arm/aes_neon_internal.h |   43 +
 sys/crypto/aes/arch/arm/aes_neon_subr.c     |  217 +++++++++
 sys/crypto/aes/arch/arm/aes_neon_subr.h     |   60 ++
 sys/crypto/aes/arch/arm/arm_neon.h          |  311 ++++++++++++++
 sys/crypto/aes/arch/arm/files.aesneon       |   13 +
 12 files changed, 1500 insertions(+), 5 deletions(-)

diffs (truncated from 1584 to 300 lines):

diff -r 72c11f4ce91b -r 8dac1e356fa8 sys/arch/aarch64/aarch64/cpu.c
--- a/sys/arch/aarch64/aarch64/cpu.c    Fri Jun 26 23:17:30 2020 +0000
+++ b/sys/arch/aarch64/aarch64/cpu.c    Sun Jun 28 02:00:56 2020 +0000
@@ -45,6 +45,7 @@
 #include <sys/systm.h>
 
 #include <crypto/aes/arch/arm/aes_armv8.h>
+#include <crypto/aes/arch/arm/aes_neon.h>
 
 #include <aarch64/armreg.h>
 #include <aarch64/cpu.h>
@@ -601,16 +602,24 @@
 {
        struct aarch64_sysctl_cpu_id *id = &ci->ci_id;
 
-       /* Verify that it is supported.  */
+       /* Check for ARMv8.0-AES support.  */
        switch (__SHIFTOUT(id->ac_aa64isar0, ID_AA64ISAR0_EL1_AES)) {
        case ID_AA64ISAR0_EL1_AES_AES:
        case ID_AA64ISAR0_EL1_AES_PMUL:
-               break;
+               aes_md_init(&aes_armv8_impl);
+               return;
        default:
-               return;
+               break;
        }
 
-       aes_md_init(&aes_armv8_impl);
+       /* Failing that, check for SIMD support.  */
+       switch (__SHIFTOUT(id->ac_aa64pfr0, ID_AA64PFR0_EL1_ADVSIMD)) {
+       case ID_AA64PFR0_EL1_ADV_SIMD_IMPL:
+               aes_md_init(&aes_neon_impl);
+               return;
+       default:
+               break;
+       }
 }
 
 #ifdef MULTIPROCESSOR
diff -r 72c11f4ce91b -r 8dac1e356fa8 sys/arch/aarch64/conf/files.aarch64
--- a/sys/arch/aarch64/conf/files.aarch64       Fri Jun 26 23:17:30 2020 +0000
+++ b/sys/arch/aarch64/conf/files.aarch64       Sun Jun 28 02:00:56 2020 +0000
@@ -141,3 +141,6 @@
 
 # ARMv8.0-AES
 include "crypto/aes/arch/arm/files.aesarmv8"
+
+# vpaes with ARM NEON
+include "crypto/aes/arch/arm/files.aesneon"
diff -r 72c11f4ce91b -r 8dac1e356fa8 sys/arch/arm/conf/Makefile.arm
--- a/sys/arch/arm/conf/Makefile.arm    Fri Jun 26 23:17:30 2020 +0000
+++ b/sys/arch/arm/conf/Makefile.arm    Sun Jun 28 02:00:56 2020 +0000
@@ -47,7 +47,7 @@
 AFLAGS.fusu.S+=                -marm
 AFLAGS.irq_dispatch.S+=        -marm ${CLANG_OBSOLETE_MULTI_ST}
 AFLAGS.locore.S+=      -marm ${CLANG_OBSOLETE_MULTI_ST}
-CFLAGS+=               -mfloat-abi=soft
+CFLAGS+=               ${FLOATABI.${.IMPSRC:T}:U-mfloat-abi=soft}
 
 # This files use instructions deprecated for ARMv7+, but still
 # included in kernel that build with higher -mcpu=... settings.
diff -r 72c11f4ce91b -r 8dac1e356fa8 sys/arch/arm/conf/files.arm
--- a/sys/arch/arm/conf/files.arm       Fri Jun 26 23:17:30 2020 +0000
+++ b/sys/arch/arm/conf/files.arm       Sun Jun 28 02:00:56 2020 +0000
@@ -262,3 +262,7 @@
 
 # profiling support
 file   dev/tprof/tprof_armv7.c                 tprof
+
+# vpaes with ARM NEON -- disabled for now pending arm32 kernel fpu
+# support and ctf
+#include "crypto/aes/arch/arm/files.aesneon"
diff -r 72c11f4ce91b -r 8dac1e356fa8 sys/crypto/aes/arch/arm/aes_neon.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/crypto/aes/arch/arm/aes_neon.c        Sun Jun 28 02:00:56 2020 +0000
@@ -0,0 +1,618 @@
+/*     $NetBSD$        */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Permutation-based AES using NEON, derived from Mike Hamburg's VPAES
+ * software, at <https://crypto.stanford.edu/vpaes/>, described in
+ *
+ *     Mike Hamburg, `Accelerating AES with Vector Permute
+ *     Instructions', in Christophe Clavier and Kris Gaj (eds.),
+ *     Cryptographic Hardware and Embedded Systems -- CHES 2009,
+ *     Springer LNCS 5747, pp. 18-32.
+ *
+ *     https://link.springer.com/chapter/10.1007/978-3-642-04138-9_2
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(1, "$NetBSD$");
+
+#include <sys/types.h>
+
+#include <sys/systm.h>
+
+#include "aes_neon_internal.h"
+
+static const uint8x16_t
+mc_forward[4] = {
+       {0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04,
+        0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C},
+       {0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08,
+        0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00},
+       {0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C,
+        0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04},
+       {0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00,
+        0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08},
+},
+mc_backward[4] = {
+       {0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06,
+        0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E},
+       {0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02,
+        0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A},
+       {0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E,
+        0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06},
+       {0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A,
+        0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02},
+},
+ipt[2] = {
+       {0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2,
+        0x08,0x78,0x22,0x52,0x90,0xE0,0xBA,0xCA},
+       {0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C,
+        0x81,0xCC,0xFD,0xB0,0xFC,0xB1,0x80,0xCD},
+},
+opt[2] = {
+       {0x00,0x60,0xB6,0xD6,0x29,0x49,0x9F,0xFF,
+        0x08,0x68,0xBE,0xDE,0x21,0x41,0x97,0xF7},
+       {0x00,0xEC,0xBC,0x50,0x51,0xBD,0xED,0x01,
+        0xE0,0x0C,0x5C,0xB0,0xB1,0x5D,0x0D,0xE1},
+},
+dipt[2] = {
+       {0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F,
+        0x1A,0x45,0x4E,0x11,0x1E,0x41,0x4A,0x15},
+       {0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86,
+        0x94,0xF1,0x91,0xF4,0x72,0x17,0x77,0x12},
+},
+sb1[2] = {
+       {0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1,
+        0x44,0xF5,0x2A,0x14,0x6E,0x7A,0xDF,0xA5},
+       {0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36,
+        0xEF,0xD9,0x2E,0x0D,0xC1,0xCC,0xF7,0x3B},
+},
+sb2[2] = {
+       {0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2,
+        0xCD,0x2F,0x98,0xBC,0x55,0xE9,0xB7,0x5E},
+       {0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69,
+        0x4A,0x23,0x82,0xAB,0xC8,0x63,0xA1,0xC2},
+},
+sbo[2] = {
+       {0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0,
+        0x78,0xA8,0x02,0xC5,0x7A,0xBF,0xAA,0x15},
+       {0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF,
+        0xFA,0x35,0x2B,0x41,0xD1,0x90,0x1E,0x8E},
+},
+dsb9[2] = {
+       {0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85,
+        0xC9,0x4C,0x99,0x4F,0x50,0x1F,0xD5,0xCA},
+       {0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0,
+        0x65,0xA5,0xFB,0xB2,0x9E,0x2C,0x5E,0x72},
+},
+dsbd[2] = {
+       {0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D,
+        0x39,0x44,0x2A,0x88,0x13,0x9B,0x6E,0xF5},
+       {0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C,
+        0xD3,0xEF,0xDE,0x15,0x0D,0x18,0x31,0x29},
+},
+dsbb[2] = {
+       {0x00,0x42,0xB4,0x96,0x92,0x64,0x22,0xD0,
+        0x04,0xD4,0xF2,0xB0,0xF6,0x46,0x26,0x60},
+       {0x00,0x67,0x59,0xCD,0xA6,0x98,0x94,0xC1,
+        0x6B,0xAA,0x55,0x32,0x3E,0x0C,0xFF,0xF3},
+},
+dsbe[2] = {
+       {0x00,0xD0,0xD4,0x26,0x96,0x92,0xF2,0x46,
+        0xB0,0xF6,0xB4,0x64,0x04,0x60,0x42,0x22},
+       {0x00,0xC1,0xAA,0xFF,0xCD,0xA6,0x55,0x0C,
+        0x32,0x3E,0x59,0x98,0x6B,0xF3,0x67,0x94},
+},
+dsbo[2] = {
+       {0x00,0x40,0xF9,0x7E,0x53,0xEA,0x87,0x13,
+        0x2D,0x3E,0x94,0xD4,0xB9,0x6D,0xAA,0xC7},
+       {0x00,0x1D,0x44,0x93,0x0F,0x56,0xD7,0x12,
+        0x9C,0x8E,0xC5,0xD8,0x59,0x81,0x4B,0xCA},
+},
+dks1[2] = {
+       {0x00,0xA7,0xD9,0x7E,0xC8,0x6F,0x11,0xB6,
+        0xFC,0x5B,0x25,0x82,0x34,0x93,0xED,0x4A},
+       {0x00,0x33,0x14,0x27,0x62,0x51,0x76,0x45,
+        0xCE,0xFD,0xDA,0xE9,0xAC,0x9F,0xB8,0x8B},
+},
+dks2[2] = {
+       {0x00,0x64,0xA8,0xCC,0xEB,0x8F,0x43,0x27,
+        0x61,0x05,0xC9,0xAD,0x8A,0xEE,0x22,0x46},
+       {0x00,0xDD,0x92,0x4F,0xCE,0x13,0x5C,0x81,
+        0xF2,0x2F,0x60,0xBD,0x3C,0xE1,0xAE,0x73},
+},
+dks3[2] = {
+       {0x00,0xC7,0xC6,0x01,0x02,0xC5,0xC4,0x03,
+        0xFB,0x3C,0x3D,0xFA,0xF9,0x3E,0x3F,0xF8},
+       {0x00,0xF7,0xCF,0x38,0xD6,0x21,0x19,0xEE,
+        0x4B,0xBC,0x84,0x73,0x9D,0x6A,0x52,0xA5},
+},
+dks4[2] = {
+       {0x00,0x20,0x73,0x53,0xB0,0x90,0xC3,0xE3,
+        0x43,0x63,0x30,0x10,0xF3,0xD3,0x80,0xA0},
+       {0xE8,0x82,0x69,0x03,0x4B,0x21,0xCA,0xA0,
+        0x67,0x0D,0xE6,0x8C,0xC4,0xAE,0x45,0x2F},
+},
+deskew[2] = {
+       {0x00,0xE3,0xA4,0x47,0x40,0xA3,0xE4,0x07,
+        0x1A,0xF9,0xBE,0x5D,0x5A,0xB9,0xFE,0x1D},
+       {0x00,0x69,0xEA,0x83,0xDC,0xB5,0x36,0x5F,
+        0x77,0x1E,0x9D,0xF4,0xAB,0xC2,0x41,0x28},
+},
+sr[4] = {
+       {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
+        0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F},
+       {0x00,0x05,0x0A,0x0F,0x04,0x09,0x0E,0x03,
+        0x08,0x0D,0x02,0x07,0x0C,0x01,0x06,0x0B},
+       {0x00,0x09,0x02,0x0B,0x04,0x0D,0x06,0x0F,
+        0x08,0x01,0x0A,0x03,0x0C,0x05,0x0E,0x07},
+       {0x00,0x0D,0x0A,0x07,0x04,0x01,0x0E,0x0B,
+        0x08,0x05,0x02,0x0F,0x0C,0x09,0x06,0x03},
+},
+rcon = {0xB6,0xEE,0x9D,0xAF,0xB9,0x91,0x83,0x1F,
+       0x81,0x7D,0x7C,0x4D,0x08,0x98,0x2A,0x70},
+s63 =  {0x5B,0x5B,0x5B,0x5B,0x5B,0x5B,0x5B,0x5B,
+       0x5B,0x5B,0x5B,0x5B,0x5B,0x5B,0x5B,0x5B},
+of =   {0x0F,0x0F,0x0F,0x0F,0x0F,0x0F,0x0F,0x0F,
+       0x0F,0x0F,0x0F,0x0F,0x0F,0x0F,0x0F,0x0F},
+inv =  {0x80,0x01,0x08,0x0D,0x0F,0x06,0x05,0x0E,
+       0x02,0x0C,0x0B,0x0A,0x09,0x03,0x07,0x04},
+inva = {0x80,0x07,0x0B,0x0F,0x06,0x0A,0x04,0x01,
+       0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03};
+
+static inline uint8x16_t
+loadroundkey(const void *rkp)
+{
+       return vld1q_u8(rkp);
+}
+
+static inline void
+storeroundkey(void *rkp, uint8x16_t rk)
+{
+       vst1q_u8(rkp, rk);
+}
+
+/* Given abcdefgh, set *lo = 0b0d0f0h and *hi = 0a0c0e0g.  */
+static inline void
+bytes2nybbles(uint8x16_t *restrict lo, uint8x16_t *restrict hi, uint8x16_t x)
+{
+
+       *lo = x & of;
+       *hi = vshrq_n_u8(x & ~of, 4);
+}
+
+/* Given 0p0q0r0s, return 0x0y0z0w where x = a/p, y = a/q, &c.  */
+static inline uint8x16_t
+gf16_inva(uint8x16_t x)
+{
+       return vqtbl1q_u8(inva, x);
+}
+
+/* Given 0p0q0r0s, return 0x0y0z0w where x = 1/p, y = 1/q, &c.  */
+static inline uint8x16_t
+gf16_inv(uint8x16_t x)
+{
+       return vqtbl1q_u8(inv, x);
+}
+
+/*



Home | Main Index | Thread Index | Old Index