Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src-draft/trunk]: src/sys/crypto/aes/arch/x86 Split SSE2 logic into separate...



details:   https://anonhg.NetBSD.org/src-all/rev/1f9f3f817515
branches:  trunk
changeset: 935164:1f9f3f817515
user:      Taylor R Campbell <riastradh%NetBSD.org@localhost>
date:      Fri Jun 26 21:15:43 2020 +0000

description:
Split SSE2 logic into separate units.

Ensure that there are no paths into files compiled with -msse -msee2
at all except via fpu_kern_enter.

I didn't run into a practical problem with this, but let's not leave
a ticking time bomb for subsequent toolchain changes.

diffstat:

 sys/crypto/aes/arch/x86/aes_sse2.c          |    2 +-
 sys/crypto/aes/arch/x86/aes_sse2_dec.c      |    2 +-
 sys/crypto/aes/arch/x86/aes_sse2_enc.c      |    2 +-
 sys/crypto/aes/arch/x86/aes_sse2_impl.c     |  541 ++-------------------------
 sys/crypto/aes/arch/x86/aes_sse2_impl.h     |   47 --
 sys/crypto/aes/arch/x86/aes_sse2_internal.h |   50 ++
 sys/crypto/aes/arch/x86/aes_sse2_subr.c     |  525 +++++++++++++++++++++++++++
 sys/crypto/aes/arch/x86/aes_sse2_subr.h     |   59 +++
 sys/crypto/aes/arch/x86/files.aessse2       |    9 +-
 9 files changed, 692 insertions(+), 545 deletions(-)

diffs (truncated from 1378 to 300 lines):

diff -r d81ebf8d7a16 -r 1f9f3f817515 sys/crypto/aes/arch/x86/aes_sse2.c
--- a/sys/crypto/aes/arch/x86/aes_sse2.c        Sat Jun 20 02:02:41 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2.c        Fri Jun 26 21:15:43 2020 +0000
@@ -29,7 +29,7 @@
 
 #include <lib/libkern/libkern.h>
 
-#include "aes_sse2_impl.h"
+#include "aes_sse2_internal.h"
 
 static void
 br_range_dec32le(uint32_t *p32, size_t nwords, const void *v)
diff -r d81ebf8d7a16 -r 1f9f3f817515 sys/crypto/aes/arch/x86/aes_sse2_dec.c
--- a/sys/crypto/aes/arch/x86/aes_sse2_dec.c    Sat Jun 20 02:02:41 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2_dec.c    Fri Jun 26 21:15:43 2020 +0000
@@ -27,7 +27,7 @@
 
 #include <sys/types.h>
 
-#include "aes_sse2_impl.h"
+#include "aes_sse2_internal.h"
 
 /* see inner.h */
 void
diff -r d81ebf8d7a16 -r 1f9f3f817515 sys/crypto/aes/arch/x86/aes_sse2_enc.c
--- a/sys/crypto/aes/arch/x86/aes_sse2_enc.c    Sat Jun 20 02:02:41 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2_enc.c    Fri Jun 26 21:15:43 2020 +0000
@@ -27,7 +27,7 @@
 
 #include <sys/types.h>
 
-#include "aes_sse2_impl.h"
+#include "aes_sse2_internal.h"
 
 static inline void
 add_round_key(__m128i q[static 4], const uint64_t sk[static 8])
diff -r d81ebf8d7a16 -r 1f9f3f817515 sys/crypto/aes/arch/x86/aes_sse2_impl.c
--- a/sys/crypto/aes/arch/x86/aes_sse2_impl.c   Sat Jun 20 02:02:41 2020 +0000
+++ b/sys/crypto/aes/arch/x86/aes_sse2_impl.c   Fri Jun 26 21:15:43 2020 +0000
@@ -31,7 +31,6 @@
 
 #include <sys/types.h>
 #include <sys/endian.h>
-#include <sys/systm.h>
 
 #include <crypto/aes/aes.h>
 #include <crypto/aes/arch/x86/aes_sse2.h>
@@ -41,532 +40,101 @@
 #include <x86/fpu.h>
 #include <x86/specialreg.h>
 
-#include "aes_sse2_impl.h"
+#include "aes_sse2_subr.h"
 
 static void
-aes_sse2_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds)
+aes_sse2_setenckey_impl(struct aesenc *enc, const uint8_t *key,
+    uint32_t nrounds)
 {
-       size_t key_len;
-
-       switch (nrounds) {
-       case 10:
-               key_len = 16;
-               break;
-       case 12:
-               key_len = 24;
-               break;
-       case 14:
-               key_len = 32;
-               break;
-       default:
-               panic("invalid AES nrounds: %u", nrounds);
-       }
 
        fpu_kern_enter();
-       aes_sse2_keysched(rk, key, key_len);
+       aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
        fpu_kern_leave();
 }
 
 static void
-aes_sse2_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
+aes_sse2_setdeckey_impl(struct aesdec *dec, const uint8_t *key,
+    uint32_t nrounds)
 {
 
-       aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
-}
-
-static void
-aes_sse2_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
-{
-
+       fpu_kern_enter();
        /*
         * BearSSL computes InvMixColumns on the fly -- no need for
         * distinct decryption round keys.
         */
        aes_sse2_setkey(dec->aesd_aes.aes_rk64, key, nrounds);
-}
-
-static void
-aes_sse2_enc(const struct aesenc *enc, const uint8_t in[static 16],
-    uint8_t out[static 16], uint32_t nrounds)
-{
-       uint64_t sk_exp[120];
-       __m128i q[4];
-
-       fpu_kern_enter();
-
-       /* Expand round keys for bitslicing.  */
-       aes_sse2_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
-
-       /* Load input block interleaved with garbage blocks.  */
-       q[0] = aes_sse2_interleave_in(_mm_loadu_epi8(in));
-       q[1] = q[2] = q[3] = _mm_setzero_si128();
-
-       /* Transform to bitslice, decrypt, transform from bitslice.  */
-       aes_sse2_ortho(q);
-       aes_sse2_bitslice_encrypt(nrounds, sk_exp, q);
-       aes_sse2_ortho(q);
-
-       /* Store output block.  */
-       _mm_storeu_epi8(out, aes_sse2_interleave_out(q[0]));
-
-       /* Paranoia: Zero temporary buffers.  */
-       explicit_memset(sk_exp, 0, sizeof sk_exp);
-       explicit_memset(q, 0, sizeof q);
-
        fpu_kern_leave();
 }
 
 static void
-aes_sse2_dec(const struct aesdec *dec, const uint8_t in[static 16],
+aes_sse2_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
     uint8_t out[static 16], uint32_t nrounds)
 {
-       uint64_t sk_exp[120];
-       __m128i q[4];
 
        fpu_kern_enter();
-
-       /* Expand round keys for bitslicing.  */
-       aes_sse2_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
-
-       /* Load input block interleaved with garbage blocks.  */
-       q[0] = aes_sse2_interleave_in(_mm_loadu_epi8(in));
-       q[1] = q[2] = q[3] = _mm_setzero_si128();
-
-       /* Transform to bitslice, decrypt, transform from bitslice.  */
-       aes_sse2_ortho(q);
-       aes_sse2_bitslice_decrypt(nrounds, sk_exp, q);
-       aes_sse2_ortho(q);
-
-       /* Store output block.  */
-       _mm_storeu_epi8(out, aes_sse2_interleave_out(q[0]));
-
-       /* Paranoia: Zero temporary buffers.  */
-       explicit_memset(sk_exp, 0, sizeof sk_exp);
-       explicit_memset(q, 0, sizeof q);
-
+       aes_sse2_enc(enc, in, out, nrounds);
        fpu_kern_leave();
 }
 
 static void
-aes_sse2_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
-    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
-    uint32_t nrounds)
+aes_sse2_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
+    uint8_t out[static 16], uint32_t nrounds)
 {
-       uint64_t sk_exp[120];
-       __m128i q[4];
-       __m128i cv;
-
-       KASSERT(nbytes % 16 == 0);
-
-       /* Skip if there's nothing to do.  */
-       if (nbytes == 0)
-               return;
 
        fpu_kern_enter();
-
-       /* Expand round keys for bitslicing.  */
-       aes_sse2_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
-
-       /* Load the IV.  */
-       cv = _mm_loadu_epi8(iv);
-
-       for (; nbytes; nbytes -= 16, in += 16, out += 16) {
-               /* Load input block and apply CV.  */
-               q[0] = aes_sse2_interleave_in(cv ^ _mm_loadu_epi8(in));
-
-               /* Transform to bitslice, encrypt, transform from bitslice.  */
-               aes_sse2_ortho(q);
-               aes_sse2_bitslice_encrypt(nrounds, sk_exp, q);
-               aes_sse2_ortho(q);
-
-               /* Remember ciphertext as CV and store output block.  */
-               cv = aes_sse2_interleave_out(q[0]);
-               _mm_storeu_epi8(out, cv);
-       }
-
-       /* Store updated IV.  */
-       _mm_storeu_epi8(iv, cv);
-
-       /* Paranoia: Zero temporary buffers.  */
-       explicit_memset(sk_exp, 0, sizeof sk_exp);
-       explicit_memset(q, 0, sizeof q);
-
+       aes_sse2_dec(dec, in, out, nrounds);
        fpu_kern_leave();
 }
 
 static void
-aes_sse2_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
-    uint8_t out[static 16], size_t nbytes, uint8_t ivp[static 16],
+aes_sse2_cbc_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
+    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
     uint32_t nrounds)
 {
-       uint64_t sk_exp[120];
-       __m128i q[4];
-       __m128i cv, iv, w;
 
-       KASSERT(nbytes % 16 == 0);
-
-       /* Skip if there's nothing to do.  */
        if (nbytes == 0)
                return;
-
        fpu_kern_enter();
-
-       /* Expand round keys for bitslicing.  */
-       aes_sse2_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
-
-       /* Load the IV.  */
-       iv = _mm_loadu_epi8(ivp);
-
-       /* Load the last cipher block.  */
-       cv = _mm_loadu_epi8(in + nbytes - 16);
-
-       /* Store the updated IV.  */
-       _mm_storeu_epi8(ivp, cv);
-
-       /* Process the last blocks if not an even multiple of four.  */
-       if (nbytes % (4*16)) {
-               unsigned n = (nbytes/16) % 4;
-
-               KASSERT(n > 0);
-               KASSERT(n < 4);
-
-               q[1] = q[2] = q[3] = _mm_setzero_si128();
-               q[n - 1] = aes_sse2_interleave_in(cv);
-               switch (nbytes % 64) {
-               case 48:
-                       w = _mm_loadu_epi8(in + nbytes - 32);
-                       q[1] = aes_sse2_interleave_in(w);
-                       /*FALLTHROUGH*/
-               case 32:
-                       w = _mm_loadu_epi8(in + nbytes - 48);
-                       q[0] = aes_sse2_interleave_in(w);
-                       /*FALLTHROUGH*/
-               case 16:
-                       break;
-               }
-
-               /* Decrypt.  */
-               aes_sse2_ortho(q);
-               aes_sse2_bitslice_decrypt(nrounds, sk_exp, q);
-               aes_sse2_ortho(q);
-
-               do {
-                       n--;
-                       w = aes_sse2_interleave_out(q[n]);
-                       if ((nbytes -= 16) == 0)
-                               goto out;
-                       cv = _mm_loadu_epi8(in + nbytes - 16);
-                       _mm_storeu_epi8(out + nbytes, w ^ cv);
-               } while (n);
-       }
-
-       for (;;) {
-               KASSERT(nbytes >= 64);
-               nbytes -= 64;
-
-               /*
-                * 1. Set up upper cipher block from cv.
-                * 2. Load lower cipher block into cv and set it up.
-                * 3. Decrypt.
-                */
-               q[3] = aes_sse2_interleave_in(cv);
-
-               w = _mm_loadu_epi8(in + nbytes + 4*8);
-               q[2] = aes_sse2_interleave_in(w);
-
-               w = _mm_loadu_epi8(in + nbytes + 4*4);
-               q[1] = aes_sse2_interleave_in(w);



Home | Main Index | Thread Index | Old Index