Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch/m68k Implemen...



details:   https://anonhg.NetBSD.org/src/rev/0b04d0ab3f58
branches:  trunk
changeset: 821467:0b04d0ab3f58
user:      isaki <isaki%NetBSD.org@localhost>
date:      Tue Feb 07 11:18:43 2017 +0000

description:
Implement m68k assembly version of AES.
It's approx 1.4 times faster than the original one.

diffstat:

 crypto/external/bsd/openssl/lib/libcrypto/arch/m68k/aes-m68k.S |  1745 ++++++++++
 crypto/external/bsd/openssl/lib/libcrypto/arch/m68k/aes.inc    |     4 +
 2 files changed, 1749 insertions(+), 0 deletions(-)

diffs (truncated from 1757 to 300 lines):

diff -r 84aae5dfc815 -r 0b04d0ab3f58 crypto/external/bsd/openssl/lib/libcrypto/arch/m68k/aes-m68k.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/m68k/aes-m68k.S    Tue Feb 07 11:18:43 2017 +0000
@@ -0,0 +1,1745 @@
+|      $NetBSD: aes-m68k.S,v 1.1 2017/02/07 11:18:43 isaki Exp $
+
+| Copyright (C) 2016 Tetsuya Isaki. All rights reserved.
+| Copyright (C) 2016 Y.Sugahara (moveccr). All rights reserved.
+|
+| Redistribution and use in source and binary forms, with or without
+| modification, are permitted provided that the following conditions
+| are met:
+| 1. Redistributions of source code must retain the above copyright
+|    notice, this list of conditions and the following disclaimer.
+| 2. Redistributions in binary form must reproduce the above copyright
+|    notice, this list of conditions and the following disclaimer in the
+|    documentation and/or other materials provided with the distribution.
+|
+| THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+| IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+| OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+| IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+| INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+| BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+| AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+| OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+| SUCH DAMAGE.
+
+#define rd_key (0)
+#define rounds (60 * 4)
+
+| int
+| private_AES_set_encrypt_key(const unsigned char *userKey,
+|  const int bits,
+|  AES_KEY *key)
+.global private_AES_set_encrypt_key
+private_AES_set_encrypt_key:
+_private_AES_set_encrypt_key:
+       moveml  %d2-%d7/%a2-%a6,%sp@-
+       moveal  %sp@(44+4),%a0          | userKey
+       moveal  %sp@(44+8),%a1          | bits
+       moveal  %sp@(44+12),%a3         | key
+
+       tstl    %a0
+       beq     pek_return1             | return -1 if userKey == NULL
+       tstl    %a3
+       beq     pek_return1             | return -1 if key == NULL
+
+       cmpaw   #128,%a1
+       bne     pek192_check            | unless bits == 128
+pek128:
+       | %d0-%d7 rk[0..7]
+       | %a0 userKey, Te0
+       | %a1          Te1
+       | %a2          Te2
+       | %a3 key,     Te3
+       | %a4 rcon_byte
+       | %a5 &rk[4]
+       | %a6 end of rcon_byte
+
+       moveq   #10,%d0
+       movel   %d0,%a3@(rounds)        | key->rounds = 10
+
+       lea     %a3@(rd_key),%a5        | &rk[0]
+
+       | rk[0] = GETU32(userKey     );
+       | rk[1] = GETU32(userKey +  4);
+       | rk[2] = GETU32(userKey +  8);
+       | rk[3] = GETU32(userKey + 12);
+       moveml  %a0@,%d0-%d3
+       movel   %d0,%a5@+
+       movel   %d1,%a5@+
+       movel   %d2,%a5@+
+       movel   %d3,%a5@+
+
+       lea     %pc@(Te0),%a0           | %a0 = Te0
+       lea     %a0@(256*4),%a1         | %a1 = Te1
+       lea     %a1@(256*4),%a2         | %a2 = Te2
+       lea     %a2@(256*4),%a3         | %a3 = Te3
+
+       moveq   #0,%d7
+       lea     %pc@(rcon_byte),%a4
+       lea     %a4@(10),%a6
+
+pek128_loop:
+                                       | d6 consists of four Te index bytes
+       movel   %d3,%d5                 | d5=rk[3] as {1,2,3,0}
+       moveb   %d5,%d7                 | d7=temp
+       moveb   %a0@(2,%d7:w:4),%d4     | d4=$xxxxxx00
+       swap    %d5                     | d5={3,0,1,2}
+       lsll    #8,%d4                  | d4=$xxxx00xx
+       moveb   %d5,%d7                 | d7=temp>>16
+       moveb   %a2@(0,%d7:w:4),%d6     | d6=$xxxxxx22
+       lsrl    #8,%d5                  | d5={x,3,0,1}
+       moveb   %a4@+,%d7               | LSByte ^= rcon[i]
+       eorb    %d7,%d6
+       lsll    #8,%d6                  | d6=$xxxx22xx
+       moveb   %d5,%d7                 | d7=temp>>24
+       moveb   %a1@(3,%d7:w:4),%d4     | d4=$xxxx0011
+       swap    %d5                     | d5={0,1,x,3}
+       moveb   %d5,%d7                 | d7=temp>>8
+       moveb   %a3@(1,%d7:w:4),%d6     | d6=$xxxx2233
+       swap    %d6                     | d6=$2233xxxx
+       movew   %d4,%d6                 | d6=$22330011
+
+       eorl    %d6,%d0                 | rk[4] = rk[0]^ (Te..)
+       movel   %d0,%a5@+
+       eorl    %d0,%d1                 | rk[5] = rk[1] ^ rk[4];
+       movel   %d1,%a5@+
+       eorl    %d1,%d2                 | rk[6] = rk[2] ^ rk[5];
+       movel   %d2,%a5@+
+       eorl    %d2,%d3                 | rk[7] = rk[3] ^ rk[6];
+       movel   %d3,%a5@+
+
+       cmpal   %a4,%a6
+       bne     pek128_loop             |if (++i == 10) return 0;
+
+pek_return0:
+       moveql  #0,%d0
+pek_return:
+       moveml  %sp@+,%d2-%d7/%a2-%a6
+       rts
+
+pek192_check:
+       | %a0 userKey
+       | %a1 bits
+       | %a3 key
+       cmpaw   #192,%a1
+       bne     pek256_check            | unless bits == 192
+pek192:
+       | %a0 Te0
+       | %a1 Te1
+       | %a2 Te2
+       | %a3 Te3
+       | %a4 rcon_byte
+       | %a5 &rk[6]
+       | %a6 end of rcon_byte
+       moveq   #12,%d0
+       movel   %d0,%a3@(rounds)        | key->rounds = 12
+
+       lea     %a3@(rd_key),%a5        | &rk[0]
+
+       | rk[0] = GETU32(userKey     );
+       | rk[1] = GETU32(userKey +  4);
+       | rk[2] = GETU32(userKey +  8);
+       | rk[3] = GETU32(userKey + 12);
+       | rk[4] = GETU32(userKey + 16);
+       | rk[5] = GETU32(userKey + 20);
+       moveml  %a0@,%d0-%d5            | copy userKey[0..23]
+       movel   %d0,%a5@+
+       movel   %d1,%a5@+
+       movel   %d2,%a5@+
+       movel   %d3,%a5@+
+       movel   %d4,%a5@+
+       movel   %d5,%a5@                | read again later
+
+       lea     %pc@(Te0   +2),%a0      | %a0 = Te0 + 2
+       lea     %a0@(256*4 -2+3),%a1    | %a1 = Te1 + 3
+       lea     %a1@(256*4 -3+0),%a2    | %a2 = Te2 + 0
+       lea     %a2@(256*4 +0+1),%a3    | %a3 = Te3 + 1
+
+       moveq   #0,%d7
+       lea     %pc@(rcon_byte),%a4
+       lea     %a4@(8),%a6
+       bra     pek192_loop_start
+
+pek192_loop:
+       eorl    %d3,%d4                 | rk[10] = rk[4] ^ rk[9]
+       movel   %d4,%a5@+               | %d4 is rk[10]
+       eorl    %d4,%d5                 | rk[11] = rk[5] ^ rk[10]
+       movel   %d5,%a5@                | %d5 is rk[11]
+
+pek192_loop_start:
+       | %d0..%d5 = rk[0..5]
+       |
+       | temp = rk[5];
+       | rk[6]  = (Te0[(temp      ) & 0xff] & 0x0000ff00)
+       | rk[6] |= (Te3[(temp >>  8) & 0xff] & 0x00ff0000)
+       | rk[6] |= (Te2[(temp >> 16) & 0xff] & 0xff000000)
+       |         ^ rcon[i]
+       | rk[6] |= (Te1[(temp >> 24)       ] & 0x000000ff)
+       moveb   %d5,%d7                 | temp >> 0
+       moveb   %a0@(%d7:w:4),%d6       | d6=$xxxxxx00
+       rorl    #8,%d6                  | d6=$00xxxxxx
+       lsrl    #8,%d5                  | temp >> 8
+       moveb   %d5,%d7
+       moveb   %a3@(%d7:w:4),%d6       | d6=$00xxxx11
+       rorl    #8,%d6                  | d6=$1100xxxx
+       lsrl    #8,%d5                  | temp >> 16
+       moveb   %d5,%d7
+       moveb   %a2@(%d7:w:4),%d6       | d6=$1100xx22
+       moveb   %a4@+,%d7               | LSByte ^= rcon[i]
+       eorb    %d7,%d6
+       rorl    #8,%d6                  | d6=$221100xx
+       lsrl    #8,%d5                  | temp >> 24
+       moveb   %a1@(%d5:w:4),%d6       | d6=$22110033
+
+       movel   %a5@+,%d5               | read rk[5] again
+                                       | (faster than keeping %a5)
+
+       eorl    %d6,%d0                 | rk[6] ^= rk[0]
+       movel   %d0,%a5@+               | %d0 is rk[6]
+       eorl    %d0,%d1                 | rk[7] = rk[1] ^ rk[6]
+       movel   %d1,%a5@+               | %d1 is rk[7]
+       eorl    %d1,%d2                 | rk[8] = rk[2] ^ rk[7]
+       movel   %d2,%a5@+               | %d2 is rk[8]
+       eorl    %d2,%d3                 | rk[9] = rk[3] ^ rk[8]
+       movel   %d3,%a5@+               | %d3 is rk[9]
+
+       cmpal   %a4,%a6
+       bne     pek192_loop
+       bra     pek_return0
+
+
+pek256_check:
+       | %a0 userKey
+       | %a1 bits
+       | %a3 key
+       cmpaw   #256,%a1
+       bne     pek_return2             | otherwise return -2
+pek256:
+       | %a0 Te0
+       | %a1 Te1
+       | %a2 Te2
+       | %a3 Te3
+       | %a4 rcon_byte
+       | %a5 &rk[6]
+       | %a6 end of rcon_byte
+       moveq   #14,%d0
+       movel   %d0,%a3@(rounds)        | key->rounds = 14
+
+       lea     %a3@(rd_key),%a5        | &rk[0]
+
+       | rk[0] = GETU32(userKey     );
+       | rk[1] = GETU32(userKey +  4);
+       | rk[2] = GETU32(userKey +  8);
+       | rk[3] = GETU32(userKey + 12);
+       | rk[4] = GETU32(userKey + 16);
+       | rk[5] = GETU32(userKey + 20);
+       | rk[6] = GETU32(userKey + 24);
+       | rk[7] = GETU32(userKey + 28);
+       moveml  %a0@,%d0-%d7            | copy userKey[0..31]
+       movel   %d0,%a5@+
+       movel   %d1,%a5@+
+       movel   %d2,%a5@+
+       movel   %d3,%a5@+
+       movel   %d4,%a5@+
+       movel   %d5,%a5@+
+       movel   %d6,%a5@+
+       movel   %d7,%a5@+
+
+       lea     %pc@(Te0   +2),%a0      | %a0 = Te0 + 2
+       lea     %a0@(256*4 -2+3),%a1    | %a1 = Te1 + 3
+       lea     %a1@(256*4 -3+0),%a2    | %a2 = Te2 + 0
+       lea     %a2@(256*4 +0+1),%a3    | %a3 = Te3 + 1
+
+       lea     %pc@(rcon_byte),%a4
+       lea     %a4@(7),%a6
+       bra     pek256_loop_start
+
+pek256_loop:
+       | %d0: rk[8]  -> work
+       | %d1: rk[9]  -> work
+       | %d2: rk[10]
+       | %d3: rk[11] -> work
+       | %d4: rk[4]
+       | %d5: work   -> rk[5]
+       | %d6: work   -> rk[6]
+       | %d7: work   -> rk[7]
+       | %a5 = &rk[12]
+       |
+       | temp = rk[11];
+       | rk[12]  = (Te1[(temp      ) & 0xff] & 0x000000ff);
+       | rk[12] |= (Te0[(temp >>  8) & 0xff] & 0x0000ff00);
+       | rk[12] |= (Te3[(temp >> 16) & 0xff] & 0x00ff0000);
+       | rk[12] |= (Te2[(temp >> 24)       ] & 0xff000000);
+       | rk[12] ^= rk[ 4];
+
+       moveml  %a5@(-7*4),%d5-%d7      | %d5..%d7 = rk[5..7]
+
+       moveq   #0,%d0
+       moveb   %d3,%d0                 | temp >> 0
+       moveb   %a1@(%d0:w:4),%d1       | d1=$xxxxxx00
+       rorl    #8,%d1                  | d1=$00xxxxxx
+       lsrl    #8,%d3                  | temp >> 8
+       moveb   %d3,%d0
+       moveb   %a0@(%d0:w:4),%d1       | d1=$00xxxx11
+       rorl    #8,%d1                  | d1=$1100xxxx
+       lsrl    #8,%d3                  | temp >> 16
+       moveb   %d3,%d0
+       moveb   %a3@(%d0:w:4),%d1       | d1=$1100xx22
+       rorl    #8,%d1                  | d1=$221100xx
+       lsrl    #8,%d3                  | temp >> 24
+       moveb   %a2@(%d3:w:4),%d1       | d1=$22110033
+       rorl    #8,%d1                  | d1=$33221100
+
+       eorl    %d1,%d4                 | rk[12] ^= rk[4]
+       movel   %d4,%a5@+               | %d4 is rk[12]



Home | Main Index | Thread Index | Old Index