Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64 Fix a...



details:   https://anonhg.NetBSD.org/src/rev/d4e41fa34968
branches:  trunk
changeset: 793966:d4e41fa34968
user:      nakayama <nakayama%NetBSD.org@localhost>
date:      Sun Mar 02 08:50:34 2014 +0000

description:
Fix assembler code generation: pass option -m64 properly, and
generate more code.

diffstat:

 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/Makefile         |     8 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/des_enc-sparc.S  |  5280 ++++++++++
 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/ghash-sparcv9.S  |     6 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/sha1-sparcv9.S   |     6 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/sha1-sparcv9a.S  |     8 +-
 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/sha512-sparcv9.S |  4041 ++++---
 6 files changed, 7510 insertions(+), 1839 deletions(-)

diffs (truncated from 9460 to 300 lines):

diff -r b25cd1fe4d0a -r d4e41fa34968 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/Makefile
--- a/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/Makefile   Sun Mar 02 08:20:09 2014 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/Makefile   Sun Mar 02 08:50:34 2014 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: Makefile,v 1.3 2012/07/31 10:33:45 christos Exp $
+#      $NetBSD: Makefile,v 1.4 2014/03/02 08:50:34 nakayama Exp $
 
 .include "bsd.own.mk"
 
@@ -9,8 +9,12 @@
        for i in $$(find ${OPENSSLSRC} -name \*sparcv9\*.pl); do \
                j=$$(basename $$i .pl).S; \
                case $$j in \
-               ghash*|sha*) perl $$i > $$j;; \
+               ghash*|sha*) perl $$i $$j -m64;; \
                *) perl $$i -m64 > $$j;; \
                esac; \
        done
+       #cp ${OPENSSLSRC}/crypto/bn/asm/sparcv8plus.S bn-sparcv9.S
+       m4 ${OPENSSLSRC}/crypto/des/asm/des_enc.m4 | \
+               sed 's,OPENSSL_SYSNAME_ULTRASPARC,__sparc_v9__,g' | \
+               sed 's,\.PIC\.DES_SPtrans,_PIC_DES_SPtrans,g' > des_enc-sparc.S
 foo:
diff -r b25cd1fe4d0a -r d4e41fa34968 crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/des_enc-sparc.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc64/des_enc-sparc.S    Sun Mar 02 08:50:34 2014 +0000
@@ -0,0 +1,5280 @@
+!  des_enc.m4
+!  des_enc.S  (generated from des_enc.m4)
+!
+!  UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
+!
+!  Version 1.0. 32-bit version.
+!
+!  June 8, 2000.
+!
+!  Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
+!              by Andy Polyakov.
+!
+!  January 1, 2003.
+!
+!  Assembler version: Copyright Svend Olaf Mikkelsen.
+!
+!  Original C code: Copyright Eric A. Young.
+!
+!  This code can be freely used by LibDES/SSLeay/OpenSSL users.
+!
+!  The LibDES/SSLeay/OpenSSL copyright notices must be respected.
+!
+!  This version can be redistributed.
+!
+!  To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
+!
+!  Global registers 1 to 5 are used. This is the same as done by the
+!  cc compiler. The UltraSPARC load/store little endian feature is used.
+!
+!  Instruction grouping often refers to one CPU cycle.
+!
+!  Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
+!
+!  Assemble through cc:  cc -c -xarch=v8plusa -o des_enc.o des_enc.S
+!
+!  Performance improvement according to './apps/openssl speed des'
+!
+!      32-bit build:
+!              23%  faster than cc-5.2 -xarch=v8plus -xO5
+!              115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
+!      64-bit build:
+!              50%  faster than cc-5.2 -xarch=v9 -xO5
+!              100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
+!
+
+.ident "des_enc.m4 2.1"
+.file  "des_enc-sparc.S"
+
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+# define ABI64  /* They've said -xarch=v9 at command line */
+#elif defined(__GNUC__) && defined(__arch64__)
+# define ABI64  /* They've said -m64 at command line */
+#endif
+
+#ifdef ABI64
+  .register    %g2,#scratch
+  .register    %g3,#scratch
+# define       FRAME   -192
+# define       BIAS    2047
+# define       LDPTR   ldx
+# define       STPTR   stx
+# define       ARG0    128
+# define       ARGSZ   8
+# ifndef __sparc_v9__
+# define __sparc_v9__
+# endif
+#else
+# define       FRAME   -96
+# define       BIAS    0
+# define       LDPTR   ld
+# define       STPTR   st
+# define       ARG0    68
+# define       ARGSZ   4
+#endif
+
+#define LOOPS 7
+
+#define global0 %g0
+#define global1 %g1
+#define global2 %g2
+#define global3 %g3
+#define global4 %g4
+#define global5 %g5
+
+#define local0 %l0
+#define local1 %l1
+#define local2 %l2
+#define local3 %l3
+#define local4 %l4
+#define local5 %l5
+#define local7 %l6
+#define local6 %l7
+
+#define in0 %i0
+#define in1 %i1
+#define in2 %i2
+#define in3 %i3
+#define in4 %i4
+#define in5 %i5
+#define in6 %i6
+#define in7 %i7
+
+#define out0 %o0
+#define out1 %o1
+#define out2 %o2
+#define out3 %o3
+#define out4 %o4
+#define out5 %o5
+#define out6 %o6
+#define out7 %o7
+
+#define stub stb
+
+
+
+
+! Macro definitions:
+
+
+! ip_macro
+!
+! The logic used in initial and final permutations is the same as in
+! the C code. The permutations are done with a clever , xor, and
+! technique.
+!
+! The macro also loads address sbox 1 to 5 to global 1 to 5, address
+! sbox 6 to local6, and addres sbox 8 to out3.
+!
+! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
+!
+! Loads key first round from address in parameter 5 to out0, out1.
+!
+! After the the original LibDES initial permutation, the resulting left
+! is in the variable initially used for right and vice versa. The macro
+! implements the possibility to keep the halfs in the original registers.
+!
+! parameter 1  left
+! parameter 2  right
+! parameter 3  result left (modify in first round)
+! parameter 4  result right (use in first round)
+! parameter 5  key address
+! parameter 6  1/2 for include encryption/decryption
+! parameter 7  1 for move in1 to in3
+! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
+! parameter 9  1 for load ks3 and ks2 to in4 and in3
+
+
+
+
+! rounds_macro
+!
+! The logic used in the DES rounds is the same as in the C code,
+! except that calculations for sbox 1 and sbox 5 begin before
+! the previous round is finished.
+!
+! In each round one half (work) is modified based on key and the
+! other half (use).
+!
+! In this version we do two rounds in a loop repeated 7 times
+! and two rounds seperately.
+!
+! One half has the bits for the sboxes in the following positions:
+!
+!      777777xx555555xx333333xx111111xx
+!
+!      88xx666666xx444444xx222222xx8888
+!
+! The bits for each sbox are xor-ed with the key bits for that box.
+! The above xx bits are cleared, and the result used for lookup in
+! the sbox table. Each sbox entry contains the 4 output bits permuted
+! into 32 bits according to the P permutation.
+!
+! In the description of DES, left and right are switched after
+! each round, except after last round. In this code the original
+! left and right are kept in the same register in all rounds, meaning
+! that after the 16 rounds the result for right is in the register
+! originally used for left.
+!
+! parameter 1  first work (left in first round)
+! parameter 2  first use (right in first round)
+! parameter 3  enc/dec  1/-1
+! parameter 4  loop label
+! parameter 5  key address register
+! parameter 6  optional address for key next encryption/decryption
+! parameter 7  not empty for include retl
+!
+! also compares in2 to 8
+
+
+
+
+! fp_macro
+!
+!  parameter 1   right (original left)
+!  parameter 2   left (original right)
+!  parameter 3   1 for optional store to [in0]
+!  parameter 4   1 for load input/output address to local5/7
+!
+!  The final permutation logic switches the halfes, meaning that
+!  left and right ends up the the registers originally used.
+
+
+
+
+! fp_ip_macro
+!
+! Does initial permutation for next block mixed with
+! final permutation for current block.
+!
+! parameter 1   original left
+! parameter 2   original right
+! parameter 3   left ip
+! parameter 4   right ip
+! parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
+!                2: mov in4 to in3
+!
+! also adds -8 to length in2 and loads loop counter to out4
+
+
+
+
+
+! load_little_endian
+!
+! parameter 1  address
+! parameter 2  destination left
+! parameter 3  destination right
+! parameter 4  temporar
+! parameter 5  label
+
+
+
+
+! load_little_endian_inc
+!
+! parameter 1  address
+! parameter 2  destination left
+! parameter 3  destination right
+! parameter 4  temporar
+! parameter 4  label
+!
+! adds 8 to address
+
+
+
+
+! load_n_bytes
+!
+! Loads 1 to 7 bytes little endian
+! Remaining bytes are zeroed.
+!
+! parameter 1  address
+! parameter 2  length
+! parameter 3  destination register left
+! parameter 4  destination register right
+! parameter 5  temp
+! parameter 6  temp2
+! parameter 7  label
+! parameter 8  return label
+
+
+
+
+! store_little_endian
+!
+! parameter 1  address
+! parameter 2  source left
+! parameter 3  source right
+! parameter 4  temporar
+
+
+
+



Home | Main Index | Thread Index | Old Index