Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/lib/libcrypto/arch/i386 Add support for building the assembl...



details:   https://anonhg.NetBSD.org/src/rev/165c07b01744
branches:  trunk
changeset: 495571:165c07b01744
user:      thorpej <thorpej%NetBSD.org@localhost>
date:      Mon Jul 31 19:57:30 2000 +0000

description:
Add support for building the assembly versions of some BIGNUM
routines from OpenSSL.  Speeds up DSA significantly.  A similar
gain should also be seen for RSA.

Before:
Doing 512 bit sign dsa's for 10s: 965 512 bit DSA signs in 9.97s
Doing 512 bit verify dsa's for 10s: 766 512 bit DSA verify in 9.93s
Doing 1024 bit sign dsa's for 10s: 276 1024 bit DSA signs in 9.99s
Doing 1024 bit verify dsa's for 10s: 217 1024 bit DSA verify in 9.93s
                  sign    verify    sign/s verify/s
dsa  512 bits   0.0103s   0.0130s     96.8     77.1
dsa 1024 bits   0.0362s   0.0458s     27.6     21.9

After:
Doing 512 bit sign dsa's for 10s: 3742 512 bit DSA signs in 9.88s
Doing 512 bit verify dsa's for 10s: 3065 512 bit DSA verify in 9.92s
Doing 1024 bit sign dsa's for 10s: 1357 1024 bit DSA signs in 9.99s
Doing 1024 bit verify dsa's for 10s: 1094 1024 bit DSA verify in 9.83s
                  sign    verify    sign/s verify/s
dsa  512 bits   0.0026s   0.0032s    378.7    309.0
dsa 1024 bits   0.0074s   0.0090s    135.8    111.3

diffstat:

 lib/libcrypto/arch/i386/bn.inc         |    16 +
 lib/libcrypto/arch/i386/bn_asm_586.S   |   909 ++++++++++++++++++++++
 lib/libcrypto/arch/i386/bn_comba_586.S |  1301 ++++++++++++++++++++++++++++++++
 3 files changed, 2226 insertions(+), 0 deletions(-)

diffs (truncated from 2238 to 300 lines):

diff -r 3f6d91a952e4 -r 165c07b01744 lib/libcrypto/arch/i386/bn.inc
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/libcrypto/arch/i386/bn.inc    Mon Jul 31 19:57:30 2000 +0000
@@ -0,0 +1,16 @@
+#      $NetBSD: bn.inc,v 1.1 2000/07/31 19:57:30 thorpej Exp $
+#
+#      @(#) Copyright (c) 1995 Simon J. Gerraty
+#
+#      SRCS extracted from /home/current/src/lib/libcrypto/../../crypto/dist/openssl/crypto/bn/Makefile.ssl
+#
+
+.PATH: ${OPENSSLSRC}/crypto/bn
+.PATH: ${.CURDIR}/arch/i386
+
+CPPFLAGS+=     -I${OPENSSLSRC}/crypto/bn
+
+SRCS+= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c \
+       bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
+       bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm_586.S bn_comba_586.S \
+       bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c
diff -r 3f6d91a952e4 -r 165c07b01744 lib/libcrypto/arch/i386/bn_asm_586.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/libcrypto/arch/i386/bn_asm_586.S      Mon Jul 31 19:57:30 2000 +0000
@@ -0,0 +1,909 @@
+/*     $NetBSD: bn_asm_586.S,v 1.1 2000/07/31 19:57:31 thorpej Exp $   */
+
+/* Copyright (C) 1995-1998 Eric Young (eay%cryptsoft.com@localhost)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay%cryptsoft.com@localhost).
+ * The implementation was written so as to conform with Netscapes SSL.
+ * 
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh%cryptsoft.com@localhost).
+ * 
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay%cryptsoft.com@localhost)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from 
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh%cryptsoft.com@localhost)"
+ * 
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+/*
+ * Modified from the output of `perl bn-586.pl elf' by
+ * Jason R. Thorpe <thorpej%zembu.com@localhost>.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(bn_mul_add_words)
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+
+
+       xorl    %esi,           %esi
+       movl    20(%esp),       %edi
+       movl    28(%esp),       %ecx
+       movl    24(%esp),       %ebx
+       andl    $4294967288,    %ecx
+       movl    32(%esp),       %ebp
+       pushl   %ecx
+       jz      L000maw_finish
+L001maw_loop:
+       movl    %ecx,           (%esp)
+       /* Round 0 */
+       movl    (%ebx),         %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    (%edi),         %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       movl    %eax,           (%edi)
+       movl    %edx,           %esi
+       /* Round 4 */
+       movl    4(%ebx),        %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    4(%edi),        %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       movl    %eax,           4(%edi)
+       movl    %edx,           %esi
+       /* Round 8 */
+       movl    8(%ebx),        %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    8(%edi),        %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       movl    %eax,           8(%edi)
+       movl    %edx,           %esi
+       /* Round 12 */
+       movl    12(%ebx),       %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    12(%edi),       %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       movl    %eax,           12(%edi)
+       movl    %edx,           %esi
+       /* Round 16 */
+       movl    16(%ebx),       %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    16(%edi),       %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       movl    %eax,           16(%edi)
+       movl    %edx,           %esi
+       /* Round 20 */
+       movl    20(%ebx),       %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    20(%edi),       %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       movl    %eax,           20(%edi)
+       movl    %edx,           %esi
+       /* Round 24 */
+       movl    24(%ebx),       %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    24(%edi),       %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       movl    %eax,           24(%edi)
+       movl    %edx,           %esi
+       /* Round 28 */
+       movl    28(%ebx),       %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    28(%edi),       %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       movl    %eax,           28(%edi)
+       movl    %edx,           %esi
+
+       movl    (%esp),         %ecx
+       addl    $32,            %ebx
+       addl    $32,            %edi
+       subl    $8,             %ecx
+       jnz     L001maw_loop
+L000maw_finish:
+       movl    32(%esp),       %ecx
+       andl    $7,             %ecx
+       jnz     L002maw_finish2
+       jmp     L003maw_end
+_ALIGN_TEXT
+L002maw_finish2:
+       /* Tail Round 0 */
+       movl    (%ebx),         %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    (%edi),         %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       decl    %ecx
+       movl    %eax,           (%edi)
+       movl    %edx,           %esi
+       jz      L003maw_end
+       /* Tail Round 1 */
+       movl    4(%ebx),        %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    4(%edi),        %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       decl    %ecx
+       movl    %eax,           4(%edi)
+       movl    %edx,           %esi
+       jz      L003maw_end
+       /* Tail Round 2 */
+       movl    8(%ebx),        %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    8(%edi),        %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       decl    %ecx
+       movl    %eax,           8(%edi)
+       movl    %edx,           %esi
+       jz      L003maw_end
+       /* Tail Round 3 */
+       movl    12(%ebx),       %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    12(%edi),       %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       decl    %ecx
+       movl    %eax,           12(%edi)
+       movl    %edx,           %esi
+       jz      L003maw_end
+       /* Tail Round 4 */
+       movl    16(%ebx),       %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    16(%edi),       %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       decl    %ecx
+       movl    %eax,           16(%edi)
+       movl    %edx,           %esi
+       jz      L003maw_end
+       /* Tail Round 5 */
+       movl    20(%ebx),       %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    20(%edi),       %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       decl    %ecx
+       movl    %eax,           20(%edi)
+       movl    %edx,           %esi
+       jz      L003maw_end
+       /* Tail Round 6 */
+       movl    24(%ebx),       %eax
+       mull    %ebp
+       addl    %esi,           %eax
+       movl    24(%edi),       %esi
+       adcl    $0,             %edx
+       addl    %esi,           %eax
+       adcl    $0,             %edx
+       movl    %eax,           24(%edi)
+       movl    %edx,           %esi
+L003maw_end:
+       movl    %esi,           %eax
+       popl    %ecx
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       popl    %ebp
+       ret
+
+ENTRY(bn_mul_words)
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+
+
+       xorl    %esi,           %esi
+       movl    20(%esp),       %edi



Home | Main Index | Thread Index | Old Index