Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/lib/libc/arch/i386/string A faster implementation.



details:   https://anonhg.NetBSD.org/src/rev/740088340336
branches:  trunk
changeset: 573635:740088340336
user:      dsl <dsl%NetBSD.org@localhost>
date:      Thu Feb 03 22:05:01 2005 +0000

description:
A faster implementation.
'rep stos' is slow to setup on modern processors, so don't use it to
align the transfer.
Also not that 8 byte alignment is faster on Intel processors

diffstat:

 lib/libc/arch/i386/string/bzero.S  |   48 +---------------
 lib/libc/arch/i386/string/memset.S |  111 +++++++++++++++++++++++++++---------
 2 files changed, 85 insertions(+), 74 deletions(-)

diffs (197 lines):

diff -r 9bcb0a4ffb3f -r 740088340336 lib/libc/arch/i386/string/bzero.S
--- a/lib/libc/arch/i386/string/bzero.S Thu Feb 03 21:54:49 2005 +0000
+++ b/lib/libc/arch/i386/string/bzero.S Thu Feb 03 22:05:01 2005 +0000
@@ -1,46 +1,4 @@
-/*
- * Written by J.T. Conklin <jtc%NetBSD.org@localhost>.
- * Public domain.
- */
-
-#include <machine/asm.h>
-
-#if defined(LIBC_SCCS)
-       RCSID("$NetBSD: bzero.S,v 1.9 2003/07/26 19:24:33 salo Exp $")
-#endif
-
-ENTRY(bzero)
-       pushl   %edi
-       movl    8(%esp),%edi
-       movl    12(%esp),%edx
-
-       cld                             /* set fill direction forward */
-       xorl    %eax,%eax               /* set fill data to 0 */
+/*     $NetBSD: bzero.S,v 1.10 2005/02/03 22:05:01 dsl Exp $   */
 
-       /*
-        * if the string is too short, it's really not worth the overhead
-        * of aligning to word boundries, etc.  So we jump to a plain
-        * unaligned set.
-        */
-       cmpl    $16,%edx
-       jb      L1
-
-       movl    %edi,%ecx               /* compute misalignment */
-       negl    %ecx
-       andl    $3,%ecx
-       subl    %ecx,%edx
-       rep                             /* zero until word aligned */
-       stosb
-
-       movl    %edx,%ecx               /* zero by words */
-       shrl    $2,%ecx
-       andl    $3,%edx
-       rep
-       stosl
-
-L1:    movl    %edx,%ecx               /* zero remainder by bytes */
-       rep
-       stosb
-
-       popl    %edi
-       ret
+#define BZERO
+#include "memset.S"
diff -r 9bcb0a4ffb3f -r 740088340336 lib/libc/arch/i386/string/memset.S
--- a/lib/libc/arch/i386/string/memset.S        Thu Feb 03 21:54:49 2005 +0000
+++ b/lib/libc/arch/i386/string/memset.S        Thu Feb 03 22:05:01 2005 +0000
@@ -1,21 +1,59 @@
-/*
- * Written by J.T. Conklin <jtc%NetBSD.org@localhost>.
- * Public domain.
+/*     $NetBSD: memset.S,v 1.10 2005/02/03 22:05:01 dsl Exp $  */
+
+/*-
+ * Copyright (c) 2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by David Laight.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <machine/asm.h>
 
 #if defined(LIBC_SCCS)
-       RCSID("$NetBSD: memset.S,v 1.9 2003/07/26 19:24:34 salo Exp $")
+       RCSID("$NetBSD: memset.S,v 1.10 2005/02/03 22:05:01 dsl Exp $")
 #endif
 
+#ifdef BZERO
+ENTRY(bzero)
+#else
 ENTRY(memset)
+#endif
+#ifdef BZERO
+       movl    8(%esp),%ecx
+       xor     %eax,%eax
+#else
+       movl    12(%esp),%ecx
+       movzbl  8(%esp),%eax            /* unsigned char, zero extend */
+#endif
+       cmpl    $0x0f,%ecx              /* avoid mispredicted branch... */
+
        pushl   %edi
-       pushl   %ebx
-       movl    12(%esp),%edi
-       movzbl  16(%esp),%eax           /* unsigned char, zero extend */
-       movl    20(%esp),%ecx
-       pushl   %edi                    /* push address of buffer */
+       movl    8(%esp),%edi
 
        cld                             /* set fill direction forward */
 
@@ -23,36 +61,51 @@
         * if the string is too short, it's really not worth the overhead
         * of aligning to word boundries, etc.  So we jump to a plain
         * unaligned set.
+        *
+        * NB aligning the transfer is actually pointless on my athlon 700,
+        * It does make a difference to a PII though.
+        *
+        * The PII, PIII and PIV all seem to have a massive performance
+        * drop when the initial target address is an odd multiple of 4.
         */
-       cmpl    $0x0f,%ecx
-       jle     L1
+       jbe     by_bytes
 
+#ifndef BZERO
        movb    %al,%ah                 /* copy char to all bytes in word */
        movl    %eax,%edx
        sall    $16,%eax
        orl     %edx,%eax
+#endif
 
-       movl    %edi,%edx               /* compute misalignment */
-       negl    %edx
-       andl    $3,%edx
-       movl    %ecx,%ebx
-       subl    %edx,%ebx
+       movl    %edi,%edx               /* detect misalignment */
+       neg     %edx
+       andl    $7,%edx
+       jnz     align
+aligned:
+       movl    %eax,-4(%edi,%ecx)      /* zap last 4 bytes */
+       shrl    $2,%ecx                 /* zero by words */
+       rep
+       stosl
+done:
+#ifndef BZERO
+       movl    8(%esp),%eax            /* return address of buffer */
+#endif
+       pop     %edi
+       ret
 
-       movl    %edx,%ecx               /* set until word aligned */
+align:
+       movl    %eax,(%edi)             /* zap first 8 bytes */
+       movl    %eax,4(%edi)
+       subl    %edx,%ecx               /* remove from main count */
+       add     %edx,%edi
+       jmp     aligned
+
+by_bytes:
        rep
        stosb
 
-       movl    %ebx,%ecx
-       shrl    $2,%ecx                 /* set by words */
-       rep
-       stosl
-
-       movl    %ebx,%ecx               /* set remainder by bytes */
-       andl    $3,%ecx
-L1:    rep
-       stosb
-
-       popl    %eax                    /* pop address of buffer */
-       popl    %ebx
+#ifndef BZERO
+       movl    8(%esp),%eax            /* return address of buffer */
+#endif
        popl    %edi
        ret



Home | Main Index | Thread Index | Old Index