Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/lib/libc/arch/sparc64/string Improved version of bzero.



details:   https://anonhg.NetBSD.org/src/rev/c2682a289e35
branches:  trunk
changeset: 513486:c2682a289e35
user:      eeh <eeh%NetBSD.org@localhost>
date:      Thu Aug 02 01:17:28 2001 +0000

description:
Improved version of bzero.

diffstat:

 lib/libc/arch/sparc64/string/memset.S |  177 +++++++++++----------------------
 1 files changed, 60 insertions(+), 117 deletions(-)

diffs (260 lines):

diff -r 3e155412603d -r c2682a289e35 lib/libc/arch/sparc64/string/memset.S
--- a/lib/libc/arch/sparc64/string/memset.S     Wed Aug 01 20:54:16 2001 +0000
+++ b/lib/libc/arch/sparc64/string/memset.S     Thu Aug 02 01:17:28 2001 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: memset.S,v 1.3 2001/08/01 16:45:20 eeh Exp $   */
+/*     $NetBSD: memset.S,v 1.4 2001/08/02 01:17:28 eeh Exp $   */
 
 /*
  * Copyright (c) 2001, Eduardo E. Horvath
@@ -47,27 +47,10 @@
 #include <machine/psl.h>
 
 #if defined(LIBC_SCCS) && !defined(lint)
-       RCSID("$NetBSD: memset.S,v 1.3 2001/08/01 16:45:20 eeh Exp $")
+       RCSID("$NetBSD: memset.S,v 1.4 2001/08/02 01:17:28 eeh Exp $")
 #endif  /* LIBC_SCCS and not lint */
 
 /*
- * memset(addr, c, len)
- *
- * Duplicate the pattern so it fills 64-bits, then swap around the
- * arguments and call bzero.
- */
-ENTRY(memset)
-       and     %o1, 0x0ff, %o3
-       mov     %o2, %o1
-       sllx    %o3, 8, %o2
-       or      %o2, %o3, %o2
-       mov     %o0, %o4                ! Save original pointer
-       sllx    %o2, 16, %o3
-       or      %o2, %o3, %o2
-       sllx    %o2, 32, %o3
-       ba,pt   %icc, Lbzero_internal
-        or     %o2, %o3, %o2
-/*
  * bzero(addr, len)
  *
  * We want to use VIS instructions if we're clearing out more than
@@ -76,122 +59,86 @@
  * to keep track of the current owner of the FPU, hence the different
  * code.
  *
+ * XXXXX To produce more efficient code, we do not allow lengths
+ * greater than 0x80000000000000000, which are negative numbers.
+ * This should not really be an issue since the VA hole should
+ * cause any such ranges to fail anyway.
  */
 ENTRY(bzero)
        ! %o0 = addr, %o1 = len
-       clr     %o2                     ! Initialize our pattern
+       mov     %o1, %o2
+       clr     %o1                     ! Initialize our pattern
+/*
+ * memset(addr, c, len)
+ *
+ */
+ENTRY(memset)
+       ! %o0 = addr, %o1 = pattern, %o2 = len
+       mov     %o0, %o4                ! Save original pointer
+
 Lbzero_internal:
-       brz,pn  %o1, Lbzero_done        ! No bytes to copy??
-        cmp    %o1, 16                 ! <16 bytes?  use byte ops.
-       bge,pn  %xcc, 1f
-        nop
-0:     
-       stb     %o2, [%o0]              ! Small clear.
-       inc     %o0
-       deccc   %o1
-       bg,pt   %icc, 0b
-        nop
-       ba,pt   %icc, Lbzero_done
-        nop
-       
-1:     
-       btst    7, %o0                  ! 64-bit aligned?  Optimization
-       bz,pt   %xcc, 2f
-        nop
-       btst    3, %o0                  ! 32-bit aligned?
-       bz,pt   %xcc, 1f
-        nop
-       btst    1, %o0                  ! 16-bit aligned?
-       bz,pt   %xcc, 0f
+       btst    7, %o0                  ! Word aligned?
+       bz,pn   %xcc, 0f
         nop
-       
-       !! unaligned -- store 1 byte
-       stb     %o2, [%o0]
-       dec     1, %o1                  ! Record storing 1 byte
        inc     %o0
-       cmp     %o1, 2
-       bl,a,pn %icc, 7f                ! 1 or 0 left
-        dec    8, %o1                  ! Fixup count -8
-0:
-       btst    3, %o0
-       bz,pt   %xcc, 1f
-        btst   7, %o0                  ! 64-bit aligned?
+       deccc   %o2                     ! Store up to 7 bytes
+       bge,a,pt        %xcc, Lbzero_internal
+        stb    %o1, [%o0 - 1]
 
-       !! 16-bit aligned -- store half word
-       sth     %o2, [%o0]
-       dec     2, %o1                  ! Prepare to store 2 bytes
-       inc     2, %o0
-       cmp     %o1, 4
-       bl,a,pn %icc, 5f                ! Less than 4 left
-        dec    8, %o1                  ! Fixup count -8
-1:
-       btst    7, %o0                  ! 64-bit aligned?
-       bz,pt   %xcc, 2f
-        nop
-       !! 32-bit aligned -- store word
-       stw     %o2, [%o0]
-       dec     4, %o1
-       inc     4, %o0  
-       cmp     %o1, 8
-       bl,a,pn %icc, Lbzero_cleanup    ! Less than 8 left
-        dec    8, %o1                  ! Fixup count -8
-2:
+       retl                            ! Duplicate Lbzero_done
+        mov    %o4, %o0
+0:
+       /*
+        * Duplicate the pattern so it fills 64-bits.
+        */
+       andcc   %o1, 0x0ff, %o1         ! No need to extend zero
+       bz,pt   %icc, 1f
+        sllx   %o1, 8, %o3             ! sigh.  all dependent insns.
+       or      %o1, %o3, %o1
+       sllx    %o1, 16, %o3
+       or      %o1, %o3, %o1
+       sllx    %o1, 32, %o3
+        or     %o1, %o3, %o1
+1:     
 #if 1
        !! Now we are 64-bit aligned
-       cmp     %o1, 256                ! Use block clear if len > 256
+       cmp     %o2, 256                ! Use block clear if len > 256
        bge,pt  %xcc, Lbzero_block      ! use block store insns
 #endif 
-        deccc  8, %o1
+        deccc  8, %o2
 Lbzero_longs:
        bl,pn   %xcc, Lbzero_cleanup    ! Less than 8 bytes left
         nop
 3:     
-       stx     %o2, [%o0]              ! Do 1 longword at a time
-       deccc   8, %o1
+       inc     8, %o0
+       deccc   8, %o2
        bge,pt  %xcc, 3b
-        inc    8, %o0
+        stx    %o1, [%o0 - 8]          ! Do 1 longword at a time
 
        /*
         * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
         * -6 => two bytes, etc.  Mop up this remainder, if any.
         */
 Lbzero_cleanup:        
-       btst    4, %o1
-       bz,pt   %xcc, 6f                ! if (len & 4) {
-        btst   2, %o1
-       stw     %o2, [%o0]              !       *(int *)addr = 0;
+       btst    4, %o2
+       bz,pt   %xcc, 5f                ! if (len & 4) {
+        nop
+       stw     %o1, [%o0]              !       *(int *)addr = 0;
        inc     4, %o0                  !       addr += 4;
 5:     
-       btst    2, %o1
-6:
-       bz,pt   %xcc, 8f                ! if (len & 2) {
-        btst   1, %o1
-       sth     %o2, [%o0]              !       *(short *)addr = 0;
+       btst    2, %o2
+       bz,pt   %xcc, 7f                ! if (len & 2) {
+        nop
+       sth     %o1, [%o0]              !       *(short *)addr = 0;
        inc     2, %o0                  !       addr += 2;
 7:     
-       btst    1, %o1
-8:     
+       btst    1, %o2
        bnz,a   %icc, Lbzero_done       ! if (len & 1)
-        stb    %o2, [%o0]              !       *addr = 0;
+        stb    %o1, [%o0]              !       *addr = 0;
 Lbzero_done:
        retl
         mov    %o4, %o0                ! Restore ponter for memset (ugh)
 
-       /*
-        * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
-        * -6 => two bytes, etc. but we're potentially unaligned.
-        * Do byte stores since it's easiest.
-        */
-Lbzero_small:
-       inccc   8, %o1
-       bz,pn   %icc, Lbzero_done
-1:     
-        deccc  %o1
-       stb     %o2, [%o0]
-       bge,pt  %icc, 1b
-        inc    %o0
-       ba,a,pt %icc, Lbzero_done
-        nop                            ! XXX spitfire bug?
 #if 1  
 Lbzero_block:
 /*
@@ -209,17 +156,17 @@
        bz,pt   %xcc, 2f
         nop
 1:     
-       stx     %o2, [%o0]
+       stx     %o1, [%o0]
        inc     8, %o0
        btst    63, %o0
        bnz,pt  %xcc, 1b
-        dec    8, %o1
+        dec    8, %o2
 
 2:
-       brz     %o2, 3f                                 ! Skip the memory op
+       brz     %o1, 3f                                 ! Skip the memory op
         fzero  %f0                                     ! for bzero
        
-       stx     %o2, [%o0]                              ! Flush this puppy to RAM
+       stx     %o1, [%o0]                              ! Flush this puppy to RAM
        membar  #StoreLoad
        ldd     [%o0], %f0
 3:     
@@ -232,22 +179,18 @@
        fmovd   %f0, %f14
        
        !! Remember: we were 8 bytes too far
-       dec     56, %o1                                 ! Go one iteration too far
+       dec     56, %o2                                 ! Go one iteration too far
 5:
        stda    %f0, [%o0] ASI_BLK_P                    ! Store 64 bytes
-       deccc   64, %o1
-       ble,pn  %xcc, 6f
-        inc    64, %o0
-
-       stda    %f0, [%o0] ASI_BLK_P                    ! Store 64 bytes
-       deccc   64, %o1
+       deccc   64, %o2
        bg,pn   %xcc, 5b
         inc    64, %o0
-6:
+       
+       membar  #Sync
 /*
  * Now we're done we need to load the FPU state from where
  * we put it.
  */
        ba,pt   %xcc, Lbzero_longs      ! Finish up the remainder
-        addcc  %o1, 56, %o1            ! Restore the count
+        inccc  56, %o2         ! Restore the count
 #endif



Home | Main Index | Thread Index | Old Index