Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/common/lib/libc/arch/sparc64/string Use a single copy of the...
details:   https://anonhg.NetBSD.org/src/rev/40594510ec17
branches:  trunk
changeset: 785500:40594510ec17
user:      christos <christos%NetBSD.org@localhost>
date:      Sun Mar 17 00:42:31 2013 +0000
description:
Use a single copy of the source.
diffstat:
 common/lib/libc/arch/sparc64/string/memcpy.S    |  1624 +++++++++++++++++++++++
 common/lib/libc/arch/sparc64/string/memset.S    |   214 +++
 common/lib/libc/arch/sparc64/string/strmacros.h |   119 +
 3 files changed, 1957 insertions(+), 0 deletions(-)
diffs (truncated from 1969 to 300 lines):
diff -r f59b84f1659e -r 40594510ec17 common/lib/libc/arch/sparc64/string/memcpy.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/common/lib/libc/arch/sparc64/string/memcpy.S      Sun Mar 17 00:42:31 2013 +0000
@@ -0,0 +1,1624 @@
+/*     $NetBSD: memcpy.S,v 1.1 2013/03/17 00:42:31 christos Exp $      */
+
+/*
+ * Copyright (c) 1996-2002 Eduardo Horvath
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+#include "strmacros.h"
+
+/*
+ * kernel memcpy
+ * Assumes regions do not overlap; has no useful return value.
+ *
+ * Must not use %g7 (see copyin/copyout above).
+ */
+ENTRY(memcpy) /* dest, src, size */
+       /*
+        * Swap args for bcopy.  Gcc generates calls to memcpy for
+        * structure assignments.
+        */
+       mov     %o0, %o3
+       mov     %o1, %o0
+       mov     %o3, %o1
+#if !defined(_KERNEL) || defined(_RUMPKERNEL)
+ENTRY(bcopy) /* src, dest, size */
+#endif
+#ifdef DEBUG
+#if defined(_KERNEL) && !defined(_RUMPKERNEL)
+       set     pmapdebug, %o4
+       ld      [%o4], %o4
+       btst    0x80, %o4       ! PDB_COPY
+       bz,pt   %icc, 3f
+        nop
+#endif
+       save    %sp, -CC64FSZ, %sp
+       mov     %i0, %o1
+       set     2f, %o0
+       mov     %i1, %o2
+       call    printf
+        mov    %i2, %o3
+!      ta      1; nop
+       restore
+       .data
+2:     .asciz  "memcpy(%p<-%p,%x)\n"
+       _ALIGN
+       .text
+3:
+#endif
+
+       cmp     %o2, BCOPY_SMALL
+
+Lmemcpy_start:
+       bge,pt  CCCR, 2f        ! if >= this many, go be fancy.
+        cmp    %o2, 256
+
+       mov     %o1, %o5        ! Save memcpy return value
+       /*
+        * Not much to copy, just do it a byte at a time.
+        */
+       deccc   %o2             ! while (--len >= 0)
+       bl      1f
+        .empty
+0:
+       inc     %o0
+       ldsb    [%o0 - 1], %o4  !       (++dst)[-1] = *src++;
+       stb     %o4, [%o1]
+       deccc   %o2
+       bge     0b
+        inc    %o1
+1:
+       retl
+        mov    %o5, %o0
+       NOTREACHED
+
+       /*
+        * Plenty of data to copy, so try to do it optimally.
+        */
+2:
+#ifdef USE_BLOCK_STORE_LOAD
+       ! If it is big enough, use VIS instructions
+       bge     Lmemcpy_block
+        nop
+#endif /* USE_BLOCK_STORE_LOAD */
+Lmemcpy_fancy:
+
+       !!
+       !! First align the output to a 8-byte entity
+       !! 
+
+       save    %sp, -CC64FSZ, %sp
+       
+       mov     %i0, %l0
+       mov     %i1, %l1
+       
+       mov     %i2, %l2
+       btst    1, %l1
+       
+       bz,pt   %icc, 4f
+        btst   2, %l1
+       ldub    [%l0], %l4                              ! Load 1st byte
+       
+       deccc   1, %l2
+       ble,pn  CCCR, Lmemcpy_finish                    ! XXXX
+        inc    1, %l0
+       
+       stb     %l4, [%l1]                              ! Store 1st byte
+       inc     1, %l1                                  ! Update address
+       btst    2, %l1
+4:     
+       bz,pt   %icc, 4f
+       
+        btst   1, %l0
+       bz,a    1f
+        lduh   [%l0], %l4                              ! Load short
+
+       ldub    [%l0], %l4                              ! Load bytes
+       
+       ldub    [%l0+1], %l3
+       sllx    %l4, 8, %l4
+       or      %l3, %l4, %l4
+       
+1:     
+       deccc   2, %l2
+       ble,pn  CCCR, Lmemcpy_finish                    ! XXXX
+        inc    2, %l0
+       sth     %l4, [%l1]                              ! Store 1st short
+       
+       inc     2, %l1
+4:
+       btst    4, %l1
+       bz,pt   CCCR, 4f
+       
+        btst   3, %l0
+       bz,a,pt CCCR, 1f
+        lduw   [%l0], %l4                              ! Load word -1
+
+       btst    1, %l0
+       bz,a,pt %icc, 2f
+        lduh   [%l0], %l4
+       
+       ldub    [%l0], %l4
+       
+       lduh    [%l0+1], %l3
+       sllx    %l4, 16, %l4
+       or      %l4, %l3, %l4
+       
+       ldub    [%l0+3], %l3
+       sllx    %l4, 8, %l4
+       ba,pt   %icc, 1f
+        or     %l4, %l3, %l4
+       
+2:
+       lduh    [%l0+2], %l3
+       sllx    %l4, 16, %l4
+       or      %l4, %l3, %l4
+       
+1:     
+       deccc   4, %l2
+       ble,pn  CCCR, Lmemcpy_finish            ! XXXX
+        inc    4, %l0
+       
+       st      %l4, [%l1]                              ! Store word
+       inc     4, %l1
+4:
+       !!
+       !! We are now 32-bit aligned in the dest.
+       !!
+Lmemcpy_common:        
+
+       and     %l0, 7, %l4                             ! Shift amount
+       andn    %l0, 7, %l0                             ! Source addr
+       
+       brz,pt  %l4, Lmemcpy_noshift8                   ! No shift version...
+
+        sllx   %l4, 3, %l4                             ! In bits
+       mov     8<<3, %l3
+       
+       ldx     [%l0], %o0                              ! Load word -1
+       sub     %l3, %l4, %l3                           ! Reverse shift
+       deccc   12*8, %l2                               ! Have enough room?
+       
+       sllx    %o0, %l4, %o0
+       bl,pn   CCCR, 2f
+        and    %l3, 0x38, %l3
+Lmemcpy_unrolled8:
+
+       /*
+        * This is about as close to optimal as you can get, since
+        * the shifts require EU0 and cannot be paired, and you have
+        * 3 dependent operations on the data.
+        */ 
+
+!      ldx     [%l0+0*8], %o0                          ! Already done
+!      sllx    %o0, %l4, %o0                           ! Already done
+       ldx     [%l0+1*8], %o1
+       ldx     [%l0+2*8], %o2
+       ldx     [%l0+3*8], %o3
+       ldx     [%l0+4*8], %o4
+       ba,pt   %icc, 1f
+        ldx    [%l0+5*8], %o5
+       .align  8
+1:
+       srlx    %o1, %l3, %g1
+       inc     6*8, %l0
+       
+       sllx    %o1, %l4, %o1
+       or      %g1, %o0, %g6
+       ldx     [%l0+0*8], %o0
+       
+       stx     %g6, [%l1+0*8]
+       srlx    %o2, %l3, %g1
+
+       sllx    %o2, %l4, %o2
+       or      %g1, %o1, %g6
+       ldx     [%l0+1*8], %o1
+       
+       stx     %g6, [%l1+1*8]
+       srlx    %o3, %l3, %g1
+       
+       sllx    %o3, %l4, %o3
+       or      %g1, %o2, %g6
+       ldx     [%l0+2*8], %o2
+       
+       stx     %g6, [%l1+2*8]
+       srlx    %o4, %l3, %g1
+       
+       sllx    %o4, %l4, %o4   
+       or      %g1, %o3, %g6
+       ldx     [%l0+3*8], %o3
+       
+       stx     %g6, [%l1+3*8]
+       srlx    %o5, %l3, %g1
+       
+       sllx    %o5, %l4, %o5
+       or      %g1, %o4, %g6
+       ldx     [%l0+4*8], %o4
+
+       stx     %g6, [%l1+4*8]
+       srlx    %o0, %l3, %g1
+       deccc   6*8, %l2                                ! Have enough room?
+
+       sllx    %o0, %l4, %o0                           ! Next loop
+       or      %g1, %o5, %g6
+       ldx     [%l0+5*8], %o5
+       
+       stx     %g6, [%l1+5*8]
+       bge,pt  CCCR, 1b
+        inc    6*8, %l1
+
+Lmemcpy_unrolled8_cleanup:     
+       !!
+       !! Finished 8 byte block, unload the regs.
+       !! 
+       srlx    %o1, %l3, %g1
+       inc     5*8, %l0
+       
+       sllx    %o1, %l4, %o1
+       or      %g1, %o0, %g6
+               
+       stx     %g6, [%l1+0*8]
+       srlx    %o2, %l3, %g1
+       
+       sllx    %o2, %l4, %o2
+       or      %g1, %o1, %g6
+               
+       stx     %g6, [%l1+1*8]
+       srlx    %o3, %l3, %g1
+       
+       sllx    %o3, %l4, %o3
+       or      %g1, %o2, %g6
+               
+       stx     %g6, [%l1+2*8]
+       srlx    %o4, %l3, %g1
+       
+       sllx    %o4, %l4, %o4   
+       or      %g1, %o3, %g6
+               
+       stx     %g6, [%l1+3*8]
Home |
Main Index |
Thread Index |
Old Index