Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/lib/libc/arch/powerpc Add PPC assembler routines for ffs(3), ...



details:   https://anonhg.NetBSD.org/src/rev/3deecf145e85
branches:  trunk
changeset: 518121:3deecf145e85
user:      mjl <mjl%NetBSD.org@localhost>
date:      Sun Nov 25 01:09:58 2001 +0000

description:
Add PPC assembler routines for ffs(3), bzero(3) and memset(3).

diffstat:

 lib/libc/arch/powerpc/Makefile.inc        |    4 +-
 lib/libc/arch/powerpc/string/Makefile.inc |    6 +-
 lib/libc/arch/powerpc/string/bzero.S      |  320 ++++++++++++++++++++++++++++++
 lib/libc/arch/powerpc/string/ffs.S        |   41 +++
 4 files changed, 367 insertions(+), 4 deletions(-)

diffs (truncated from 396 to 300 lines):

diff -r 2c9f6f8a316e -r 3deecf145e85 lib/libc/arch/powerpc/Makefile.inc
--- a/lib/libc/arch/powerpc/Makefile.inc        Sun Nov 25 00:42:11 2001 +0000
+++ b/lib/libc/arch/powerpc/Makefile.inc        Sun Nov 25 01:09:58 2001 +0000
@@ -1,7 +1,7 @@
-#      $NetBSD: Makefile.inc,v 1.3 2001/06/18 17:05:32 simonb Exp $
+#      $NetBSD: Makefile.inc,v 1.4 2001/11/25 01:09:58 mjl Exp $
 
 KMINCLUDES=
-KMSRCS=
+KMSRCS=        arch/powerpc/string/ffs.S
 
 .if (${MKSOFTFLOAT} != "no")
 CPPFLAGS+= -DSOFTFLOAT_NEED_FIXUNS
diff -r 2c9f6f8a316e -r 3deecf145e85 lib/libc/arch/powerpc/string/Makefile.inc
--- a/lib/libc/arch/powerpc/string/Makefile.inc Sun Nov 25 00:42:11 2001 +0000
+++ b/lib/libc/arch/powerpc/string/Makefile.inc Sun Nov 25 01:09:58 2001 +0000
@@ -1,6 +1,8 @@
-#      $NetBSD: Makefile.inc,v 1.1 1997/03/29 20:56:01 thorpej Exp $
+#      $NetBSD: Makefile.inc,v 1.2 2001/11/25 01:09:59 mjl Exp $
 
-SRCS+= bcmp.c bcopy.c bzero.c ffs.c index.c memchr.c memcmp.c memset.c \
+SRCS+= bzero.S ffs.S
+
+SRCS+= bcmp.c bcopy.c index.c memchr.c memcmp.c \
        rindex.c strcat.c strcmp.c strcpy.c strcspn.c strlen.c \
        strncat.c strncmp.c strncpy.c strpbrk.c strsep.c \
        strspn.c strstr.c swab.c
diff -r 2c9f6f8a316e -r 3deecf145e85 lib/libc/arch/powerpc/string/bzero.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/libc/arch/powerpc/string/bzero.S      Sun Nov 25 01:09:58 2001 +0000
@@ -0,0 +1,320 @@
+/*     $NetBSD: bzero.S,v 1.1 2001/11/25 01:09:59 mjl Exp $ */
+
+/*-
+ * Copyright (C) 2001  Martin J. Laubach <mjl%netbsd.org@localhost>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*----------------------------------------------------------------------*/
+
+#include <machine/asm.h>
+
+#define USE_STSWX 0    /* don't. slower than trivial copy loop */
+
+/*----------------------------------------------------------------------*/
+/*
+     void bzero(void *b r3, size_t len r4);
+     void * memset(void *b r3, int c r4, size_t len r5);
+*/
+/*----------------------------------------------------------------------*/
+
+#define r_dst  r3
+#define r_len  r4
+#define r_val  r0
+
+               .text
+               .align 4
+ENTRY(bzero)
+               li      r_val, 0                /* Value to stuff in */
+               b       cb_memset
+
+ENTRY(memset)
+               cmplwi  cr1, r5, 0
+               mr.     r0, r4
+               mr      r8, r3
+               beqlr-  cr1                     /* Nothing to do */
+
+               rlwimi  r0, r4, 8, 16, 23       /* word extend fill value */
+               rlwimi  r0, r0, 16, 0, 15
+               mr      r4, r5
+               bne-    simple_fill             /* =! 0, use trivial fill */
+cb_memset:
+
+/*----------------------------------------------------------------------*/
+               /* First find out cache line size */
+#ifdef PIC
+               mflr    r9
+               bl      _GLOBAL_OFFSET_TABLE_@local-4
+               mflr    r10
+               mtlr    r9
+               lwz     r5,cache_size@got(r10)
+#else
+               lis     r5,cache_size@h
+               ori     r5,r5,cache_size@l
+#endif
+               lwz     r6, 0(r5)
+               cmpwi   r6, -1
+               bne+    cb_cacheline_known
+
+/*----------------------------------------------------------------------*/
+#define CTL_MACHDEP    7
+#define CPU_CACHELINE  1
+
+#define STKFRAME_SZ    48
+#define MIB            8
+#define OLDPLEN                16
+#define R3_SAVE                20
+#define R4_SAVE                24
+#define R0_SAVE                28
+#define R8_SAVE                32
+
+               mflr    r6
+               stw     r6, 4(r1)
+               stwu    r1, -STKFRAME_SZ(r1)
+
+               stw     r8, R8_SAVE(r1)
+               stw     r3, R3_SAVE(r1)
+               stw     r4, R4_SAVE(r1)
+               stw     r0, R0_SAVE(r1)
+
+               li      r0, CTL_MACHDEP         /* Construct MIB */
+               stw     r0, MIB(r1)
+               li      r0, CPU_CACHELINE
+               stw     r0, MIB+4(r1)
+
+               li      r0, 4                   /* Oldlenp := 4 */
+               stw     r0, OLDPLEN(r1)
+
+               addi    r3, r1, MIB
+               li      r4, 2                   /* namelen */
+               /* r5 already contains &cache_size */
+               addi    r6, r1, OLDPLEN
+               li      r7, 0
+               li      r8, 0
+               bl      PIC_PLT(_C_LABEL(sysctl))
+
+               lwz     r8, R8_SAVE(r1)
+               lwz     r3, R3_SAVE(r1)
+               lwz     r4, R4_SAVE(r1)
+               lwz     r0, R0_SAVE(r1)
+
+#ifdef PIC
+               bl      _GLOBAL_OFFSET_TABLE_@local-4
+               mflr    r10
+               lwz     r9, cache_size@got(r10)
+               lwz     r9, 0(r9)
+#else
+               lis     r5, cache_size@ha
+               lwz     r9, cache_size@l(r5)
+#endif
+               la      r1, STKFRAME_SZ(r1)
+               lwz     r5, 4(r1)
+               mtlr    r5
+
+               cntlzw  r6, r9                  /* compute shift value */
+               li      r5, 31
+               subf    r5, r6, r5
+
+#ifdef PIC
+               lwz     r6, cache_sh@got(r10)
+               stw     r5, 0(r6)
+#else
+               lis     r6, cache_sh@ha
+               stw     r5, cache_sh@l(r6)
+#endif
+/*----------------------------------------------------------------------*/
+/* Okay, we know the cache line size (r9) and shift value (r10) */
+cb_cacheline_known:
+#ifdef PIC
+               lwz     r5, cache_size@got(r10)
+               lwz     r9, 0(r5)
+               lwz     r5, cache_sh@got(r10)
+               lwz     r10, 0(r5)
+#else
+               lis     r9, cache_size@ha
+               lwz     r9, cache_size@l(r9)
+               lis     r10, cache_sh@ha
+               lwz     r10, cache_sh@l(r10)
+#endif
+               /* Back in memory filling business */
+               
+               cmplwi  cr1, r_len, 0           /* Nothing to do? */
+               add     r5, r9, r9
+               cmplw   r_len, r5               /* <= 2*CL bytes to move? */
+               beqlr-  cr1                     /* then do nothing */
+
+               blt+    simple_fill             /* a trivial fill routine */
+
+               /* Word align the block, fill bytewise until dst even*/
+               
+               andi.   r5, r_dst, 0x03 
+               li      r6, 4
+               beq+    cb_aligned_w            /* already aligned to word? */
+
+               subf    r5, r5, r6              /* bytes to fill to align4 */
+#if USE_STSWX
+               mtxer   r5
+               stswx   r0, 0, r_dst
+               add     r_dst, r5, r_dst
+#else
+               mtctr   r5
+
+               subi    r_dst, r_dst, 1
+1:             stbu    r_val, 1(r_dst)         /* Fill bytewise */
+               bdnz    1b
+
+               addi    r_dst, r_dst, 1
+#endif
+               subf    r_len, r5, r_len
+
+cb_aligned_w:  /* Cache block align, fill wordwise until dst aligned */
+
+               /* I know I have something to do since we had > 2*CL initially */
+               /* so no need to check for r_len = 0 */
+
+               rlwinm. r5, r_dst, 30, 29, 31
+               srwi    r6, r9, 2
+               beq     cb_aligned_cb           /* already on CL boundary? */
+
+               subf    r5, r5, r6              /* words to fill to alignment */
+               mtctr   r5
+               slwi    r5, r5, 2
+               subf    r_len, r5, r_len
+
+               subi    r_dst, r_dst, 4
+1:             stwu    r_val, 4(r_dst)         /* Fill wordwise */
+               bdnz    1b
+               addi    r_dst, r_dst, 4
+
+cb_aligned_cb: /* no need to check r_len, see above */
+               
+               srw.    r5, r_len, r10          /* Number of cache blocks */
+               mtctr   r5
+               beq     cblocks_done
+
+               slw     r5, r5, r10
+               subf    r_len, r5, r_len
+
+1:             dcbz    0, r_dst                /* Clear blockwise */
+               add     r_dst, r_dst, r9
+               bdnz    1b
+
+cblocks_done:  /* still CL aligned, but less than CL bytes left */
+               cmplwi  cr1, r_len, 0
+               cmplwi  r_len, 8
+               beq-    cr1, sf_return
+
+               blt-    sf_bytewise             /* <8 remaining? */
+               b       sf_aligned_w
+
+/*----------------------------------------------------------------------*/
+wbzero:                li      r_val, 0
+
+               cmplwi  r_len, 0
+               beqlr-                          /* Nothing to do */
+
+simple_fill:
+#if USE_STSWX
+               cmplwi  cr1, r_len, 12          /* < 12 bytes to move? */
+#else
+               cmplwi  cr1, r_len, 8           /* < 8 bytes to move? */
+#endif
+               andi.   r5, r_dst, 0x03         /* bytes to fill to align4 */
+               blt     cr1, sf_bytewise        /* trivial byte mover */
+
+               li      r6, 4
+               subf    r5, r5, r6
+               beq+    sf_aligned_w            /* dest is word aligned */
+
+#if USE_STSWX
+               mtxer   r5
+               stswx   r0, 0, r_dst
+               add     r_dst, r5, r_dst
+#else
+               mtctr   r5                      /* nope, then fill bytewise */
+               subi    r_dst, r_dst, 1         /* until it is */
+1:             stbu    r_val, 1(r_dst)         
+               bdnz    1b
+
+               addi    r_dst, r_dst, 1
+#endif
+               subf    r_len, r5, r_len
+
+sf_aligned_w:  /* no need to check r_len since it were >= 8 bytes initially */
+#if USE_STSWX
+               mr      r6, r0
+               mr      r7, r0
+
+               srwi    r5, r_len, 3



Home | Main Index | Thread Index | Old Index