Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/lib/libkern/arch/sh3 Assembly version of bzero()/memset().



details:   https://anonhg.NetBSD.org/src/rev/c337d32c9fea
branches:  trunk
changeset: 539531:c337d32c9fea
user:      itohy <itohy%NetBSD.org@localhost>
date:      Wed Nov 20 09:51:52 2002 +0000

description:
Assembly version of bzero()/memset().
Written by SHIMIZU Ryo.

diffstat:

 sys/lib/libkern/arch/sh3/bzero.S  |    5 +
 sys/lib/libkern/arch/sh3/memset.S |  294 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 299 insertions(+), 0 deletions(-)

diffs (truncated from 307 to 300 lines):

diff -r 57459c39d712 -r c337d32c9fea sys/lib/libkern/arch/sh3/bzero.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/lib/libkern/arch/sh3/bzero.S  Wed Nov 20 09:51:52 2002 +0000
@@ -0,0 +1,5 @@
+/*     $NetBSD: bzero.S,v 1.1 2002/11/20 09:51:52 itohy Exp $  */
+
+#define BZERO
+#include "memset.S"
+
diff -r 57459c39d712 -r c337d32c9fea sys/lib/libkern/arch/sh3/memset.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/lib/libkern/arch/sh3/memset.S Wed Nov 20 09:51:52 2002 +0000
@@ -0,0 +1,294 @@
+/*     $NetBSD: memset.S,v 1.1 2002/11/20 09:51:53 itohy Exp $ */
+
+/*-
+ * Copyright (c) 2002 SHIMIZU Ryo.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+#define        REG_PTR                         r0
+#define        REG_TMP1                        r1
+
+#ifdef BZERO
+# define       REG_C                   r2
+# define       REG_DST                 r4
+# define       REG_LEN                 r5
+#else
+# define       REG_DST0                r3
+# define       REG_DST                 r4
+# define       REG_C                   r5
+# define       REG_LEN                 r6
+#endif
+
+#ifdef BZERO
+ENTRY(bzero)
+#else
+ENTRY(memset)
+       mov     REG_DST,REG_DST0        /* for return value */
+#endif
+       /* small amount to fill ? */
+       mov     #28,REG_TMP1
+       cmp/hs  REG_TMP1,REG_LEN        /* if (len >= 28) goto large; */
+       bt/s    large
+       mov     #12,REG_TMP1            /* if (len >= 12) goto small; */
+       cmp/hs  REG_TMP1,REG_LEN
+       bt/s    small
+#ifdef BZERO
+       mov     #0,REG_C
+#endif
+       /* very little fill (0 ~ 11 bytes) */
+       tst     REG_LEN,REG_LEN
+       add     REG_DST,REG_LEN
+       bt/s    done
+       add     #1,REG_DST
+
+       /* unroll 4 loops */
+       cmp/eq  REG_DST,REG_LEN
+1:     mov.b   REG_C,@-REG_LEN
+       bt/s    done
+       cmp/eq  REG_DST,REG_LEN
+       mov.b   REG_C,@-REG_LEN
+       bt/s    done
+       cmp/eq  REG_DST,REG_LEN
+       mov.b   REG_C,@-REG_LEN
+       bt/s    done
+       cmp/eq  REG_DST,REG_LEN
+       mov.b   REG_C,@-REG_LEN
+       bf/s    1b
+       cmp/eq  REG_DST,REG_LEN
+done:
+#ifdef BZERO
+       rts
+       nop
+#else
+       rts
+       mov     REG_DST0,r0
+#endif
+
+
+small:
+       mov     REG_DST,r0
+       tst     #1,r0
+       bt/s    small_aligned
+       mov     REG_DST,REG_TMP1
+       shll    REG_LEN
+       mova    1f,r0                   /* 1f must be 4bytes aligned! */
+       add     #16,REG_TMP1            /* REG_TMP1 = dst+16; */
+       sub     REG_LEN,r0
+       jmp     @r0
+       mov     REG_C,r0
+
+       .align  2
+       mov.b   r0,@(15,REG_TMP1)
+       mov.b   r0,@(14,REG_TMP1)
+       mov.b   r0,@(13,REG_TMP1)
+       mov.b   r0,@(12,REG_TMP1)
+       mov.b   r0,@(11,REG_TMP1)
+       mov.b   r0,@(10,REG_TMP1)
+       mov.b   r0,@(9,REG_TMP1)
+       mov.b   r0,@(8,REG_TMP1)
+       mov.b   r0,@(7,REG_TMP1)
+       mov.b   r0,@(6,REG_TMP1)
+       mov.b   r0,@(5,REG_TMP1)
+       mov.b   r0,@(4,REG_TMP1)
+       mov.b   r0,@(3,REG_TMP1)
+       mov.b   r0,@(2,REG_TMP1)
+       mov.b   r0,@(1,REG_TMP1)
+       mov.b   r0,@REG_TMP1
+       mov.b   r0,@(15,REG_DST)
+       mov.b   r0,@(14,REG_DST)
+       mov.b   r0,@(13,REG_DST)
+       mov.b   r0,@(12,REG_DST)
+       mov.b   r0,@(11,REG_DST)
+       mov.b   r0,@(10,REG_DST)
+       mov.b   r0,@(9,REG_DST)
+       mov.b   r0,@(8,REG_DST)
+       mov.b   r0,@(7,REG_DST)
+       mov.b   r0,@(6,REG_DST)
+       mov.b   r0,@(5,REG_DST)
+       mov.b   r0,@(4,REG_DST)
+       mov.b   r0,@(3,REG_DST)
+       mov.b   r0,@(2,REG_DST)
+       mov.b   r0,@(1,REG_DST)
+#ifdef BZERO
+       rts
+1:     mov.b   r0,@REG_DST
+#else
+       mov.b   r0,@REG_DST
+1:     rts
+       mov     REG_DST0,r0
+#endif
+
+
+/* 2 bytes aligned small fill */
+small_aligned:
+#ifndef BZERO
+       extu.b  REG_C,REG_TMP1          /* REG_C = ??????xx, REG_TMP1 = ????00xx */
+       shll8   REG_C                   /* REG_C = ????xx00, REG_TMP1 = ????00xx */
+       or      REG_TMP1,REG_C          /* REG_C = ????xxxx */
+#endif
+
+       mov     REG_LEN,r0
+       tst     #1,r0                   /* len is aligned? */
+       bt/s    1f
+       add     #-1,r0
+       mov.b   REG_C,@(r0,REG_DST)     /* fill last a byte */
+       mov     r0,REG_LEN
+1:
+
+       mova    1f,r0                   /* 1f must be 4bytes aligned! */
+       sub     REG_LEN,r0
+       jmp     @r0
+       mov     REG_C,r0
+
+       .align  2
+       mov.w   r0,@(30,REG_DST)
+       mov.w   r0,@(28,REG_DST)
+       mov.w   r0,@(26,REG_DST)
+       mov.w   r0,@(24,REG_DST)
+       mov.w   r0,@(22,REG_DST)
+       mov.w   r0,@(20,REG_DST)
+       mov.w   r0,@(18,REG_DST)
+       mov.w   r0,@(16,REG_DST)
+       mov.w   r0,@(14,REG_DST)
+       mov.w   r0,@(12,REG_DST)
+       mov.w   r0,@(10,REG_DST)
+       mov.w   r0,@(8,REG_DST)
+       mov.w   r0,@(6,REG_DST)
+       mov.w   r0,@(4,REG_DST)
+       mov.w   r0,@(2,REG_DST)
+#ifdef BZERO
+       rts
+1:     mov.w   r0,@REG_DST
+#else
+       mov.w   r0,@REG_DST
+1:     rts
+       mov     REG_DST0,r0
+#endif
+
+
+
+       .align  2
+large:
+#ifdef BZERO
+       mov     #0,REG_C
+#else
+       extu.b  REG_C,REG_TMP1          /* REG_C = ??????xx, REG_TMP1 = ????00xx */
+       shll8   REG_C                   /* REG_C = ????xx00, REG_TMP1 = ????00xx */
+       or      REG_C,REG_TMP1          /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
+       swap.w  REG_TMP1,REG_C          /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
+       xtrct   REG_TMP1,REG_C          /* REG_C = xxxxxxxx */
+#endif
+
+       mov     #3,REG_TMP1
+       tst     REG_TMP1,REG_DST
+       mov     REG_DST,REG_PTR
+       bf/s    unaligned_dst
+       add     REG_LEN,REG_PTR         /* REG_PTR = dst + len; */
+       tst     REG_TMP1,REG_LEN
+       bf/s    unaligned_len
+
+aligned:
+       /* fill 32*n bytes */
+       mov     #32,REG_TMP1
+       cmp/hi  REG_LEN,REG_TMP1
+       bt      9f
+       .align  2
+1:     sub     REG_TMP1,REG_PTR
+       mov.l   REG_C,@REG_PTR
+       sub     REG_TMP1,REG_LEN
+       mov.l   REG_C,@(4,REG_PTR)
+       cmp/hi  REG_LEN,REG_TMP1
+       mov.l   REG_C,@(8,REG_PTR)
+       mov.l   REG_C,@(12,REG_PTR)
+       mov.l   REG_C,@(16,REG_PTR)
+       mov.l   REG_C,@(20,REG_PTR)
+       mov.l   REG_C,@(24,REG_PTR)
+       bf/s    1b
+       mov.l   REG_C,@(28,REG_PTR)
+9:
+
+       /* fill left 4*n bytes */
+       cmp/eq  REG_DST,REG_PTR
+       bt      9f
+       add     #4,REG_DST
+       cmp/eq  REG_DST,REG_PTR
+1:     mov.l   REG_C,@-REG_PTR
+       bt/s    9f
+       cmp/eq  REG_DST,REG_PTR
+       mov.l   REG_C,@-REG_PTR
+       bt/s    9f
+       cmp/eq  REG_DST,REG_PTR
+       mov.l   REG_C,@-REG_PTR
+       bt/s    9f
+       cmp/eq  REG_DST,REG_PTR
+       mov.l   REG_C,@-REG_PTR
+       bf/s    1b
+       cmp/eq  REG_DST,REG_PTR
+9:
+#ifdef BZERO
+       rts
+       nop
+#else
+       rts
+       mov     REG_DST0,r0
+#endif
+
+
+unaligned_dst:
+       mov     #1,REG_TMP1
+       tst     REG_TMP1,REG_DST        /* if (dst & 1) {               */
+       add     #1,REG_TMP1
+       bt/s    2f
+       tst     REG_TMP1,REG_DST
+       mov.b   REG_C,@REG_DST          /*   *dst++ = c;                */
+       add     #1,REG_DST
+       tst     REG_TMP1,REG_DST
+2:                                     /* }                            */
+                                       /* if (dst & 2) {               */
+       bt      4f
+       mov.w   REG_C,@REG_DST          /*   *(u_int16_t*)dst++ = c;    */
+       add     #2,REG_DST
+4:                                     /* }                            */
+
+
+       tst     #3,REG_PTR              /* if (ptr & 3) {               */
+       bt/s    4f                      /*                              */
+unaligned_len:
+       tst     #1,REG_PTR              /*   if (ptr & 1) {             */
+       bt/s    2f
+       tst     #2,REG_PTR
+       mov.b   REG_C,@-REG_PTR         /*     --ptr = c;               */
+2:                                     /*   }                          */
+                                       /*   if (ptr & 2) {             */
+       bt      4f
+       mov.w   REG_C,@-REG_PTR         /*     *--(u_int16_t*)ptr = c;  */



Home | Main Index | Thread Index | Old Index