Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/common/lib/libc/arch/arm/string Add ARM optimized version of...



details:   https://anonhg.NetBSD.org/src/rev/ae022fcd171e
branches:  trunk
changeset: 784028:ae022fcd171e
user:      matt <matt%NetBSD.org@localhost>
date:      Tue Jan 15 08:52:27 2013 +0000

description:
Add ARM optimized version of strrchr.

diffstat:

 common/lib/libc/arch/arm/string/strrchr_arm.S |  145 ++++++++++++++++++++++++++
 1 files changed, 145 insertions(+), 0 deletions(-)

diffs (149 lines):

diff -r a7d2dc8f77d3 -r ae022fcd171e common/lib/libc/arch/arm/string/strrchr_arm.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/common/lib/libc/arch/arm/string/strrchr_arm.S     Tue Jan 15 08:52:27 2013 +0000
@@ -0,0 +1,145 @@
+/*-
+ * Copyright (c) 2013 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Matt Thomas of 3am Software Foundry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: strrchr_arm.S,v 1.1 2013/01/15 08:52:27 matt Exp $")
+
+#ifdef __ARMEL__
+#define        BYTE0   0x000000ff
+#define        BYTE1   0x0000ff00
+#define        BYTE2   0x00ff0000
+#define        BYTE3   0xff000000
+#define        lshi    lsl
+#else
+#define        BYTE0   0xff000000
+#define        BYTE1   0x00ff0000
+#define        BYTE2   0x0000ff00
+#define        BYTE3   0x000000ff
+#define        lshi    lsr
+#endif
+
+       .text
+ENTRY(strrchr)
+       mov     ip, r0                  /* we use r0 at the return value */
+       mov     r0, #0                  /* return NULL by default */
+       and     r2, r1, #0xff           /* restrict to byte value */
+1:     tst     ip, #3                  /* test for word alignment */
+       beq     .Lpre_main_loop         /*   finally word aligned */
+       ldrb    r3, [ip], #1            /* load a byte */
+       cmp     r3, r2                  /* did it match? */
+       subeq   r0, ip, #1              /*   yes, remember that it did */
+       teq     r3, #0                  /* was it NUL? */
+       bne     1b                      /*   no, try next byte */
+       RET                             /* return */
+.Lpre_main_loop:
+       push    {r4, r5}                /* save some registers */
+#if defined(_ARM_ARCH_7)
+       movw    r1, #0xfefe             /* magic constant; 254 in each byte */
+       movt    r1, #0xfefe             /* magic constant; 254 in each byte */
+#elif defined(_ARM_ARCH_6)
+       mov     r1, #0xfe               /* put 254 in low byte */
+       orr     r1, r1, r1, lsl #8      /* move to next byte */
+       orr     r1, r1, r1, lsl #16     /* move to next halfword */
+#endif /* _ARM_ARCH_6 */
+       orr     r2, r2, r2, lsl #8      /* move to next byte */
+       orr     r2, r2, r2, lsl #16     /* move to next halfword */
+.Lmain_loop:
+       ldr     r3, [ip], #4            /* load next word */
+#if defined(_ARM_ARCH_6)
+       /*
+        * Add 254 to each byte using the UQADD8 (unsigned saturating add 8)
+        * instruction.  For every non-NUL byte, the result for that byte will
+        * become 255.  For NUL, it will be 254.  When we complement the
+        * result, if the result is non-0 then we must have encountered a NUL.
+        */
+       uqadd8  r4, r3, r1              /* NUL detection happens here */
+       usub8   r3, r3, r2              /* bias for char looked for? */
+       uqadd8  r5, r3, r1              /* char detection happens here */
+       and     r3, r4, r5              /* merge results */
+       mvns    r3, r3                  /* is the complement non-0? */
+       beq     .Lmain_loop             /*   no, then keep going */
+
+       mvns    r5, r5                  /* get we find any matching bytes? */
+       beq     .Ldone                  /*   no, then we hit the end, return */
+       mvns    r4, r4                  /* did we encounter a NUL? */
+       beq     .Lfind_match            /*   no, find matching byte */
+       /*
+        * Copy the NUL bit to the following byte lanes.  Then clear any match
+        * bits in those byte lanes to prevent false positives in those bytes.
+        */
+       movs    r3, r4, lshi #8         /* shift up a byte */
+       orrnes  r3, r3, r3, lshi #8     /* if non 0, copy up to next byte */
+       orrnes  r3, r3, r3, lshi #8     /* if non 0, copy up to last byte */
+       bics    r5, r5, r3              /* clear match bits */
+       beq     .Ldone                  /*   no remaining matches, we're done */
+.Lfind_match:
+#ifdef __ARMEL__
+       rev     r5, r5                  /* we want this in BE for the CLZ */
+#endif
+       /*
+        * If we have multiple matches, we want to the select the "last" match
+        * in the word which will be the lowest bit set.
+        */
+       sub     r3, r5, #1              /* subtract 1 */
+       and     r3, r3, r5              /* and with mask */
+       eor     r5, r5, r3              /* only have the lowest bit set left */
+       clz     r5, r5                  /* count how many leading zeros */
+       add     r0, ip, r5, lsr #3      /* divide that by 8 and add to count */
+       sub     r0, r0, #4              /* compensate for the post-inc */
+       teq     r4, #0                  /* did we read any NULs? */
+       beq     .Lmain_loop             /*   no, get next word */
+#else
+       /*
+        * No fancy shortcuts so just test each byte lane for a NUL.
+        * (other tests for NULs in a word take more instructions/cycles).
+        */
+       eor     r4, r3, r2              /* xor .. */
+       tst     r3, #BYTE0              /* is byte 0 a NUL? */
+       beq     .Ldone                  /*   yes, then we're done */
+       tst     r4, #BYTE0              /* is byte 0 a match? */
+       subeq   r0, ip, #4              /*   yes, remember its location */
+       tst     r3, #BYTE1              /* is byte 1 a NUL? */
+       beq     .Ldone                  /*   yes, then we're done */
+       tst     r4, #BYTE1              /* is byte 1 a match? */
+       subeq   r0, ip, #3              /*   yes, remember its location */
+       tst     r3, #BYTE2              /* is byte 2 a NUL? */
+       beq     .Ldone                  /*   yes, then we're done */
+       tst     r4, #BYTE2              /* is byte 2 a match? */
+       subeq   r0, ip, #2              /*   yes, remember its location */
+       tst     r3, #BYTE3              /* is byte 3 a NUL? */
+       beq     .Ldone                  /*   yes, then we're done */
+       tst     r4, #BYTE3              /* is byte 3 a match? */
+       subeq   r0, ip, #1              /*   yes, remember its location */
+       b       .Lmain_loop
+#endif /* _ARM_ARCH_6 */
+.Ldone:
+       pop     {r4, r5}
+       RET
+END(strrchr)



Home | Main Index | Thread Index | Old Index