Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/common/lib/libc/arch/arm/string strlen implementation for ar...
details: https://anonhg.NetBSD.org/src/rev/e235a252cb6c
branches: trunk
changeset: 783542:e235a252cb6c
user: matt <matt%NetBSD.org@localhost>
date: Fri Dec 28 07:10:41 2012 +0000
description:
strlen implementation for armv6 and later. Uses clz and uqadd8 to really
speed the search for NUL. as fast as normal strlen at about a length of
6 or 7 and 2-3 times faster starting around 10.
diffstat:
common/lib/libc/arch/arm/string/strlen_armv6.S | 86 ++++++++++++++++++++++++++
1 files changed, 86 insertions(+), 0 deletions(-)
diffs (90 lines):
diff -r 11f9541c2383 -r e235a252cb6c common/lib/libc/arch/arm/string/strlen_armv6.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/common/lib/libc/arch/arm/string/strlen_armv6.S Fri Dec 28 07:10:41 2012 +0000
@@ -0,0 +1,86 @@
+/*-
+ * Copyright (c) 2012 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Matt Thomas of 3am Software Foundry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: strlen_armv6.S,v 1.1 2012/12/28 07:10:41 matt Exp $")
+
+ .text
+ENTRY(strlen)
+ ands r1, r0, #3 /* get misalignment */
+ bic ip, r0, #3 /* align to word boundary */
+ ldr r3, [ip], #4 /* load first word */
+ neg r0, r1 /* subtract misalignment from length */
+ beq .Lpre_main_loop /* misaligned? no, go to loop */
+ /*
+ * For misaligned string, we need to make sure that the bytes before
+ * the start of the string will not cause a false match to a NUL.
+ */
+ mvn r2, #0 /* create a mask */
+ and r1, r0, #3 /* find out how many bytes to clear */
+ mov r1, r1, lsl #3 /* bytes -> bits */
+#ifdef __ARMEL__
+ mov r2, r2, lsr r1 /* clear relavent bytes */
+#else
+ mov r2, r2, lsl r1 /* clear relavent bytes */
+#endif
+ orr r3, r3, r2 /* orr in mask for leading bytes */
+.Lpre_main_loop:
+#ifdef _ARM_ARCH_7
+ movw r1, #0xfefe /* magic constant; 254 in each byte */
+#else
+ mov r1, #0xfe /* put 254 in low byte */
+ orr r1, r1, r1, lsl #8 /* move to next byte */
+#endif
+ orr r1, r1, r1, lsl #16 /* move to next halfword */
+.Lmain_loop:
+ /*
+ * Add 254 to each byte using the UQADD8 (unsigned saturating add 8)
+ * instruction. For every non-NUL byte, the result for that byte will
+ * become 255. For NUL, it will be 254. When we complement the
+ * result, if the result is non-0 then we must have encountered a NUL.
+ */
+ uqadd8 r3, r3, r1 /* magic happens here */
+ mvns r3, r3 /* is the complemented result 0? */
+ bne .Lreturn /* no, return # of bytes */
+ add r0, r0, #4 /* add 4 to the count */
+ ldr r3, [ip], #4 /* load next word */
+ b .Lmain_loop /* and go */
+.Lreturn:
+ /*
+ * We encountered a NUL. Find out where by doing a CLZ and then
+ * shifting right by 3. That will be the number of non-NUL bytes.
+ */
+#ifdef __ARMEL__
+ rev r3, r3 /* we want this in BE for the CLZ */
+#endif
+ clz r3, r3 /* count how many leading zeros */
+ add r0, r0, r3, lsr #3 /* divide that by 8 and add to count */
+ RET
+END(strlen)
Home |
Main Index |
Thread Index |
Old Index