Subject: port-arm/36513: Pre-cache load length exceeds source memory area in memcpy
To: None <port-arm-maintainer@netbsd.org, gnats-admin@netbsd.org,>
From: Hiroki Doshita <doshita@iij.ad.jp>
List: netbsd-bugs
Date: 06/20/2007 02:15:00
>Number: 36513
>Category: port-arm
>Synopsis: Pre-cache load length exceeds source memory area in memcpy
>Confidential: no
>Severity: non-critical
>Priority: medium
>Responsible: port-arm-maintainer
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Wed Jun 20 02:15:00 +0000 2007
>Originator: Hiroki Doshita
>Release: NetBSD 3.1
>Organization:
Internet Initiative Japan, Inc.
>Environment:
Architecture: arm
Machine: armeb
>Description:
Pre-cache load length exceeds source memory area in memcpy.
Unrelated area is written back to memory, which causes
some troubles.
>How-To-Repeat:
>Fix:
Index: memcpy_xscale.S
===================================================================
RCS file: /cvs/cvsroot/src/common/lib/libc/arch/arm/string/memcpy_xscale.S,v
retrieving revision 1.1
diff -u -r1.1 memcpy_xscale.S
--- memcpy_xscale.S 20 Dec 2005 19:28:49 -0000 1.1
+++ memcpy_xscale.S 20 Jun 2007 02:08:52 -0000
@@ -247,13 +247,50 @@
str r5, [r3], #0x04
str r6, [r3], #0x04
str r7, [r3], #0x04
-.Lmemcpy_bad1:
subs r2, r2, #0x10
+
+.Lmemcpy_bad1:
+ cmp r2, #0x20
bge .Lmemcpy_bad1_loop16
+ cmp r2, #0x10
+ blt .Lmemcpy_bad1_loop16_short
- adds r2, r2, #0x10
+ /* copy last 16 bytes (without preload) */
+#ifdef __ARMEB__
+ mov r4, ip, lsl #8
+#else
+ mov r4, ip, lsr #8
+#endif
+ ldr r5, [r1], #0x04
+ ldr r6, [r1], #0x04
+ ldr r7, [r1], #0x04
+ ldr ip, [r1], #0x04
+#ifdef __ARMEB__
+ orr r4, r4, r5, lsr #24
+ mov r5, r5, lsl #8
+ orr r5, r5, r6, lsr #24
+ mov r6, r6, lsl #8
+ orr r6, r6, r7, lsr #24
+ mov r7, r7, lsl #8
+ orr r7, r7, ip, lsr #24
+#else
+ orr r4, r4, r5, lsl #24
+ mov r5, r5, lsr #8
+ orr r5, r5, r6, lsl #24
+ mov r6, r6, lsr #8
+ orr r6, r6, r7, lsl #24
+ mov r7, r7, lsr #8
+ orr r7, r7, ip, lsl #24
+#endif
+ str r4, [r3], #0x04
+ str r5, [r3], #0x04
+ str r6, [r3], #0x04
+ str r7, [r3], #0x04
+ subs r2, r2, #0x10
ldmeqfd sp!, {r4-r7}
bxeq lr /* Return now if done */
+
+.Lmemcpy_bad1_loop16_short:
subs r2, r2, #0x04
sublt r1, r1, #0x03
blt .Lmemcpy_bad_done
@@ -308,13 +345,50 @@
str r5, [r3], #0x04
str r6, [r3], #0x04
str r7, [r3], #0x04
-.Lmemcpy_bad2:
subs r2, r2, #0x10
+
+.Lmemcpy_bad2:
+ cmp r2, #0x20
bge .Lmemcpy_bad2_loop16
+ cmp r2, #0x10
+ blt .Lmemcpy_bad2_loop16_short
- adds r2, r2, #0x10
+ /* copy last 16 bytes (without preload) */
+#ifdef __ARMEB__
+ mov r4, ip, lsl #16
+#else
+ mov r4, ip, lsr #16
+#endif
+ ldr r5, [r1], #0x04
+ ldr r6, [r1], #0x04
+ ldr r7, [r1], #0x04
+ ldr ip, [r1], #0x04
+#ifdef __ARMEB__
+ orr r4, r4, r5, lsr #16
+ mov r5, r5, lsl #16
+ orr r5, r5, r6, lsr #16
+ mov r6, r6, lsl #16
+ orr r6, r6, r7, lsr #16
+ mov r7, r7, lsl #16
+ orr r7, r7, ip, lsr #16
+#else
+ orr r4, r4, r5, lsl #16
+ mov r5, r5, lsr #16
+ orr r5, r5, r6, lsl #16
+ mov r6, r6, lsr #16
+ orr r6, r6, r7, lsl #16
+ mov r7, r7, lsr #16
+ orr r7, r7, ip, lsl #16
+#endif
+ str r4, [r3], #0x04
+ str r5, [r3], #0x04
+ str r6, [r3], #0x04
+ str r7, [r3], #0x04
+ subs r2, r2, #0x10
ldmeqfd sp!, {r4-r7}
bxeq lr /* Return now if done */
+
+.Lmemcpy_bad2_loop16_short:
subs r2, r2, #0x04
sublt r1, r1, #0x02
blt .Lmemcpy_bad_done
@@ -369,13 +443,50 @@
str r5, [r3], #0x04
str r6, [r3], #0x04
str r7, [r3], #0x04
-.Lmemcpy_bad3:
subs r2, r2, #0x10
+
+.Lmemcpy_bad3:
+ cmp r2, #0x20
bge .Lmemcpy_bad3_loop16
+ cmp r2, #0x10
+ blt .Lmemcpy_bad3_loop16_short
- adds r2, r2, #0x10
+ /* copy last 16 bytes (without preload) */
+#ifdef __ARMEB__
+ mov r4, ip, lsl #24
+#else
+ mov r4, ip, lsr #24
+#endif
+ ldr r5, [r1], #0x04
+ ldr r6, [r1], #0x04
+ ldr r7, [r1], #0x04
+ ldr ip, [r1], #0x04
+#ifdef __ARMEB__
+ orr r4, r4, r5, lsr #8
+ mov r5, r5, lsl #24
+ orr r5, r5, r6, lsr #8
+ mov r6, r6, lsl #24
+ orr r6, r6, r7, lsr #8
+ mov r7, r7, lsl #24
+ orr r7, r7, ip, lsr #8
+#else
+ orr r4, r4, r5, lsl #8
+ mov r5, r5, lsr #24
+ orr r5, r5, r6, lsl #8
+ mov r6, r6, lsr #24
+ orr r6, r6, r7, lsl #8
+ mov r7, r7, lsr #24
+ orr r7, r7, ip, lsl #8
+#endif
+ str r4, [r3], #0x04
+ str r5, [r3], #0x04
+ str r6, [r3], #0x04
+ str r7, [r3], #0x04
+ subs r2, r2, #0x10
ldmeqfd sp!, {r4-r7}
bxeq lr /* Return now if done */
+
+.Lmemcpy_bad3_loop16_short:
subs r2, r2, #0x04
sublt r1, r1, #0x01
blt .Lmemcpy_bad_done