Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch New, improved version of copyin(), copyout(), and k...



details:   https://anonhg.NetBSD.org/src/rev/794917671dfc
branches:  trunk
changeset: 535195:794917671dfc
user:      bjh21 <bjh21%NetBSD.org@localhost>
date:      Sun Aug 11 21:19:12 2002 +0000

description:
New, improved version of copyin(), copyout(), and kcopy() by Allen Briggs.
This version works on both 26-bit and 32-bit machines.  For large copies,
it's up to three times as fast as the old arm32 version and five times as
fast as the old arm26 version.  For small copies it seems to be even faster
(getrusage() is apparently over ten times faster on an ARM610).

Hooray for Allen!

diffstat:

 sys/arch/acorn26/acorn26/copyinout.S |  103 +-----
 sys/arch/arm/arm/bcopyinout.S        |  673 +++++++++++++++++++++++++++++++++++
 sys/arch/arm/arm32/bcopyinout.S      |  245 ------------
 sys/arch/arm/conf/files.arm          |    4 +-
 4 files changed, 677 insertions(+), 348 deletions(-)

diffs (truncated from 1072 to 300 lines):

diff -r 49e6533f517c -r 794917671dfc sys/arch/acorn26/acorn26/copyinout.S
--- a/sys/arch/acorn26/acorn26/copyinout.S      Sun Aug 11 20:50:39 2002 +0000
+++ b/sys/arch/acorn26/acorn26/copyinout.S      Sun Aug 11 21:19:12 2002 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: copyinout.S,v 1.2 2002/03/24 23:37:42 bjh21 Exp $ */
+/* $NetBSD: copyinout.S,v 1.3 2002/08/11 21:19:15 bjh21 Exp $ */
 
 /*-
  * Copyright (c) 2000 Ben Harris
@@ -32,110 +32,11 @@
 
 #include <machine/asm.h>
 
-RCSID("$NetBSD: copyinout.S,v 1.2 2002/03/24 23:37:42 bjh21 Exp $")
+RCSID("$NetBSD: copyinout.S,v 1.3 2002/08/11 21:19:15 bjh21 Exp $")
 
 #include <sys/errno.h>
 #include "assym.h"
 
-/*
- * int copyin(const void *ua, void *ka, size_t len);
- * int copyout(const void *ka, void *ua, size_t len);
- * int kcopy(const void *src, void *dst, size_t len);
- */
-
-/*
- * memcpy isn't currently data-abort-safe (it uses R14).  This is much
- * slower, but safer.
- */
-
-/* LINTSTUB: Func: int copyin(const void *uaddr, void *kaddr, size_t len) */
-ENTRY(copyin)
-       mov     ip, sp
-       stmfd   sp!, {r4, fp, ip, lr, pc}
-       sub     fp, ip, #4
-       adr     r3, Lcopyfault
-       ldr     r4, Lcurproc
-       ldr     r4, [r4]
-       ldr     r4, [r4, #P_ADDR]
-       str     r3, [r4, #(U_PCB + PCB_ONFAULT)]
-       teq     r2, #0
-       beq     Lcopyinskip
-Lcopyinloop:
-       ldrbt   r3, [r0], #1
-       strb    r3, [r1], #1
-       subs    r2, r2, #1
-       bne     Lcopyinloop
-Lcopyinskip:
-       mov     r0, #0
-       str     r0, [r4, #(U_PCB + PCB_ONFAULT)]
-#ifdef __APCS_26__
-       ldmdb   fp, {r4, fp, sp, pc}^
-#else
-       ldmdb   fp, {r4, fp, sp, pc}
-#endif
-
-/* LINTSTUB: Func: int copyout(const void *kaddr, void *uaddr, size_t len) */
-ENTRY(copyout)
-       mov     ip, sp
-       stmfd   sp!, {r4, fp, ip, lr, pc}
-       sub     fp, ip, #4
-       adr     r3, Lcopyfault
-       ldr     r4, Lcurproc
-       ldr     r4, [r4]
-       ldr     r4, [r4, #P_ADDR]
-       str     r3, [r4, #(U_PCB + PCB_ONFAULT)]
-       teq     r2, #0
-       beq     Lcopyoutskip
-Lcopyoutloop:
-       ldrb    r3, [r0], #1
-       strbt   r3, [r1], #1
-       subs    r2, r2, #1
-       bne     Lcopyoutloop
-Lcopyoutskip:
-       mov     r0, #0
-       str     r0, [r4, #(U_PCB + PCB_ONFAULT)]
-#ifdef __APCS_26__
-       ldmdb   fp, {r4, fp, sp, pc}^
-#else
-       ldmdb   fp, {r4, fp, sp, pc}
-#endif
-
-/* LINTSTUB: Func: int kcopy(const void *kfaddr, void *kdaddr, size_t len) */
-ENTRY(kcopy)
-       mov     ip, sp
-       stmfd   sp!, {r4, fp, ip, lr, pc}
-       sub     fp, ip, #4
-       adr     r3, Lcopyfault
-       ldr     r4, Lcurproc
-       ldr     r4, [r4]
-       ldr     r4, [r4, #P_ADDR]
-       str     r3, [r4, #(U_PCB + PCB_ONFAULT)]
-       teq     r2, #0
-       beq     Lkcopyskip
-Lkcopyloop:
-       ldrb    r3, [r0], #1
-       strb    r3, [r1], #1
-       subs    r2, r2, #1
-       bne     Lkcopyloop
-Lkcopyskip:
-       mov     r0, #0
-       str     r0, [r4, #(U_PCB + PCB_ONFAULT)]
-#ifdef __APCS_26__
-       ldmdb   fp, {r4, fp, sp, pc}^
-#else
-       ldmdb   fp, {r4, fp, sp, pc}
-#endif
-
-Lcopyfault:
-       mov     r1, #0
-       str     r1, [r4, #(U_PCB + PCB_ONFAULT)]
-       /* Return value is provided by fault handler. */
-#ifdef __APCS_26__
-       ldmdb   fp, {r4, fp, sp, pc}^
-#else
-       ldmdb   fp, {r4, fp, sp, pc}
-#endif
-
 /* LINTSTUB: Func: int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) */
 ENTRY(copyinstr)
        mov     ip, sp
diff -r 49e6533f517c -r 794917671dfc sys/arch/arm/arm/bcopyinout.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/arm/arm/bcopyinout.S     Sun Aug 11 21:19:12 2002 +0000
@@ -0,0 +1,673 @@
+/*     $NetBSD: bcopyinout.S,v 1.1 2002/08/11 21:19:12 bjh21 Exp $     */
+
+/*
+ * Copyright (c) 2002 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Allen Briggs for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed for the NetBSD Project by
+ *      Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "assym.h"
+
+#include <machine/asm.h>
+#include <sys/errno.h>
+
+       .text
+       .align  0
+
+Lcurpcb:
+       .word _C_LABEL(curpcb)
+
+#ifdef __PROG32
+#define SAVE_REGS      stmfd   sp!, {r4-r11}
+#define RESTORE_REGS   ldmfd   sp!, {r4-r11}
+#else
+/* Need to save R14_svc because it'll get trampled if we take a page fault. */
+#define SAVE_REGS      stmfd   sp!, {r4-r11, r14}
+#define RESTORE_REGS   ldmfd   sp!, {r4-r11, r14}
+#endif
+               
+#if 0 && defined(__XSCALE__)
+#define HELLOCPP #
+#define PREFETCH(rx,o) pld     [ rx , HELLOCPP (o) ]
+#else
+#define PREFETCH(rx,o)
+#endif
+
+/*
+ * r0 = user space address
+ * r1 = kernel space address
+ * r2 = length
+ *
+ * Copies bytes from user space to kernel space
+ *
+ * We save/restore r4-r11:
+ * r4-r11 are scratch
+ */
+ENTRY(copyin)
+       /* Quick exit if length is zero */      
+       teq     r2, #0
+       moveq   r0, #0
+       moveq   pc, lr
+
+       SAVE_REGS
+       ldr     r4, Lcurpcb
+       ldr     r4, [r4]
+
+       ldr     r5, [r4, #PCB_ONFAULT]
+       add     r3, pc, #Lcopyfault - . - 8
+       str     r3, [r4, #PCB_ONFAULT]
+
+       PREFETCH(r0, 0)
+       PREFETCH(r1, 0)
+
+       /*
+        * If not too many bytes, take the slow path.
+        */
+       cmp     r2, #0x08
+       blt     Licleanup
+
+       /*
+        * Align destination to word boundary.
+        */
+       and     r6, r1, #0x3
+       ldr     pc, [pc, r6, lsl #2]
+       b       Lialend
+       .word   Lialend
+       .word   Lial1
+       .word   Lial2
+       .word   Lial3
+Lial3: ldrbt   r6, [r0], #1
+       sub     r2, r2, #1
+       strb    r6, [r1], #1
+Lial2: ldrbt   r7, [r0], #1
+       sub     r2, r2, #1
+       strb    r7, [r1], #1
+Lial1: ldrbt   r6, [r0], #1
+       sub     r2, r2, #1
+       strb    r6, [r1], #1
+Lialend:
+
+       /*
+        * If few bytes left, finish slow.
+        */
+       cmp     r2, #0x08
+       blt     Licleanup
+
+       /*
+        * If source is not aligned, finish slow.
+        */
+       ands    r3, r0, #0x03
+       bne     Licleanup
+
+       cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
+       blt     Licleanup8
+
+       /*
+        * Align destination to cacheline boundary.
+        * If source and destination are nicely aligned, this can be a big
+        * win.  If not, it's still cheaper to copy in groups of 32 even if
+        * we don't get the nice cacheline alignment.
+        */
+       and     r6, r1, #0x1f
+       ldr     pc, [pc, r6]
+       b       Licaligned
+       .word   Licaligned
+       .word   Lical4
+       .word   Lical8
+       .word   Lical12
+       .word   Lical16
+       .word   Lical20
+       .word   Lical24
+       .word   Lical28
+Lical28:ldrt   r6, [r0], #4
+       sub     r2, r2, #4
+       str     r6, [r1], #4
+Lical24:ldrt   r7, [r0], #4
+       sub     r2, r2, #4
+       str     r7, [r1], #4
+Lical20:ldrt   r6, [r0], #4
+       sub     r2, r2, #4
+       str     r6, [r1], #4
+Lical16:ldrt   r7, [r0], #4
+       sub     r2, r2, #4
+       str     r7, [r1], #4
+Lical12:ldrt   r6, [r0], #4
+       sub     r2, r2, #4
+       str     r6, [r1], #4
+Lical8:        ldrt    r7, [r0], #4
+       sub     r2, r2, #4
+       str     r7, [r1], #4
+Lical4:        ldrt    r6, [r0], #4
+       sub     r2, r2, #4
+       str     r6, [r1], #4
+
+       /*
+        * We start with > 0x40 bytes to copy (>= 0x60 got us into this
+        * part of the code, and we may have knocked that down by as much



Home | Main Index | Thread Index | Old Index