Subject: port-m68k/3641: Polishing zeropage/m68k.
To: None <gnats-bugs@gnats.netbsd.org>
From: Hiroshi HORIMOTO <horimoto@cs-aoi.cs.sist.ac.jp>
List: netbsd-bugs
Date: 05/18/1997 08:07:26
>Number:         3641
>Category:       port-m68k
>Synopsis:       Optimizing `zeropage'.
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    gnats-admin (GNATS administrator)
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Sat May 17 16:20:01 1997
>Last-Modified:
>Originator:     Hiroshi HORIMOTO
>Organization:
	Shizuoka Institute of Science and Technology, JAPAN.
>Release:        NetBSD/x68k 1.2D (May 3, 1997)
>Environment:
	Machine: X68030 with MC68030RC40, MC68882FN33
	Target: all m68k-based machines' kernel (src/sys/arch/m68k/m68k/copypage.s)
System: NetBSD silpheed.faf.mil 1.2D NetBSD 1.2D (SILPHEED) #7: Thu May 8 07:34:53 JST 1997 root@silpheed.faf.mil:/usr/src/sys/arch/x68k/compile/SILPHEED x68k


>Description:
	Polishing `zeropage' in src/sys/arch/m68k/m68k/copypage.s by using
	movem.l instruction for multiple memory-writing.
>How-To-Repeat:
>Fix:
	This is the patch. Please apply and examine it.

--- ./copypage.s.org	Sun May 18 07:48:33 1997
+++ ./copypage.s	Sun Mar 23 07:07:14 1997
@@ -56,11 +56,15 @@
 ENTRY(copypage040)
 	movl	sp@(4),a0		| source address
 	movl	sp@(8),a1		| destiniation address
-	movl	#NBPG/32-1,d0		| number of 32 byte chunks - 1
+#if NBPG <= 4096
+	movq	#NBPG/32-1,d0		| number of 32 byte chunks - 1
+#else
+	movw	#NBPG/32-1,d0
+#endif
 Lm16loop:
 	.long	0xf6209000		| move16 a0@+,a1@+
 	.long	0xf6209000		| move16 a0@+,a1@+
-	dbf	d0,Lm16loop
+	dbra	d0,Lm16loop
 	rts
 #endif /* M68040 || M68060 */
 
@@ -72,7 +76,11 @@
 ENTRY(copypage)
 	movl	sp@(4),a0		| source address
 	movl	sp@(8),a1		| destiniation address
-	movl	#NBPG/32-1,d0		| number of 32 byte chunks - 1
+#if NBPG <= 4096
+	movq	#NBPG/32-1,d0		| number of 32 byte chunks - 1
+#else
+	movw	#NBPG/32-1,d0
+#endif
 Lmlloop:
 	movl	a0@+,a1@+
 	movl	a0@+,a1@+
@@ -82,7 +90,7 @@
 	movl	a0@+,a1@+
 	movl	a0@+,a1@+
 	movl	a0@+,a1@+
-	dbf	d0,Lmlloop
+	dbra	d0,Lmlloop
 	rts
 
 /*
@@ -91,8 +99,48 @@
  * Optimized version of bzero for a single page-aligned NBPG byte zero.
  */
 ENTRY(zeropage)
+#if NBPG >= 1024
+	movml	#0x3f3e,sp@-		| push d2-d7/a2-a6
+	movl	sp@(48),a0		| dest address
+#if NBPG > 65536
+	movw	#NBPG/512-1,d0		| number of 512 byte chunks - 1
+#else
+	movq	#NBPG/512-1,d0
+#endif
+	movq	#0,d1
+	movq	#0,d2
+	movq	#0,d3
+	movq	#0,d4
+	movq	#0,d5
+	movq	#0,d6
+	movq	#0,d7
+	movl	d7,a1
+	movl	d7,a2
+	movl	d7,a3
+	movl	d7,a4
+	movl	d7,a5
+	movl	d7,a6
+#if NBPG < 32768
+	lea	a0@(NBPG),a0
+#else
+	addl	#NBPG,a0
+#endif
+Lzzloop:
+	movml	#0x7f7e,a0@-		|  52 bytes (d1-d7/a1-a6)
+	movml	#0x7f7e,a0@-		| 104 bytes
+	movml	#0x7f7e,a0@-		| 156 bytes
+	movml	#0x7f7e,a0@-		| 208 bytes
+	movml	#0x7f7e,a0@-		| 260 bytes
+	movml	#0x7f7e,a0@-		| 312 bytes
+	movml	#0x7f7e,a0@-		| 364 bytes
+	movml	#0x7f7e,a0@-		| 416 bytes
+	movml	#0x7f7e,a0@-		| 468 bytes
+	movml	#0x7f78,a0@-		| 512 bytes (d1-d7/a1-a4)
+	dbra	d0,Lzzloop
+	movml	sp@+,#0x7cfc
+#else
 	movl	sp@(4),a0		| dest address
-	movl	#NBPG/32-1,d0		| number of 32 byte chunks - 1
+	movq	#NBPG/32-1,d0		| number of 32 byte chunks - 1
 	movq	#0,d1
 Lzloop:
 	movl	d1,a0@+
@@ -103,5 +151,6 @@
 	movl	d1,a0@+
 	movl	d1,a0@+
 	movl	d1,a0@+
-	dbf	d0,Lzloop
+	dbra	d0,Lzloop
+#endif
 	rts
>Audit-Trail:
>Unformatted: