Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/lib/libkern/arch/m68k Add m68k assembler version of __mu...



details:   https://anonhg.NetBSD.org/src/rev/375fde11f83f
branches:  trunk
changeset: 933921:375fde11f83f
user:      rin <rin%NetBSD.org@localhost>
date:      Sun May 31 11:43:37 2020 +0000

description:
Add m68k assembler version of __muldi3().

This is intended for 68060:
  - GCC does not emit __muldi3() for 68020-40, that have 32 * 32 --> 64 mulul
  - mulsl (and moveml), used in this code, are not implemented for 68010

In comparison with that from compiler_rt, this version saves:
  - 12% of processing time
  - 12 bytes of stack
  - 50 bytes of code size
Also, slightly faster, memory saving, and smaller than libgcc version.

By examining with evcnt(9), __muldi3() is invoked more than 1000 times per
sec by kernel, which should justify to introduce assembler version of this
function.

diffstat:

 common/lib/libc/arch/m68k/gen/muldi3.S |  115 +++++++++++++++++++++++++++++++++
 lib/libc/arch/m68k/gen/Makefile.inc    |    5 +-
 sys/lib/libkern/arch/m68k/Makefile.inc |    5 +-
 3 files changed, 122 insertions(+), 3 deletions(-)

diffs (157 lines):

diff -r 4cd6bbcc9025 -r 375fde11f83f common/lib/libc/arch/m68k/gen/muldi3.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/common/lib/libc/arch/m68k/gen/muldi3.S    Sun May 31 11:43:37 2020 +0000
@@ -0,0 +1,115 @@
+/*     $NetBSD: muldi3.S,v 1.1 2020/05/31 11:43:37 rin Exp $   */
+
+/*
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Rin Okuyama.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+RCSID("$NetBSD: muldi3.S,v 1.1 2020/05/31 11:43:37 rin Exp $")
+
+| int64_t __muldi3(int64_t X, int64_t Y);
+|
+| * Return lower 64bit of (X * Y) into %d0:%d1.
+|
+| * Intended for 68060:
+|   - GCC does not emit __muldi3() for 68020-40, that have 32 * 32 --> 64 mulul.
+|   - mulsl (and moveml) are not implemented for 68010.
+|
+| * Notation:
+|   - H32:L32 --> higher:lower 32bit of variable
+|   - H:L     --> higher:lower 16bit of variable/register
+
+#ifdef __mc68010__
+#error "not for 68010"
+#endif
+
+#define X_H32 (4 * 4)
+#define X_L32 (X_H32 + 4)
+#define Y_H32 (X_L32 + 4)
+#define Y_L32 (Y_H32 + 4)
+
+ENTRY(__muldi3)
+       moveml  %d2-%d4, -(%sp) | push %d2-%d4
+
+| First, calculate (X_L32 * Y_L32) as a 64bit integer.
+
+       movel   X_L32(%sp), %a0 | save X_L32
+       movel   Y_L32(%sp), %a1 | save Y_L32
+
+       movel   %a0, %d2        | prepare for X_L32(H) in L
+       movel   %a1, %d3        | prepare for Y_L32(H) in L
+
+       movel   %a0, %d4        | X_L32(L) in L
+       movel   %a1, %d1        | Y_L32(L) in L
+       movel   %a0, %d0        | X_L32(L) in L
+
+       swap    %d2             | X_L32(H) in L
+       swap    %d3             | Y_L32(H) in L
+
+       muluw   %d1, %d4        | A = X_L32(L) * Y_L32(L)
+       muluw   %d2, %d1        | B = X_L32(H) * Y_L32(L)
+       muluw   %d3, %d2        | C = X_L32(H) * Y_L32(H)
+       muluw   %d0, %d3        | D = X_L32(L) * Y_L32(H)
+
+       movel   %d4, %d0        | extract A(H)
+       clrw    %d0
+       swap    %d0
+
+       addl    %d0, %d1        | B += A(H) (no carry; max 0xffff0000)
+
+       addl    %d3, %d1        | B += D
+       bccs    1f              | if (carry)
+       addil   #0x10000, %d2   |       C += 0x10000
+
+1:     swap    %d1             | B(H) <--> B(L)
+
+| (%d0), (%d1), %d2 = C, %d3 = free, %d4 = A
+
+       clrl    %d3             | extract B(H)
+       movew   %d1, %d3
+
+       movew   %d4, %d1        | %d1 = (B(L) << 16) + A(L)
+
+       addl    %d3, %d2        | C += B(H)
+
+| We have (X_L32 * Y_L32) in %d2:%d1. Lower 32bit was completed.
+| Add (X_L32 * Y_H32 + X_H32 * Y_L32) to higher 32bit.
+|
+| (%d0), (%d1), %d2 = C, %d3 = free, %d4 = free
+
+       movel   %a0, %d0        | restore X_L32
+       movel   %a1, %d3        | restore Y_L32
+       mulsl   Y_H32(%sp), %d0 | E = X_L32 * Y_H32
+       mulsl   X_H32(%sp), %d3 | F = X_H32 * Y_L32
+       addl    %d2, %d0        | E += C
+       addl    %d3, %d0        | %d0 = E + F
+
+       moveml  (%sp)+, %d2-%d4 | pop %d2-%d6
+       rts
+END(__muldi3)
diff -r 4cd6bbcc9025 -r 375fde11f83f lib/libc/arch/m68k/gen/Makefile.inc
--- a/lib/libc/arch/m68k/gen/Makefile.inc       Sun May 31 11:28:52 2020 +0000
+++ b/lib/libc/arch/m68k/gen/Makefile.inc       Sun May 31 11:43:37 2020 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: Makefile.inc,v 1.37 2020/04/22 11:28:56 rin Exp $
+#      $NetBSD: Makefile.inc,v 1.38 2020/05/31 11:43:37 rin Exp $
 
 SRCS+= alloca.S fabs.S
 
@@ -42,6 +42,9 @@
 
 .if ${MACHINE_ARCH} == "m68k"
 SRCS+= mulsi3.S umulsi3.S
+SRCS+= muldi3.S
+.else
+muldi3.o muldi3.po muldi3.pico muldi3.d: muldi3.c
 .endif
 
 SRCS+= setjmp.S longjmp.c
diff -r 4cd6bbcc9025 -r 375fde11f83f sys/lib/libkern/arch/m68k/Makefile.inc
--- a/sys/lib/libkern/arch/m68k/Makefile.inc    Sun May 31 11:28:52 2020 +0000
+++ b/sys/lib/libkern/arch/m68k/Makefile.inc    Sun May 31 11:43:37 2020 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: Makefile.inc,v 1.37 2015/07/30 15:29:52 tsutsui Exp $
+#      $NetBSD: Makefile.inc,v 1.38 2020/05/31 11:43:37 rin Exp $
 
 SRCS+= bswap16.S bswap32.S bswap64.S
 SRCS+= memcmp.S memcpy.S memmove.S memset.S
@@ -12,7 +12,8 @@
 SRCS+= mulsi3.S divsi3.S udivsi3.S modsi3.S umodsi3.S
 .endif
 .if defined(MACHINE_ARCH) && ${MACHINE_ARCH} == "m68k"
-SRCS+= random.S
+SRCS+= muldi3.S random.S
 .else
+muldi3.o muldi3.po muldi3.pico muldi3.d: muldi3.c
 random.o random.po random.pico random.d: random.c
 .endif



Home | Main Index | Thread Index | Old Index