[src/trunk]: src/sys/arch/m68k/fpe Introduce the CORDIC algorithm.

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/sys/arch/m68k/fpe Introduce the CORDIC algorithm.
From: isaki <isaki%NetBSD.org@localhost>
Date: Tue, 07 Apr 2020 00:29:57 +0000
details:   https://anonhg.NetBSD.org/src/rev/63ce6cd6a19c
branches:  trunk
changeset: 786189:63ce6cd6a19c
user:      isaki <isaki%NetBSD.org@localhost>
date:      Fri Apr 19 13:31:11 2013 +0000

description:
Introduce the CORDIC algorithm.
o sine and cosine (e.g., FSIN, FCOS and FSINCOS instructions) is now
  calculated in the CORDIC instead of Taylor expansion.
o tangent (FTAN) is not touched from a viewpoint of the code size.
o The CORDIC is applicable for hyperbolic functions (e.g., FSINH,
  FCOSH, FTANH instructions), but I didn't use it because its working
  range is poor.
o The CORDIC is also usable for inverse trigonometric functions,
  I will commit it at next phase.
o The code size becomes a bit big.  I cannot evaluate speed on m68k
  for some reasons, but in test on i386 the CORDIC is approximately
  100 times faster in sin/cos.

diffstat:

 sys/arch/m68k/fpe/files.fpe     |    3 +-
 sys/arch/m68k/fpe/fpu_cordic.c  |  646 ++++++++++++++++++++++++++++++++++++++++
 sys/arch/m68k/fpe/fpu_emulate.h |   11 +-
 sys/arch/m68k/fpe/fpu_hyperb.c  |   71 ++++-
 sys/arch/m68k/fpe/fpu_trig.c    |  177 +---------
 5 files changed, 751 insertions(+), 157 deletions(-)

diffs (truncated from 1050 to 300 lines):

diff -r a59683227822 -r 63ce6cd6a19c sys/arch/m68k/fpe/files.fpe
--- a/sys/arch/m68k/fpe/files.fpe       Fri Apr 19 13:14:10 2013 +0000
+++ b/sys/arch/m68k/fpe/files.fpe       Fri Apr 19 13:31:11 2013 +0000
@@ -1,10 +1,11 @@
-# $NetBSD: files.fpe,v 1.2 1995/11/03 04:51:51 briggs Exp $
+# $NetBSD: files.fpe,v 1.3 2013/04/19 13:31:11 isaki Exp $
 
 # Config(.new) file for m68k floating point emulator.
 # Included by ports that need it.
 
 file   arch/m68k/fpe/fpu_add.c                 fpu_emulate
 file   arch/m68k/fpe/fpu_calcea.c              fpu_emulate
+file   arch/m68k/fpe/fpu_cordic.c              fpu_emulate
 file   arch/m68k/fpe/fpu_div.c                 fpu_emulate
 file   arch/m68k/fpe/fpu_emulate.c             fpu_emulate
 file   arch/m68k/fpe/fpu_exp.c                 fpu_emulate
diff -r a59683227822 -r 63ce6cd6a19c sys/arch/m68k/fpe/fpu_cordic.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/m68k/fpe/fpu_cordic.c    Fri Apr 19 13:31:11 2013 +0000
@@ -0,0 +1,646 @@
+/*     $NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $     */
+
+/*
+ * Copyright (c) 2013 Tetsuya Isaki. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $");
+
+#include <machine/ieee.h>
+
+#include "fpu_emulate.h"
+
+/*
+ * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same.
+ * The most significant byte of sp_m0 is EXP (signed byte) and the rest
+ * of sp_m0 is fp_mant[0].
+ */
+struct sfpn {
+       uint32_t sp_m0;
+       uint32_t sp_m1;
+       uint32_t sp_m2;
+};
+
+#if defined(CORDIC_BOOTSTRAP)
+/*
+ * This is a bootstrap code to generate a pre-calculated tables such as
+ * atan_table[] and atanh_table[].  However, it's just for reference.
+ * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP
+ * and modify these files as a userland application.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+
+static void prepare_cordic_const(struct fpemu *);
+static struct fpn *fpu_gain1_cordic(struct fpemu *);
+static struct fpn *fpu_gain2_cordic(struct fpemu *);
+static struct fpn *fpu_atan_tayler(struct fpemu *);
+static void printf_fpn(const struct fpn *);
+static void printf_sfpn(const struct sfpn *);
+static void fpn_to_sfpn(struct sfpn *, const struct fpn *);
+
+static struct sfpn atan_table[EXT_FRACBITS];
+static struct sfpn atanh_table[EXT_FRACBITS];
+static struct fpn inv_gain1;
+static struct fpn inv_gain2;
+
+int
+main(int argc, char *argv[])
+{
+       struct fpemu dummyfe;
+       int i;
+       struct fpn fp;
+
+       memset(&dummyfe, 0, sizeof(dummyfe));
+       prepare_cordic_const(&dummyfe);
+
+       /* output as source code */
+       printf("static const struct sfpn atan_table[] = {\n");
+       for (i = 0; i < EXT_FRACBITS; i++) {
+               printf("\t");
+               printf_sfpn(&atan_table[i]);
+               printf(",\n");
+       }
+       printf("};\n\n");
+
+       printf("static const struct sfpn atanh_table[] = {\n");
+       for (i = 0; i < EXT_FRACBITS; i++) {
+               printf("\t");
+               printf_sfpn(&atanh_table[i]);
+               printf(",\n");
+       }
+       printf("};\n\n");
+
+       printf("const struct fpn fpu_cordic_inv_gain1 =\n\t");
+       printf_fpn(&inv_gain1);
+       printf(";\n\n");
+
+       printf("const struct fpn fpu_cordic_inv_gain2 =\n\t");
+       printf_fpn(&inv_gain2);
+       printf(";\n\n");
+}
+
+/*
+ * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn()
+ * and fpu_atan_tayler() as bootstrap.
+ */
+static void
+prepare_cordic_const(struct fpemu *fe)
+{
+       struct fpn t;
+       struct fpn x;
+       struct fpn *r;
+       int i;
+
+       /* atan_table and atanh_table */
+       fpu_const(&t, FPU_CONST_1);
+       for (i = 0; i < EXT_FRACBITS; i++) {
+               /* atan(t) */
+               CPYFPN(&fe->fe_f2, &t);
+               r = fpu_atan_tayler(fe);
+               fpn_to_sfpn(&atan_table[i], r);
+
+               /* t /= 2 */
+               t.fp_exp--;
+
+               /* (1-t) */
+               fpu_const(&fe->fe_f1, FPU_CONST_1);
+               CPYFPN(&fe->fe_f2, &t);
+               fe->fe_f2.fp_sign = 1;
+               r = fpu_add(fe);
+               CPYFPN(&x, r);
+
+               /* (1+t) */
+               fpu_const(&fe->fe_f1, FPU_CONST_1);
+               CPYFPN(&fe->fe_f2, &t);
+               r = fpu_add(fe);
+
+               /* r = (1+t)/(1-t) */
+               CPYFPN(&fe->fe_f1, r);
+               CPYFPN(&fe->fe_f2, &x);
+               r = fpu_div(fe);
+
+               /* r = log(r) */
+               CPYFPN(&fe->fe_f2, r);
+               r = fpu_logn(fe);
+
+               /* r /= 2 */
+               r->fp_exp--;
+
+               fpn_to_sfpn(&atanh_table[i], r);
+       }
+
+       /* inv_gain1 = 1 / gain1cordic() */
+       r = fpu_gain1_cordic(fe);
+       CPYFPN(&fe->fe_f2, r);
+       fpu_const(&fe->fe_f1, FPU_CONST_1);
+       r = fpu_div(fe);
+       CPYFPN(&inv_gain1, r);
+
+       /* inv_gain2 = 1 / gain2cordic() */
+       r = fpu_gain2_cordic(fe);
+       CPYFPN(&fe->fe_f2, r);
+       fpu_const(&fe->fe_f1, FPU_CONST_1);
+       r = fpu_div(fe);
+       CPYFPN(&inv_gain2, r);
+}
+
+static struct fpn *
+fpu_gain1_cordic(struct fpemu *fe)
+{
+       struct fpn x;
+       struct fpn y;
+       struct fpn z;
+       struct fpn v;
+
+       fpu_const(&x, FPU_CONST_1);
+       fpu_const(&y, FPU_CONST_0);
+       fpu_const(&z, FPU_CONST_0);
+       CPYFPN(&v, &x);
+       v.fp_sign = !v.fp_sign;
+
+       fpu_cordit1(fe, &x, &y, &z, &v);
+       CPYFPN(&fe->fe_f2, &x);
+       return &fe->fe_f2;
+}
+
+static struct fpn *
+fpu_gain2_cordic(struct fpemu *fe)
+{
+       struct fpn x;
+       struct fpn y;
+       struct fpn z;
+       struct fpn v;
+
+       fpu_const(&x, FPU_CONST_1);
+       fpu_const(&y, FPU_CONST_0);
+       fpu_const(&z, FPU_CONST_0);
+       CPYFPN(&v, &x);
+       v.fp_sign = !v.fp_sign;
+
+       fpu_cordit2(fe, &x, &y, &z, &v);
+       CPYFPN(&fe->fe_f2, &x);
+       return &fe->fe_f2;
+}
+
+/*
+ * arctan(x) = pi/4 (for |x| = 1)
+ *
+ *                 x^3   x^5   x^7
+ * arctan(x) = x - --- + --- - --- + ...   (for |x| < 1)
+ *                  3     5     7
+ */
+static struct fpn *
+fpu_atan_tayler(struct fpemu *fe)
+{
+       struct fpn res;
+       struct fpn x2;
+       struct fpn s0;
+       struct fpn *s1;
+       struct fpn *r;
+       uint32_t k;
+
+       /* arctan(1) is pi/4 */
+       if (fe->fe_f2.fp_exp == 0) {
+               fpu_const(&fe->fe_f2, FPU_CONST_PI);
+               fe->fe_f2.fp_exp -= 2;
+               return &fe->fe_f2;
+       }
+
+       /* s0 := x */
+       CPYFPN(&s0, &fe->fe_f2);
+
+       /* res := x */
+       CPYFPN(&res, &fe->fe_f2);
+
+       /* x2 := x * x */
+       CPYFPN(&fe->fe_f1, &fe->fe_f2);
+       r = fpu_mul(fe);
+       CPYFPN(&x2, r);
+
+       k = 3;
+       for (;;) {
+               /* s1 := -s0 * x2 */
+               CPYFPN(&fe->fe_f1, &s0);
+               CPYFPN(&fe->fe_f2, &x2);
+               s1 = fpu_mul(fe);
+               s1->fp_sign ^= 1;
+               CPYFPN(&fe->fe_f1, s1);
+
+               /* s0 := s1 for next loop */
+               CPYFPN(&s0, s1);
+
+               /* s1 := s1 / k */
+               fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
+               s1 = fpu_div(fe);
+
+               /* break if s1 is enough small */
+               if (ISZERO(s1))
+                       break;
+               if (res.fp_exp - s1->fp_exp >= FP_NMANT)
+                       break;
+
+               /* res += s1 */
+               CPYFPN(&fe->fe_f2, s1);
+               CPYFPN(&fe->fe_f1, &res);
+               r = fpu_add(fe);
+               CPYFPN(&res, r);
+
+               k += 2;
+       }
+
+       CPYFPN(&fe->fe_f2, &res);
+       return &fe->fe_f2;
+}
+
Prev by Date: [src/trunk]: src/lib/libc/arch/arm/gen Actually fail if the magic number is f...
Next by Date: [src/trunk]: src/sys/arch/m68k/fpe Implement inverse trigonometric functions ...
Previous by Thread: [src/trunk]: src/lib/libc/arch/arm/gen Actually fail if the magic number is f...
Next by Thread: [src/trunk]: src/sys/arch/m68k/fpe Implement inverse trigonometric functions ...
Indexes:
Home | Main Index | Thread Index | Old Index