Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/m68k/fpe Improve the exponential and hyperbolic fun...



details:   https://anonhg.NetBSD.org/src/rev/eecd80a61dd7
branches:  trunk
changeset: 349305:eecd80a61dd7
user:      isaki <isaki%NetBSD.org@localhost>
date:      Mon Dec 05 15:31:01 2016 +0000

description:
Improve the exponential and hyperbolic function's performance
10..100 times faster.
PR port-m68k/51645 from rin@ (and modified by me)

diffstat:

 sys/arch/m68k/fpe/fpu_exp.c    |   62 +++++++++++++++----
 sys/arch/m68k/fpe/fpu_hyperb.c |  124 ++++++++++++++++------------------------
 2 files changed, 97 insertions(+), 89 deletions(-)

diffs (273 lines):

diff -r 8275caec8459 -r eecd80a61dd7 sys/arch/m68k/fpe/fpu_exp.c
--- a/sys/arch/m68k/fpe/fpu_exp.c       Mon Dec 05 13:17:28 2016 +0000
+++ b/sys/arch/m68k/fpe/fpu_exp.c       Mon Dec 05 15:31:01 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $        */
+/*     $NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $        */
 
 /*
  * Copyright (c) 1995  Ken Nakata
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $");
 
 #include <machine/ieee.h>
 
@@ -100,12 +100,16 @@
 }
 
 /*
- * exp(x)
+ * exp(x) = 2^k * exp(r) with k = round(x / ln2) and r = x - k * ln2
+ *
+ * Algorithm partially taken from libm, where exp(r) is approximated by a
+ * rational function of r. We use the Taylor expansion instead.
  */
 struct fpn *
 fpu_etox(struct fpemu *fe)
 {
-       struct fpn *fp;
+       struct fpn x, *fp;
+       int j, k;
 
        if (ISNAN(&fe->fe_f2))
                return &fe->fe_f2;
@@ -115,19 +119,47 @@
                return &fe->fe_f2;
        }
 
-       if (fe->fe_f2.fp_sign == 0) {
-               /* exp(x) */
-               fp = fpu_etox_taylor(fe);
-       } else {
-               /* 1/exp(-x) */
-               fe->fe_f2.fp_sign = 0;
+       CPYFPN(&x, &fe->fe_f2);
+
+       /* k = round(x / ln2) */
+       CPYFPN(&fe->fe_f1, &fe->fe_f2);
+       fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
+       fp = fpu_div(fe);
+       CPYFPN(&fe->fe_f2, fp);
+       fp = fpu_int(fe);
+       if (ISZERO(fp)) {
+               /* k = 0 */
+               CPYFPN(&fe->fe_f2, &x);
                fp = fpu_etox_taylor(fe);
+               return fp;
+       }
+       /* extract k as integer format from fpn format */
+       j = FP_LG - fp->fp_exp;
+       if (j < 0) {
+               if (fp->fp_sign)
+                       fp->fp_class = FPC_ZERO;                /* k < -2^18 */
+               else
+                       fp->fp_class = FPC_INF;                 /* k >  2^18 */
+               return fp;
+       }
+       k = fp->fp_mant[0] >> j;
+       if (fp->fp_sign)
+               k *= -1;
 
-               CPYFPN(&fe->fe_f2, fp);
-               fpu_const(&fe->fe_f1, FPU_CONST_1);
-               fp = fpu_div(fe);
-       }
-       
+       /* exp(r) = exp(x - k * ln2) */
+       CPYFPN(&fe->fe_f1, fp);
+       fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
+       fp = fpu_mul(fe);
+       fp->fp_sign = !fp->fp_sign;
+       CPYFPN(&fe->fe_f1, fp);
+       CPYFPN(&fe->fe_f2, &x);
+       fp = fpu_add(fe);
+       CPYFPN(&fe->fe_f2, fp);
+       fp = fpu_etox_taylor(fe);
+
+       /* 2^k */
+       fp->fp_exp += k;
+
        return fp;
 }
 
diff -r 8275caec8459 -r eecd80a61dd7 sys/arch/m68k/fpe/fpu_hyperb.c
--- a/sys/arch/m68k/fpe/fpu_hyperb.c    Mon Dec 05 13:17:28 2016 +0000
+++ b/sys/arch/m68k/fpe/fpu_hyperb.c    Mon Dec 05 15:31:01 2016 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $    */
+/*     $NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $    */
 
 /*
  * Copyright (c) 1995  Ken Nakata
@@ -57,15 +57,12 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $");
 
 #include <machine/ieee.h>
 
 #include "fpu_emulate.h"
 
-/* The number of items to terminate the Taylor expansion */
-#define MAX_ITEMS      (2000)
-
 /*
  * fpu_hyperb.c: defines the following functions
  *
@@ -137,71 +134,14 @@
 }
 
 /*
- * taylor expansion used by sinh(), cosh().
+ *            exp(x) + exp(-x)
+ * cosh(x) = ------------------
+ *                   2
  */
-static struct fpn *
-__fpu_sinhcosh_taylor(struct fpemu *fe, struct fpn *s0, uint32_t f)
-{
-       struct fpn res;
-       struct fpn x2;
-       struct fpn *s1;
-       struct fpn *r;
-       int sign;
-       uint32_t k;
-
-       /* x2 := x * x */
-       CPYFPN(&fe->fe_f1, &fe->fe_f2);
-       r = fpu_mul(fe);
-       CPYFPN(&x2, r);
-
-       /* res := s0 */
-       CPYFPN(&res, s0);
-
-       sign = 1;       /* sign := (-1)^n */
-
-       for (; f < (2 * MAX_ITEMS); ) {
-               /* (f1 :=) s0 * x^2 */
-               CPYFPN(&fe->fe_f1, s0);
-               CPYFPN(&fe->fe_f2, &x2);
-               r = fpu_mul(fe);
-               CPYFPN(&fe->fe_f1, r);
-
-               /*
-                * for sinh(),  s1 := s0 * x^2 / (2n+1)2n
-                * for cosh(),  s1 := s0 * x^2 / 2n(2n-1)
-                */
-               k = f * (f + 1);
-               fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
-               s1 = fpu_div(fe);
-
-               /* break if s1 is enough small */
-               if (ISZERO(s1))
-                       break;
-               if (res.fp_exp - s1->fp_exp >= EXT_FRACBITS)
-                       break;
-
-               /* s0 := s1 for next loop */
-               CPYFPN(s0, s1);
-
-               /* res += s1 */
-               CPYFPN(&fe->fe_f2, s1);
-               CPYFPN(&fe->fe_f1, &res);
-               r = fpu_add(fe);
-               CPYFPN(&res, r);
-
-               f += 2;
-               sign ^= 1;
-       }
-
-       CPYFPN(&fe->fe_f2, &res);
-       return &fe->fe_f2;
-}
-
 struct fpn *
 fpu_cosh(struct fpemu *fe)
 {
-       struct fpn s0;
-       struct fpn *r;
+       struct fpn x, *fp;
 
        if (ISNAN(&fe->fe_f2))
                return &fe->fe_f2;
@@ -211,17 +151,37 @@
                return &fe->fe_f2;
        }
 
-       fpu_const(&s0, FPU_CONST_1);
-       r = __fpu_sinhcosh_taylor(fe, &s0, 1);
+       /* if x is +0/-0, return 1 */ /* XXX is this necessary? */
+       if (ISZERO(&fe->fe_f2)) {
+               fpu_const(&fe->fe_f2, FPU_CONST_1);
+               return &fe->fe_f2;
+       }
+
+       fp = fpu_etox(fe);
+       CPYFPN(&x, fp);
 
-       return r;
+       fpu_const(&fe->fe_f1, FPU_CONST_1);
+       CPYFPN(&fe->fe_f2, fp);
+       fp = fpu_div(fe);
+
+       CPYFPN(&fe->fe_f1, fp);
+       CPYFPN(&fe->fe_f2, &x);
+       fp = fpu_add(fe);
+
+       fp->fp_exp--;
+
+       return fp;
 }
 
+/*
+ *            exp(x) - exp(-x)
+ * sinh(x) = ------------------
+ *                   2
+ */
 struct fpn *
 fpu_sinh(struct fpemu *fe)
 {
-       struct fpn s0;
-       struct fpn *r;
+       struct fpn x, *fp;
 
        if (ISNAN(&fe->fe_f2))
                return &fe->fe_f2;
@@ -232,12 +192,28 @@
        if (ISZERO(&fe->fe_f2))
                return &fe->fe_f2;
 
-       CPYFPN(&s0, &fe->fe_f2);
-       r = __fpu_sinhcosh_taylor(fe, &s0, 2);
+       fp = fpu_etox(fe);
+       CPYFPN(&x, fp);
+
+       fpu_const(&fe->fe_f1, FPU_CONST_1);
+       CPYFPN(&fe->fe_f2, fp);
+       fp = fpu_div(fe);
 
-       return r;
+       fp->fp_sign = 1;
+       CPYFPN(&fe->fe_f1, fp);
+       CPYFPN(&fe->fe_f2, &x);
+       fp = fpu_add(fe);
+
+       fp->fp_exp--;
+
+       return fp;
 }
 
+/*
+ *            sinh(x)
+ * tanh(x) = ---------
+ *            cosh(x)
+ */
 struct fpn *
 fpu_tanh(struct fpemu *fe)
 {



Home | Main Index | Thread Index | Old Index