Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src When hardware subnormal support is available, disable flush-...



details:   https://anonhg.NetBSD.org/src/rev/acb9a4cab946
branches:  trunk
changeset: 836801:acb9a4cab946
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Wed Nov 07 06:47:38 2018 +0000

description:
When hardware subnormal support is available, disable flush-to-zero.

Similarly, when hardware NaN propagation is available, disable
default-NaN substitution.

This enables IEEE 754 semantics on any hardware that supports it by
default.  Programs that want flush-to-zero or default-NaN substitution
can enable them explicitly.

ok ryo@

diffstat:

 lib/libm/arch/aarch64/fenv.c      |   6 +-
 sys/arch/aarch64/aarch64/fpu.c    |  60 ++++++++++++++++++++++++++++++++++++--
 sys/arch/aarch64/include/armreg.h |   5 ++-
 3 files changed, 63 insertions(+), 8 deletions(-)

diffs (139 lines):

diff -r cfa38e0ef858 -r acb9a4cab946 lib/libm/arch/aarch64/fenv.c
--- a/lib/libm/arch/aarch64/fenv.c      Wed Nov 07 04:00:12 2018 +0000
+++ b/lib/libm/arch/aarch64/fenv.c      Wed Nov 07 06:47:38 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: fenv.c,v 1.3 2017/03/22 23:11:08 chs Exp $ */
+/* $NetBSD: fenv.c,v 1.4 2018/11/07 06:47:38 riastradh Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: fenv.c,v 1.3 2017/03/22 23:11:08 chs Exp $");
+__RCSID("$NetBSD: fenv.c,v 1.4 2018/11/07 06:47:38 riastradh Exp $");
 
 #include "namespace.h"
 
@@ -63,7 +63,7 @@
 
 const fenv_t __fe_dfl_env = {
        .__fpsr = 0,
-       .__fpcr = FPCR_FZ|FPCR_DN|FPCR_RN,
+       .__fpcr = __SHIFTIN(FPCR_RN, FPCR_RMODE),
 };
 
 /*
diff -r cfa38e0ef858 -r acb9a4cab946 sys/arch/aarch64/aarch64/fpu.c
--- a/sys/arch/aarch64/aarch64/fpu.c    Wed Nov 07 04:00:12 2018 +0000
+++ b/sys/arch/aarch64/aarch64/fpu.c    Wed Nov 07 06:47:38 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu.c,v 1.2 2018/04/01 04:35:03 ryo Exp $ */
+/* $NetBSD: fpu.c,v 1.3 2018/11/07 06:47:38 riastradh Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(1, "$NetBSD: fpu.c,v 1.2 2018/04/01 04:35:03 ryo Exp $");
+__KERNEL_RCSID(1, "$NetBSD: fpu.c,v 1.3 2018/11/07 06:47:38 riastradh Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -75,10 +75,62 @@
        KASSERT(l == curlwp);
 
        if (__predict_false((flags & PCU_VALID) == 0)) {
+               uint64_t mvfr1 = reg_mvfr1_el1_read();
+               bool fp16 = false;
+               uint32_t fpcr = 0;
+
+               /*
+                * Determine whether ARMv8.2-FP16 binary16
+                * floating-point arithmetic is supported.
+                */
+               switch (__SHIFTOUT(mvfr1, MVFR1_FPHP)) {
+               case MVFR1_FPHP_HALF_ARITH:
+                       fp16 = true;
+                       break;
+               }
+
+               /* Rounding mode: round to nearest, ties to even.  */
+               fpcr |= __SHIFTIN(FPCR_RN, FPCR_RMODE);
+
+               /* NaN propagation or default NaN.   */
+               switch (__SHIFTOUT(mvfr1, MVFR1_FPDNAN)) {
+               case MVFR1_FPDNAN_NAN:
+                       /*
+                        * IEEE 754 NaN propagation supported.  Don't
+                        * enable default NaN mode.
+                        */
+                       break;
+               default:
+                       /*
+                        * IEEE 754 NaN propagation not supported, so
+                        * enable default NaN mode.
+                        */
+                       fpcr |= FPCR_DN;
+               }
+
+               /* Subnormal arithmetic or flush-to-zero.  */
+               switch (__SHIFTOUT(mvfr1, MVFR1_FPFTZ)) {
+               case MVFR1_FPFTZ_DENORMAL:
+                       /*
+                        * IEEE 754 subnormal arithmetic supported.
+                        * Don't enable flush-to-zero mode.
+                        */
+                       break;
+               default:
+                       /*
+                        * IEEE 754 subnormal arithmetic not supported,
+                        * so enable flush-to-zero mode.  If FP16 is
+                        * supported, also enable flush-to-zero for
+                        * binary16 arithmetic.
+                        */
+                       fpcr |= FPCR_FZ;
+                       if (fp16)
+                               fpcr |= FPCR_FZ16;
+               }
+
                /* initialize fpregs */
                memset(&pcb->pcb_fpregs, 0, sizeof(pcb->pcb_fpregs));
-               pcb->pcb_fpregs.fpcr =
-                   FPCR_DN | FPCR_FZ | __SHIFTIN(FPCR_RN, FPCR_RMODE);
+               pcb->pcb_fpregs.fpcr = fpcr;
 
                curcpu()->ci_vfp_use.ev_count++;
        } else {
diff -r cfa38e0ef858 -r acb9a4cab946 sys/arch/aarch64/include/armreg.h
--- a/sys/arch/aarch64/include/armreg.h Wed Nov 07 04:00:12 2018 +0000
+++ b/sys/arch/aarch64/include/armreg.h Wed Nov 07 06:47:38 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: armreg.h,v 1.19 2018/10/12 01:28:58 ryo Exp $ */
+/* $NetBSD: armreg.h,v 1.20 2018/11/07 06:47:38 riastradh Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -102,6 +102,7 @@
 #define         FPCR_RM                2               //  Round towards Minus infinity
 #define         FPCR_RZ                3               //  Round towards Zero
 #define        FPCR_STRIDE             __BITS(21,20)
+#define        FPCR_FZ16               __BIT(19)       // Flush-To-Zero for FP16
 #define        FPCR_LEN                __BITS(18,16)
 #define        FPCR_IDE                __BIT(15)       // Input Denormal Exception enable
 #define        FPCR_IXE                __BIT(12)       // IneXact Exception enable
@@ -303,9 +304,11 @@
 #define         MVFR1_FPHP_NONE         0
 #define         MVFR1_FPHP_HALF_SINGLE  1
 #define         MVFR1_FPHP_HALF_DOUBLE  2
+#define         MVFR1_FPHP_HALF_ARITH   3
 #define        MVFR1_SIMDHP            __BITS(23,20)
 #define         MVFR1_SIMDHP_NONE       0
 #define         MVFR1_SIMDHP_HALF       1
+#define         MVFR1_SIMDHP_HALF_ARITH 3
 #define        MVFR1_SIMDSP            __BITS(19,16)
 #define         MVFR1_SIMDSP_NONE       0
 #define         MVFR1_SIMDSP_SINGLE     1



Home | Main Index | Thread Index | Old Index