tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Support for non-default rounding modes in the softfloat layer



On Apr 30, 2013, at 3:17 AM, Joerg Sonnenberger 
<joerg%britannica.bec.de@localhost> wrote:

> Hi all,
> one major difference between the existing softfloat support in libc and
> compiler-rt's softfloat implementation is the (non)support for
> rounding modes. libc supports some global variables for that, which is
> of limited usefulness in a multi-threaded world. Making them fully
> thread-local on the other hand is quite expensive. A discussion with the
> responsible developer of the compiler-rt code and some LLVM folks
> resulting in the suggestion of providing an alternative set of entry
> points, where the float environment is an explicit argument, if this
> feature is really desirable. Long story short, does anyone use
> non-default rounding modes on platforms with softfloat?

Locally for earm, I have a diff which does provide per-thread
rounding, sticky, and mask support for softfloat.

It wasn't that difficult and seems to work reasonably well.

Index: lib/libc/arch/arm/softfloat/softfloat.h
===================================================================
RCS file: /cvsroot/src/lib/libc/arch/arm/softfloat/softfloat.h,v
retrieving revision 1.10
diff -u -p -r1.10 softfloat.h
--- lib/libc/arch/arm/softfloat/softfloat.h     24 Apr 2013 18:04:46 -0000      
1.10
+++ lib/libc/arch/arm/softfloat/softfloat.h     30 Apr 2013 15:58:32 -0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: softfloat.h,v 1.10 2013/04/24 18:04:46 matt Exp $      */
+/*     $NetBSD: softfloat.h,v 1.9 2011/07/07 07:14:57 matt Exp $       */
 
 /* This is a derivative work. */
 
@@ -45,7 +45,10 @@ the `FLOAT128' macro and the quadruple-p
 /* #define FLOAT128 */
 
 #include <stdint.h>
-#include <machine/ieeefp.h>
+#include <arm/ieeefp.h>
+#ifdef __ARM_EABI__
+#include <arm/vfpreg.h>
+#endif
 
 /*
 -------------------------------------------------------------------------------
@@ -84,7 +87,53 @@ enum {
 Software IEC/IEEE floating-point rounding mode.
 -------------------------------------------------------------------------------
 */
+#ifdef __ARM_EABI__
+__CTASSERT(__SHIFTOUT(VFP_FPSCR_RN, VFP_FPSCR_RMODE) == FP_RN);
+__CTASSERT(__SHIFTOUT(VFP_FPSCR_RP, VFP_FPSCR_RMODE) == FP_RP);
+__CTASSERT(__SHIFTOUT(VFP_FPSCR_RM, VFP_FPSCR_RMODE) == FP_RM);
+__CTASSERT(__SHIFTOUT(VFP_FPSCR_RZ, VFP_FPSCR_RMODE) == FP_RZ);
+
+static inline uint32_t
+__arm_getfpscr(void)
+{
+       uint32_t fpscr;
+        __asm __volatile("mrc p10, 7, %0, c1, c0, 0" : "=r" (fpscr));
+       return fpscr;
+}
+
+static inline void
+__arm_setfpscr(uint32_t fpscr)
+{
+        __asm __volatile("mcr p10, 7, %0, c1, c0, 0" :: "r" (fpscr));
+}
+
+static inline uint32_t
+__arm_clrsetfpscr(uint32_t mask, uint32_t new)
+{
+       uint32_t fpscr = __arm_getfpscr();
+       __arm_setfpscr((fpscr & ~mask) | __SHIFTOUT(new, mask));
+       return __SHIFTOUT(fpscr, mask);
+}
+
+static inline fp_rnd
+__arm_fpgetround(void)
+{
+       return __SHIFTOUT(__arm_getfpscr(), VFP_FPSCR_RMODE);
+}
+
+static inline fp_rnd
+__arm_fpsetround(fp_rnd new)
+{
+       return __arm_clrsetfpscr(VFP_FPSCR_RMODE, (uint32_t)new);
+}
+
+#undef float_rounding_mode
+#undef set_float_rounding_mode
+#define float_rounding_mode            __arm_fpgetround()
+#define set_float_rounding_mode(x)     __arm_fpsetround(x)
+#else
 extern fp_rnd float_rounding_mode;
+#endif
 #define float_round_nearest_even FP_RN
 #define float_round_to_zero      FP_RZ
 #define float_round_down         FP_RM
@@ -95,8 +144,63 @@ extern fp_rnd float_rounding_mode;
 Software IEC/IEEE floating-point exception flags.
 -------------------------------------------------------------------------------
 */
+#ifdef __ARM_EABI__
+__CTASSERT(VFP_FPSCR_IXC == FP_X_IMP);
+__CTASSERT(VFP_FPSCR_UFC == FP_X_UFL);
+__CTASSERT(VFP_FPSCR_OFC == FP_X_OFL);
+__CTASSERT(VFP_FPSCR_DZC == FP_X_DZ);
+__CTASSERT(VFP_FPSCR_IOC == FP_X_INV);
+
+static inline fp_except
+__arm_fpgetexcept(void)
+{
+       return __SHIFTOUT(__arm_getfpscr(), VFP_FPSCR_CSUM);
+}
+
+static inline fp_except
+__arm_fpgetmask(void)
+{
+       return __SHIFTOUT(__arm_getfpscr(), VFP_FPSCR_ESUM);
+}
+
+static inline fp_except
+__arm_fpsetexcept(fp_except new, int replace)
+{
+       const uint32_t fpscr = __arm_getfpscr();
+       const uint32_t nfpscr = (fpscr & ~(replace ? 0 : VFP_FPSCR_CSUM))
+           | __SHIFTOUT(new, VFP_FPSCR_CSUM);
+       if (fpscr != nfpscr)
+               __arm_setfpscr(nfpscr);
+       return __SHIFTIN(fpscr, VFP_FPSCR_CSUM);
+}
+
+static inline fp_except
+__arm_fpsetmask(fp_except new)
+{
+       return __arm_clrsetfpscr(VFP_FPSCR_ESUM, (uint32_t)new);
+}
+
+static inline void
+__arm_bisfpscr(uint32_t new)
+{
+       uint32_t fpscr = __arm_getfpscr();
+       if ((fpscr | new) != fpscr)
+               __arm_setfpscr(fpscr | new);
+}
+
+#undef float_exception_flags
+#undef float_exception_mask
+#define float_exception_flags          __arm_fpgetexcept()
+#define float_exception_mask           __arm_fpgetmask()
+#undef set_float_exception_flags
+#undef set_float_exception_mask
+#define set_float_exception_flags(x, n)        __arm_fpsetexcept((x), (n))
+#define set_float_exception_mask(x)    __arm_fpsetmask(x)
+#define set_float_exception_inexact_flag() __arm_bisfpscr(VFP_FPSCR_IXC)
+#else
 extern fp_except float_exception_flags;
 extern fp_except float_exception_mask;
+#endif
 enum {
     float_flag_inexact   = FP_X_IMP,
     float_flag_underflow = FP_X_UFL,


Home | Main Index | Thread Index | Old Index