From ffc51161f327bd77a9650f4bc70908297343d5fa Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 11 May 2015 14:46:31 -0400 Subject: vfp: Handle flush-to-zero mode. --- src/core/arm/skyeye_common/vfp/vfp_helper.h | 117 +++++++++++++++------------ src/core/arm/skyeye_common/vfp/vfpdouble.cpp | 35 ++++---- src/core/arm/skyeye_common/vfp/vfpsingle.cpp | 30 +++---- 3 files changed, 98 insertions(+), 84 deletions(-) (limited to 'src/core/arm/skyeye_common') diff --git a/src/core/arm/skyeye_common/vfp/vfp_helper.h b/src/core/arm/skyeye_common/vfp/vfp_helper.h index 6b3dae28..ccc0212a 100644 --- a/src/core/arm/skyeye_common/vfp/vfp_helper.h +++ b/src/core/arm/skyeye_common/vfp/vfp_helper.h @@ -35,6 +35,7 @@ #include #include "common/common_types.h" #include "core/arm/skyeye_common/armdefs.h" +#include "core/arm/skyeye_common/vfp/asm_vfp.h" #define do_div(n, base) {n/=base;} @@ -236,33 +237,6 @@ struct vfp_single { #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) -// Unpack a single-precision float. Note that this returns the magnitude -// of the single-precision float mantissa with the 1. if necessary, -// aligned to bit 30. -static inline void vfp_single_unpack(vfp_single* s, s32 val) -{ - u32 significand; - - s->sign = vfp_single_packed_sign(val) >> 16, - s->exponent = vfp_single_packed_exponent(val); - - significand = (u32) val; - significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; - if (s->exponent && s->exponent != 255) - significand |= 0x40000000; - s->significand = significand; -} - -// Re-pack a single-precision float. This assumes that the float is -// already normalised such that the MSB is bit 30, _not_ bit 31. -static inline s32 vfp_single_pack(vfp_single* s) -{ - u32 val = (s->sign << 16) + - (s->exponent << VFP_SINGLE_MANTISSA_BITS) + - (s->significand >> VFP_SINGLE_LOW_BITS); - return (s32)val; -} - enum : u32 { VFP_NUMBER = (1 << 0), VFP_ZERO = (1 << 1), @@ -294,6 +268,39 @@ static inline int vfp_single_type(vfp_single* s) return type; } +// Unpack a single-precision float. Note that this returns the magnitude +// of the single-precision float mantissa with the 1. if necessary, +// aligned to bit 30. +static inline void vfp_single_unpack(vfp_single* s, s32 val, u32* fpscr) +{ + s->sign = vfp_single_packed_sign(val) >> 16, + s->exponent = vfp_single_packed_exponent(val); + + u32 significand = ((u32)val << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; + if (s->exponent && s->exponent != 255) + significand |= 0x40000000; + s->significand = significand; + + // If flush-to-zero mode is enabled, turn the denormal into zero. + // On a VFPv2 architecture, the sign of the zero is always positive. + if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_single_type(s) & VFP_DENORMAL) != 0) { + s->sign = 0; + s->exponent = 0; + s->significand = 0; + *fpscr |= FPSCR_IDC; + } +} + +// Re-pack a single-precision float. This assumes that the float is +// already normalised such that the MSB is bit 30, _not_ bit 31. +static inline s32 vfp_single_pack(vfp_single* s) +{ + u32 val = (s->sign << 16) + + (s->exponent << VFP_SINGLE_MANTISSA_BITS) + + (s->significand >> VFP_SINGLE_LOW_BITS); + return (s32)val; +} + u32 vfp_single_normaliseround(ARMul_State* state, int sd, vfp_single* vs, u32 fpscr, u32 exceptions, const char* func); @@ -328,24 +335,49 @@ struct vfp_double { #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) +static inline int vfp_double_type(vfp_double* s) +{ + int type = VFP_NUMBER; + if (s->exponent == 2047) { + if (s->significand == 0) + type = VFP_INFINITY; + else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) + type = VFP_QNAN; + else + type = VFP_SNAN; + } else if (s->exponent == 0) { + if (s->significand == 0) + type |= VFP_ZERO; + else + type |= VFP_DENORMAL; + } + return type; +} + // Unpack a double-precision float. Note that this returns the magnitude // of the double-precision float mantissa with the 1. if necessary, // aligned to bit 62. -static inline void vfp_double_unpack(vfp_double* s, s64 val) +static inline void vfp_double_unpack(vfp_double* s, s64 val, u32* fpscr) { - u64 significand; - s->sign = vfp_double_packed_sign(val) >> 48; s->exponent = vfp_double_packed_exponent(val); - significand = (u64) val; - significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; + u64 significand = ((u64)val << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; if (s->exponent && s->exponent != 2047) significand |= (1ULL << 62); s->significand = significand; + + // If flush-to-zero mode is enabled, turn the denormal into zero. + // On a VFPv2 architecture, the sign of the zero is always positive. + if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_double_type(s) & VFP_DENORMAL) != 0) { + s->sign = 0; + s->exponent = 0; + s->significand = 0; + *fpscr |= FPSCR_IDC; + } } -// Re-pack a double-precision float. This assumes that the float is +// Re-pack a double-precision float. This assumes that the float is // already normalised such that the MSB is bit 30, _not_ bit 31. static inline s64 vfp_double_pack(vfp_double* s) { @@ -355,25 +387,6 @@ static inline s64 vfp_double_pack(vfp_double* s) return (s64)val; } -static inline int vfp_double_type(vfp_double* s) -{ - int type = VFP_NUMBER; - if (s->exponent == 2047) { - if (s->significand == 0) - type = VFP_INFINITY; - else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) - type = VFP_QNAN; - else - type = VFP_SNAN; - } else if (s->exponent == 0) { - if (s->significand == 0) - type |= VFP_ZERO; - else - type |= VFP_DENORMAL; - } - return type; -} - u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); // A special flag to tell the normalisation code not to normalise. diff --git a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp index d76d37fd..ab9fec39 100644 --- a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp @@ -291,7 +291,8 @@ static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32 vfp_double vdm, vdd, *vdp; int ret, tm; - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); + tm = vfp_double_type(&vdm); if (tm & (VFP_NAN|VFP_INFINITY)) { vdp = &vdd; @@ -473,7 +474,7 @@ static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 u32 exceptions = 0; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); tm = vfp_double_type(&vdm); @@ -543,7 +544,7 @@ static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 int tm; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); /* * Do we have a denormalised number? @@ -624,7 +625,7 @@ static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32 int tm; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); vfp_double_dump("VDM", &vdm); /* @@ -896,11 +897,11 @@ vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 f struct vfp_double vdd, vdp, vdn, vdm; u32 exceptions; - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -908,7 +909,7 @@ vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 f if (negate & NEG_MULTIPLY) vdp.sign = vfp_sign_negate(vdp.sign); - vfp_double_unpack(&vdn, vfp_get_double(state, dd)); + vfp_double_unpack(&vdn, vfp_get_double(state, dd), &fpscr); if (vdn.exponent == 0 && vdn.significand != 0) vfp_double_normalise_denormal(&vdn); @@ -969,11 +970,11 @@ static u32 vfp_double_fmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr u32 exceptions; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -990,11 +991,11 @@ static u32 vfp_double_fnmul(ARMul_State* state, int dd, int dn, int dm, u32 fpsc u32 exceptions; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -1013,11 +1014,11 @@ static u32 vfp_double_fadd(ARMul_State* state, int dd, int dn, int dm, u32 fpscr u32 exceptions; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -1035,11 +1036,11 @@ static u32 vfp_double_fsub(ARMul_State* state, int dd, int dn, int dm, u32 fpscr u32 exceptions; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -1063,8 +1064,8 @@ static u32 vfp_double_fdiv(ARMul_State* state, int dd, int dn, int dm, u32 fpscr int tm, tn; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); vdd.sign = vdn.sign ^ vdm.sign; diff --git a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp index a78bdc43..4dfe0254 100644 --- a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp @@ -330,7 +330,7 @@ static u32 vfp_single_fsqrt(ARMul_State* state, int sd, int unused, s32 m, u32 f struct vfp_single vsm, vsd, *vsp; int ret, tm; - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); tm = vfp_single_type(&vsm); if (tm & (VFP_NAN|VFP_INFINITY)) { vsp = &vsd; @@ -498,7 +498,7 @@ static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 f int tm; u32 exceptions = 0; - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); tm = vfp_single_type(&vsm); @@ -563,7 +563,7 @@ static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 f int rmode = fpscr & FPSCR_RMODE_MASK; int tm; - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); vfp_single_dump("VSM", &vsm); /* @@ -643,7 +643,7 @@ static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 f int rmode = fpscr & FPSCR_RMODE_MASK; int tm; - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); vfp_single_dump("VSM", &vsm); /* @@ -925,11 +925,11 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp v = vfp_get_float(state, sn); LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, v); - vfp_single_unpack(&vsn, v); + vfp_single_unpack(&vsn, v, &fpscr); if (vsn.exponent == 0 && vsn.significand) vfp_single_normalise_denormal(&vsn); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); if (vsm.exponent == 0 && vsm.significand) vfp_single_normalise_denormal(&vsm); @@ -940,7 +940,7 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp v = vfp_get_float(state, sd); LOG_DEBUG(Core_ARM11, "s%u = %08x", sd, v); - vfp_single_unpack(&vsn, v); + vfp_single_unpack(&vsn, v, &fpscr); if (vsn.exponent == 0 && vsn.significand != 0) vfp_single_normalise_denormal(&vsn); @@ -1004,11 +1004,11 @@ static u32 vfp_single_fmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr) LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n); - vfp_single_unpack(&vsn, n); + vfp_single_unpack(&vsn, n, &fpscr); if (vsn.exponent == 0 && vsn.significand) vfp_single_normalise_denormal(&vsn); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); if (vsm.exponent == 0 && vsm.significand) vfp_single_normalise_denormal(&vsm); @@ -1027,11 +1027,11 @@ static u32 vfp_single_fnmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n); - vfp_single_unpack(&vsn, n); + vfp_single_unpack(&vsn, n, &fpscr); if (vsn.exponent == 0 && vsn.significand) vfp_single_normalise_denormal(&vsn); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); if (vsm.exponent == 0 && vsm.significand) vfp_single_normalise_denormal(&vsm); @@ -1054,11 +1054,11 @@ static u32 vfp_single_fadd(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr) /* * Unpack and normalise denormals. */ - vfp_single_unpack(&vsn, n); + vfp_single_unpack(&vsn, n, &fpscr); if (vsn.exponent == 0 && vsn.significand) vfp_single_normalise_denormal(&vsn); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); if (vsm.exponent == 0 && vsm.significand) vfp_single_normalise_denormal(&vsm); @@ -1094,8 +1094,8 @@ static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr) LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n); - vfp_single_unpack(&vsn, n); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsn, n, &fpscr); + vfp_single_unpack(&vsm, m, &fpscr); vsd.sign = vsn.sign ^ vsm.sign; -- cgit v1.2.3