Flush denorm half floats to zero.

I think we convinced ourselves that denorms, while a good chunk of half floats, cover a rather small fraction of the representable range, which is always close enough to zero to flush. This makes both paths of the conversion to or from float considerably simpler. These functions now work for zero-or-normal half floats (excluding infinite, NaN). I'm not aware of a term for this class so I've called them "ordinary". A handful of GMs and SKPs draw differently in --config f16, but all imperceptibly. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2256023002 Review-Url: https://codereview.chromium.org/2256023002
author: mtklein <mtklein@chromium.org> 2016-08-22 13:20:18 -0700
committer: Commit bot <commit-bot@chromium.org> 2016-08-22 13:20:18 -0700
commit: 8ae991e433d2c0814ea5579613f00173805ff057 (patch)
tree: 03948b83317a26c3a1c176c3bfd102a044b0beca /src/core/SkHalf.h
parent: 4f3a0ca85d28a8fc7fcc1ac5c4a1864c66bdea14 (diff)
1 files changed, 18 insertions, 30 deletions
diff --git a/src/core/SkHalf.h b/src/core/SkHalf.h
index bc9dd7940f..cc57823cb7 100644
--- a/src/core/SkHalf.h
+++ b/src/core/SkHalf.h
@@ -26,9 +26,10 @@ float SkHalfToFloat(SkHalf h);
 SkHalf SkFloatToHalf(float f);
 
 // Convert between half and single precision floating point,
-// assuming inputs and outputs are both finite.
-static inline Sk4f SkHalfToFloat_finite(uint64_t);
-static inline Sk4h SkFloatToHalf_finite(const Sk4f&);
+// assuming inputs and outputs are both finite, and
+// flushing values which would be denormal half floats to zero.
+static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t);
+static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f&);
 
 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //
 
@@ -37,7 +38,7 @@ static inline Sk4h SkFloatToHalf_finite(const Sk4f&);
 
 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use inline assembly.
 
-static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) {
+static inline Sk4f SkHalfToFloat_finite_ftz(const Sk4h& hs) {
 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
     float32x4_t fs;
     asm ("fcvtl %[fs].4s, %[hs].4h   \n"   // vcvt_f32_f16(...)
@@ -45,54 +46,41 @@ static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) {
         : [hs] "w" (hs.fVec));             //  w: read-only NEON register
     return fs;
 #else
-    Sk4i bits      = SkNx_cast<int>(hs),   // Expand to 32 bit.
-         sign      = bits & 0x00008000,    // Save the sign bit for later...
-         positive  = bits ^ sign,          // ...but strip it off for now.
-         is_denorm = positive < (1<<10);   // Exponent == 0?
+    Sk4i bits     = SkNx_cast<int>(hs),  // Expand to 32 bit.
+         sign     = bits & 0x00008000,   // Save the sign bit for later...
+         positive = bits ^ sign,         // ...but strip it off for now.
+         is_norm  = 0x03ff < positive;   // Exponent > 0?
 
     // For normal half floats, extend the mantissa by 13 zero bits,
     // then adjust the exponent from 15 bias to 127 bias.
     Sk4i norm = (positive << 13) + ((127 - 15) << 23);
 
-    // For denorm half floats, mask in the exponent-only float K that turns our
-    // denorm value V*2^-14 into a normalized float K + V*2^-14.  Then subtract off K.
-    const Sk4i K = ((127-15) + (23-10) + 1) << 23;
-    Sk4i mask_K = positive | K;
-    Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K);
-
-    Sk4i merged = (sign << 16) | is_denorm.thenElse(Sk4i::Load(&denorm), norm);
+    Sk4i merged = (sign << 16) | (norm & is_norm);
     return Sk4f::Load(&merged);
 #endif
 }
 
-static inline Sk4f SkHalfToFloat_finite(uint64_t hs) {
-    return SkHalfToFloat_finite(Sk4h::Load(&hs));
+static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t hs) {
+    return SkHalfToFloat_finite_ftz(Sk4h::Load(&hs));
 }
 
-static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) {
+static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) {
 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
     float32x4_t vec = fs.fVec;
     asm ("fcvtn %[vec].4h, %[vec].4s  \n"   // vcvt_f16_f32(vec)
         : [vec] "+w" (vec));                // +w: read-write NEON register
     return vreinterpret_u16_f32(vget_low_f32(vec));
 #else
-    Sk4i bits           = Sk4i::Load(&fs),
-         sign           = bits & 0x80000000,              // Save the sign bit for later...
-         positive       = bits ^ sign,                    // ...but strip it off for now.
-         will_be_denorm = positive < ((127-15+1) << 23);  // positve < smallest normal half?
+    Sk4i bits         = Sk4i::Load(&fs),
+         sign         = bits & 0x80000000,      // Save the sign bit for later...
+         positive     = bits ^ sign,            // ...but strip it off for now.
+         will_be_norm = 0x387fdfff < positive;  // greater than largest denorm half?
 
     // For normal half floats, adjust the exponent from 127 bias to 15 bias,
     // then drop the bottom 13 mantissa bits.
     Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;
 
-    // This mechanically inverts the denorm half -> normal float conversion above.
-    // Knowning that and reading its explanation will leave you feeling more confident
-    // than reading my best attempt at explaining this directly.
-    const Sk4i K = ((127-15) + (23-10) + 1) << 23;
-    Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K);
-    Sk4i denorm = Sk4i::Load(&plus_K) ^ K;
-
-    Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm);
+    Sk4i merged = (sign >> 16) | (will_be_norm & norm);
     return SkNx_cast<uint16_t>(merged);
 #endif
 }
author	mtklein <mtklein@chromium.org>	2016-08-22 13:20:18 -0700
committer	Commit bot <commit-bot@chromium.org>	2016-08-22 13:20:18 -0700
commit	8ae991e433d2c0814ea5579613f00173805ff057 (patch)
tree	03948b83317a26c3a1c176c3bfd102a044b0beca /src/core/SkHalf.h
parent	4f3a0ca85d28a8fc7fcc1ac5c4a1864c66bdea14 (diff)