diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/SkBitmap.cpp | 2 | ||||
-rw-r--r-- | src/core/SkColorSpaceXform.cpp | 18 | ||||
-rw-r--r-- | src/core/SkHalf.h | 48 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_sample.h | 2 | ||||
-rw-r--r-- | src/core/SkMipMap.cpp | 4 | ||||
-rw-r--r-- | src/core/SkRasterPipelineBlitter.cpp | 16 | ||||
-rw-r--r-- | src/core/SkSpanProcs.cpp | 2 | ||||
-rw-r--r-- | src/core/SkXfermodeF16.cpp | 42 | ||||
-rw-r--r-- | src/effects/gradients/Sk4fGradientPriv.h | 4 |
9 files changed, 63 insertions, 75 deletions
diff --git a/src/core/SkBitmap.cpp b/src/core/SkBitmap.cpp index e73217ea50..26630860ac 100644 --- a/src/core/SkBitmap.cpp +++ b/src/core/SkBitmap.cpp @@ -600,7 +600,7 @@ SkColor SkBitmap::getColor(int x, int y) const { } case kRGBA_F16_SkColorType: { const uint64_t* addr = (const uint64_t*)fPixels + y * (fRowBytes >> 3) + x; - Sk4f p4 = SkHalfToFloat_finite(addr[0]); + Sk4f p4 = SkHalfToFloat_finite_ftz(addr[0]); if (p4[3]) { float inva = 1 / p4[3]; p4 = p4 * Sk4f(inva, inva, inva, 1); diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp index 3795d2d8d6..f445939267 100644 --- a/src/core/SkColorSpaceXform.cpp +++ b/src/core/SkColorSpaceXform.cpp @@ -876,25 +876,25 @@ static inline void store_2dot2_1(void* dst, const uint32_t* src, static inline void store_f16(void* dst, const uint32_t* src, Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da, const uint8_t* const[3], SwapRB) { - Sk4h_store4(dst, SkFloatToHalf_finite(dr), - SkFloatToHalf_finite(dg), - SkFloatToHalf_finite(db), - SkFloatToHalf_finite(da)); + Sk4h_store4(dst, SkFloatToHalf_finite_ftz(dr), + SkFloatToHalf_finite_ftz(dg), + SkFloatToHalf_finite_ftz(db), + SkFloatToHalf_finite_ftz(da)); } static inline void store_f16_1(void* dst, const uint32_t* src, Sk4f& rgba, const Sk4f& a, const uint8_t* const[3], SwapRB kSwapRB) { rgba = Sk4f(rgba[0], rgba[1], rgba[2], a[3]); - SkFloatToHalf_finite(rgba).store((uint64_t*) dst); + SkFloatToHalf_finite_ftz(rgba).store((uint64_t*) dst); } static inline void store_f16_opaque(void* dst, const uint32_t* src, Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da, const uint8_t* const[3], SwapRB) { - Sk4h_store4(dst, SkFloatToHalf_finite(dr), - SkFloatToHalf_finite(dg), - SkFloatToHalf_finite(db), + Sk4h_store4(dst, SkFloatToHalf_finite_ftz(dr), + SkFloatToHalf_finite_ftz(dg), + SkFloatToHalf_finite_ftz(db), SK_Half1); } @@ -902,7 +902,7 @@ static inline void store_f16_1_opaque(void* dst, const uint32_t* src, Sk4f& rgba, const Sk4f& a, const uint8_t* const[3], SwapRB kSwapRB) { uint64_t tmp; - SkFloatToHalf_finite(rgba).store(&tmp); + SkFloatToHalf_finite_ftz(rgba).store(&tmp); tmp |= static_cast<uint64_t>(SK_Half1) << 48; *((uint64_t*) dst) = tmp; } diff --git a/src/core/SkHalf.h b/src/core/SkHalf.h index bc9dd7940f..cc57823cb7 100644 --- a/src/core/SkHalf.h +++ b/src/core/SkHalf.h @@ -26,9 +26,10 @@ float SkHalfToFloat(SkHalf h); SkHalf SkFloatToHalf(float f); // Convert between half and single precision floating point, -// assuming inputs and outputs are both finite. -static inline Sk4f SkHalfToFloat_finite(uint64_t); -static inline Sk4h SkFloatToHalf_finite(const Sk4f&); +// assuming inputs and outputs are both finite, and +// flushing values which would be denormal half floats to zero. +static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t); +static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f&); // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // @@ -37,7 +38,7 @@ static inline Sk4h SkFloatToHalf_finite(const Sk4f&); // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use inline assembly. -static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) { +static inline Sk4f SkHalfToFloat_finite_ftz(const Sk4h& hs) { #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) float32x4_t fs; asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...) @@ -45,54 +46,41 @@ static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) { : [hs] "w" (hs.fVec)); // w: read-only NEON register return fs; #else - Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit. - sign = bits & 0x00008000, // Save the sign bit for later... - positive = bits ^ sign, // ...but strip it off for now. - is_denorm = positive < (1<<10); // Exponent == 0? + Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit. + sign = bits & 0x00008000, // Save the sign bit for later... + positive = bits ^ sign, // ...but strip it off for now. + is_norm = 0x03ff < positive; // Exponent > 0? // For normal half floats, extend the mantissa by 13 zero bits, // then adjust the exponent from 15 bias to 127 bias. Sk4i norm = (positive << 13) + ((127 - 15) << 23); - // For denorm half floats, mask in the exponent-only float K that turns our - // denorm value V*2^-14 into a normalized float K + V*2^-14. Then subtract off K. - const Sk4i K = ((127-15) + (23-10) + 1) << 23; - Sk4i mask_K = positive | K; - Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K); - - Sk4i merged = (sign << 16) | is_denorm.thenElse(Sk4i::Load(&denorm), norm); + Sk4i merged = (sign << 16) | (norm & is_norm); return Sk4f::Load(&merged); #endif } -static inline Sk4f SkHalfToFloat_finite(uint64_t hs) { - return SkHalfToFloat_finite(Sk4h::Load(&hs)); +static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t hs) { + return SkHalfToFloat_finite_ftz(Sk4h::Load(&hs)); } -static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) { +static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) { #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) float32x4_t vec = fs.fVec; asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec) : [vec] "+w" (vec)); // +w: read-write NEON register return vreinterpret_u16_f32(vget_low_f32(vec)); #else - Sk4i bits = Sk4i::Load(&fs), - sign = bits & 0x80000000, // Save the sign bit for later... - positive = bits ^ sign, // ...but strip it off for now. - will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest normal half? + Sk4i bits = Sk4i::Load(&fs), + sign = bits & 0x80000000, // Save the sign bit for later... + positive = bits ^ sign, // ...but strip it off for now. + will_be_norm = 0x387fdfff < positive; // greater than largest denorm half? // For normal half floats, adjust the exponent from 127 bias to 15 bias, // then drop the bottom 13 mantissa bits. Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; - // This mechanically inverts the denorm half -> normal float conversion above. - // Knowning that and reading its explanation will leave you feeling more confident - // than reading my best attempt at explaining this directly. - const Sk4i K = ((127-15) + (23-10) + 1) << 23; - Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K); - Sk4i denorm = Sk4i::Load(&plus_K) ^ K; - - Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm); + Sk4i merged = (sign >> 16) | (will_be_norm & norm); return SkNx_cast<uint16_t>(merged); #endif } diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h index 78f46ff797..78e8d673ed 100644 --- a/src/core/SkLinearBitmapPipeline_sample.h +++ b/src/core/SkLinearBitmapPipeline_sample.h @@ -185,7 +185,7 @@ public: PixelConverter(const SkPixmap& srcPixmap) { } Sk4f toSk4f(const Element pixel) const { - return SkHalfToFloat_finite(pixel); + return SkHalfToFloat_finite_ftz(pixel); } }; diff --git a/src/core/SkMipMap.cpp b/src/core/SkMipMap.cpp index cb9cc85a98..c5bd3ac3b7 100644 --- a/src/core/SkMipMap.cpp +++ b/src/core/SkMipMap.cpp @@ -85,11 +85,11 @@ struct ColorTypeFilter_8 { struct ColorTypeFilter_F16 { typedef uint64_t Type; // SkHalf x4 static Sk4f Expand(uint64_t x) { - return SkHalfToFloat_finite(x); + return SkHalfToFloat_finite_ftz(x); } static uint64_t Compact(const Sk4f& x) { uint64_t r; - SkFloatToHalf_finite(x).store(&r); + SkFloatToHalf_finite_ftz(x).store(&r); return r; } }; diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp index 0fae6dd5a2..618e646da4 100644 --- a/src/core/SkRasterPipelineBlitter.cpp +++ b/src/core/SkRasterPipelineBlitter.cpp @@ -196,17 +196,17 @@ SK_RASTER_STAGE(load_d_f16) { Sk4h rh, gh, bh, ah; Sk4h_load4(ptr, &rh, &gh, &bh, &ah); - dr = SkHalfToFloat_finite(rh); - dg = SkHalfToFloat_finite(gh); - db = SkHalfToFloat_finite(bh); - da = SkHalfToFloat_finite(ah); + dr = SkHalfToFloat_finite_ftz(rh); + dg = SkHalfToFloat_finite_ftz(gh); + db = SkHalfToFloat_finite_ftz(bh); + da = SkHalfToFloat_finite_ftz(ah); } // Load 1 F16 pixel. SK_RASTER_STAGE(load_d_f16_1) { auto ptr = (const uint64_t*)ctx + x; - auto p0 = SkHalfToFloat_finite(ptr[0]); + auto p0 = SkHalfToFloat_finite_ftz(ptr[0]); dr = { p0[0],0,0,0 }; dg = { p0[1],0,0,0 }; db = { p0[2],0,0,0 }; @@ -217,15 +217,15 @@ SK_RASTER_STAGE(load_d_f16_1) { SK_RASTER_STAGE(store_f16) { auto ptr = (uint64_t*)ctx + x; - Sk4h_store4(ptr, SkFloatToHalf_finite(r), SkFloatToHalf_finite(g), - SkFloatToHalf_finite(b), SkFloatToHalf_finite(a)); + Sk4h_store4(ptr, SkFloatToHalf_finite_ftz(r), SkFloatToHalf_finite_ftz(g), + SkFloatToHalf_finite_ftz(b), SkFloatToHalf_finite_ftz(a)); } // Store 1 F16 pixel. SK_RASTER_STAGE(store_f16_1) { auto ptr = (uint64_t*)ctx + x; - SkFloatToHalf_finite({r[0], g[0], b[0], a[0]}).store(ptr); + SkFloatToHalf_finite_ftz({r[0], g[0], b[0], a[0]}).store(ptr); } // Load 4 8-bit sRGB pixels from SkPMColor order to RGBA. diff --git a/src/core/SkSpanProcs.cpp b/src/core/SkSpanProcs.cpp index b2e9914a17..32237d5f03 100644 --- a/src/core/SkSpanProcs.cpp +++ b/src/core/SkSpanProcs.cpp @@ -42,7 +42,7 @@ static void load_f16(const SkPixmap& src, int x, int y, SkPM4f span[], int count SkASSERT(src.addr64(x + count - 1, y)); for (int i = 0; i < count; ++i) { - SkHalfToFloat_finite(addr[i]).store(span[i].fVec); + SkHalfToFloat_finite_ftz(addr[i]).store(span[i].fVec); } } diff --git a/src/core/SkXfermodeF16.cpp b/src/core/SkXfermodeF16.cpp index 219e91188e..9cf7254d1f 100644 --- a/src/core/SkXfermodeF16.cpp +++ b/src/core/SkXfermodeF16.cpp @@ -22,16 +22,16 @@ static void xfer_1(const SkXfermode* xfer, uint64_t dst[], const SkPM4f* src, in SkPM4f d; if (aa) { for (int i = 0; i < count; ++i) { - Sk4f d4 = SkHalfToFloat_finite(dst[i]); + Sk4f d4 = SkHalfToFloat_finite_ftz(dst[i]); d4.store(d.fVec); Sk4f r4 = Sk4f::Load(proc(*src, d).fVec); - SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]); + SkFloatToHalf_finite_ftz(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]); } } else { for (int i = 0; i < count; ++i) { - SkHalfToFloat_finite(dst[i]).store(d.fVec); + SkHalfToFloat_finite_ftz(dst[i]).store(d.fVec); Sk4f r4 = Sk4f::Load(proc(*src, d).fVec); - SkFloatToHalf_finite(r4).store(&dst[i]); + SkFloatToHalf_finite_ftz(r4).store(&dst[i]); } } } @@ -42,16 +42,16 @@ static void xfer_n(const SkXfermode* xfer, uint64_t dst[], const SkPM4f src[], i SkPM4f d; if (aa) { for (int i = 0; i < count; ++i) { - Sk4f d4 = SkHalfToFloat_finite(dst[i]); + Sk4f d4 = SkHalfToFloat_finite_ftz(dst[i]); d4.store(d.fVec); Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec); - SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]); + SkFloatToHalf_finite_ftz(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]); } } else { for (int i = 0; i < count; ++i) { - SkHalfToFloat_finite(dst[i]).store(d.fVec); + SkHalfToFloat_finite_ftz(dst[i]).store(d.fVec); Sk4f r4 = Sk4f::Load(proc(src[i], d).fVec); - SkFloatToHalf_finite(r4).store(&dst[i]); + SkFloatToHalf_finite_ftz(r4).store(&dst[i]); } } } @@ -64,8 +64,8 @@ static void clear(const SkXfermode*, uint64_t dst[], const SkPM4f*, int count, c if (aa) { for (int i = 0; i < count; ++i) { if (aa[i]) { - const Sk4f d4 = SkHalfToFloat_finite(dst[i]); - SkFloatToHalf_finite(d4 * Sk4f((255 - aa[i]) * 1.0f/255)).store(&dst[i]); + const Sk4f d4 = SkHalfToFloat_finite_ftz(dst[i]); + SkFloatToHalf_finite_ftz(d4 * Sk4f((255 - aa[i]) * 1.0f/255)).store(&dst[i]); } } } else { @@ -82,12 +82,12 @@ static void src_1(const SkXfermode*, uint64_t dst[], const SkPM4f* src, int coun const Sk4f s4 = Sk4f::Load(src->fVec); if (aa) { for (int i = 0; i < count; ++i) { - const Sk4f d4 = SkHalfToFloat_finite(dst[i]); - SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i])).store(&dst[i]); + const Sk4f d4 = SkHalfToFloat_finite_ftz(dst[i]); + SkFloatToHalf_finite_ftz(lerp_by_coverage(s4, d4, aa[i])).store(&dst[i]); } } else { uint64_t s4h; - SkFloatToHalf_finite(s4).store(&s4h); + SkFloatToHalf_finite_ftz(s4).store(&s4h); sk_memset64(dst, s4h, count); } } @@ -97,13 +97,13 @@ static void src_n(const SkXfermode*, uint64_t dst[], const SkPM4f src[], int cou if (aa) { for (int i = 0; i < count; ++i) { const Sk4f s4 = Sk4f::Load(src[i].fVec); - const Sk4f d4 = SkHalfToFloat_finite(dst[i]); - SkFloatToHalf_finite(lerp_by_coverage(s4, d4, aa[i])).store(&dst[i]); + const Sk4f d4 = SkHalfToFloat_finite_ftz(dst[i]); + SkFloatToHalf_finite_ftz(lerp_by_coverage(s4, d4, aa[i])).store(&dst[i]); } } else { for (int i = 0; i < count; ++i) { const Sk4f s4 = Sk4f::Load(src[i].fVec); - SkFloatToHalf_finite(s4).store(&dst[i]); + SkFloatToHalf_finite_ftz(s4).store(&dst[i]); } } } @@ -123,12 +123,12 @@ static void srcover_1(const SkXfermode*, uint64_t dst[], const SkPM4f* src, int const Sk4f s4 = Sk4f::Load(src->fVec); const Sk4f dst_scale = Sk4f(1 - get_alpha(s4)); for (int i = 0; i < count; ++i) { - const Sk4f d4 = SkHalfToFloat_finite(dst[i]); + const Sk4f d4 = SkHalfToFloat_finite_ftz(dst[i]); const Sk4f r4 = s4 + d4 * dst_scale; if (aa) { - SkFloatToHalf_finite(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]); + SkFloatToHalf_finite_ftz(lerp_by_coverage(r4, d4, aa[i])).store(&dst[i]); } else { - SkFloatToHalf_finite(r4).store(&dst[i]); + SkFloatToHalf_finite_ftz(r4).store(&dst[i]); } } } @@ -137,12 +137,12 @@ static void srcover_n(const SkXfermode*, uint64_t dst[], const SkPM4f src[], int const SkAlpha aa[]) { for (int i = 0; i < count; ++i) { Sk4f s = Sk4f::Load(src+i), - d = SkHalfToFloat_finite(dst[i]), + d = SkHalfToFloat_finite_ftz(dst[i]), r = s + d*(1.0f - SkNx_shuffle<3,3,3,3>(s)); if (aa) { r = lerp_by_coverage(r, d, aa[i]); } - SkFloatToHalf_finite(r).store(&dst[i]); + SkFloatToHalf_finite_ftz(r).store(&dst[i]); } } diff --git a/src/effects/gradients/Sk4fGradientPriv.h b/src/effects/gradients/Sk4fGradientPriv.h index 6542683eb0..65fa821e85 100644 --- a/src/effects/gradients/Sk4fGradientPriv.h +++ b/src/effects/gradients/Sk4fGradientPriv.h @@ -141,12 +141,12 @@ struct DstTraits<DstType::F16, premul> { } static void store(const Sk4f& c, Type* dst) { - SkFloatToHalf_finite(PM::apply(c)).store(dst); + SkFloatToHalf_finite_ftz(PM::apply(c)).store(dst); } static void store(const Sk4f& c, Type* dst, int n) { uint64_t color; - SkFloatToHalf_finite(PM::apply(c)).store(&color); + SkFloatToHalf_finite_ftz(PM::apply(c)).store(&color); sk_memset64(dst, color, n); } |