diff options
-rw-r--r-- | bench/SkBlend_optsBench.cpp | 48 | ||||
-rw-r--r-- | src/core/SkColor.cpp | 22 | ||||
-rw-r--r-- | src/core/SkColorMatrixFilterRowMajor255.cpp | 2 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_sample.h | 9 | ||||
-rw-r--r-- | src/core/SkPM4fPriv.h | 145 | ||||
-rw-r--r-- | src/core/SkSpanProcs.cpp | 4 | ||||
-rw-r--r-- | src/core/SkXfermode4f.cpp | 157 | ||||
-rw-r--r-- | src/effects/gradients/Sk4fGradientPriv.h | 8 | ||||
-rw-r--r-- | src/effects/gradients/Sk4fLinearGradient.cpp | 11 | ||||
-rw-r--r-- | src/opts/SkBlend_opts.h | 26 | ||||
-rw-r--r-- | tests/SkBlend_optsTest.cpp | 9 |
11 files changed, 159 insertions, 282 deletions
diff --git a/bench/SkBlend_optsBench.cpp b/bench/SkBlend_optsBench.cpp index c290714128..184e93335e 100644 --- a/bench/SkBlend_optsBench.cpp +++ b/bench/SkBlend_optsBench.cpp @@ -19,13 +19,39 @@ #define INNER_LOOPS 10 +static inline void brute_srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { + auto d = Sk4f_fromS32(*dst), + s = Sk4f_fromS32( src); + *dst = Sk4f_toS32(s + d * (1.0f - s[3])); +} + +static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { + if (src >= 0xFF000000) { + *dst = src; + return; + } + brute_srcover_srgb_srgb_1(dst, src); +} + static void brute_force_srcover_srgb_srgb( uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { while (ndst > 0) { int n = SkTMin(ndst, nsrc); for (int i = 0; i < n; i++) { - srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i]))); + brute_srcover_srgb_srgb_1(dst++, src[i]); + } + ndst -= n; + } +} + +static void trivial_srcover_srgb_srgb( + uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { + while (ndst > 0) { + int n = SkTMin(ndst, nsrc); + + for (int i = 0; i < n; i++) { + srcover_srgb_srgb_1(dst++, src[i]); } ndst -= n; } @@ -36,8 +62,8 @@ static void best_non_simd_srcover_srgb_srgb( uint64_t* ddst = reinterpret_cast<uint64_t*>(dst); auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) { - srcover_srgb8888_srgb_1(dst++, *src++); - srcover_srgb8888_srgb_1(dst, *src); + srcover_srgb_srgb_1(dst++, *src++); + srcover_srgb_srgb_1(dst, *src); }; while (ndst >0) { @@ -62,24 +88,12 @@ static void best_non_simd_srcover_srgb_srgb( } while (dsrc < end); if ((count & 1) != 0) { - srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst), - *reinterpret_cast<const uint32_t*>(dsrc)); + srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst), + *reinterpret_cast<const uint32_t*>(dsrc)); } } } -static void trivial_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { - while (ndst > 0) { - int n = SkTMin(ndst, nsrc); - - for (int i = 0; i < n; i++) { - srcover_srgb8888_srgb_1(dst++, src[i]); - } - ndst -= n; - } -} - class SrcOverVSkOptsBruteForce { public: static SkString Name() { return SkString{"VSkOptsBruteForce"}; } diff --git a/src/core/SkColor.cpp b/src/core/SkColor.cpp index a1404a27e3..39e9aafa92 100644 --- a/src/core/SkColor.cpp +++ b/src/core/SkColor.cpp @@ -106,10 +106,7 @@ SkColor SkHSVToColor(U8CPU a, const SkScalar hsv[3]) { #include "SkHalf.h" SkPM4f SkPM4f::FromPMColor(SkPMColor c) { - Sk4f value = to_4f_rgba(c); - SkPM4f c4; - (value * Sk4f(1.0f / 255)).store(&c4); - return c4; + return From4f(swizzle_rb_if_bgra(Sk4f_fromL32(c))); } SkColor4f SkPM4f::unpremul() const { @@ -152,21 +149,14 @@ void SkPM4f::assertIsUnit() const { /////////////////////////////////////////////////////////////////////////////////////////////////// -SkColor4f SkColor4f::FromColor(SkColor c) { - Sk4f value = SkNx_shuffle<2,1,0,3>(SkNx_cast<float>(Sk4b::Load(&c))); - SkColor4f c4; - (value * Sk4f(1.0f / 255)).store(&c4); - c4.fR = srgb_to_linear(c4.fR); - c4.fG = srgb_to_linear(c4.fG); - c4.fB = srgb_to_linear(c4.fB); - return c4; +SkColor4f SkColor4f::FromColor(SkColor bgra) { + SkColor4f rgba; + swizzle_rb(Sk4f_fromS32(bgra)).store(rgba.vec()); + return rgba; } SkColor SkColor4f::toSkColor() const { - SkColor result; - Sk4f value = Sk4f(linear_to_srgb(fB), linear_to_srgb(fG), linear_to_srgb(fR), fA); - SkNx_cast<uint8_t>(value * Sk4f(255) + Sk4f(0.5f)).store(&result); - return result; + return Sk4f_toS32(swizzle_rb(Sk4f::Load(this->vec()))); } SkColor4f SkColor4f::Pin(float r, float g, float b, float a) { diff --git a/src/core/SkColorMatrixFilterRowMajor255.cpp b/src/core/SkColorMatrixFilterRowMajor255.cpp index cdfd1df496..29a3f107b8 100644 --- a/src/core/SkColorMatrixFilterRowMajor255.cpp +++ b/src/core/SkColorMatrixFilterRowMajor255.cpp @@ -126,7 +126,7 @@ struct SkPMColorAdaptor { return round(swizzle_rb_if_bgra(c4)); } static Sk4f To4f(SkPMColor c) { - return to_4f(c) * Sk4f(1.0f/255); + return Sk4f_fromL32(c); } }; void SkColorMatrixFilterRowMajor255::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const { diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h index 86ad6e146f..759075b3e5 100644 --- a/src/core/SkLinearBitmapPipeline_sample.h +++ b/src/core/SkLinearBitmapPipeline_sample.h @@ -170,11 +170,10 @@ public: PixelConverter(const SkPixmap& srcPixmap) { } Sk4f toSk4f(Element pixel) const { - float gray = pixel * (1.0f/255.0f); - Sk4f result = Sk4f{gray, gray, gray, 1.0f}; - return gammaType == kSRGB_SkGammaType - ? srgb_to_linear(result) - : result; + float gray = (gammaType == kSRGB_SkGammaType) + ? sk_linear_from_srgb[pixel] + : pixel * (1/255.0f); + return {gray, gray, gray, 1.0f}; } }; diff --git a/src/core/SkPM4fPriv.h b/src/core/SkPM4fPriv.h index 57a44c1cb2..89a0caeb70 100644 --- a/src/core/SkPM4fPriv.h +++ b/src/core/SkPM4fPriv.h @@ -10,142 +10,65 @@ #include "SkColorPriv.h" #include "SkPM4f.h" +#include "SkSRGB.h" -static inline float get_alpha(const Sk4f& f4) { - return f4[SkPM4f::A]; +static inline Sk4f set_alpha(const Sk4f& px, float alpha) { + return { px[0], px[1], px[2], alpha }; } -static inline Sk4f set_alpha(const Sk4f& f4, float alpha) { - static_assert(3 == SkPM4f::A, ""); - return Sk4f(f4[0], f4[1], f4[2], alpha); +static inline float get_alpha(const Sk4f& px) { + return px[3]; } -static inline uint32_t to_4b(const Sk4f& f4) { - uint32_t b4; - SkNx_cast<uint8_t>(f4).store((uint8_t*)&b4); - return b4; -} - -static inline Sk4f to_4f(uint32_t b4) { - return SkNx_cast<float>(Sk4b::Load((const uint8_t*)&b4)); -} - -static inline Sk4f to_4f_rgba(uint32_t b4) { - return swizzle_rb_if_bgra(to_4f(b4)); -} - -static inline Sk4f srgb_to_linear(const Sk4f& s4) { - return set_alpha(s4 * s4, get_alpha(s4)); -} - -static inline Sk4f linear_to_srgb(const Sk4f& l4) { - return set_alpha(l4.rsqrt().invert(), get_alpha(l4)); -} - -static inline float srgb_to_linear(float x) { - return x * x; -} - -static inline float linear_to_srgb(float x) { - return sqrtf(x); -} - -static void assert_unit(float x) { - SkASSERT(x >= 0 && x <= 1); -} - -static inline float exact_srgb_to_linear(float x) { - assert_unit(x); - float linear; - if (x <= 0.04045) { - linear = x / 12.92f; - } else { - linear = powf((x + 0.055f) / 1.055f, 2.4f); - } - assert_unit(linear); - return linear; -} -static inline float exact_linear_to_srgb(float x) { - assert_unit(x); - float srgb; - if (x <= 0.0031308f) { - srgb = x * 12.92f; - } else { - srgb = 1.055f * powf(x, 0.41666667f) - 0.055f; - } - assert_unit(srgb); - return srgb; +static inline Sk4f Sk4f_fromL32(uint32_t px) { + return SkNx_cast<float>(Sk4b::Load(&px)) * (1/255.0f); } -static inline Sk4f exact_srgb_to_linear(const Sk4f& x) { - Sk4f linear(exact_srgb_to_linear(x[0]), - exact_srgb_to_linear(x[1]), - exact_srgb_to_linear(x[2]), 1); - return set_alpha(linear, get_alpha(x)); +static inline Sk4f Sk4f_fromS32(uint32_t px) { + return { sk_linear_from_srgb[(px >> 0) & 0xff], + sk_linear_from_srgb[(px >> 8) & 0xff], + sk_linear_from_srgb[(px >> 16) & 0xff], + (1/255.0f) * (px >> 24) }; } -static inline Sk4f exact_linear_to_srgb(const Sk4f& x) { - Sk4f srgb(exact_linear_to_srgb(x[0]), - exact_linear_to_srgb(x[1]), - exact_linear_to_srgb(x[2]), 1); - return set_alpha(srgb, get_alpha(x)); +static inline uint32_t Sk4f_toL32(const Sk4f& px) { + uint32_t l32; + SkNx_cast<uint8_t>(Sk4f_round(px * 255.0f)).store(&l32); + return l32; } -/////////////////////////////////////////////////////////////////////////////////////////////////// +static inline uint32_t Sk4f_toS32(const Sk4f& px) { + Sk4i rgb = sk_linear_to_srgb(px), + srgb = { rgb[0], rgb[1], rgb[2], (int)(255.0f * px[3] + 0.5f) }; -static inline Sk4f Sk4f_fromL32(uint32_t src) { - return to_4f(src) * Sk4f(1.0f/255); + uint32_t s32; + SkNx_cast<uint8_t>(srgb).store(&s32); + return s32; } -static inline Sk4f Sk4f_fromS32(uint32_t src) { - return srgb_to_linear(to_4f(src) * Sk4f(1.0f/255)); -} -// Color handling: +// SkColor handling: // SkColor has an ordering of (b, g, r, a) if cast to an Sk4f, so the code swizzles r and b to // produce the needed (r, g, b, a) ordering. static inline Sk4f Sk4f_from_SkColor(SkColor color) { return swizzle_rb(Sk4f_fromS32(color)); } -static inline uint32_t Sk4f_toL32(const Sk4f& x4) { - return to_4b(x4 * Sk4f(255) + Sk4f(0.5f)); +static inline void assert_unit(float x) { + SkASSERT(0 <= x && x <= 1); } -static inline uint32_t Sk4f_toS32(const Sk4f& x4) { - return to_4b(linear_to_srgb(x4) * Sk4f(255) + Sk4f(0.5f)); -} - -static inline Sk4f exact_Sk4f_fromS32(uint32_t src) { - return exact_srgb_to_linear(to_4f(src) * Sk4f(1.0f/255)); -} -static inline uint32_t exact_Sk4f_toS32(const Sk4f& x4) { - return to_4b(exact_linear_to_srgb(x4) * Sk4f(255) + Sk4f(0.5f)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// An implementation of SrcOver from bytes to bytes in linear space that takes advantage of the -// observation that the 255's cancel. -// invA = 1 - (As / 255); -// -// R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA) -// => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2) -// => R = sqrt(Rs^2 + Rd^2 * invA) -// Note: src is assumed to be linear. -static inline void srcover_blend_srgb8888_srgb_1(uint32_t* dst, const Sk4f& src) { - Sk4f d = srgb_to_linear(to_4f(*dst)); - Sk4f invAlpha = 1.0f - Sk4f{src[SkPM4f::A]} * (1.0f / 255.0f); - Sk4f r = linear_to_srgb(src + d * invAlpha) + 0.5f; - *dst = to_4b(r); -} - -static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) { - if ((~pixel & 0xFF000000) == 0) { - *dst = pixel; - } else if ((pixel & 0xFF000000) != 0) { - srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel))); +static inline float exact_srgb_to_linear(float srgb) { + assert_unit(srgb); + float linear; + if (srgb <= 0.04045) { + linear = srgb / 12.92f; + } else { + linear = powf((srgb + 0.055f) / 1.055f, 2.4f); } + assert_unit(linear); + return linear; } #endif diff --git a/src/core/SkSpanProcs.cpp b/src/core/SkSpanProcs.cpp index 87dcbc0ee7..b2e9914a17 100644 --- a/src/core/SkSpanProcs.cpp +++ b/src/core/SkSpanProcs.cpp @@ -22,7 +22,7 @@ static void load_l32(const SkPixmap& src, int x, int y, SkPM4f span[], int count SkASSERT(src.addr32(x + count - 1, y)); for (int i = 0; i < count; ++i) { - (to_4f_rgba(addr[i]) * Sk4f(1.0f/255)).store(span[i].fVec); + swizzle_rb_if_bgra(Sk4f_fromL32(addr[i])).store(span[i].fVec); } } @@ -32,7 +32,7 @@ static void load_s32(const SkPixmap& src, int x, int y, SkPM4f span[], int count SkASSERT(src.addr32(x + count - 1, y)); for (int i = 0; i < count; ++i) { - srgb_to_linear(to_4f_rgba(addr[i]) * Sk4f(1.0f/255)).store(span[i].fVec); + swizzle_rb_if_bgra(Sk4f_fromS32(addr[i])).store(span[i].fVec); } } diff --git a/src/core/SkXfermode4f.cpp b/src/core/SkXfermode4f.cpp index d861973dbf..87a12a7df6 100644 --- a/src/core/SkXfermode4f.cpp +++ b/src/core/SkXfermode4f.cpp @@ -35,44 +35,40 @@ template <DstType D> Sk4f load_dst(SkPMColor dstC) { return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC); } -static Sk4f srgb_4b_to_linear_unit(SkPMColor dstC) { - return Sk4f_fromS32(dstC); -} - template <DstType D> uint32_t store_dst(const Sk4f& x4) { return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4); } -static Sk4f linear_unit_to_srgb_255f(const Sk4f& l4) { - return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f); -} +static Sk4x4f load_4_srgb(const void* vptr) { + auto ptr = (const uint32_t*)vptr; + + Sk4x4f rgba; + + rgba.r = { sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], + sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], + sk_linear_from_srgb[(ptr[2] >> 0) & 0xff], + sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] }; -// Load 4 interlaced 8888 sRGB pixels as an Sk4x4f, transposed and converted to float. -static Sk4x4f load_4_srgb(const void* ptr) { - auto p = Sk4x4f::Transpose((const uint8_t*)ptr); + rgba.g = { sk_linear_from_srgb[(ptr[0] >> 8) & 0xff], + sk_linear_from_srgb[(ptr[1] >> 8) & 0xff], + sk_linear_from_srgb[(ptr[2] >> 8) & 0xff], + sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] }; - // Scale to [0,1]. - p.r *= 1/255.0f; - p.g *= 1/255.0f; - p.b *= 1/255.0f; - p.a *= 1/255.0f; + rgba.b = { sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], + sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], + sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], + sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; - // Apply approximate sRGB gamma correction to convert to linear (as if gamma were 2). - p.r *= p.r; - p.g *= p.g; - p.b *= p.b; + rgba.a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); - return p; + return rgba; } -// Store an Sk4x4f back to 4 interlaced 8888 sRGB pixels. static void store_4_srgb(void* ptr, const Sk4x4f& p) { - // Convert back to sRGB and [0,255], again approximating sRGB as gamma == 2. - auto r = p.r.rsqrt().invert() * 255.0f + 0.5f, - g = p.g.rsqrt().invert() * 255.0f + 0.5f, - b = p.b.rsqrt().invert() * 255.0f + 0.5f, - a = p.a * 255.0f + 0.5f; - Sk4x4f{r,g,b,a}.transpose((uint8_t*)ptr); + ( sk_linear_to_srgb(p.r) << 0 + | sk_linear_to_srgb(p.g) << 8 + | sk_linear_to_srgb(p.b) << 16 + | Sk4f_round(255.0f*p.a) << 24).store(ptr); } /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -197,41 +193,24 @@ template <DstType D> void src_1(const SkXfermode*, uint32_t dst[], const Sk4f s4 = src->to4f_pmorder(); if (aa) { - if (D == kLinear_Dst) { - // operate in bias-255 space for src and dst - const Sk4f& s4_255 = s4 * Sk4f(255); - while (count >= 4) { - Sk4f aa4 = SkNx_cast<float>(Sk4b::Load(aa)) * Sk4f(1/255.f); - Sk4f r0 = lerp(s4_255, to_4f(dst[0]), Sk4f(aa4[0])) + Sk4f(0.5f); - Sk4f r1 = lerp(s4_255, to_4f(dst[1]), Sk4f(aa4[1])) + Sk4f(0.5f); - Sk4f r2 = lerp(s4_255, to_4f(dst[2]), Sk4f(aa4[2])) + Sk4f(0.5f); - Sk4f r3 = lerp(s4_255, to_4f(dst[3]), Sk4f(aa4[3])) + Sk4f(0.5f); - Sk4f_ToBytes((uint8_t*)dst, r0, r1, r2, r3); - - dst += 4; - aa += 4; - count -= 4; - } - } else { // kSRGB - SkPMColor srcColor = store_dst<D>(s4); - while (count-- > 0) { - SkAlpha cover = *aa++; - switch (cover) { - case 0xFF: { - *dst++ = srcColor; - break; - } - case 0x00: { - dst++; - break; - } - default: { - Sk4f d4 = load_dst<D>(*dst); - *dst++ = store_dst<D>(lerp(s4, d4, cover)); - } + SkPMColor srcColor = store_dst<D>(s4); + while (count-- > 0) { + SkAlpha cover = *aa++; + switch (cover) { + case 0xFF: { + *dst++ = srcColor; + break; + } + case 0x00: { + dst++; + break; + } + default: { + Sk4f d4 = load_dst<D>(*dst); + *dst++ = store_dst<D>(lerp(s4, d4, cover)); } } - } // kSRGB + } } else { sk_memset32(dst, store_dst<D>(s4), count); } @@ -274,18 +253,15 @@ template <DstType D> void srcover_n(const SkXfermode*, uint32_t dst[], } else { while (count >= 4 && D == kSRGB_Dst) { auto d = load_4_srgb(dst); - auto s = Sk4x4f::Transpose(src->fVec); #if defined(SK_PMCOLOR_IS_BGRA) SkTSwap(s.r, s.b); #endif - auto invSA = 1.0f - s.a; auto r = s.r + d.r * invSA, g = s.g + d.g * invSA, b = s.b + d.b * invSA, a = s.a + d.a * invSA; - store_4_srgb(dst, Sk4x4f{r,g,b,a}); count -= 4; dst += 4; @@ -322,23 +298,9 @@ static void srcover_linear_dst_1(const SkXfermode*, uint32_t dst[], dst[i] = Sk4f_toL32(r4); } } else { - const Sk4f s4_255 = s4 * Sk4f(255) + Sk4f(0.5f); // +0.5 to pre-bias for rounding - while (count >= 4) { - Sk4f d0 = to_4f(dst[0]); - Sk4f d1 = to_4f(dst[1]); - Sk4f d2 = to_4f(dst[2]); - Sk4f d3 = to_4f(dst[3]); - Sk4f_ToBytes((uint8_t*)dst, - s4_255 + d0 * dst_scale, - s4_255 + d1 * dst_scale, - s4_255 + d2 * dst_scale, - s4_255 + d3 * dst_scale); - dst += 4; - count -= 4; - } for (int i = 0; i < count; ++i) { - Sk4f d4 = to_4f(dst[i]); - dst[i] = to_4b(s4_255 + d4 * dst_scale); + Sk4f d4 = Sk4f_fromL32(dst[i]); + dst[i] = Sk4f_toL32(s4 + d4 * dst_scale); } } } @@ -354,7 +316,8 @@ static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[], if (0 == a) { continue; } - Sk4f d4 = srgb_4b_to_linear_unit(dst[i]); + + Sk4f d4 = Sk4f_fromS32(dst[i]); Sk4f r4; if (a != 0xFF) { const Sk4f s4_aa = scale_by_coverage(s4, a); @@ -362,30 +325,27 @@ static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[], } else { r4 = s4 + d4 * dst_scale; } - dst[i] = to_4b(linear_unit_to_srgb_255f(r4)); + dst[i] = Sk4f_toS32(r4); } } else { while (count >= 4) { auto d = load_4_srgb(dst); - auto s = Sk4x4f{{ src->r() }, { src->g() }, { src->b() }, { src->a() }}; #if defined(SK_PMCOLOR_IS_BGRA) SkTSwap(s.r, s.b); #endif - auto invSA = 1.0f - s.a; auto r = s.r + d.r * invSA, g = s.g + d.g * invSA, b = s.b + d.b * invSA, a = s.a + d.a * invSA; - store_4_srgb(dst, Sk4x4f{r,g,b,a}); count -= 4; dst += 4; } for (int i = 0; i < count; ++i) { - Sk4f d4 = srgb_4b_to_linear_unit(dst[i]); - dst[i] = to_4b(linear_unit_to_srgb_255f(s4 + d4 * dst_scale)); + Sk4f d4 = Sk4f_fromS32(dst[i]); + dst[i] = Sk4f_toS32(s4 + d4 * dst_scale); } } } @@ -443,26 +403,13 @@ template <DstType D> void src_1_lcd(uint32_t dst[], const SkPM4f* src, int count, const uint16_t lcd[]) { const Sk4f s4 = src->to4f_pmorder(); - if (D == kLinear_Dst) { - // operate in bias-255 space for src and dst - const Sk4f s4bias = s4 * Sk4f(255); - for (int i = 0; i < count; ++i) { - uint16_t rgb = lcd[i]; - if (0 == rgb) { - continue; - } - Sk4f d4bias = to_4f(dst[i]); - dst[i] = to_4b(lerp(s4bias, d4bias, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT); - } - } else { // kSRGB - for (int i = 0; i < count; ++i) { - uint16_t rgb = lcd[i]; - if (0 == rgb) { - continue; - } - Sk4f d4 = load_dst<D>(dst[i]); - dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT); + for (int i = 0; i < count; ++i) { + uint16_t rgb = lcd[i]; + if (0 == rgb) { + continue; } + Sk4f d4 = load_dst<D>(dst[i]); + dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT); } } diff --git a/src/effects/gradients/Sk4fGradientPriv.h b/src/effects/gradients/Sk4fGradientPriv.h index 9745119fd4..6542683eb0 100644 --- a/src/effects/gradients/Sk4fGradientPriv.h +++ b/src/effects/gradients/Sk4fGradientPriv.h @@ -109,18 +109,16 @@ struct DstTraits<DstType::S32, premul> { using Type = SkPMColor; static Sk4f load(const SkPM4f& c) { - // Prescaling by (255^2, 255^2, 255^2, 255) on load, to avoid a 255 multiply on - // each store (S32 conversion yields a uniform 255 factor). - return c.to4f_pmorder() * Sk4f(255 * 255, 255 * 255, 255 * 255, 255); + return c.to4f_pmorder(); } static void store(const Sk4f& c, Type* dst) { // FIXME: this assumes opaque colors. Handle unpremultiplication. - *dst = to_4b(linear_to_srgb(PM::apply(c))); + *dst = Sk4f_toS32(PM::apply(c)); } static void store(const Sk4f& c, Type* dst, int n) { - sk_memset32(dst, to_4b(linear_to_srgb(PM::apply(c))), n); + sk_memset32(dst, Sk4f_toS32(PM::apply(c)), n); } static void store4x(const Sk4f& c0, const Sk4f& c1, diff --git a/src/effects/gradients/Sk4fLinearGradient.cpp b/src/effects/gradients/Sk4fLinearGradient.cpp index d22dbff8a1..dc6e530a0c 100644 --- a/src/effects/gradients/Sk4fLinearGradient.cpp +++ b/src/effects/gradients/Sk4fLinearGradient.cpp @@ -53,13 +53,10 @@ void ramp<DstType::S32, ApplyPremul::False>(const Sk4f& c, const Sk4f& dc, SkPMC Sk4x4f c4x = Sk4x4f::Transpose(c, c + dc, c + dc * 2, c + dc * 3); while (n >= 4) { - const Sk4x4f cx4s32 = { - c4x.r.rsqrt().invert(), - c4x.g.rsqrt().invert(), - c4x.b.rsqrt().invert(), - c4x.a - }; - cx4s32.transpose((uint8_t*)dst); + ( sk_linear_to_srgb(c4x.r) << 0 + | sk_linear_to_srgb(c4x.g) << 8 + | sk_linear_to_srgb(c4x.b) << 16 + | Sk4f_round(255.0f*c4x.a) << 24).store(dst); c4x.r += dc4x.r; c4x.g += dc4x.g; diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h index 4f6d1e9195..1da4c4fb04 100644 --- a/src/opts/SkBlend_opts.h +++ b/src/opts/SkBlend_opts.h @@ -21,19 +21,21 @@ ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an namespace SK_OPTS_NS { -static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) { - if ((~pixel & 0xFF000000) == 0) { - *dst = pixel; - } else if ((pixel & 0xFF000000) != 0) { - srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel))); +static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { + if (src >= 0xFF000000) { + *dst = src; + return; } + auto d = Sk4f_fromS32(*dst), + s = Sk4f_fromS32( src); + *dst = Sk4f_toS32(s + d * (1.0f - s[3])); } static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { - srcover_srgb8888_srgb_1(dst++, *src++); - srcover_srgb8888_srgb_1(dst++, *src++); - srcover_srgb8888_srgb_1(dst++, *src++); - srcover_srgb8888_srgb_1(dst, *src); + srcover_srgb_srgb_1(dst++, *src++); + srcover_srgb_srgb_1(dst++, *src++); + srcover_srgb_srgb_1(dst++, *src++); + srcover_srgb_srgb_1(dst , *src ); } #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 @@ -87,7 +89,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { count = count & 3; while (count-- > 0) { - srcover_srgb8888_srgb_1(dst++, *src++); + srcover_srgb_srgb_1(dst++, *src++); } } } @@ -159,7 +161,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { count = count & 3; while (count-- > 0) { - srcover_srgb8888_srgb_1(dst++, *src++); + srcover_srgb_srgb_1(dst++, *src++); } } } @@ -172,7 +174,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { int n = SkTMin(ndst, nsrc); for (int i = 0; i < n; i++) { - srcover_srgb8888_srgb_1(dst++, src[i]); + srcover_srgb_srgb_1(dst++, src[i]); } ndst -= n; } diff --git a/tests/SkBlend_optsTest.cpp b/tests/SkBlend_optsTest.cpp index 7665a2d5de..e681374852 100644 --- a/tests/SkBlend_optsTest.cpp +++ b/tests/SkBlend_optsTest.cpp @@ -19,13 +19,19 @@ typedef void (*Blender)(uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); +static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { + auto d = Sk4f_fromS32(*dst), + s = Sk4f_fromS32( src); + *dst = Sk4f_toS32(s + d * (1.0f - s[3])); +} + static void brute_force_srcover_srgb_srgb( uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { while (ndst > 0) { int n = SkTMin(ndst, nsrc); for (int i = 0; i < n; i++) { - srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i]))); + srcover_srgb_srgb_1(dst++, src[i]); } ndst -= n; } @@ -63,6 +69,7 @@ static void test_blender(std::string resourceName, skiatest::Reporter* reporter) SkAutoTArray<uint32_t> testDst(width); for (int y = 0; y < pixmap.height(); y++) { + // TODO: zero is not the most interesting dst to test srcover... sk_bzero(correctDst.get(), width * sizeof(uint32_t)); sk_bzero(testDst.get(), width * sizeof(uint32_t)); brute_force_srcover_srgb_srgb(correctDst.get(), src, width, width); |