aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--bench/SkBlend_optsBench.cpp48
-rw-r--r--src/core/SkColor.cpp22
-rw-r--r--src/core/SkColorMatrixFilterRowMajor255.cpp2
-rw-r--r--src/core/SkLinearBitmapPipeline_sample.h9
-rw-r--r--src/core/SkPM4fPriv.h145
-rw-r--r--src/core/SkSpanProcs.cpp4
-rw-r--r--src/core/SkXfermode4f.cpp157
-rw-r--r--src/effects/gradients/Sk4fGradientPriv.h8
-rw-r--r--src/effects/gradients/Sk4fLinearGradient.cpp11
-rw-r--r--src/opts/SkBlend_opts.h26
-rw-r--r--tests/SkBlend_optsTest.cpp9
11 files changed, 159 insertions, 282 deletions
diff --git a/bench/SkBlend_optsBench.cpp b/bench/SkBlend_optsBench.cpp
index c290714128..184e93335e 100644
--- a/bench/SkBlend_optsBench.cpp
+++ b/bench/SkBlend_optsBench.cpp
@@ -19,13 +19,39 @@
#define INNER_LOOPS 10
+static inline void brute_srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
+ auto d = Sk4f_fromS32(*dst),
+ s = Sk4f_fromS32( src);
+ *dst = Sk4f_toS32(s + d * (1.0f - s[3]));
+}
+
+static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
+ if (src >= 0xFF000000) {
+ *dst = src;
+ return;
+ }
+ brute_srcover_srgb_srgb_1(dst, src);
+}
+
static void brute_force_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
- srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
+ brute_srcover_srgb_srgb_1(dst++, src[i]);
+ }
+ ndst -= n;
+ }
+}
+
+static void trivial_srcover_srgb_srgb(
+ uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
+ while (ndst > 0) {
+ int n = SkTMin(ndst, nsrc);
+
+ for (int i = 0; i < n; i++) {
+ srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
@@ -36,8 +62,8 @@ static void best_non_simd_srcover_srgb_srgb(
uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
- srcover_srgb8888_srgb_1(dst++, *src++);
- srcover_srgb8888_srgb_1(dst, *src);
+ srcover_srgb_srgb_1(dst++, *src++);
+ srcover_srgb_srgb_1(dst, *src);
};
while (ndst >0) {
@@ -62,24 +88,12 @@ static void best_non_simd_srcover_srgb_srgb(
} while (dsrc < end);
if ((count & 1) != 0) {
- srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst),
- *reinterpret_cast<const uint32_t*>(dsrc));
+ srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),
+ *reinterpret_cast<const uint32_t*>(dsrc));
}
}
}
-static void trivial_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
- while (ndst > 0) {
- int n = SkTMin(ndst, nsrc);
-
- for (int i = 0; i < n; i++) {
- srcover_srgb8888_srgb_1(dst++, src[i]);
- }
- ndst -= n;
- }
-}
-
class SrcOverVSkOptsBruteForce {
public:
static SkString Name() { return SkString{"VSkOptsBruteForce"}; }
diff --git a/src/core/SkColor.cpp b/src/core/SkColor.cpp
index a1404a27e3..39e9aafa92 100644
--- a/src/core/SkColor.cpp
+++ b/src/core/SkColor.cpp
@@ -106,10 +106,7 @@ SkColor SkHSVToColor(U8CPU a, const SkScalar hsv[3]) {
#include "SkHalf.h"
SkPM4f SkPM4f::FromPMColor(SkPMColor c) {
- Sk4f value = to_4f_rgba(c);
- SkPM4f c4;
- (value * Sk4f(1.0f / 255)).store(&c4);
- return c4;
+ return From4f(swizzle_rb_if_bgra(Sk4f_fromL32(c)));
}
SkColor4f SkPM4f::unpremul() const {
@@ -152,21 +149,14 @@ void SkPM4f::assertIsUnit() const {
///////////////////////////////////////////////////////////////////////////////////////////////////
-SkColor4f SkColor4f::FromColor(SkColor c) {
- Sk4f value = SkNx_shuffle<2,1,0,3>(SkNx_cast<float>(Sk4b::Load(&c)));
- SkColor4f c4;
- (value * Sk4f(1.0f / 255)).store(&c4);
- c4.fR = srgb_to_linear(c4.fR);
- c4.fG = srgb_to_linear(c4.fG);
- c4.fB = srgb_to_linear(c4.fB);
- return c4;
+SkColor4f SkColor4f::FromColor(SkColor bgra) {
+ SkColor4f rgba;
+ swizzle_rb(Sk4f_fromS32(bgra)).store(rgba.vec());
+ return rgba;
}
SkColor SkColor4f::toSkColor() const {
- SkColor result;
- Sk4f value = Sk4f(linear_to_srgb(fB), linear_to_srgb(fG), linear_to_srgb(fR), fA);
- SkNx_cast<uint8_t>(value * Sk4f(255) + Sk4f(0.5f)).store(&result);
- return result;
+ return Sk4f_toS32(swizzle_rb(Sk4f::Load(this->vec())));
}
SkColor4f SkColor4f::Pin(float r, float g, float b, float a) {
diff --git a/src/core/SkColorMatrixFilterRowMajor255.cpp b/src/core/SkColorMatrixFilterRowMajor255.cpp
index cdfd1df496..29a3f107b8 100644
--- a/src/core/SkColorMatrixFilterRowMajor255.cpp
+++ b/src/core/SkColorMatrixFilterRowMajor255.cpp
@@ -126,7 +126,7 @@ struct SkPMColorAdaptor {
return round(swizzle_rb_if_bgra(c4));
}
static Sk4f To4f(SkPMColor c) {
- return to_4f(c) * Sk4f(1.0f/255);
+ return Sk4f_fromL32(c);
}
};
void SkColorMatrixFilterRowMajor255::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const {
diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h
index 86ad6e146f..759075b3e5 100644
--- a/src/core/SkLinearBitmapPipeline_sample.h
+++ b/src/core/SkLinearBitmapPipeline_sample.h
@@ -170,11 +170,10 @@ public:
PixelConverter(const SkPixmap& srcPixmap) { }
Sk4f toSk4f(Element pixel) const {
- float gray = pixel * (1.0f/255.0f);
- Sk4f result = Sk4f{gray, gray, gray, 1.0f};
- return gammaType == kSRGB_SkGammaType
- ? srgb_to_linear(result)
- : result;
+ float gray = (gammaType == kSRGB_SkGammaType)
+ ? sk_linear_from_srgb[pixel]
+ : pixel * (1/255.0f);
+ return {gray, gray, gray, 1.0f};
}
};
diff --git a/src/core/SkPM4fPriv.h b/src/core/SkPM4fPriv.h
index 57a44c1cb2..89a0caeb70 100644
--- a/src/core/SkPM4fPriv.h
+++ b/src/core/SkPM4fPriv.h
@@ -10,142 +10,65 @@
#include "SkColorPriv.h"
#include "SkPM4f.h"
+#include "SkSRGB.h"
-static inline float get_alpha(const Sk4f& f4) {
- return f4[SkPM4f::A];
+static inline Sk4f set_alpha(const Sk4f& px, float alpha) {
+ return { px[0], px[1], px[2], alpha };
}
-static inline Sk4f set_alpha(const Sk4f& f4, float alpha) {
- static_assert(3 == SkPM4f::A, "");
- return Sk4f(f4[0], f4[1], f4[2], alpha);
+static inline float get_alpha(const Sk4f& px) {
+ return px[3];
}
-static inline uint32_t to_4b(const Sk4f& f4) {
- uint32_t b4;
- SkNx_cast<uint8_t>(f4).store((uint8_t*)&b4);
- return b4;
-}
-
-static inline Sk4f to_4f(uint32_t b4) {
- return SkNx_cast<float>(Sk4b::Load((const uint8_t*)&b4));
-}
-
-static inline Sk4f to_4f_rgba(uint32_t b4) {
- return swizzle_rb_if_bgra(to_4f(b4));
-}
-
-static inline Sk4f srgb_to_linear(const Sk4f& s4) {
- return set_alpha(s4 * s4, get_alpha(s4));
-}
-
-static inline Sk4f linear_to_srgb(const Sk4f& l4) {
- return set_alpha(l4.rsqrt().invert(), get_alpha(l4));
-}
-
-static inline float srgb_to_linear(float x) {
- return x * x;
-}
-
-static inline float linear_to_srgb(float x) {
- return sqrtf(x);
-}
-
-static void assert_unit(float x) {
- SkASSERT(x >= 0 && x <= 1);
-}
-
-static inline float exact_srgb_to_linear(float x) {
- assert_unit(x);
- float linear;
- if (x <= 0.04045) {
- linear = x / 12.92f;
- } else {
- linear = powf((x + 0.055f) / 1.055f, 2.4f);
- }
- assert_unit(linear);
- return linear;
-}
-static inline float exact_linear_to_srgb(float x) {
- assert_unit(x);
- float srgb;
- if (x <= 0.0031308f) {
- srgb = x * 12.92f;
- } else {
- srgb = 1.055f * powf(x, 0.41666667f) - 0.055f;
- }
- assert_unit(srgb);
- return srgb;
+static inline Sk4f Sk4f_fromL32(uint32_t px) {
+ return SkNx_cast<float>(Sk4b::Load(&px)) * (1/255.0f);
}
-static inline Sk4f exact_srgb_to_linear(const Sk4f& x) {
- Sk4f linear(exact_srgb_to_linear(x[0]),
- exact_srgb_to_linear(x[1]),
- exact_srgb_to_linear(x[2]), 1);
- return set_alpha(linear, get_alpha(x));
+static inline Sk4f Sk4f_fromS32(uint32_t px) {
+ return { sk_linear_from_srgb[(px >> 0) & 0xff],
+ sk_linear_from_srgb[(px >> 8) & 0xff],
+ sk_linear_from_srgb[(px >> 16) & 0xff],
+ (1/255.0f) * (px >> 24) };
}
-static inline Sk4f exact_linear_to_srgb(const Sk4f& x) {
- Sk4f srgb(exact_linear_to_srgb(x[0]),
- exact_linear_to_srgb(x[1]),
- exact_linear_to_srgb(x[2]), 1);
- return set_alpha(srgb, get_alpha(x));
+static inline uint32_t Sk4f_toL32(const Sk4f& px) {
+ uint32_t l32;
+ SkNx_cast<uint8_t>(Sk4f_round(px * 255.0f)).store(&l32);
+ return l32;
}
-///////////////////////////////////////////////////////////////////////////////////////////////////
+static inline uint32_t Sk4f_toS32(const Sk4f& px) {
+ Sk4i rgb = sk_linear_to_srgb(px),
+ srgb = { rgb[0], rgb[1], rgb[2], (int)(255.0f * px[3] + 0.5f) };
-static inline Sk4f Sk4f_fromL32(uint32_t src) {
- return to_4f(src) * Sk4f(1.0f/255);
+ uint32_t s32;
+ SkNx_cast<uint8_t>(srgb).store(&s32);
+ return s32;
}
-static inline Sk4f Sk4f_fromS32(uint32_t src) {
- return srgb_to_linear(to_4f(src) * Sk4f(1.0f/255));
-}
-// Color handling:
+// SkColor handling:
// SkColor has an ordering of (b, g, r, a) if cast to an Sk4f, so the code swizzles r and b to
// produce the needed (r, g, b, a) ordering.
static inline Sk4f Sk4f_from_SkColor(SkColor color) {
return swizzle_rb(Sk4f_fromS32(color));
}
-static inline uint32_t Sk4f_toL32(const Sk4f& x4) {
- return to_4b(x4 * Sk4f(255) + Sk4f(0.5f));
+static inline void assert_unit(float x) {
+ SkASSERT(0 <= x && x <= 1);
}
-static inline uint32_t Sk4f_toS32(const Sk4f& x4) {
- return to_4b(linear_to_srgb(x4) * Sk4f(255) + Sk4f(0.5f));
-}
-
-static inline Sk4f exact_Sk4f_fromS32(uint32_t src) {
- return exact_srgb_to_linear(to_4f(src) * Sk4f(1.0f/255));
-}
-static inline uint32_t exact_Sk4f_toS32(const Sk4f& x4) {
- return to_4b(exact_linear_to_srgb(x4) * Sk4f(255) + Sk4f(0.5f));
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// An implementation of SrcOver from bytes to bytes in linear space that takes advantage of the
-// observation that the 255's cancel.
-// invA = 1 - (As / 255);
-//
-// R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)
-// => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)
-// => R = sqrt(Rs^2 + Rd^2 * invA)
-// Note: src is assumed to be linear.
-static inline void srcover_blend_srgb8888_srgb_1(uint32_t* dst, const Sk4f& src) {
- Sk4f d = srgb_to_linear(to_4f(*dst));
- Sk4f invAlpha = 1.0f - Sk4f{src[SkPM4f::A]} * (1.0f / 255.0f);
- Sk4f r = linear_to_srgb(src + d * invAlpha) + 0.5f;
- *dst = to_4b(r);
-}
-
-static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) {
- if ((~pixel & 0xFF000000) == 0) {
- *dst = pixel;
- } else if ((pixel & 0xFF000000) != 0) {
- srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));
+static inline float exact_srgb_to_linear(float srgb) {
+ assert_unit(srgb);
+ float linear;
+ if (srgb <= 0.04045) {
+ linear = srgb / 12.92f;
+ } else {
+ linear = powf((srgb + 0.055f) / 1.055f, 2.4f);
}
+ assert_unit(linear);
+ return linear;
}
#endif
diff --git a/src/core/SkSpanProcs.cpp b/src/core/SkSpanProcs.cpp
index 87dcbc0ee7..b2e9914a17 100644
--- a/src/core/SkSpanProcs.cpp
+++ b/src/core/SkSpanProcs.cpp
@@ -22,7 +22,7 @@ static void load_l32(const SkPixmap& src, int x, int y, SkPM4f span[], int count
SkASSERT(src.addr32(x + count - 1, y));
for (int i = 0; i < count; ++i) {
- (to_4f_rgba(addr[i]) * Sk4f(1.0f/255)).store(span[i].fVec);
+ swizzle_rb_if_bgra(Sk4f_fromL32(addr[i])).store(span[i].fVec);
}
}
@@ -32,7 +32,7 @@ static void load_s32(const SkPixmap& src, int x, int y, SkPM4f span[], int count
SkASSERT(src.addr32(x + count - 1, y));
for (int i = 0; i < count; ++i) {
- srgb_to_linear(to_4f_rgba(addr[i]) * Sk4f(1.0f/255)).store(span[i].fVec);
+ swizzle_rb_if_bgra(Sk4f_fromS32(addr[i])).store(span[i].fVec);
}
}
diff --git a/src/core/SkXfermode4f.cpp b/src/core/SkXfermode4f.cpp
index d861973dbf..87a12a7df6 100644
--- a/src/core/SkXfermode4f.cpp
+++ b/src/core/SkXfermode4f.cpp
@@ -35,44 +35,40 @@ template <DstType D> Sk4f load_dst(SkPMColor dstC) {
return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC);
}
-static Sk4f srgb_4b_to_linear_unit(SkPMColor dstC) {
- return Sk4f_fromS32(dstC);
-}
-
template <DstType D> uint32_t store_dst(const Sk4f& x4) {
return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4);
}
-static Sk4f linear_unit_to_srgb_255f(const Sk4f& l4) {
- return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f);
-}
+static Sk4x4f load_4_srgb(const void* vptr) {
+ auto ptr = (const uint32_t*)vptr;
+
+ Sk4x4f rgba;
+
+ rgba.r = { sk_linear_from_srgb[(ptr[0] >> 0) & 0xff],
+ sk_linear_from_srgb[(ptr[1] >> 0) & 0xff],
+ sk_linear_from_srgb[(ptr[2] >> 0) & 0xff],
+ sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] };
-// Load 4 interlaced 8888 sRGB pixels as an Sk4x4f, transposed and converted to float.
-static Sk4x4f load_4_srgb(const void* ptr) {
- auto p = Sk4x4f::Transpose((const uint8_t*)ptr);
+ rgba.g = { sk_linear_from_srgb[(ptr[0] >> 8) & 0xff],
+ sk_linear_from_srgb[(ptr[1] >> 8) & 0xff],
+ sk_linear_from_srgb[(ptr[2] >> 8) & 0xff],
+ sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] };
- // Scale to [0,1].
- p.r *= 1/255.0f;
- p.g *= 1/255.0f;
- p.b *= 1/255.0f;
- p.a *= 1/255.0f;
+ rgba.b = { sk_linear_from_srgb[(ptr[0] >> 16) & 0xff],
+ sk_linear_from_srgb[(ptr[1] >> 16) & 0xff],
+ sk_linear_from_srgb[(ptr[2] >> 16) & 0xff],
+ sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
- // Apply approximate sRGB gamma correction to convert to linear (as if gamma were 2).
- p.r *= p.r;
- p.g *= p.g;
- p.b *= p.b;
+ rgba.a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
- return p;
+ return rgba;
}
-// Store an Sk4x4f back to 4 interlaced 8888 sRGB pixels.
static void store_4_srgb(void* ptr, const Sk4x4f& p) {
- // Convert back to sRGB and [0,255], again approximating sRGB as gamma == 2.
- auto r = p.r.rsqrt().invert() * 255.0f + 0.5f,
- g = p.g.rsqrt().invert() * 255.0f + 0.5f,
- b = p.b.rsqrt().invert() * 255.0f + 0.5f,
- a = p.a * 255.0f + 0.5f;
- Sk4x4f{r,g,b,a}.transpose((uint8_t*)ptr);
+ ( sk_linear_to_srgb(p.r) << 0
+ | sk_linear_to_srgb(p.g) << 8
+ | sk_linear_to_srgb(p.b) << 16
+ | Sk4f_round(255.0f*p.a) << 24).store(ptr);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
@@ -197,41 +193,24 @@ template <DstType D> void src_1(const SkXfermode*, uint32_t dst[],
const Sk4f s4 = src->to4f_pmorder();
if (aa) {
- if (D == kLinear_Dst) {
- // operate in bias-255 space for src and dst
- const Sk4f& s4_255 = s4 * Sk4f(255);
- while (count >= 4) {
- Sk4f aa4 = SkNx_cast<float>(Sk4b::Load(aa)) * Sk4f(1/255.f);
- Sk4f r0 = lerp(s4_255, to_4f(dst[0]), Sk4f(aa4[0])) + Sk4f(0.5f);
- Sk4f r1 = lerp(s4_255, to_4f(dst[1]), Sk4f(aa4[1])) + Sk4f(0.5f);
- Sk4f r2 = lerp(s4_255, to_4f(dst[2]), Sk4f(aa4[2])) + Sk4f(0.5f);
- Sk4f r3 = lerp(s4_255, to_4f(dst[3]), Sk4f(aa4[3])) + Sk4f(0.5f);
- Sk4f_ToBytes((uint8_t*)dst, r0, r1, r2, r3);
-
- dst += 4;
- aa += 4;
- count -= 4;
- }
- } else { // kSRGB
- SkPMColor srcColor = store_dst<D>(s4);
- while (count-- > 0) {
- SkAlpha cover = *aa++;
- switch (cover) {
- case 0xFF: {
- *dst++ = srcColor;
- break;
- }
- case 0x00: {
- dst++;
- break;
- }
- default: {
- Sk4f d4 = load_dst<D>(*dst);
- *dst++ = store_dst<D>(lerp(s4, d4, cover));
- }
+ SkPMColor srcColor = store_dst<D>(s4);
+ while (count-- > 0) {
+ SkAlpha cover = *aa++;
+ switch (cover) {
+ case 0xFF: {
+ *dst++ = srcColor;
+ break;
+ }
+ case 0x00: {
+ dst++;
+ break;
+ }
+ default: {
+ Sk4f d4 = load_dst<D>(*dst);
+ *dst++ = store_dst<D>(lerp(s4, d4, cover));
}
}
- } // kSRGB
+ }
} else {
sk_memset32(dst, store_dst<D>(s4), count);
}
@@ -274,18 +253,15 @@ template <DstType D> void srcover_n(const SkXfermode*, uint32_t dst[],
} else {
while (count >= 4 && D == kSRGB_Dst) {
auto d = load_4_srgb(dst);
-
auto s = Sk4x4f::Transpose(src->fVec);
#if defined(SK_PMCOLOR_IS_BGRA)
SkTSwap(s.r, s.b);
#endif
-
auto invSA = 1.0f - s.a;
auto r = s.r + d.r * invSA,
g = s.g + d.g * invSA,
b = s.b + d.b * invSA,
a = s.a + d.a * invSA;
-
store_4_srgb(dst, Sk4x4f{r,g,b,a});
count -= 4;
dst += 4;
@@ -322,23 +298,9 @@ static void srcover_linear_dst_1(const SkXfermode*, uint32_t dst[],
dst[i] = Sk4f_toL32(r4);
}
} else {
- const Sk4f s4_255 = s4 * Sk4f(255) + Sk4f(0.5f); // +0.5 to pre-bias for rounding
- while (count >= 4) {
- Sk4f d0 = to_4f(dst[0]);
- Sk4f d1 = to_4f(dst[1]);
- Sk4f d2 = to_4f(dst[2]);
- Sk4f d3 = to_4f(dst[3]);
- Sk4f_ToBytes((uint8_t*)dst,
- s4_255 + d0 * dst_scale,
- s4_255 + d1 * dst_scale,
- s4_255 + d2 * dst_scale,
- s4_255 + d3 * dst_scale);
- dst += 4;
- count -= 4;
- }
for (int i = 0; i < count; ++i) {
- Sk4f d4 = to_4f(dst[i]);
- dst[i] = to_4b(s4_255 + d4 * dst_scale);
+ Sk4f d4 = Sk4f_fromL32(dst[i]);
+ dst[i] = Sk4f_toL32(s4 + d4 * dst_scale);
}
}
}
@@ -354,7 +316,8 @@ static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[],
if (0 == a) {
continue;
}
- Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
+
+ Sk4f d4 = Sk4f_fromS32(dst[i]);
Sk4f r4;
if (a != 0xFF) {
const Sk4f s4_aa = scale_by_coverage(s4, a);
@@ -362,30 +325,27 @@ static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[],
} else {
r4 = s4 + d4 * dst_scale;
}
- dst[i] = to_4b(linear_unit_to_srgb_255f(r4));
+ dst[i] = Sk4f_toS32(r4);
}
} else {
while (count >= 4) {
auto d = load_4_srgb(dst);
-
auto s = Sk4x4f{{ src->r() }, { src->g() }, { src->b() }, { src->a() }};
#if defined(SK_PMCOLOR_IS_BGRA)
SkTSwap(s.r, s.b);
#endif
-
auto invSA = 1.0f - s.a;
auto r = s.r + d.r * invSA,
g = s.g + d.g * invSA,
b = s.b + d.b * invSA,
a = s.a + d.a * invSA;
-
store_4_srgb(dst, Sk4x4f{r,g,b,a});
count -= 4;
dst += 4;
}
for (int i = 0; i < count; ++i) {
- Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
- dst[i] = to_4b(linear_unit_to_srgb_255f(s4 + d4 * dst_scale));
+ Sk4f d4 = Sk4f_fromS32(dst[i]);
+ dst[i] = Sk4f_toS32(s4 + d4 * dst_scale);
}
}
}
@@ -443,26 +403,13 @@ template <DstType D>
void src_1_lcd(uint32_t dst[], const SkPM4f* src, int count, const uint16_t lcd[]) {
const Sk4f s4 = src->to4f_pmorder();
- if (D == kLinear_Dst) {
- // operate in bias-255 space for src and dst
- const Sk4f s4bias = s4 * Sk4f(255);
- for (int i = 0; i < count; ++i) {
- uint16_t rgb = lcd[i];
- if (0 == rgb) {
- continue;
- }
- Sk4f d4bias = to_4f(dst[i]);
- dst[i] = to_4b(lerp(s4bias, d4bias, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
- }
- } else { // kSRGB
- for (int i = 0; i < count; ++i) {
- uint16_t rgb = lcd[i];
- if (0 == rgb) {
- continue;
- }
- Sk4f d4 = load_dst<D>(dst[i]);
- dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
+ for (int i = 0; i < count; ++i) {
+ uint16_t rgb = lcd[i];
+ if (0 == rgb) {
+ continue;
}
+ Sk4f d4 = load_dst<D>(dst[i]);
+ dst[i] = store_dst<D>(lerp(s4, d4, lcd16_to_unit_4f(rgb))) | (SK_A32_MASK << SK_A32_SHIFT);
}
}
diff --git a/src/effects/gradients/Sk4fGradientPriv.h b/src/effects/gradients/Sk4fGradientPriv.h
index 9745119fd4..6542683eb0 100644
--- a/src/effects/gradients/Sk4fGradientPriv.h
+++ b/src/effects/gradients/Sk4fGradientPriv.h
@@ -109,18 +109,16 @@ struct DstTraits<DstType::S32, premul> {
using Type = SkPMColor;
static Sk4f load(const SkPM4f& c) {
- // Prescaling by (255^2, 255^2, 255^2, 255) on load, to avoid a 255 multiply on
- // each store (S32 conversion yields a uniform 255 factor).
- return c.to4f_pmorder() * Sk4f(255 * 255, 255 * 255, 255 * 255, 255);
+ return c.to4f_pmorder();
}
static void store(const Sk4f& c, Type* dst) {
// FIXME: this assumes opaque colors. Handle unpremultiplication.
- *dst = to_4b(linear_to_srgb(PM::apply(c)));
+ *dst = Sk4f_toS32(PM::apply(c));
}
static void store(const Sk4f& c, Type* dst, int n) {
- sk_memset32(dst, to_4b(linear_to_srgb(PM::apply(c))), n);
+ sk_memset32(dst, Sk4f_toS32(PM::apply(c)), n);
}
static void store4x(const Sk4f& c0, const Sk4f& c1,
diff --git a/src/effects/gradients/Sk4fLinearGradient.cpp b/src/effects/gradients/Sk4fLinearGradient.cpp
index d22dbff8a1..dc6e530a0c 100644
--- a/src/effects/gradients/Sk4fLinearGradient.cpp
+++ b/src/effects/gradients/Sk4fLinearGradient.cpp
@@ -53,13 +53,10 @@ void ramp<DstType::S32, ApplyPremul::False>(const Sk4f& c, const Sk4f& dc, SkPMC
Sk4x4f c4x = Sk4x4f::Transpose(c, c + dc, c + dc * 2, c + dc * 3);
while (n >= 4) {
- const Sk4x4f cx4s32 = {
- c4x.r.rsqrt().invert(),
- c4x.g.rsqrt().invert(),
- c4x.b.rsqrt().invert(),
- c4x.a
- };
- cx4s32.transpose((uint8_t*)dst);
+ ( sk_linear_to_srgb(c4x.r) << 0
+ | sk_linear_to_srgb(c4x.g) << 8
+ | sk_linear_to_srgb(c4x.b) << 16
+ | Sk4f_round(255.0f*c4x.a) << 24).store(dst);
c4x.r += dc4x.r;
c4x.g += dc4x.g;
diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h
index 4f6d1e9195..1da4c4fb04 100644
--- a/src/opts/SkBlend_opts.h
+++ b/src/opts/SkBlend_opts.h
@@ -21,19 +21,21 @@ ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
namespace SK_OPTS_NS {
-static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) {
- if ((~pixel & 0xFF000000) == 0) {
- *dst = pixel;
- } else if ((pixel & 0xFF000000) != 0) {
- srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));
+static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
+ if (src >= 0xFF000000) {
+ *dst = src;
+ return;
}
+ auto d = Sk4f_fromS32(*dst),
+ s = Sk4f_fromS32( src);
+ *dst = Sk4f_toS32(s + d * (1.0f - s[3]));
}
static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
- srcover_srgb8888_srgb_1(dst++, *src++);
- srcover_srgb8888_srgb_1(dst++, *src++);
- srcover_srgb8888_srgb_1(dst++, *src++);
- srcover_srgb8888_srgb_1(dst, *src);
+ srcover_srgb_srgb_1(dst++, *src++);
+ srcover_srgb_srgb_1(dst++, *src++);
+ srcover_srgb_srgb_1(dst++, *src++);
+ srcover_srgb_srgb_1(dst , *src );
}
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
@@ -87,7 +89,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
count = count & 3;
while (count-- > 0) {
- srcover_srgb8888_srgb_1(dst++, *src++);
+ srcover_srgb_srgb_1(dst++, *src++);
}
}
}
@@ -159,7 +161,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
count = count & 3;
while (count-- > 0) {
- srcover_srgb8888_srgb_1(dst++, *src++);
+ srcover_srgb_srgb_1(dst++, *src++);
}
}
}
@@ -172,7 +174,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
- srcover_srgb8888_srgb_1(dst++, src[i]);
+ srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
diff --git a/tests/SkBlend_optsTest.cpp b/tests/SkBlend_optsTest.cpp
index 7665a2d5de..e681374852 100644
--- a/tests/SkBlend_optsTest.cpp
+++ b/tests/SkBlend_optsTest.cpp
@@ -19,13 +19,19 @@
typedef void (*Blender)(uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
+static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
+ auto d = Sk4f_fromS32(*dst),
+ s = Sk4f_fromS32( src);
+ *dst = Sk4f_toS32(s + d * (1.0f - s[3]));
+}
+
static void brute_force_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
- srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
+ srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
@@ -63,6 +69,7 @@ static void test_blender(std::string resourceName, skiatest::Reporter* reporter)
SkAutoTArray<uint32_t> testDst(width);
for (int y = 0; y < pixmap.height(); y++) {
+ // TODO: zero is not the most interesting dst to test srcover...
sk_bzero(correctDst.get(), width * sizeof(uint32_t));
sk_bzero(testDst.get(), width * sizeof(uint32_t));
brute_force_srcover_srgb_srgb(correctDst.get(), src, width, width);