aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/SkXfermode4f.cpp
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2016-03-22 18:30:44 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-03-22 18:30:45 -0700
commit0dfe89248972f701a0a96812ffed801bcc0387b0 (patch)
tree1b99cd8fb5d2c0660ef2794afe836c622188ba1a /src/core/SkXfermode4f.cpp
parent5b1a2ca8c5c7b33aee7c1260d1161ba6c8cd6820 (diff)
Use Sk4x4f in srcover_srgb_dst_1.
I've also pulled out the common parts shared with sRGB srcover_n, and rearranged to make the similarities a bit more clear. This speeds up about 25%. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1829513002 Review URL: https://codereview.chromium.org/1829513002
Diffstat (limited to 'src/core/SkXfermode4f.cpp')
-rw-r--r--src/core/SkXfermode4f.cpp140
1 files changed, 67 insertions, 73 deletions
diff --git a/src/core/SkXfermode4f.cpp b/src/core/SkXfermode4f.cpp
index 883cd12757..267985d8a2 100644
--- a/src/core/SkXfermode4f.cpp
+++ b/src/core/SkXfermode4f.cpp
@@ -47,6 +47,34 @@ static Sk4f linear_unit_to_srgb_255f(const Sk4f& l4) {
return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f);
}
+// Load 4 interlaced 8888 sRGB pixels as an Sk4x4f, transposed and converted to float.
+static Sk4x4f load_4_srgb(const void* ptr) {
+ auto p = Sk4x4f::Transpose((const uint8_t*)ptr);
+
+ // Scale to [0,1].
+ p.r *= 1/255.0f;
+ p.g *= 1/255.0f;
+ p.b *= 1/255.0f;
+ p.a *= 1/255.0f;
+
+ // Apply approximate sRGB gamma correction to convert to linear (as if gamma were 2).
+ p.r *= p.r;
+ p.g *= p.g;
+ p.b *= p.b;
+
+ return p;
+}
+
+// Store an Sk4x4f back to 4 interlaced 8888 sRGB pixels.
+static void store_4_srgb(void* ptr, const Sk4x4f& p) {
+ // Convert back to sRGB and [0,255], again approximating sRGB as gamma == 2.
+ auto r = p.r.sqrt() * 255.0f + 0.5f,
+ g = p.g.sqrt() * 255.0f + 0.5f,
+ b = p.b.sqrt() * 255.0f + 0.5f,
+ a = p.a * 255.0f + 0.5f;
+ Sk4x4f{r,g,b,a}.transpose((uint8_t*)ptr);
+}
+
///////////////////////////////////////////////////////////////////////////////////////////////////
template <DstType D> void general_1(const SkXfermode* xfer, uint32_t dst[],
@@ -235,60 +263,6 @@ const SkXfermode::D32Proc gProcs_Dst[] = {
///////////////////////////////////////////////////////////////////////////////////////////////////
-static void srcover_n_srgb_bw(uint32_t dst[], const SkPM4f src[], int count) {
- while (count >= 4) {
- // Load 4 sRGB RGBA/BGRA 8888 dst pixels.
- // We'll write most of this as if they're RGBA, and just swizzle the src pixels to match.
- auto d = Sk4x4f::Transpose((const uint8_t*)dst);
-
- // Scale to [0,1].
- d.r *= 1/255.0f;
- d.g *= 1/255.0f;
- d.b *= 1/255.0f;
- d.a *= 1/255.0f;
-
- // Apply approximate sRGB gamma correction to convert to linear (as if gamma were 2).
- d.r *= d.r;
- d.g *= d.g;
- d.b *= d.b;
-
- // Load 4 linear float src pixels.
- auto s = Sk4x4f::Transpose(src->fVec);
-
- // Match color order with destination, if necessary.
- #if defined(SK_PMCOLOR_IS_BGRA)
- SkTSwap(s.r, s.b);
- #endif
-
- // Now, the meat of what we wanted to do... perform the srcover blend.
- auto invSA = 1.0f - s.a;
- auto r = s.r + d.r * invSA,
- g = s.g + d.g * invSA,
- b = s.b + d.b * invSA,
- a = s.a + d.a * invSA;
-
- // Convert back to sRGB and [0,255], again approximating sRGB as gamma == 2.
- r = r.sqrt() * 255.0f + 0.5f;
- g = g.sqrt() * 255.0f + 0.5f;
- b = b.sqrt() * 255.0f + 0.5f;
- a = a * 255.0f + 0.5f;
-
- Sk4x4f{r,g,b,a}.transpose((uint8_t*)dst);
-
- count -= 4;
- dst += 4;
- src += 4;
- }
-
- // This should look just like the non-specialized case in srcover_n.
- for (int i = 0; i < count; ++i) {
- Sk4f s4 = src[i].to4f_pmorder();
- Sk4f d4 = load_dst<kSRGB_Dst>(dst[i]);
- Sk4f r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));
- dst[i] = store_dst<kSRGB_Dst>(r4);
- }
-}
-
template <DstType D> void srcover_n(const SkXfermode*, uint32_t dst[],
const SkPM4f src[], int count, const SkAlpha aa[]) {
if (aa) {
@@ -306,15 +280,30 @@ template <DstType D> void srcover_n(const SkXfermode*, uint32_t dst[],
dst[i] = store_dst<D>(r4);
}
} else {
- if (D == kSRGB_Dst) {
- srcover_n_srgb_bw(dst, src, count);
- } else {
- for (int i = 0; i < count; ++i) {
- Sk4f s4 = src[i].to4f_pmorder();
- Sk4f d4 = load_dst<D>(dst[i]);
- Sk4f r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));
- dst[i] = store_dst<D>(r4);
- }
+ while (count >= 4 && D == kSRGB_Dst) {
+ auto d = load_4_srgb(dst);
+
+ auto s = Sk4x4f::Transpose(src->fVec);
+ #if defined(SK_PMCOLOR_IS_BGRA)
+ SkTSwap(s.r, s.b);
+ #endif
+
+ auto invSA = 1.0f - s.a;
+ auto r = s.r + d.r * invSA,
+ g = s.g + d.g * invSA,
+ b = s.b + d.b * invSA,
+ a = s.a + d.a * invSA;
+
+ store_4_srgb(dst, Sk4x4f{r,g,b,a});
+ count -= 4;
+ dst += 4;
+ src += 4;
+ }
+ for (int i = 0; i < count; ++i) {
+ Sk4f s4 = src[i].to4f_pmorder();
+ Sk4f d4 = load_dst<D>(dst[i]);
+ Sk4f r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));
+ dst[i] = store_dst<D>(r4);
}
}
}
@@ -385,17 +374,22 @@ static void srcover_srgb_dst_1(const SkXfermode*, uint32_t dst[],
}
} else {
while (count >= 4) {
- Sk4f d0 = srgb_4b_to_linear_unit(dst[0]);
- Sk4f d1 = srgb_4b_to_linear_unit(dst[1]);
- Sk4f d2 = srgb_4b_to_linear_unit(dst[2]);
- Sk4f d3 = srgb_4b_to_linear_unit(dst[3]);
- Sk4f_ToBytes((uint8_t*)dst,
- linear_unit_to_srgb_255f(s4 + d0 * dst_scale),
- linear_unit_to_srgb_255f(s4 + d1 * dst_scale),
- linear_unit_to_srgb_255f(s4 + d2 * dst_scale),
- linear_unit_to_srgb_255f(s4 + d3 * dst_scale));
- dst += 4;
+ auto d = load_4_srgb(dst);
+
+ auto s = Sk4x4f{{ src->r() }, { src->g() }, { src->b() }, { src->a() }};
+ #if defined(SK_PMCOLOR_IS_BGRA)
+ SkTSwap(s.r, s.b);
+ #endif
+
+ auto invSA = 1.0f - s.a;
+ auto r = s.r + d.r * invSA,
+ g = s.g + d.g * invSA,
+ b = s.b + d.b * invSA,
+ a = s.a + d.a * invSA;
+
+ store_4_srgb(dst, Sk4x4f{r,g,b,a});
count -= 4;
+ dst += 4;
}
for (int i = 0; i < count; ++i) {
Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);