aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/SkXfermode4f.cpp
diff options
context:
space:
mode:
authorGravatar reed <reed@google.com>2016-02-02 11:00:55 -0800
committerGravatar Commit bot <commit-bot@chromium.org>2016-02-02 11:00:55 -0800
commitbf907e628fe245305d0f987aed8b8ecff8356374 (patch)
treea4ee1424e4080ba03abd9f44fc8a73cbf18772a8 /src/core/SkXfermode4f.cpp
parent5923535a0445767bb287b1868dd285d6656413de (diff)
unroll srcover_1 for blending a single color
Before: curr/maxrss loops min median mean max stddev samples config bench 8/8 MB 1 1.59ms 1.82ms 1.89ms 2.59ms 14% ▁█▃▃▃▃▃▃▃▃ nonrendering xfer4f_srcover_1_alpha_linear 8/8 MB 1 3.25ms 4.25ms 4.16ms 5.87ms 21% ▁▅▂▁▁▄█▄▅▂ nonrendering xfer4f_srcover_1_alpha_srgb After: curr/maxrss loops min median mean max stddev samples config bench 8/8 MB 1 915µs 915µs 946µs 1.02ms 4% █▄▇▁▁▁▆▁▁▁ nonrendering xfer4f_srcover_1_alpha_linear 8/8 MB 1 2.69ms 3.08ms 3.03ms 3.63ms 10% ▁▃▂▁▁█▄▄▄▆ nonrendering xfer4f_srcover_1_alpha_srgb BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1653943002 Review URL: https://codereview.chromium.org/1653943002
Diffstat (limited to 'src/core/SkXfermode4f.cpp')
-rw-r--r--src/core/SkXfermode4f.cpp93
1 files changed, 80 insertions, 13 deletions
diff --git a/src/core/SkXfermode4f.cpp b/src/core/SkXfermode4f.cpp
index 0485a5e6ed..1bf66a2b69 100644
--- a/src/core/SkXfermode4f.cpp
+++ b/src/core/SkXfermode4f.cpp
@@ -31,10 +31,22 @@ template <DstType D> Sk4f load_dst(SkPMColor dstC) {
return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC);
}
+static Sk4f srgb_4b_to_linear_unit(SkPMColor dstC) {
+ return Sk4f_fromS32(dstC);
+}
+
template <DstType D> uint32_t store_dst(const Sk4f& x4) {
return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4);
}
+static uint32_t linear_unit_to_srgb_32(const Sk4f& l4) {
+ return Sk4f_toL32(l4);
+}
+
+static Sk4f linear_unit_to_srgb_255f(const Sk4f& l4) {
+ return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f);
+}
+
///////////////////////////////////////////////////////////////////////////////////////////////////
static Sk4f scale_255_round(const SkPM4f& pm4) {
@@ -254,10 +266,53 @@ template <DstType D> void srcover_n(const SkXfermode::PM4fState& state, uint32_t
}
}
-template <DstType D> void srcover_1(const SkXfermode::PM4fState& state, uint32_t dst[],
- const SkPM4f& src, int count, const SkAlpha aa[]) {
+static void srcover_linear_dst_1(const SkXfermode::PM4fState& state, uint32_t dst[],
+ const SkPM4f& src, int count, const SkAlpha aa[]) {
+ Sk4f s4 = Sk4f::Load(src.fVec);
+ Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
+
+ if (aa) {
+ for (int i = 0; i < count; ++i) {
+ unsigned a = aa[i];
+ if (0 == a) {
+ continue;
+ }
+ Sk4f d4 = Sk4f_fromL32(dst[i]);
+ Sk4f r4;
+ if (a != 0xFF) {
+ s4 = scale_by_coverage(s4, a);
+ r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));
+ } else {
+ r4 = s4 + d4 * dst_scale;
+ }
+ dst[i] = Sk4f_toL32(r4);
+ }
+ } else {
+ s4 = s4 * Sk4f(255) + Sk4f(0.5f); // +0.5 to pre-bias for rounding
+ while (count >= 4) {
+ Sk4f d0 = to_4f(dst[0]);
+ Sk4f d1 = to_4f(dst[1]);
+ Sk4f d2 = to_4f(dst[2]);
+ Sk4f d3 = to_4f(dst[3]);
+ Sk4f_ToBytes((uint8_t*)dst,
+ s4 + d0 * dst_scale,
+ s4 + d1 * dst_scale,
+ s4 + d2 * dst_scale,
+ s4 + d3 * dst_scale);
+ dst += 4;
+ count -= 4;
+ }
+ for (int i = 0; i < count; ++i) {
+ Sk4f d4 = to_4f(dst[i]);
+ dst[i] = to_4b(s4 + d4 * dst_scale);
+ }
+ }
+}
+
+static void srcover_srgb_dst_1(const SkXfermode::PM4fState& state, uint32_t dst[],
+ const SkPM4f& src, int count, const SkAlpha aa[]) {
Sk4f s4 = Sk4f::Load(src.fVec);
- Sk4f scale = Sk4f(1 - get_alpha(s4));
+ Sk4f dst_scale = Sk4f(1 - get_alpha(s4));
if (aa) {
for (int i = 0; i < count; ++i) {
@@ -265,30 +320,42 @@ template <DstType D> void srcover_1(const SkXfermode::PM4fState& state, uint32_t
if (0 == a) {
continue;
}
- Sk4f d4 = load_dst<D>(dst[i]);
+ Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
Sk4f r4;
if (a != 0xFF) {
s4 = scale_by_coverage(s4, a);
r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));
} else {
- r4 = s4 + d4 * scale;
+ r4 = s4 + d4 * dst_scale;
}
- dst[i] = store_dst<D>(r4);
+ dst[i] = linear_unit_to_srgb_32(r4);
}
} else {
+ while (count >= 4) {
+ Sk4f d0 = srgb_4b_to_linear_unit(dst[0]);
+ Sk4f d1 = srgb_4b_to_linear_unit(dst[1]);
+ Sk4f d2 = srgb_4b_to_linear_unit(dst[2]);
+ Sk4f d3 = srgb_4b_to_linear_unit(dst[3]);
+ Sk4f_ToBytes((uint8_t*)dst,
+ linear_unit_to_srgb_255f(s4 + d0 * dst_scale),
+ linear_unit_to_srgb_255f(s4 + d1 * dst_scale),
+ linear_unit_to_srgb_255f(s4 + d2 * dst_scale),
+ linear_unit_to_srgb_255f(s4 + d3 * dst_scale));
+ dst += 4;
+ count -= 4;
+ }
for (int i = 0; i < count; ++i) {
- Sk4f d4 = load_dst<D>(dst[i]);
- Sk4f r4 = s4 + d4 * scale;
- dst[i] = store_dst<D>(r4);
+ Sk4f d4 = srgb_4b_to_linear_unit(dst[i]);
+ dst[i] = to_4b(linear_unit_to_srgb_255f(s4 + d4 * dst_scale));
}
}
}
const XferProcPair gProcs_SrcOver[] = {
- { srcover_1<kLinear_Dst>, srcover_n<kLinear_Dst> }, // linear alpha
- { src_1<kLinear_Dst>, src_n<kLinear_Dst> }, // linear opaque [ we are src-mode ]
- { srcover_1<kSRGB_Dst>, srcover_n<kSRGB_Dst> }, // srgb alpha
- { src_1<kSRGB_Dst>, src_n<kSRGB_Dst> }, // srgb opaque [ we are src-mode ]
+ { srcover_linear_dst_1, srcover_n<kLinear_Dst> }, // linear alpha
+ { src_1<kLinear_Dst>, src_n<kLinear_Dst> }, // linear opaque [ we are src-mode ]
+ { srcover_srgb_dst_1, srcover_n<kSRGB_Dst> }, // srgb alpha
+ { src_1<kSRGB_Dst>, src_n<kSRGB_Dst> }, // srgb opaque [ we are src-mode ]
};
///////////////////////////////////////////////////////////////////////////////////////////////////