aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2016-05-02 11:48:42 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-05-02 11:48:42 -0700
commitc5091b5b6c4b8a7aef8c12db9ea2a85e907b01c4 (patch)
treeb100bb8389c0a4851a387da99e978b04a61e1fd0 /src/opts
parentffc2aea3cb6981a5cc26f6c0f2ebf889ca5eb73f (diff)
Add a hook for CPU-optimized sRGB-sRGB srcover.
Herb's really starting to get serious about tweaking this, which becomes a lot easier when you've got SkOpts' runtime CPU detection. We should be able to optimize this usefully for SSSE3, SSE4.1, AVX, AVX2, or NEON. (We can of course implement a subset.) This function takes two counts to give us flexibility to write src patterns: nsrc >= ndst -> the usual srcover function nsrc < ndst -> repeat src until it fills dst nsrc << ndst -> possibly preprocess src into registers nsrc == 1 -> equivalent of blitrow_color32, srcover_1, etc. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1939783003 Review-Url: https://codereview.chromium.org/1939783003
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkBlend_opts.h58
1 files changed, 58 insertions, 0 deletions
diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h
new file mode 100644
index 0000000000..a1067407be
--- /dev/null
+++ b/src/opts/SkBlend_opts.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkBlend_opts_DEFINED
+#define SkBlend_opts_DEFINED
+
+namespace SK_OPTS_NS {
+
+#if 0
+
+#else
+
+ static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
+ switch (src >> 24) {
+ case 0x00: return;
+ case 0xff: *dst = src; return;
+ }
+
+ Sk4f d = SkNx_cast<float>(Sk4b::Load( dst)),
+ s = SkNx_cast<float>(Sk4b::Load(&src));
+
+ // Approximate sRGB gamma as 2.0.
+ Sk4f d_sq = d*d,
+ s_sq = s*s;
+ d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]};
+ s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]};
+
+ // SrcOver.
+ Sk4f invA = 1.0f - s[3]*(1/255.0f);
+ d = s + d * invA;
+
+ // Re-apply approximate sRGB gamma.
+ Sk4f d_sqrt = d.sqrt();
+ d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]};
+
+ SkNx_cast<uint8_t>(d).store(dst);
+ }
+
+ static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
+ while (ndst > 0) {
+ int n = SkTMin(ndst, nsrc);
+
+ for (int i = 0; i < n; i++) {
+ srcover_srgb_srgb_1(dst++, src[i]);
+ }
+ ndst -= n;
+ }
+ }
+
+#endif
+
+} // namespace SK_OPTS_NS
+
+#endif//SkBlend_opts_DEFINED