diff options
author | mtklein <mtklein@chromium.org> | 2016-05-02 11:48:42 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-05-02 11:48:42 -0700 |
commit | c5091b5b6c4b8a7aef8c12db9ea2a85e907b01c4 (patch) | |
tree | b100bb8389c0a4851a387da99e978b04a61e1fd0 /src/opts | |
parent | ffc2aea3cb6981a5cc26f6c0f2ebf889ca5eb73f (diff) |
Add a hook for CPU-optimized sRGB-sRGB srcover.
Herb's really starting to get serious about tweaking this, which becomes
a lot easier when you've got SkOpts' runtime CPU detection. We should be
able to optimize this usefully for SSSE3, SSE4.1, AVX, AVX2, or NEON.
(We can of course implement a subset.)
This function takes two counts to give us flexibility to write src patterns:
nsrc >= ndst -> the usual srcover function
nsrc < ndst -> repeat src until it fills dst
nsrc << ndst -> possibly preprocess src into registers
nsrc == 1 -> equivalent of blitrow_color32, srcover_1, etc.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1939783003
Review-Url: https://codereview.chromium.org/1939783003
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkBlend_opts.h | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h new file mode 100644 index 0000000000..a1067407be --- /dev/null +++ b/src/opts/SkBlend_opts.h @@ -0,0 +1,58 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkBlend_opts_DEFINED +#define SkBlend_opts_DEFINED + +namespace SK_OPTS_NS { + +#if 0 + +#else + + static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { + switch (src >> 24) { + case 0x00: return; + case 0xff: *dst = src; return; + } + + Sk4f d = SkNx_cast<float>(Sk4b::Load( dst)), + s = SkNx_cast<float>(Sk4b::Load(&src)); + + // Approximate sRGB gamma as 2.0. + Sk4f d_sq = d*d, + s_sq = s*s; + d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]}; + s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]}; + + // SrcOver. + Sk4f invA = 1.0f - s[3]*(1/255.0f); + d = s + d * invA; + + // Re-apply approximate sRGB gamma. + Sk4f d_sqrt = d.sqrt(); + d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]}; + + SkNx_cast<uint8_t>(d).store(dst); + } + + static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { + while (ndst > 0) { + int n = SkTMin(ndst, nsrc); + + for (int i = 0; i < n; i++) { + srcover_srgb_srgb_1(dst++, src[i]); + } + ndst -= n; + } + } + +#endif + +} // namespace SK_OPTS_NS + +#endif//SkBlend_opts_DEFINED |