diff options
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkBlend_opts.h | 235 | ||||
-rw-r--r-- | src/opts/SkOpts_sse41.cpp | 9 |
2 files changed, 31 insertions, 213 deletions
diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h index 93946438e5..a1067407be 100644 --- a/src/opts/SkBlend_opts.h +++ b/src/opts/SkBlend_opts.h @@ -5,233 +5,52 @@ * found in the LICENSE file. */ -/* -ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; and ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q - */ - #ifndef SkBlend_opts_DEFINED #define SkBlend_opts_DEFINED -#include "SkNx.h" -#include "SkPM4fPriv.h" - namespace SK_OPTS_NS { -// An implementation of SrcOver from bytes to bytes in linear space that takes advantage of the -// observation that the 255's cancel. -// invA = 1 - (As / 255); -// -// R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA) -// => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2) -// => R = sqrt(Rs^2 + Rd^2 * invA) -static inline void blend_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) { - Sk4f s = srgb_to_linear(to_4f(pixel)); - Sk4f d = srgb_to_linear(to_4f(*dst)); - Sk4f invAlpha = 1.0f - Sk4f{s[SkPM4f::A]} * (1.0f / 255.0f); - Sk4f r = linear_to_srgb(s + d * invAlpha) + 0.5f; - *dst = to_4b(r); -} - -static inline void srcover_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) { - if ((~pixel & 0xFF000000) == 0) { - *dst = pixel; - } else if ((pixel & 0xFF000000) != 0) { - blend_srgb_srgb_1(dst, pixel); - } -} - -static inline void srcover_srgb_srgb_2(uint32_t* dst, const uint32_t* src) { - srcover_srgb_srgb_1(dst++, *src++); - srcover_srgb_srgb_1(dst, *src); -} - -static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { - srcover_srgb_srgb_1(dst++, *src++); - srcover_srgb_srgb_1(dst++, *src++); - srcover_srgb_srgb_1(dst++, *src++); - srcover_srgb_srgb_1(dst, *src); -} - -void best_non_simd_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { - uint64_t* ddst = reinterpret_cast<uint64_t*>(dst); +#if 0 - while (ndst >0) { - int count = SkTMin(ndst, nsrc); - ndst -= count; - const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src); - const uint64_t* end = dsrc + (count >> 1); - do { - if ((~*dsrc & 0xFF000000FF000000) == 0) { - do { - *ddst++ = *dsrc++; - } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0); - } else if ((*dsrc & 0xFF000000FF000000) == 0) { - do { - dsrc++; - ddst++; - } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0); - } else { - srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++), - reinterpret_cast<const uint32_t*>(dsrc++)); - } - } while (dsrc < end); - - if ((count & 1) != 0) { - srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst), - *reinterpret_cast<const uint32_t*>(dsrc)); - } - } -} - -void brute_force_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { - while (ndst > 0) { - int n = SkTMin(ndst, nsrc); +#else - for (int i = 0; i < n; i++) { - blend_srgb_srgb_1(dst++, src[i]); + static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { + switch (src >> 24) { + case 0x00: return; + case 0xff: *dst = src; return; } - ndst -= n; - } -} -void trivial_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { - while (ndst > 0) { - int n = SkTMin(ndst, nsrc); + Sk4f d = SkNx_cast<float>(Sk4b::Load( dst)), + s = SkNx_cast<float>(Sk4b::Load(&src)); - for (int i = 0; i < n; i++) { - srcover_srgb_srgb_1(dst++, src[i]); - } - ndst -= n; - } -} + // Approximate sRGB gamma as 2.0. + Sk4f d_sq = d*d, + s_sq = s*s; + d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]}; + s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]}; -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 + // SrcOver. + Sk4f invA = 1.0f - s[3]*(1/255.0f); + d = s + d * invA; - static inline __m128i load(const uint32_t* p) { - return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); - } + // Re-apply approximate sRGB gamma. + Sk4f d_sqrt = d.sqrt(); + d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]}; - static inline void store(uint32_t* p, __m128i v) { - _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); + SkNx_cast<uint8_t>(d).store(dst); } - #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 - - void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) { - const __m128i alphaMask = _mm_set1_epi32(0xFF000000); - while (ndst > 0) { - int count = SkTMin(ndst, nsrc); - ndst -= count; - const uint32_t* src = srcStart; - const uint32_t* end = src + (count & ~3); + static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { + while (ndst > 0) { + int n = SkTMin(ndst, nsrc); - while (src < end) { - __m128i pixels = load(src); - if (_mm_testc_si128(pixels, alphaMask)) { - do { - store(dst, pixels); - dst += 4; - src += 4; - } while (src < end && _mm_testc_si128(pixels = load(src), alphaMask)); - } else if (_mm_testz_si128(pixels, alphaMask)) { - do { - dst += 4; - src += 4; - } while (src < end && _mm_testz_si128(pixels = load(src), alphaMask)); - } else { - do { - srcover_srgb_srgb_4(dst, src); - dst += 4; - src += 4; - } while (src < end && _mm_testnzc_si128(pixels = load(src), alphaMask)); - } - } - - count = count & 3; - while (count-- > 0) { - srcover_srgb_srgb_1(dst++, *src++); - } + for (int i = 0; i < n; i++) { + srcover_srgb_srgb_1(dst++, src[i]); } + ndst -= n; } - #else - // SSE2 versions - static inline bool check_opaque_alphas(__m128i pixels) { - int mask = - _mm_movemask_epi8( - _mm_cmpeq_epi32( - _mm_andnot_si128(pixels, _mm_set1_epi32(0xFF000000)), - _mm_setzero_si128())); - return mask == 0xFFFF; - } - - static inline bool check_transparent_alphas(__m128i pixels) { - int mask = - _mm_movemask_epi8( - _mm_cmpeq_epi32( - _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000)), - _mm_setzero_si128())); - return mask == 0xFFFF; - } - - static inline bool check_partial_alphas(__m128i pixels) { - __m128i alphas = _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000)); - int mask = - _mm_movemask_epi8( - _mm_cmpeq_epi8( - _mm_srai_epi32(alphas, 8), - alphas)); - return mask == 0xFFFF; - } - - void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) { - while (ndst > 0) { - int count = SkTMin(ndst, nsrc); - ndst -= count; - const uint32_t* src = srcStart; - const uint32_t* end = src + (count & ~3); - - __m128i pixels = load(src); - do { - if (check_opaque_alphas(pixels)) { - do { - store(dst, pixels); - dst += 4; - src += 4; - } while (src < end && check_opaque_alphas(pixels = load(src))); - } else if (check_transparent_alphas(pixels)) { - const uint32_t* start = src; - do { - src += 4; - } while (src < end && check_transparent_alphas(pixels = load(src))); - dst += src - start; - } else { - do { - srcover_srgb_srgb_4(dst, src); - dst += 4; - src += 4; - } while (src < end && check_partial_alphas(pixels = load(src))); - } - } while (src < end); - - count = count & 3; - while (count-- > 0) { - srcover_srgb_srgb_1(dst++, *src++); - } - } - } - #endif -#else - - void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { - trivial_srcover_srgb_srgb(dst, src, ndst, nsrc); } - + #endif } // namespace SK_OPTS_NS diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp index f0561a69c6..34b078c2ca 100644 --- a/src/opts/SkOpts_sse41.cpp +++ b/src/opts/SkOpts_sse41.cpp @@ -10,7 +10,6 @@ #define SK_OPTS_NS sk_sse41 #include "SkBlurImageFilter_opts.h" #include "SkBlitRow_opts.h" -#include "SkBlend_opts.h" #ifndef SK_SUPPORT_LEGACY_X86_BLITS @@ -212,16 +211,16 @@ static void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB, } } } + } // namespace sk_sse41_new #endif namespace SkOpts { void Init_sse41() { - box_blur_xx = sk_sse41::box_blur_xx; - box_blur_xy = sk_sse41::box_blur_xy; - box_blur_yx = sk_sse41::box_blur_yx; - srcover_srgb_srgb = sk_sse41::srcover_srgb_srgb; + box_blur_xx = sk_sse41::box_blur_xx; + box_blur_xy = sk_sse41::box_blur_xy; + box_blur_yx = sk_sse41::box_blur_yx; #ifndef SK_SUPPORT_LEGACY_X86_BLITS blit_row_color32 = sk_sse41_new::blit_row_color32; |