diff options
-rw-r--r-- | bench/SkBlend_optsBench.cpp | 167 | ||||
-rw-r--r-- | resources/iconstrip.png | bin | 55635 -> 0 bytes | |||
-rw-r--r-- | src/opts/SkBlend_opts.h | 235 | ||||
-rw-r--r-- | src/opts/SkOpts_sse41.cpp | 9 | ||||
-rw-r--r-- | tests/SkBlend_optsTest.cpp | 120 |
5 files changed, 31 insertions, 500 deletions
diff --git a/bench/SkBlend_optsBench.cpp b/bench/SkBlend_optsBench.cpp deleted file mode 100644 index 24f1f85229..0000000000 --- a/bench/SkBlend_optsBench.cpp +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright 2016 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include <tuple> - -#include "Benchmark.h" -#include "Resources.h" -#include "SkCpu.h" -#include "SkImage.h" -#include "SkImage_Base.h" -#include "SkNx.h" -#include "SkOpts.h" -#include "SkString.h" - -#define INNER_LOOPS 10 - -namespace sk_default { -extern void brute_force_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); -} - -class SrcOverVSkOptsBruteForce { -public: - static SkString Name() { return SkString{"VSkOptsBruteForce"}; } - static bool WorksOnCpu() { return true; } - static void BlendN(uint32_t* dst, int count, const uint32_t* src) { - sk_default::brute_force_srcover_srgb_srgb(dst, src, count, count); - } -}; - -namespace sk_default { -extern void trivial_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); -} - -class SrcOverVSkOptsTrivial { -public: - static SkString Name() { return SkString{"VSkOptsTrivial"}; } - static bool WorksOnCpu() { return true; } - static void BlendN(uint32_t* dst, int count, const uint32_t* src) { - sk_default::trivial_srcover_srgb_srgb(dst, src, count, count); - } -}; - -namespace sk_default { -extern void best_non_simd_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); -} - -class SrcOverVSkOptsNonSimdCore { -public: - static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; } - static bool WorksOnCpu() { return true; } - static void BlendN(uint32_t* dst, int count, const uint32_t* src) { - sk_default::best_non_simd_srcover_srgb_srgb(dst, src, count, count); - } -}; - -namespace sk_default { -extern void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); -} - -class SrcOverVSkOptsDefault { -public: - static SkString Name() { return SkString{"VSkOptsDefault"}; } - static bool WorksOnCpu() { return true; } - static void BlendN(uint32_t* dst, int count, const uint32_t* src) { - sk_default::srcover_srgb_srgb(dst, src, count, count); - } -}; - -namespace sk_sse41 { - extern void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); -} - -class SrcOverVSkOptsSSE41 { -public: - static SkString Name() { return SkString{"VSkOptsSSE41"}; } - static bool WorksOnCpu() { return SkCpu::Supports(SkCpu::SSE41); } - static void BlendN(uint32_t* dst, int count, const uint32_t* src) { - sk_sse41::srcover_srgb_srgb(dst, src, count, count); - } -}; - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -template <typename Blender> -class LinearSrcOverBench : public Benchmark { -public: - LinearSrcOverBench(const char* fileName) { - fName = "LinearSrcOver"; - fName.append(fileName); - fName.append(Blender::Name()); - - sk_sp<SkImage> image = GetResourceAsImage(fileName); - SkBitmap bm; - if (!as_IB(image)->getROPixels(&bm)) { - SkFAIL("Could not read resource"); - } - bm.peekPixels(&fPixmap); - fCount = fPixmap.rowBytesAsPixels(); - fDst.reset(fCount); - - } - -protected: - bool isSuitableFor(Backend backend) override { - return backend == kNonRendering_Backend && Blender::WorksOnCpu(); - } - const char* onGetName() override { return fName.c_str(); } - void onDraw(int loops, SkCanvas*) override { - SkASSERT(fPixmap.colorType() == kN32_SkColorType); - - const int width = fPixmap.rowBytesAsPixels(); - - for (int i = 0; i < loops * INNER_LOOPS; ++i) { - const uint32_t* src = fPixmap.addr32(); - for (int y = 0; y < fPixmap.height(); y++) { - Blender::BlendN(fDst.get(), width, src); - src += width; - } - } - } - - void onPostDraw(SkCanvas*) override { - // Make sure the compiler does not optimize away the operation. - volatile uint32_t v = 0; - for (int i = 0; i < fCount; i++) { - v ^= fDst[i]; - } - } - -private: - int fCount; - SkAutoTArray<uint32_t> fDst; - SkString fName; - SkPixmap fPixmap; - - typedef Benchmark INHERITED; -}; - -#if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS) -#define BENCHES(fileName) \ -DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsBruteForce>(fileName); ) \ -DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsTrivial>(fileName); ) \ -DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsNonSimdCore>(fileName); ) \ -DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsDefault>(fileName); ) \ -DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsSSE41>(fileName); ) -#else -#define BENCHES(fileName) \ -DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsBruteForce>(fileName); ) \ -DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsTrivial>(fileName); ) \ -DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsNonSimdCore>(fileName); ) \ -DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsDefault>(fileName); ) -#endif - -BENCHES("yellow_rose.png") -BENCHES("baby_tux.png") -BENCHES("plane.png") -BENCHES("mandrill_512.png") -BENCHES("iconstrip.png") diff --git a/resources/iconstrip.png b/resources/iconstrip.png Binary files differdeleted file mode 100644 index 10be277afd..0000000000 --- a/resources/iconstrip.png +++ /dev/null diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h index 59bf8c2908..a1067407be 100644 --- a/src/opts/SkBlend_opts.h +++ b/src/opts/SkBlend_opts.h @@ -5,233 +5,52 @@ * found in the LICENSE file. */ -/* -ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; and ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q - */ - #ifndef SkBlend_opts_DEFINED #define SkBlend_opts_DEFINED -#include "SkNx.h" -#include "SkPM4fPriv.h" - namespace SK_OPTS_NS { -// An implementation of SrcOver from bytes to bytes in linear space that takes advantage of the -// observation that the 255's cancel. -// invA = 1 - (As / 255); -// -// R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA) -// => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2) -// => R = sqrt(Rs^2 + Rd^2 * invA) -static inline void blend_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) { - Sk4f s = srgb_to_linear(to_4f(pixel)); - Sk4f d = srgb_to_linear(to_4f(*dst)); - Sk4f invAlpha = 1.0f - Sk4f{s[SkPM4f::A]} * (1.0f / 255.0f); - Sk4f r = linear_to_srgb(s + d * invAlpha); - *dst = to_4b(r); -} - -static inline void srcover_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) { - if ((~pixel & 0xFF000000) == 0) { - *dst = pixel; - } else if ((pixel & 0xFF000000) != 0) { - blend_srgb_srgb_1(dst, pixel); - } -} - -static inline void srcover_srgb_srgb_2(uint32_t* dst, const uint32_t* src) { - srcover_srgb_srgb_1(dst++, *src++); - srcover_srgb_srgb_1(dst, *src); -} - -static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { - srcover_srgb_srgb_1(dst++, *src++); - srcover_srgb_srgb_1(dst++, *src++); - srcover_srgb_srgb_1(dst++, *src++); - srcover_srgb_srgb_1(dst, *src); -} - -void best_non_simd_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { - uint64_t* ddst = reinterpret_cast<uint64_t*>(dst); +#if 0 - while (ndst >0) { - int count = SkTMin(ndst, nsrc); - ndst -= count; - const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src); - const uint64_t* end = dsrc + (count >> 1); - do { - if ((~*dsrc & 0xFF000000FF000000) == 0) { - do { - *ddst++ = *dsrc++; - } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0); - } else if ((*dsrc & 0xFF000000FF000000) == 0) { - do { - dsrc++; - ddst++; - } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0); - } else { - srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++), - reinterpret_cast<const uint32_t*>(dsrc++)); - } - } while (dsrc < end); - - if ((count & 1) != 0) { - srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst), - *reinterpret_cast<const uint32_t*>(dsrc)); - } - } -} - -void brute_force_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { - while (ndst > 0) { - int n = SkTMin(ndst, nsrc); +#else - for (int i = 0; i < n; i++) { - blend_srgb_srgb_1(dst++, src[i]); + static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { + switch (src >> 24) { + case 0x00: return; + case 0xff: *dst = src; return; } - ndst -= n; - } -} -void trivial_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { - while (ndst > 0) { - int n = SkTMin(ndst, nsrc); + Sk4f d = SkNx_cast<float>(Sk4b::Load( dst)), + s = SkNx_cast<float>(Sk4b::Load(&src)); - for (int i = 0; i < n; i++) { - srcover_srgb_srgb_1(dst++, src[i]); - } - ndst -= n; - } -} + // Approximate sRGB gamma as 2.0. + Sk4f d_sq = d*d, + s_sq = s*s; + d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]}; + s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]}; -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 + // SrcOver. + Sk4f invA = 1.0f - s[3]*(1/255.0f); + d = s + d * invA; - static inline __m128i load(const uint32_t* p) { - return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); - } + // Re-apply approximate sRGB gamma. + Sk4f d_sqrt = d.sqrt(); + d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]}; - static inline void store(uint32_t* p, __m128i v) { - _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); + SkNx_cast<uint8_t>(d).store(dst); } - #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 - - void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) { - const __m128i alphaMask = _mm_set1_epi32(0xFF000000); - while (ndst > 0) { - int count = SkTMin(ndst, nsrc); - ndst -= count; - const uint32_t* src = srcStart; - const uint32_t* end = src + (count & ~3); + static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { + while (ndst > 0) { + int n = SkTMin(ndst, nsrc); - while (src < end) { - __m128i pixels = load(src); - if (_mm_testc_si128(pixels, alphaMask)) { - do { - store(dst, pixels); - dst += 4; - src += 4; - } while (src < end && _mm_testc_si128(pixels = load(src), alphaMask)); - } else if (_mm_testz_si128(pixels, alphaMask)) { - do { - dst += 4; - src += 4; - } while (src < end && _mm_testz_si128(pixels = load(src), alphaMask)); - } else { - do { - srcover_srgb_srgb_4(dst, src); - dst += 4; - src += 4; - } while (src < end && _mm_testnzc_si128(pixels = load(src), alphaMask)); - } - } - - count = count & 3; - while (count-- > 0) { - srcover_srgb_srgb_1(dst++, *src++); - } + for (int i = 0; i < n; i++) { + srcover_srgb_srgb_1(dst++, src[i]); } + ndst -= n; } - #else - // SSE2 versions - static inline bool check_opaque_alphas(__m128i pixels) { - int mask = - _mm_movemask_epi8( - _mm_cmpeq_epi32( - _mm_andnot_si128(pixels, _mm_set1_epi32(0xFF000000)), - _mm_setzero_si128())); - return mask == 0xFFFF; - } - - static inline bool check_transparent_alphas(__m128i pixels) { - int mask = - _mm_movemask_epi8( - _mm_cmpeq_epi32( - _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000)), - _mm_setzero_si128())); - return mask == 0xFFFF; - } - - static inline bool check_partial_alphas(__m128i pixels) { - __m128i alphas = _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000)); - int mask = - _mm_movemask_epi8( - _mm_cmpeq_epi8( - _mm_srai_epi32(alphas, 8), - alphas)); - return mask == 0xFFFF; - } - - void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) { - while (ndst > 0) { - int count = SkTMin(ndst, nsrc); - ndst -= count; - const uint32_t* src = srcStart; - const uint32_t* end = src + (count & ~3); - - __m128i pixels = load(src); - do { - if (check_opaque_alphas(pixels)) { - do { - store(dst, pixels); - dst += 4; - src += 4; - } while (src < end && check_opaque_alphas(pixels = load(src))); - } else if (check_transparent_alphas(pixels)) { - const uint32_t* start = src; - do { - src += 4; - } while (src < end && check_transparent_alphas(pixels = load(src))); - dst += src - start; - } else { - do { - srcover_srgb_srgb_4(dst, src); - dst += 4; - src += 4; - } while (src < end && check_partial_alphas(pixels = load(src))); - } - } while (src < end); - - count = count & 3; - while (count-- > 0) { - srcover_srgb_srgb_1(dst++, *src++); - } - } - } - #endif -#else - - void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { - trivial_srcover_srgb_srgb(dst, src, ndst, nsrc); } - + #endif } // namespace SK_OPTS_NS diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp index f0561a69c6..34b078c2ca 100644 --- a/src/opts/SkOpts_sse41.cpp +++ b/src/opts/SkOpts_sse41.cpp @@ -10,7 +10,6 @@ #define SK_OPTS_NS sk_sse41 #include "SkBlurImageFilter_opts.h" #include "SkBlitRow_opts.h" -#include "SkBlend_opts.h" #ifndef SK_SUPPORT_LEGACY_X86_BLITS @@ -212,16 +211,16 @@ static void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB, } } } + } // namespace sk_sse41_new #endif namespace SkOpts { void Init_sse41() { - box_blur_xx = sk_sse41::box_blur_xx; - box_blur_xy = sk_sse41::box_blur_xy; - box_blur_yx = sk_sse41::box_blur_yx; - srcover_srgb_srgb = sk_sse41::srcover_srgb_srgb; + box_blur_xx = sk_sse41::box_blur_xx; + box_blur_xy = sk_sse41::box_blur_xy; + box_blur_yx = sk_sse41::box_blur_yx; #ifndef SK_SUPPORT_LEGACY_X86_BLITS blit_row_color32 = sk_sse41_new::blit_row_color32; diff --git a/tests/SkBlend_optsTest.cpp b/tests/SkBlend_optsTest.cpp deleted file mode 100644 index 10359002f4..0000000000 --- a/tests/SkBlend_optsTest.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright 2016 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include <string> -#include <tuple> -#include <vector> -#include "Resources.h" -#include "SkCpu.h" -#include "SkImage.h" -#include "SkImage_Base.h" -#include "SkOpts.h" -#include "Test.h" -#include "../include/core/SkImageInfo.h" - -typedef void (*Blender)(uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); - -namespace sk_default { -extern void brute_force_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); -} - -namespace sk_default { -extern void trivial_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); - -extern void best_non_simd_srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); - -extern void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); -} - -#if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS) -namespace sk_sse41 { -extern void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); -} -#endif - -static SkString missmatch_message(std::string resourceName, std::string name, int x, int y, - uint32_t src, uint32_t good, uint32_t bad) { - return SkStringPrintf( - "%s - %s missmatch at %d, %d src: %08x good: %08x bad: %08x", - resourceName.c_str(), name.c_str(), x, y, src, good, bad); -} - -using Spec = std::tuple<Blender, std::string>; - -static void test_blender( - Spec spec, - std::string resourceName, - skiatest::Reporter* reporter) -{ - Blender blender; - std::string name; - std::tie(blender, name) = spec; - - std::string fileName = resourceName + ".png"; - sk_sp<SkImage> image = GetResourceAsImage(fileName.c_str()); - SkASSERT(image != nullptr); - if (image == nullptr) { - SkFAIL("image is NULL"); - } - SkBitmap bm; - if (!as_IB(image)->getROPixels(&bm)) { - SkFAIL("Could not read resource"); - } - - SkPixmap pixmap; - bm.peekPixels(&pixmap); - SkASSERTF(pixmap.colorType() == kN32_SkColorType, "colorType: %d", pixmap.colorType()); - SkASSERT(pixmap.alphaType() != kUnpremul_SkAlphaType); - const uint32_t* src = pixmap.addr32(); - const int width = pixmap.rowBytesAsPixels(); - SkASSERT(width > 0); - SkASSERT(width < 4000); - SkAutoTArray<uint32_t> correctDst(width); - SkAutoTArray<uint32_t> testDst(width); - - for (int y = 0; y < pixmap.height(); y++) { - memset(correctDst.get(), 0, width * sizeof(uint32_t)); - memset(testDst.get(), 0, width * sizeof(uint32_t)); - sk_default::brute_force_srcover_srgb_srgb(correctDst.get(), src, width, width); - blender(testDst.get(), src, width, width); - for (int x = 0; x < width; x++) { - REPORTER_ASSERT_MESSAGE( - reporter, correctDst[x] == testDst[x], - missmatch_message(resourceName, name, x, y, src[x], correctDst[x], testDst[x])); - if (correctDst[x] != testDst[x]) break; - } - src += width; - } -} - -DEF_TEST(SkBlend_optsCheck, reporter) { - std::vector<Spec> specs = { - Spec{sk_default::trivial_srcover_srgb_srgb, "trivial"}, - Spec{sk_default::best_non_simd_srcover_srgb_srgb, "best_non_simd"}, - Spec{sk_default::srcover_srgb_srgb, "default"}, - }; - #if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS) - if (SkCpu::Supports(SkCpu::SSE41)) { - specs.push_back(Spec{sk_sse41::srcover_srgb_srgb, "sse41", }); - } - #endif - - std::vector<std::string> testResources = { - "yellow_rose", "baby_tux", "plane", "mandrill_512", "iconstrip" - }; - - for (auto& spec : specs) { - for (auto& resourceName : testResources) { - test_blender(spec, resourceName, reporter); - } - } -} |