aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
authorGravatar scroggo <scroggo@chromium.org>2016-05-11 06:40:32 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-05-11 06:40:32 -0700
commitfbb3102d0e5f4c7d4b89d710f806b163ae27c3c8 (patch)
tree2b8d5d06e384e9b5b539a6cee737f8d46d75bd7b /src/opts
parent8b7451aaf6b1c71e9d343a4df107893db277b6aa (diff)
Revert "Add tests and benches to support the sRGB blitter for SkOpts"
This reverts commit 554784cd85029c05d9ed04b1aeb71520d196153a and 1956b4ae1c9a47833b174f31c054d347ea04db09 Reason for revert - ASAN failures, e.g. from https://uberchromegw.corp.google.com/i/client.skia/builders/Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Debug-MSAN/builds/2233/steps/perf_skia%20on%20Ubuntu/logs/stdio : Uninitialized value was created by a heap allocation 0 0x7f69aa96f799 in operator new[](unsigned long) /b/work/skia/third_party/externals/llvm/out/../projects/compiler-rt/lib/msan/msan_new_delete.cc:37 1 0x7f69aaa315c1 in SkAutoTArray<unsigned int>::reset(int) /b/work/skia/out/Build-Ubuntu-GCC-x86_64-Debug-MSAN/Debug/../../../include/private/../private/SkTemplates.h:137:22 2 0x7f69aaa34ee9 in LinearSrcOverBench<SrcOverVSkOptsSSE41>::LinearSrcOverBench(char const*) /b/work/skia/out/Build-Ubuntu-GCC-x86_64-Debug-MSAN/Debug/../../../bench/SkBlend_optsBench.cpp:108:9 3 0x7f69aaa30cf2 in $_24::operator()(void*) const /b/work/skia/out/Build-Ubuntu-GCC-x86_64-Debug-MSAN/Debug/../../../bench/SkBlend_optsBench.cpp:167:1 4 0x7f69aaa30c87 in $_24::__invoke(void*) /b/work/skia/out/Build-Ubuntu-GCC-x86_64-Debug-MSAN/Debug/../../../bench/SkBlend_optsBench.cpp:167:1 5 0x7f69aaa68856 in BenchmarkStream::rawNext() /b/work/skia/out/Build-Ubuntu-GCC-x86_64-Debug-MSAN/Debug/../../../bench/nanobench.cpp:653:32 6 0x7f69aaa61467 in BenchmarkStream::next() /b/work/skia/out/Build-Ubuntu-GCC-x86_64-Debug-MSAN/Debug/../../../bench/nanobench.cpp:642:25 7 0x7f69aaa5b703 in nanobench_main() /b/work/skia/out/Build-Ubuntu-GCC-x86_64-Debug-MSAN/Debug/../../../bench/nanobench.cpp:1119:27 8 0x7f69aaa5e10d in main /b/work/skia/out/Build-Ubuntu-GCC-x86_64-Debug-MSAN/Debug/../../../bench/nanobench.cpp:1290:12 9 0x7f69a8c95ec4 in __libc_start_main /build/buildd/eglibc-2.19/csu/libc-start.c:287 TBR=herb@google.com GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1969803002 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review-Url: https://codereview.chromium.org/1969803002
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkBlend_opts.h235
-rw-r--r--src/opts/SkOpts_sse41.cpp9
2 files changed, 31 insertions, 213 deletions
diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h
index 59bf8c2908..a1067407be 100644
--- a/src/opts/SkBlend_opts.h
+++ b/src/opts/SkBlend_opts.h
@@ -5,233 +5,52 @@
* found in the LICENSE file.
*/
-/*
-ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; and ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q
- */
-
#ifndef SkBlend_opts_DEFINED
#define SkBlend_opts_DEFINED
-#include "SkNx.h"
-#include "SkPM4fPriv.h"
-
namespace SK_OPTS_NS {
-// An implementation of SrcOver from bytes to bytes in linear space that takes advantage of the
-// observation that the 255's cancel.
-// invA = 1 - (As / 255);
-//
-// R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)
-// => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)
-// => R = sqrt(Rs^2 + Rd^2 * invA)
-static inline void blend_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {
- Sk4f s = srgb_to_linear(to_4f(pixel));
- Sk4f d = srgb_to_linear(to_4f(*dst));
- Sk4f invAlpha = 1.0f - Sk4f{s[SkPM4f::A]} * (1.0f / 255.0f);
- Sk4f r = linear_to_srgb(s + d * invAlpha);
- *dst = to_4b(r);
-}
-
-static inline void srcover_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {
- if ((~pixel & 0xFF000000) == 0) {
- *dst = pixel;
- } else if ((pixel & 0xFF000000) != 0) {
- blend_srgb_srgb_1(dst, pixel);
- }
-}
-
-static inline void srcover_srgb_srgb_2(uint32_t* dst, const uint32_t* src) {
- srcover_srgb_srgb_1(dst++, *src++);
- srcover_srgb_srgb_1(dst, *src);
-}
-
-static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
- srcover_srgb_srgb_1(dst++, *src++);
- srcover_srgb_srgb_1(dst++, *src++);
- srcover_srgb_srgb_1(dst++, *src++);
- srcover_srgb_srgb_1(dst, *src);
-}
-
-void best_non_simd_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
- uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
+#if 0
- while (ndst >0) {
- int count = SkTMin(ndst, nsrc);
- ndst -= count;
- const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
- const uint64_t* end = dsrc + (count >> 1);
- do {
- if ((~*dsrc & 0xFF000000FF000000) == 0) {
- do {
- *ddst++ = *dsrc++;
- } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
- } else if ((*dsrc & 0xFF000000FF000000) == 0) {
- do {
- dsrc++;
- ddst++;
- } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
- } else {
- srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
- reinterpret_cast<const uint32_t*>(dsrc++));
- }
- } while (dsrc < end);
-
- if ((count & 1) != 0) {
- srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),
- *reinterpret_cast<const uint32_t*>(dsrc));
- }
- }
-}
-
-void brute_force_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
- while (ndst > 0) {
- int n = SkTMin(ndst, nsrc);
+#else
- for (int i = 0; i < n; i++) {
- blend_srgb_srgb_1(dst++, src[i]);
+ static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
+ switch (src >> 24) {
+ case 0x00: return;
+ case 0xff: *dst = src; return;
}
- ndst -= n;
- }
-}
-void trivial_srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
- while (ndst > 0) {
- int n = SkTMin(ndst, nsrc);
+ Sk4f d = SkNx_cast<float>(Sk4b::Load( dst)),
+ s = SkNx_cast<float>(Sk4b::Load(&src));
- for (int i = 0; i < n; i++) {
- srcover_srgb_srgb_1(dst++, src[i]);
- }
- ndst -= n;
- }
-}
+ // Approximate sRGB gamma as 2.0.
+ Sk4f d_sq = d*d,
+ s_sq = s*s;
+ d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]};
+ s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]};
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+ // SrcOver.
+ Sk4f invA = 1.0f - s[3]*(1/255.0f);
+ d = s + d * invA;
- static inline __m128i load(const uint32_t* p) {
- return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
- }
+ // Re-apply approximate sRGB gamma.
+ Sk4f d_sqrt = d.sqrt();
+ d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]};
- static inline void store(uint32_t* p, __m128i v) {
- _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
+ SkNx_cast<uint8_t>(d).store(dst);
}
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
-
- void srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) {
- const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
- while (ndst > 0) {
- int count = SkTMin(ndst, nsrc);
- ndst -= count;
- const uint32_t* src = srcStart;
- const uint32_t* end = src + (count & ~3);
+ static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
+ while (ndst > 0) {
+ int n = SkTMin(ndst, nsrc);
- while (src < end) {
- __m128i pixels = load(src);
- if (_mm_testc_si128(pixels, alphaMask)) {
- do {
- store(dst, pixels);
- dst += 4;
- src += 4;
- } while (src < end && _mm_testc_si128(pixels = load(src), alphaMask));
- } else if (_mm_testz_si128(pixels, alphaMask)) {
- do {
- dst += 4;
- src += 4;
- } while (src < end && _mm_testz_si128(pixels = load(src), alphaMask));
- } else {
- do {
- srcover_srgb_srgb_4(dst, src);
- dst += 4;
- src += 4;
- } while (src < end && _mm_testnzc_si128(pixels = load(src), alphaMask));
- }
- }
-
- count = count & 3;
- while (count-- > 0) {
- srcover_srgb_srgb_1(dst++, *src++);
- }
+ for (int i = 0; i < n; i++) {
+ srcover_srgb_srgb_1(dst++, src[i]);
}
+ ndst -= n;
}
- #else
- // SSE2 versions
- static inline bool check_opaque_alphas(__m128i pixels) {
- int mask =
- _mm_movemask_epi8(
- _mm_cmpeq_epi32(
- _mm_andnot_si128(pixels, _mm_set1_epi32(0xFF000000)),
- _mm_setzero_si128()));
- return mask == 0xFFFF;
- }
-
- static inline bool check_transparent_alphas(__m128i pixels) {
- int mask =
- _mm_movemask_epi8(
- _mm_cmpeq_epi32(
- _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000)),
- _mm_setzero_si128()));
- return mask == 0xFFFF;
- }
-
- static inline bool check_partial_alphas(__m128i pixels) {
- __m128i alphas = _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000));
- int mask =
- _mm_movemask_epi8(
- _mm_cmpeq_epi8(
- _mm_srai_epi32(alphas, 8),
- alphas));
- return mask == 0xFFFF;
- }
-
- void srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) {
- while (ndst > 0) {
- int count = SkTMin(ndst, nsrc);
- ndst -= count;
- const uint32_t* src = srcStart;
- const uint32_t* end = src + (count & ~3);
-
- __m128i pixels = load(src);
- do {
- if (check_opaque_alphas(pixels)) {
- do {
- store(dst, pixels);
- dst += 4;
- src += 4;
- } while (src < end && check_opaque_alphas(pixels = load(src)));
- } else if (check_transparent_alphas(pixels)) {
- const uint32_t* start = src;
- do {
- src += 4;
- } while (src < end && check_transparent_alphas(pixels = load(src)));
- dst += src - start;
- } else {
- do {
- srcover_srgb_srgb_4(dst, src);
- dst += 4;
- src += 4;
- } while (src < end && check_partial_alphas(pixels = load(src)));
- }
- } while (src < end);
-
- count = count & 3;
- while (count-- > 0) {
- srcover_srgb_srgb_1(dst++, *src++);
- }
- }
- }
- #endif
-#else
-
- void srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
- trivial_srcover_srgb_srgb(dst, src, ndst, nsrc);
}
-
+
#endif
} // namespace SK_OPTS_NS
diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp
index f0561a69c6..34b078c2ca 100644
--- a/src/opts/SkOpts_sse41.cpp
+++ b/src/opts/SkOpts_sse41.cpp
@@ -10,7 +10,6 @@
#define SK_OPTS_NS sk_sse41
#include "SkBlurImageFilter_opts.h"
#include "SkBlitRow_opts.h"
-#include "SkBlend_opts.h"
#ifndef SK_SUPPORT_LEGACY_X86_BLITS
@@ -212,16 +211,16 @@ static void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB,
}
}
}
+
} // namespace sk_sse41_new
#endif
namespace SkOpts {
void Init_sse41() {
- box_blur_xx = sk_sse41::box_blur_xx;
- box_blur_xy = sk_sse41::box_blur_xy;
- box_blur_yx = sk_sse41::box_blur_yx;
- srcover_srgb_srgb = sk_sse41::srcover_srgb_srgb;
+ box_blur_xx = sk_sse41::box_blur_xx;
+ box_blur_xy = sk_sse41::box_blur_xy;
+ box_blur_yx = sk_sse41::box_blur_yx;
#ifndef SK_SUPPORT_LEGACY_X86_BLITS
blit_row_color32 = sk_sse41_new::blit_row_color32;