aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-05-30 14:58:03 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-05-30 19:16:46 +0000
commit836e6c1f7daee7aa02c0779dce2cfd59b6c1d830 (patch)
treee0ba662c1f7693e52caedb3edcfacac819077281 /src
parentb391f12e78008cb61be32591829de3e95d4fed63 (diff)
remove sse2::srcover_srgb_srgb
Change-Id: Icc570d8a8f1df1dea202e1d234433491122b9b67 Reviewed-on: https://skia-review.googlesource.com/18135 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r--src/opts/SkBlend_opts.h152
1 files changed, 39 insertions, 113 deletions
diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h
index 1da4c4fb04..15636e9914 100644
--- a/src/opts/SkBlend_opts.h
+++ b/src/opts/SkBlend_opts.h
@@ -15,7 +15,7 @@ ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
#include "SkNx.h"
#include "SkPM4fPriv.h"
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
#include <immintrin.h>
#endif
@@ -38,7 +38,7 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
srcover_srgb_srgb_1(dst , *src );
}
-#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
static inline __m128i load(const uint32_t* p) {
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
@@ -48,124 +48,50 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
_mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
}
- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
-
- static void srcover_srgb_srgb(
+ static void srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) {
- const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
- while (ndst > 0) {
- int count = SkTMin(ndst, nsrc);
- ndst -= count;
- const uint32_t* src = srcStart;
- const uint32_t* end = dst + (count & ~3);
- ptrdiff_t delta = src - dst;
-
- while (dst < end) {
- __m128i pixels = load(src);
- if (_mm_testc_si128(pixels, alphaMask)) {
- uint32_t* start = dst;
- do {
- store(dst, pixels);
- dst += 4;
- } while (dst < end
- && _mm_testc_si128(pixels = load(dst + delta), alphaMask));
- src += dst - start;
- } else if (_mm_testz_si128(pixels, alphaMask)) {
- do {
- dst += 4;
- src += 4;
- } while (dst < end
- && _mm_testz_si128(pixels = load(src), alphaMask));
- } else {
- uint32_t* start = dst;
- do {
- srcover_srgb_srgb_4(dst, dst + delta);
- dst += 4;
- } while (dst < end
- && _mm_testnzc_si128(pixels = load(dst + delta), alphaMask));
- src += dst - start;
- }
- }
+ const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
+ while (ndst > 0) {
+ int count = SkTMin(ndst, nsrc);
+ ndst -= count;
+ const uint32_t* src = srcStart;
+ const uint32_t* end = dst + (count & ~3);
+ ptrdiff_t delta = src - dst;
- count = count & 3;
- while (count-- > 0) {
- srcover_srgb_srgb_1(dst++, *src++);
+ while (dst < end) {
+ __m128i pixels = load(src);
+ if (_mm_testc_si128(pixels, alphaMask)) {
+ uint32_t* start = dst;
+ do {
+ store(dst, pixels);
+ dst += 4;
+ } while (dst < end
+ && _mm_testc_si128(pixels = load(dst + delta), alphaMask));
+ src += dst - start;
+ } else if (_mm_testz_si128(pixels, alphaMask)) {
+ do {
+ dst += 4;
+ src += 4;
+ } while (dst < end
+ && _mm_testz_si128(pixels = load(src), alphaMask));
+ } else {
+ uint32_t* start = dst;
+ do {
+ srcover_srgb_srgb_4(dst, dst + delta);
+ dst += 4;
+ } while (dst < end
+ && _mm_testnzc_si128(pixels = load(dst + delta), alphaMask));
+ src += dst - start;
}
}
- }
- #else
- // SSE2 versions
-
- // Note: In the next three comparisons a group of 4 pixels is converted to a group of
- // "signed" pixels because the sse2 does not have an unsigned comparison.
- // Make it so that we can use the signed comparison operators by biasing
- // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0xffxxxxxx to
- // 0x7fxxxxxx which is the largest set of values.
- static inline bool check_opaque_alphas(__m128i pixels) {
- __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x80000000));
- int mask =
- _mm_movemask_epi8(
- _mm_cmplt_epi32(signedPixels, _mm_set1_epi32(0x7F000000)));
- return mask == 0;
- }
-
- static inline bool check_transparent_alphas(__m128i pixels) {
- __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x80000000));
- int mask =
- _mm_movemask_epi8(
- _mm_cmpgt_epi32(signedPixels, _mm_set1_epi32(0x80FFFFFF)));
- return mask == 0;
- }
- static inline bool check_partial_alphas(__m128i pixels) {
- __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x80000000));
- __m128i opaque = _mm_cmplt_epi32(signedPixels, _mm_set1_epi32(0x7F000000));
- __m128i transparent = _mm_cmpgt_epi32(signedPixels, _mm_set1_epi32(0x80FFFFFF));
- int mask = _mm_movemask_epi8(_mm_xor_si128(opaque, transparent));
- return mask == 0;
- }
-
- static void srcover_srgb_srgb(
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) {
- while (ndst > 0) {
- int count = SkTMin(ndst, nsrc);
- ndst -= count;
- const uint32_t* src = srcStart;
- const uint32_t* end = dst + (count & ~3);
- const ptrdiff_t delta = src - dst;
-
- __m128i pixels = load(src);
- do {
- if (check_opaque_alphas(pixels)) {
- uint32_t* start = dst;
- do {
- store(dst, pixels);
- dst += 4;
- } while (dst < end && check_opaque_alphas((pixels = load(dst + delta))));
- src += dst - start;
- } else if (check_transparent_alphas(pixels)) {
- const uint32_t* start = dst;
- do {
- dst += 4;
- } while (dst < end && check_transparent_alphas(pixels = load(dst + delta)));
- src += dst - start;
- } else {
- const uint32_t* start = dst;
- do {
- srcover_srgb_srgb_4(dst, dst + delta);
- dst += 4;
- } while (dst < end && check_partial_alphas(pixels = load(dst + delta)));
- src += dst - start;
- }
- } while (dst < end);
-
- count = count & 3;
- while (count-- > 0) {
- srcover_srgb_srgb_1(dst++, *src++);
- }
+ count = count & 3;
+ while (count-- > 0) {
+ srcover_srgb_srgb_1(dst++, *src++);
}
}
- #endif
+ }
+
#else
static void srcover_srgb_srgb(