Sk4px blit mask.

Local SKP nanobenching ranges SSE between 1.05x and 0.87x, much more heavily weighted toward <1.0x ratios (speedups). I profiled the top five regressions (1.05x-1.01x) and they look like noise. Will follow up after broad bot results. NEON looks similar but less extreme than SSE changes, ranging between 1.02x and 0.95x, again mostly speedups in 0.99x-0.97x range. The old code trifurcated into black, opaque-but-not-black, and general versions as a function of the constant src color. I did not see a significant difference between general and opaque-but-not-black, and I don't think a black version would be faster using SIMD. So we have here just one version of the code, the general version. Somewhat fantastically, I see no pixel diffs on GMs or SKPs. I will be following up with more CLs for the other procs called by SkBlitMask. BUG=skia: Review URL: https://codereview.chromium.org/1278253003
author: mtklein <mtklein@chromium.org> 2015-08-10 12:58:17 -0700
committer: Commit bot <commit-bot@chromium.org> 2015-08-10 12:58:17 -0700
commit: 4977983510028712528743aa877f6da83781b381 (patch)
tree: d56b58c866da7e86a80b38d0e85f03abd3f7d87c /src/opts/SkBlitRow_opts_SSE2.cpp
parent: c699873ac7c6b21bbca96053cdb9720c80f69916 (diff)
1 files changed, 0 insertions, 48 deletions
diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp
index 7f5b6779cf..c017f7e4b3 100644
--- a/src/opts/SkBlitRow_opts_SSE2.cpp
+++ b/src/opts/SkBlitRow_opts_SSE2.cpp
@@ -301,54 +301,6 @@ void Color32A_D565_SSE2(uint16_t dst[], SkPMColor src, int count, int x, int y)
     }
 }
 
-void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
-                               size_t maskRB, SkColor origColor,
-                               int width, int height) {
-    SkPMColor color = SkPreMultiplyColor(origColor);
-    size_t dstOffset = dstRB - (width << 2);
-    size_t maskOffset = maskRB - width;
-    SkPMColor* dst = (SkPMColor *)device;
-    const uint8_t* mask = (const uint8_t*)maskPtr;
-    do {
-        int count = width;
-        if (count >= 4) {
-            while (((size_t)dst & 0x0F) != 0 && (count > 0)) {
-                *dst = SkBlendARGB32(color, *dst, *mask);
-                mask++;
-                dst++;
-                count--;
-            }
-            __m128i *d = reinterpret_cast<__m128i*>(dst);
-            __m128i src_pixel = _mm_set1_epi32(color);
-            while (count >= 4) {
-                // Load 4 dst pixels
-                __m128i dst_pixel = _mm_load_si128(d);
-
-                // Set the alpha value
-                __m128i alpha_wide = _mm_cvtsi32_si128(*reinterpret_cast<const uint32_t*>(mask));
-                alpha_wide = _mm_unpacklo_epi8(alpha_wide, _mm_setzero_si128());
-                alpha_wide = _mm_unpacklo_epi16(alpha_wide, _mm_setzero_si128());
-
-                __m128i result = SkBlendARGB32_SSE2(src_pixel, dst_pixel, alpha_wide);
-                _mm_store_si128(d, result);
-                // Load the next 4 dst pixels and alphas
-                mask = mask + 4;
-                d++;
-                count -= 4;
-            }
-            dst = reinterpret_cast<SkPMColor*>(d);
-        }
-        while (count > 0) {
-            *dst= SkBlendARGB32(color, *dst, *mask);
-            dst += 1;
-            mask++;
-            count --;
-        }
-        dst = (SkPMColor *)((char*)dst + dstOffset);
-        mask += maskOffset;
-    } while (--height != 0);
-}
-
 // The following (left) shifts cause the top 5 bits of the mask components to
 // line up with the corresponding components in an SkPMColor.
 // Note that the mask's RGB16 order may differ from the SkPMColor order.
author	mtklein <mtklein@chromium.org>	2015-08-10 12:58:17 -0700
committer	Commit bot <commit-bot@chromium.org>	2015-08-10 12:58:17 -0700
commit	4977983510028712528743aa877f6da83781b381 (patch)
tree	d56b58c866da7e86a80b38d0e85f03abd3f7d87c /src/opts/SkBlitRow_opts_SSE2.cpp
parent	c699873ac7c6b21bbca96053cdb9720c80f69916 (diff)