aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkBlurImage_opts_SSE2.cpp
diff options
context:
space:
mode:
authorGravatar henrik.smiding <henrik.smiding@intel.com>2014-07-04 04:23:17 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2014-07-04 04:23:17 -0700
commit2830632ce93c97ed7647b13348365ea92e4ea665 (patch)
tree5373e37ddeb54ee601b8d1cc067cf132d2c8fbd5 /src/opts/SkBlurImage_opts_SSE2.cpp
parent3df05015efce95c306fb79c21efc77c79f1ac1ba (diff)
Add SSE4 version of BlurImage optimizations.
Adds an SSE4.1 version of the existing BlurImage optimizations. Performance of blur_image_filter_* benchmarks show a 10-50% improvement on Linux/Ubuntu Core i7. Signed-off-by: Henrik Smiding <henrik.smiding@intel.com> R=mtklein@google.com Author: henrik.smiding@intel.com Review URL: https://codereview.chromium.org/366593004
Diffstat (limited to 'src/opts/SkBlurImage_opts_SSE2.cpp')
-rw-r--r--src/opts/SkBlurImage_opts_SSE2.cpp8
1 files changed, 2 insertions, 6 deletions
diff --git a/src/opts/SkBlurImage_opts_SSE2.cpp b/src/opts/SkBlurImage_opts_SSE2.cpp
index bbc6a66462..d2f8882726 100644
--- a/src/opts/SkBlurImage_opts_SSE2.cpp
+++ b/src/opts/SkBlurImage_opts_SSE2.cpp
@@ -55,17 +55,13 @@ void SkBoxBlur_SSE2(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker
const SkPMColor* sptr = src;
SkColor* dptr = dst;
for (int x = 0; x < width; ++x) {
-#if 0
- // In SSE4.1, this would be
- __m128i result = _mm_mullo_epi32(sum, scale);
-#else
- // But SSE2 has no PMULLUD, so we must do AG and RB separately.
+ // SSE2 has no PMULLUD, so we must do AG and RB separately.
__m128i tmp1 = _mm_mul_epu32(sum, scale);
__m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),
_mm_srli_si128(scale, 4));
__m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,2,0)),
_mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,2,0)));
-#endif
+
// sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5
result = _mm_add_epi32(result, half);