diff options
author | 2014-07-07 08:05:40 -0700 | |
---|---|---|
committer | 2014-07-07 08:05:40 -0700 | |
commit | 5f7f9d04dc3a2d2c3ef9d8f1703d8e13c2d15c6e (patch) | |
tree | f3f2d571a93a9a25b826e2d9da76b1ef9039966f /src/opts/SkBlurImage_opts_SSE2.cpp | |
parent | 1f836ee096bb988adef4b9757b2629c7afeda36d (diff) |
Add SSE4 version of BlurImage optimizations.
Adds an SSE4.1 version of the existing BlurImage optimizations.
Performance of blur_image_filter_* benchmarks show a 10-50%
improvement on Linux/Ubuntu Core i7.
Signed-off-by: Henrik Smiding <henrik.smiding@intel.com>
Committed: https://skia.googlesource.com/skia/+/2830632ce93c97ed7647b13348365ea92e4ea665
R=mtklein@google.com, reed@chromium.org
Author: henrik.smiding@intel.com
Review URL: https://codereview.chromium.org/366593004
Diffstat (limited to 'src/opts/SkBlurImage_opts_SSE2.cpp')
-rw-r--r-- | src/opts/SkBlurImage_opts_SSE2.cpp | 8 |
1 files changed, 2 insertions, 6 deletions
diff --git a/src/opts/SkBlurImage_opts_SSE2.cpp b/src/opts/SkBlurImage_opts_SSE2.cpp index bbc6a66462..d2f8882726 100644 --- a/src/opts/SkBlurImage_opts_SSE2.cpp +++ b/src/opts/SkBlurImage_opts_SSE2.cpp @@ -55,17 +55,13 @@ void SkBoxBlur_SSE2(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker const SkPMColor* sptr = src; SkColor* dptr = dst; for (int x = 0; x < width; ++x) { -#if 0 - // In SSE4.1, this would be - __m128i result = _mm_mullo_epi32(sum, scale); -#else - // But SSE2 has no PMULLUD, so we must do AG and RB separately. + // SSE2 has no PMULLUD, so we must do AG and RB separately. __m128i tmp1 = _mm_mul_epu32(sum, scale); __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4), _mm_srli_si128(scale, 4)); __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,2,0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,2,0))); -#endif + // sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5 result = _mm_add_epi32(result, half); |