diff options
author | 2014-04-25 02:59:04 +0000 | |
---|---|---|
committer | 2014-04-25 02:59:04 +0000 | |
commit | b004b8e22fb90c6773e9ee6d76142123f1663d46 (patch) | |
tree | 183d7e194de35413c436a5dd75c923c7fa700d30 /src | |
parent | 7bf10152b129e3b0cad76f2abd5136ccbc74a393 (diff) |
Xfermode: SSE2 implementation of exclusion_modeproc
With SSE2 optimization, performance of Xfermode_Exclusion will improve
about 50% on desktop i7-3770. Here are the data:
before:
Xfermode_Exclusion 8888: cmsecs = 40.17 565: cmsecs = 55.22
after:
Xfermode_Exclusion 8888: cmsecs = 18.53 565: cmsecs = 26.55
BUG=skia:
R=mtklein@google.com
Author: qiankun.miao@intel.com
Review URL: https://codereview.chromium.org/233733005
git-svn-id: http://skia.googlecode.com/svn/trunk@14371 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src')
-rw-r--r-- | src/opts/SkXfermode_opts_SSE2.cpp | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/src/opts/SkXfermode_opts_SSE2.cpp b/src/opts/SkXfermode_opts_SSE2.cpp index 817f777bc4..51bb820174 100644 --- a/src/opts/SkXfermode_opts_SSE2.cpp +++ b/src/opts/SkXfermode_opts_SSE2.cpp @@ -266,6 +266,32 @@ static __m128i overlay_modeproc_SSE2(const __m128i& src, const __m128i& dst) { return SkPackARGB32_SSE2(a, r, g, b); } +static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, + const __m128i&, __m128i&) { + __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc + __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc + tmp1 = _mm_add_epi32(tmp1, tmp2); + tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc + tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc + + __m128i r = _mm_sub_epi32(tmp1, tmp2); + return clamp_div255round_SSE2(r); +} + +static __m128i exclusion_modeproc_SSE2(const __m128i& src, const __m128i& dst) { + __m128i sa = SkGetPackedA32_SSE2(src); + __m128i da = SkGetPackedA32_SSE2(dst); + + __m128i a = srcover_byte_SSE2(sa, da); + __m128i r = exclusion_byte_SSE2(SkGetPackedR32_SSE2(src), + SkGetPackedR32_SSE2(dst), sa, da); + __m128i g = exclusion_byte_SSE2(SkGetPackedG32_SSE2(src), + SkGetPackedG32_SSE2(dst), sa, da); + __m128i b = exclusion_byte_SSE2(SkGetPackedB32_SSE2(src), + SkGetPackedB32_SSE2(dst), sa, da); + return SkPackARGB32_SSE2(a, r, g, b); +} + //////////////////////////////////////////////////////////////////////////////// typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); @@ -429,7 +455,7 @@ SkXfermodeProcSIMD gSSE2XfermodeProcs[] = { NULL, // kHardLight_Mode NULL, // kSoftLight_Mode NULL, // kDifference_Mode - NULL, // kExclusion_Mode + exclusion_modeproc_SSE2, multiply_modeproc_SSE2, NULL, // kHue_Mode |