aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2014-04-25 02:59:04 +0000
committerGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2014-04-25 02:59:04 +0000
commitb004b8e22fb90c6773e9ee6d76142123f1663d46 (patch)
tree183d7e194de35413c436a5dd75c923c7fa700d30 /src
parent7bf10152b129e3b0cad76f2abd5136ccbc74a393 (diff)
Xfermode: SSE2 implementation of exclusion_modeproc
With SSE2 optimization, performance of Xfermode_Exclusion will improve about 50% on desktop i7-3770. Here are the data: before: Xfermode_Exclusion 8888: cmsecs = 40.17 565: cmsecs = 55.22 after: Xfermode_Exclusion 8888: cmsecs = 18.53 565: cmsecs = 26.55 BUG=skia: R=mtklein@google.com Author: qiankun.miao@intel.com Review URL: https://codereview.chromium.org/233733005 git-svn-id: http://skia.googlecode.com/svn/trunk@14371 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src')
-rw-r--r--src/opts/SkXfermode_opts_SSE2.cpp28
1 files changed, 27 insertions, 1 deletions
diff --git a/src/opts/SkXfermode_opts_SSE2.cpp b/src/opts/SkXfermode_opts_SSE2.cpp
index 817f777bc4..51bb820174 100644
--- a/src/opts/SkXfermode_opts_SSE2.cpp
+++ b/src/opts/SkXfermode_opts_SSE2.cpp
@@ -266,6 +266,32 @@ static __m128i overlay_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
return SkPackARGB32_SSE2(a, r, g, b);
}
+static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc,
+ const __m128i&, __m128i&) {
+ __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc
+ __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc
+ tmp1 = _mm_add_epi32(tmp1, tmp2);
+ tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc
+ tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc
+
+ __m128i r = _mm_sub_epi32(tmp1, tmp2);
+ return clamp_div255round_SSE2(r);
+}
+
+static __m128i exclusion_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
+ __m128i sa = SkGetPackedA32_SSE2(src);
+ __m128i da = SkGetPackedA32_SSE2(dst);
+
+ __m128i a = srcover_byte_SSE2(sa, da);
+ __m128i r = exclusion_byte_SSE2(SkGetPackedR32_SSE2(src),
+ SkGetPackedR32_SSE2(dst), sa, da);
+ __m128i g = exclusion_byte_SSE2(SkGetPackedG32_SSE2(src),
+ SkGetPackedG32_SSE2(dst), sa, da);
+ __m128i b = exclusion_byte_SSE2(SkGetPackedB32_SSE2(src),
+ SkGetPackedB32_SSE2(dst), sa, da);
+ return SkPackARGB32_SSE2(a, r, g, b);
+}
+
////////////////////////////////////////////////////////////////////////////////
typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst);
@@ -429,7 +455,7 @@ SkXfermodeProcSIMD gSSE2XfermodeProcs[] = {
NULL, // kHardLight_Mode
NULL, // kSoftLight_Mode
NULL, // kDifference_Mode
- NULL, // kExclusion_Mode
+ exclusion_modeproc_SSE2,
multiply_modeproc_SSE2,
NULL, // kHue_Mode