aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkBlitRow_opts_SSE2.cpp
diff options
context:
space:
mode:
authorGravatar jmuizelaar <jmuizelaar@mozilla.com>2014-10-09 11:43:02 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2014-10-09 11:43:02 -0700
commit60e4ad7b29f50ebd7698d2d37580d5c8da5ce600 (patch)
tree1141961fe3fcaed60f76ee319cab400c96936667 /src/opts/SkBlitRow_opts_SSE2.cpp
parent12b1831ea49f3d88e93b7d2793d94852d03813e8 (diff)
Improve SkARGB32_A8_BlitMask_SSE2
With clang this: - movzbl -3(%rbx), %edx - pxor %xmm5, %xmm5 - pinsrw $0, %edx, %xmm5 - pinsrw $1, %edx, %xmm5 - movzbl -2(%rbx), %edx - pinsrw $2, %edx, %xmm5 - pinsrw $3, %edx, %xmm5 - movzbl -1(%rbx), %edx - pinsrw $4, %edx, %xmm5 - pinsrw $5, %edx, %xmm5 - movzbl (%rbx), %edx - pinsrw $6, %edx, %xmm5 - pinsrw $7, %edx, %xmm5 becomes: + movd (%rbx), %xmm4 + punpcklbw %xmm9, %xmm4 + punpcklwd %xmm4, %xmm4 And clang already does better codegen than msvc 2013 on this. BUG=skia: Review URL: https://codereview.chromium.org/609823003
Diffstat (limited to 'src/opts/SkBlitRow_opts_SSE2.cpp')
-rw-r--r--src/opts/SkBlitRow_opts_SSE2.cpp9
1 files changed, 4 insertions, 5 deletions
diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp
index 391b24c867..363cdab9f0 100644
--- a/src/opts/SkBlitRow_opts_SSE2.cpp
+++ b/src/opts/SkBlitRow_opts_SSE2.cpp
@@ -441,11 +441,10 @@ void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
__m128i dst_pixel = _mm_load_si128(d);
//set the aphla value
- __m128i src_scale_wide = _mm_set_epi8(0, *(mask+3),\
- 0, *(mask+3),0, \
- *(mask+2),0, *(mask+2),\
- 0,*(mask+1), 0,*(mask+1),\
- 0, *mask,0,*mask);
+ __m128i src_scale_wide = _mm_cvtsi32_si128(*reinterpret_cast<const uint32_t*>(mask));
+ src_scale_wide = _mm_unpacklo_epi8(src_scale_wide,
+ _mm_setzero_si128());
+ src_scale_wide = _mm_unpacklo_epi16(src_scale_wide, src_scale_wide);
//call SkAlpha255To256()
src_scale_wide = _mm_add_epi16(src_scale_wide, c_1);