diff options
author | jmuizelaar <jmuizelaar@mozilla.com> | 2014-10-09 11:43:02 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2014-10-09 11:43:02 -0700 |
commit | 60e4ad7b29f50ebd7698d2d37580d5c8da5ce600 (patch) | |
tree | 1141961fe3fcaed60f76ee319cab400c96936667 | |
parent | 12b1831ea49f3d88e93b7d2793d94852d03813e8 (diff) |
Improve SkARGB32_A8_BlitMask_SSE2
With clang this:
- movzbl -3(%rbx), %edx
- pxor %xmm5, %xmm5
- pinsrw $0, %edx, %xmm5
- pinsrw $1, %edx, %xmm5
- movzbl -2(%rbx), %edx
- pinsrw $2, %edx, %xmm5
- pinsrw $3, %edx, %xmm5
- movzbl -1(%rbx), %edx
- pinsrw $4, %edx, %xmm5
- pinsrw $5, %edx, %xmm5
- movzbl (%rbx), %edx
- pinsrw $6, %edx, %xmm5
- pinsrw $7, %edx, %xmm5
becomes:
+ movd (%rbx), %xmm4
+ punpcklbw %xmm9, %xmm4
+ punpcklwd %xmm4, %xmm4
And clang already does better codegen than msvc 2013 on this.
BUG=skia:
Review URL: https://codereview.chromium.org/609823003
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | src/opts/SkBlitRow_opts_SSE2.cpp | 9 |
2 files changed, 5 insertions, 5 deletions
@@ -15,6 +15,7 @@ ACCESS CO., LTD. <*@access-company.com> ARM <*@arm.com> Ehsan Akhgari <ehsan.akhgari@gmail.com> George Wright <george@mozilla.com> +Jeff Muizelaar <jmuizelaar@mozilla.com> Google Inc. <*@google.com> Igalia <*@igalia.com> Intel <*@intel.com> diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp index 391b24c867..363cdab9f0 100644 --- a/src/opts/SkBlitRow_opts_SSE2.cpp +++ b/src/opts/SkBlitRow_opts_SSE2.cpp @@ -441,11 +441,10 @@ void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr, __m128i dst_pixel = _mm_load_si128(d); //set the aphla value - __m128i src_scale_wide = _mm_set_epi8(0, *(mask+3),\ - 0, *(mask+3),0, \ - *(mask+2),0, *(mask+2),\ - 0,*(mask+1), 0,*(mask+1),\ - 0, *mask,0,*mask); + __m128i src_scale_wide = _mm_cvtsi32_si128(*reinterpret_cast<const uint32_t*>(mask)); + src_scale_wide = _mm_unpacklo_epi8(src_scale_wide, + _mm_setzero_si128()); + src_scale_wide = _mm_unpacklo_epi16(src_scale_wide, src_scale_wide); //call SkAlpha255To256() src_scale_wide = _mm_add_epi16(src_scale_wide, c_1); |