aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkBlitRow_opts_SSE2.cpp
diff options
context:
space:
mode:
authorGravatar bungeman@google.com <bungeman@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-07-09 17:44:57 +0000
committerGravatar bungeman@google.com <bungeman@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-07-09 17:44:57 +0000
commit8cd5ae79c6aaa20188ac6f34318c2f358d87e103 (patch)
treedc221a00aa0f8ffb7647f6d4caf28b1d735e2765 /src/opts/SkBlitRow_opts_SSE2.cpp
parent142e1fe7cf5dc82e5d4c107b06756302f0cbf96d (diff)
Fix SkBlendLCD16_SSE2 for non ARGB platforms.
Diffstat (limited to 'src/opts/SkBlitRow_opts_SSE2.cpp')
-rw-r--r--src/opts/SkBlitRow_opts_SSE2.cpp66
1 files changed, 46 insertions, 20 deletions
diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp
index 5bb0c8c102..74e44af4c9 100644
--- a/src/opts/SkBlitRow_opts_SSE2.cpp
+++ b/src/opts/SkBlitRow_opts_SSE2.cpp
@@ -513,20 +513,48 @@ void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
} while (--height != 0);
}
+// The following (left) shifts cause the top 5 bits of the mask components to
+// line up with the corresponding components in an SkPMColor.
+// Note that the mask's RGB16 order may differ from the SkPMColor order.
+#define SK_R16x5_R32x5_SHIFT (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5)
+#define SK_G16x5_G32x5_SHIFT (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5)
+#define SK_B16x5_B32x5_SHIFT (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5)
+
+#if SK_R16x5_R32x5_SHIFT == 0
+ #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (x)
+#elif SK_R16x5_R32x5_SHIFT > 0
+ #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_slli_epi32(x, SK_R16x5_R32x5_SHIFT))
+#else
+ #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_srli_epi32(x, -SK_R16x5_R32x5_SHIFT))
+#endif
+
+#if SK_G16x5_G32x5_SHIFT == 0
+ #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (x)
+#elif SK_G16x5_G32x5_SHIFT > 0
+ #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_slli_epi32(x, SK_G16x5_G32x5_SHIFT))
+#else
+ #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_srli_epi32(x, -SK_G16x5_G32x5_SHIFT))
+#endif
+
+#if SK_B16x5_B32x5_SHIFT == 0
+ #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (x)
+#elif SK_B16x5_B32x5_SHIFT > 0
+ #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_slli_epi32(x, SK_B16x5_B32x5_SHIFT))
+#else
+ #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_srli_epi32(x, -SK_B16x5_B32x5_SHIFT))
+#endif
+
static __m128i SkBlendLCD16_SSE2(__m128i &srci, __m128i &dst,
__m128i &mask, __m128i &scale) {
// Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
- __m128i r = _mm_and_si128(_mm_slli_epi32(mask,
- 16-SK_R16_SHIFT-(SK_R16_BITS-5)),
- _mm_set1_epi32(0x001F0000));
-
- __m128i g = _mm_and_si128(_mm_slli_epi32(mask,
- 8-SK_G16_SHIFT-(SK_G16_BITS-5)),
- _mm_set1_epi32(0x00001F00));
+ __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
+ _mm_set1_epi32(0x1F << SK_R32_SHIFT));
- __m128i b = _mm_and_si128(_mm_slli_epi32(mask,
- SK_B16_BITS-5),
- _mm_set1_epi32(0x0000001F));
+ __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
+ _mm_set1_epi32(0x1F << SK_G32_SHIFT));
+
+ __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
+ _mm_set1_epi32(0x1F << SK_B32_SHIFT));
// Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
mask = _mm_or_si128(_mm_or_si128(r, g), b);
@@ -564,20 +592,18 @@ static __m128i SkBlendLCD16_SSE2(__m128i &srci, __m128i &dst,
return _mm_packus_epi16(resultLo, resultHi);
}
-static __m128i SkBlendLCD16Opaque_SSE2(__m128i &srci, __m128i &dst,
+static __m128i SkBlendLCD16Opaque_SSE2(__m128i &srci, __m128i &dst,
__m128i &mask) {
// Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
- __m128i r = _mm_and_si128(_mm_slli_epi32(mask,
- 16-SK_R16_SHIFT-(SK_R16_BITS-5)),
- _mm_set1_epi32(0x001F0000));
+ __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask),
+ _mm_set1_epi32(0x1F << SK_R32_SHIFT));
- __m128i g = _mm_and_si128(_mm_slli_epi32(mask,
- 8-SK_G16_SHIFT-(SK_G16_BITS-5)),
- _mm_set1_epi32(0x00001F00));
+ __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask),
+ _mm_set1_epi32(0x1F << SK_G32_SHIFT));
+
+ __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask),
+ _mm_set1_epi32(0x1F << SK_B32_SHIFT));
- __m128i b = _mm_and_si128(_mm_slli_epi32(mask, SK_B16_BITS-5),
- _mm_set1_epi32(0x0000001F));
-
// Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
mask = _mm_or_si128(_mm_or_si128(r, g), b);