aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkBlitRow_opts_SSE2.cpp
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-05-05 09:58:31 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-05-06 14:57:12 +0000
commitc6820383b2526de95296ed8436f76333e0651d75 (patch)
tree3b55e949fcb14cb7f4325469dc61077bc7764507 /src/opts/SkBlitRow_opts_SSE2.cpp
parent4c6e4103a246c27bdd1302a9c7fba64367758dcc (diff)
remove old 565 destination opts
This is not an important format, and the code is dead or close to it. The code is an occasional maintenance burden so I'd like it gone. Change-Id: I4ad921533abf3211e6a81e6e475b848795eea060 Reviewed-on: https://skia-review.googlesource.com/15600 Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/opts/SkBlitRow_opts_SSE2.cpp')
-rw-r--r--src/opts/SkBlitRow_opts_SSE2.cpp547
1 files changed, 0 insertions, 547 deletions
diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp
index 7ce1fc9a80..7f03907d1c 100644
--- a/src/opts/SkBlitRow_opts_SSE2.cpp
+++ b/src/opts/SkBlitRow_opts_SSE2.cpp
@@ -103,75 +103,6 @@ void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
}
}
-void Color32A_D565_SSE2(uint16_t dst[], SkPMColor src, int count, int x, int y) {
- SkASSERT(count > 0);
-
- uint32_t src_expand = (SkGetPackedG32(src) << 24) |
- (SkGetPackedR32(src) << 13) |
- (SkGetPackedB32(src) << 2);
- unsigned scale = SkAlpha255To256(0xFF - SkGetPackedA32(src)) >> 3;
-
- // Check if we have enough pixels to run SIMD
- if (count >= (int)(8 + (((16 - (size_t)dst) & 0x0F) >> 1))) {
- __m128i* dst_wide;
- const __m128i src_R_wide = _mm_set1_epi16(SkGetPackedR32(src) << 2);
- const __m128i src_G_wide = _mm_set1_epi16(SkGetPackedG32(src) << 3);
- const __m128i src_B_wide = _mm_set1_epi16(SkGetPackedB32(src) << 2);
- const __m128i scale_wide = _mm_set1_epi16(scale);
- const __m128i mask_blue = _mm_set1_epi16(SK_B16_MASK);
- const __m128i mask_green = _mm_set1_epi16(SK_G16_MASK << SK_G16_SHIFT);
-
- // Align dst to an even 16 byte address (0-7 pixels)
- while (((((size_t)dst) & 0x0F) != 0) && (count > 0)) {
- *dst = SkBlend32_RGB16(src_expand, *dst, scale);
- dst += 1;
- count--;
- }
-
- dst_wide = reinterpret_cast<__m128i*>(dst);
- do {
- // Load eight RGB565 pixels
- __m128i pixels = _mm_load_si128(dst_wide);
-
- // Mask out sub-pixels
- __m128i pixel_R = _mm_srli_epi16(pixels, SK_R16_SHIFT);
- __m128i pixel_G = _mm_slli_epi16(pixels, SK_R16_BITS);
- pixel_G = _mm_srli_epi16(pixel_G, SK_R16_BITS + SK_B16_BITS);
- __m128i pixel_B = _mm_and_si128(pixels, mask_blue);
-
- // Scale with alpha
- pixel_R = _mm_mullo_epi16(pixel_R, scale_wide);
- pixel_G = _mm_mullo_epi16(pixel_G, scale_wide);
- pixel_B = _mm_mullo_epi16(pixel_B, scale_wide);
-
- // Add src_X_wide and shift down again
- pixel_R = _mm_add_epi16(pixel_R, src_R_wide);
- pixel_R = _mm_srli_epi16(pixel_R, 5);
- pixel_G = _mm_add_epi16(pixel_G, src_G_wide);
- pixel_B = _mm_add_epi16(pixel_B, src_B_wide);
- pixel_B = _mm_srli_epi16(pixel_B, 5);
-
- // Combine into RGB565 and store
- pixel_R = _mm_slli_epi16(pixel_R, SK_R16_SHIFT);
- pixel_G = _mm_and_si128(pixel_G, mask_green);
- pixels = _mm_or_si128(pixel_R, pixel_G);
- pixels = _mm_or_si128(pixels, pixel_B);
- _mm_store_si128(dst_wide, pixels);
- count -= 8;
- dst_wide++;
- } while (count >= 8);
-
- dst = reinterpret_cast<uint16_t*>(dst_wide);
- }
-
- // Small loop to handle remaining pixels.
- while (count > 0) {
- *dst = SkBlend32_RGB16(src_expand, *dst, scale);
- dst += 1;
- count--;
- }
-}
-
// The following (left) shifts cause the top 5 bits of the mask components to
// line up with the corresponding components in an SkPMColor.
// Note that the mask's RGB16 order may differ from the SkPMColor order.
@@ -510,481 +441,3 @@ void SkBlitLCD16OpaqueRow_SSE2(SkPMColor dst[], const uint16_t mask[],
width--;
}
}
-
-/* SSE2 version of S32_D565_Opaque()
- * portable version is in core/SkBlitRow_D16.cpp
- */
-void S32_D565_Opaque_SSE2(uint16_t* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src, int count,
- U8CPU alpha, int /*x*/, int /*y*/) {
- SkASSERT(255 == alpha);
-
- if (count <= 0) {
- return;
- }
-
- if (count >= 8) {
- while (((size_t)dst & 0x0F) != 0) {
- SkPMColor c = *src++;
- SkPMColorAssert(c);
-
- *dst++ = SkPixel32ToPixel16_ToU16(c);
- count--;
- }
-
- const __m128i* s = reinterpret_cast<const __m128i*>(src);
- __m128i* d = reinterpret_cast<__m128i*>(dst);
-
- while (count >= 8) {
- // Load 8 pixels of src.
- __m128i src_pixel1 = _mm_loadu_si128(s++);
- __m128i src_pixel2 = _mm_loadu_si128(s++);
-
- __m128i d_pixel = SkPixel32ToPixel16_ToU16_SSE2(src_pixel1, src_pixel2);
- _mm_store_si128(d++, d_pixel);
- count -= 8;
- }
- src = reinterpret_cast<const SkPMColor*>(s);
- dst = reinterpret_cast<uint16_t*>(d);
- }
-
- if (count > 0) {
- do {
- SkPMColor c = *src++;
- SkPMColorAssert(c);
- *dst++ = SkPixel32ToPixel16_ToU16(c);
- } while (--count != 0);
- }
-}
-
-/* SSE2 version of S32A_D565_Opaque()
- * portable version is in core/SkBlitRow_D16.cpp
- */
-void S32A_D565_Opaque_SSE2(uint16_t* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src,
- int count, U8CPU alpha, int /*x*/, int /*y*/) {
- SkASSERT(255 == alpha);
-
- if (count <= 0) {
- return;
- }
-
- if (count >= 8) {
- // Make dst 16 bytes alignment
- while (((size_t)dst & 0x0F) != 0) {
- SkPMColor c = *src++;
- if (c) {
- *dst = SkSrcOver32To16(c, *dst);
- }
- dst += 1;
- count--;
- }
-
- const __m128i* s = reinterpret_cast<const __m128i*>(src);
- __m128i* d = reinterpret_cast<__m128i*>(dst);
- __m128i var255 = _mm_set1_epi16(255);
- __m128i r16_mask = _mm_set1_epi16(SK_R16_MASK);
- __m128i g16_mask = _mm_set1_epi16(SK_G16_MASK);
- __m128i b16_mask = _mm_set1_epi16(SK_B16_MASK);
-
- while (count >= 8) {
- // Load 8 pixels of src.
- __m128i src_pixel1 = _mm_loadu_si128(s++);
- __m128i src_pixel2 = _mm_loadu_si128(s++);
-
- // Check whether src pixels are equal to 0 and get the highest bit
- // of each byte of result, if src pixels are all zero, src_cmp1 and
- // src_cmp2 will be 0xFFFF.
- int src_cmp1 = _mm_movemask_epi8(_mm_cmpeq_epi16(src_pixel1,
- _mm_setzero_si128()));
- int src_cmp2 = _mm_movemask_epi8(_mm_cmpeq_epi16(src_pixel2,
- _mm_setzero_si128()));
- if (src_cmp1 == 0xFFFF && src_cmp2 == 0xFFFF) {
- d++;
- count -= 8;
- continue;
- }
-
- // Load 8 pixels of dst.
- __m128i dst_pixel = _mm_load_si128(d);
-
- // Extract A from src.
- __m128i sa1 = _mm_slli_epi32(src_pixel1, (24 - SK_A32_SHIFT));
- sa1 = _mm_srli_epi32(sa1, 24);
- __m128i sa2 = _mm_slli_epi32(src_pixel2, (24 - SK_A32_SHIFT));
- sa2 = _mm_srli_epi32(sa2, 24);
- __m128i sa = _mm_packs_epi32(sa1, sa2);
-
- // Extract R from src.
- __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT));
- sr1 = _mm_srli_epi32(sr1, 24);
- __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT));
- sr2 = _mm_srli_epi32(sr2, 24);
- __m128i sr = _mm_packs_epi32(sr1, sr2);
-
- // Extract G from src.
- __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT));
- sg1 = _mm_srli_epi32(sg1, 24);
- __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT));
- sg2 = _mm_srli_epi32(sg2, 24);
- __m128i sg = _mm_packs_epi32(sg1, sg2);
-
- // Extract B from src.
- __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT));
- sb1 = _mm_srli_epi32(sb1, 24);
- __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT));
- sb2 = _mm_srli_epi32(sb2, 24);
- __m128i sb = _mm_packs_epi32(sb1, sb2);
-
- // Extract R G B from dst.
- __m128i dr = _mm_srli_epi16(dst_pixel, SK_R16_SHIFT);
- dr = _mm_and_si128(dr, r16_mask);
- __m128i dg = _mm_srli_epi16(dst_pixel, SK_G16_SHIFT);
- dg = _mm_and_si128(dg, g16_mask);
- __m128i db = _mm_srli_epi16(dst_pixel, SK_B16_SHIFT);
- db = _mm_and_si128(db, b16_mask);
-
- __m128i isa = _mm_sub_epi16(var255, sa); // 255 -sa
-
- // Calculate R G B of result.
- // Original algorithm is in SkSrcOver32To16().
- dr = _mm_add_epi16(sr, SkMul16ShiftRound_SSE2(dr, isa, SK_R16_BITS));
- dr = _mm_srli_epi16(dr, 8 - SK_R16_BITS);
- dg = _mm_add_epi16(sg, SkMul16ShiftRound_SSE2(dg, isa, SK_G16_BITS));
- dg = _mm_srli_epi16(dg, 8 - SK_G16_BITS);
- db = _mm_add_epi16(sb, SkMul16ShiftRound_SSE2(db, isa, SK_B16_BITS));
- db = _mm_srli_epi16(db, 8 - SK_B16_BITS);
-
- // Pack R G B into 16-bit color.
- __m128i d_pixel = SkPackRGB16_SSE2(dr, dg, db);
-
- // Store 8 16-bit colors in dst.
- _mm_store_si128(d++, d_pixel);
- count -= 8;
- }
-
- src = reinterpret_cast<const SkPMColor*>(s);
- dst = reinterpret_cast<uint16_t*>(d);
- }
-
- if (count > 0) {
- do {
- SkPMColor c = *src++;
- SkPMColorAssert(c);
- if (c) {
- *dst = SkSrcOver32To16(c, *dst);
- }
- dst += 1;
- } while (--count != 0);
- }
-}
-
-void S32_D565_Opaque_Dither_SSE2(uint16_t* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src,
- int count, U8CPU alpha, int x, int y) {
- SkASSERT(255 == alpha);
-
- if (count <= 0) {
- return;
- }
-
- if (count >= 8) {
- while (((size_t)dst & 0x0F) != 0) {
- DITHER_565_SCAN(y);
- SkPMColor c = *src++;
- SkPMColorAssert(c);
-
- unsigned dither = DITHER_VALUE(x);
- *dst++ = SkDitherRGB32To565(c, dither);
- DITHER_INC_X(x);
- count--;
- }
-
- unsigned short dither_value[8];
- __m128i dither;
-#ifdef ENABLE_DITHER_MATRIX_4X4
- const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];
- dither_value[0] = dither_value[4] = dither_scan[(x) & 3];
- dither_value[1] = dither_value[5] = dither_scan[(x + 1) & 3];
- dither_value[2] = dither_value[6] = dither_scan[(x + 2) & 3];
- dither_value[3] = dither_value[7] = dither_scan[(x + 3) & 3];
-#else
- const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
- dither_value[0] = dither_value[4] = (dither_scan
- >> (((x) & 3) << 2)) & 0xF;
- dither_value[1] = dither_value[5] = (dither_scan
- >> (((x + 1) & 3) << 2)) & 0xF;
- dither_value[2] = dither_value[6] = (dither_scan
- >> (((x + 2) & 3) << 2)) & 0xF;
- dither_value[3] = dither_value[7] = (dither_scan
- >> (((x + 3) & 3) << 2)) & 0xF;
-#endif
- dither = _mm_loadu_si128((__m128i*) dither_value);
-
- const __m128i* s = reinterpret_cast<const __m128i*>(src);
- __m128i* d = reinterpret_cast<__m128i*>(dst);
-
- while (count >= 8) {
- // Load 8 pixels of src.
- __m128i src_pixel1 = _mm_loadu_si128(s++);
- __m128i src_pixel2 = _mm_loadu_si128(s++);
-
- // Extract R from src.
- __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT));
- sr1 = _mm_srli_epi32(sr1, 24);
- __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT));
- sr2 = _mm_srli_epi32(sr2, 24);
- __m128i sr = _mm_packs_epi32(sr1, sr2);
-
- // SkDITHER_R32To565(sr, dither)
- __m128i sr_offset = _mm_srli_epi16(sr, 5);
- sr = _mm_add_epi16(sr, dither);
- sr = _mm_sub_epi16(sr, sr_offset);
- sr = _mm_srli_epi16(sr, SK_R32_BITS - SK_R16_BITS);
-
- // Extract G from src.
- __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT));
- sg1 = _mm_srli_epi32(sg1, 24);
- __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT));
- sg2 = _mm_srli_epi32(sg2, 24);
- __m128i sg = _mm_packs_epi32(sg1, sg2);
-
- // SkDITHER_R32To565(sg, dither)
- __m128i sg_offset = _mm_srli_epi16(sg, 6);
- sg = _mm_add_epi16(sg, _mm_srli_epi16(dither, 1));
- sg = _mm_sub_epi16(sg, sg_offset);
- sg = _mm_srli_epi16(sg, SK_G32_BITS - SK_G16_BITS);
-
- // Extract B from src.
- __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT));
- sb1 = _mm_srli_epi32(sb1, 24);
- __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT));
- sb2 = _mm_srli_epi32(sb2, 24);
- __m128i sb = _mm_packs_epi32(sb1, sb2);
-
- // SkDITHER_R32To565(sb, dither)
- __m128i sb_offset = _mm_srli_epi16(sb, 5);
- sb = _mm_add_epi16(sb, dither);
- sb = _mm_sub_epi16(sb, sb_offset);
- sb = _mm_srli_epi16(sb, SK_B32_BITS - SK_B16_BITS);
-
- // Pack and store 16-bit dst pixel.
- __m128i d_pixel = SkPackRGB16_SSE2(sr, sg, sb);
- _mm_store_si128(d++, d_pixel);
-
- count -= 8;
- x += 8;
- }
-
- src = reinterpret_cast<const SkPMColor*>(s);
- dst = reinterpret_cast<uint16_t*>(d);
- }
-
- if (count > 0) {
- DITHER_565_SCAN(y);
- do {
- SkPMColor c = *src++;
- SkPMColorAssert(c);
-
- unsigned dither = DITHER_VALUE(x);
- *dst++ = SkDitherRGB32To565(c, dither);
- DITHER_INC_X(x);
- } while (--count != 0);
- }
-}
-
-/* SSE2 version of S32A_D565_Opaque_Dither()
- * portable version is in core/SkBlitRow_D16.cpp
- */
-void S32A_D565_Opaque_Dither_SSE2(uint16_t* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src,
- int count, U8CPU alpha, int x, int y) {
- SkASSERT(255 == alpha);
-
- if (count <= 0) {
- return;
- }
-
- if (count >= 8) {
- while (((size_t)dst & 0x0F) != 0) {
- DITHER_565_SCAN(y);
- SkPMColor c = *src++;
- SkPMColorAssert(c);
- if (c) {
- unsigned a = SkGetPackedA32(c);
-
- int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
-
- unsigned sr = SkGetPackedR32(c);
- unsigned sg = SkGetPackedG32(c);
- unsigned sb = SkGetPackedB32(c);
- sr = SkDITHER_R32_FOR_565(sr, d);
- sg = SkDITHER_G32_FOR_565(sg, d);
- sb = SkDITHER_B32_FOR_565(sb, d);
-
- uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
- uint32_t dst_expanded = SkExpand_rgb_16(*dst);
- dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
- // now src and dst expanded are in g:11 r:10 x:1 b:10
- *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
- }
- dst += 1;
- DITHER_INC_X(x);
- count--;
- }
-
- unsigned short dither_value[8];
- __m128i dither, dither_cur;
-#ifdef ENABLE_DITHER_MATRIX_4X4
- const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];
- dither_value[0] = dither_value[4] = dither_scan[(x) & 3];
- dither_value[1] = dither_value[5] = dither_scan[(x + 1) & 3];
- dither_value[2] = dither_value[6] = dither_scan[(x + 2) & 3];
- dither_value[3] = dither_value[7] = dither_scan[(x + 3) & 3];
-#else
- const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
- dither_value[0] = dither_value[4] = (dither_scan
- >> (((x) & 3) << 2)) & 0xF;
- dither_value[1] = dither_value[5] = (dither_scan
- >> (((x + 1) & 3) << 2)) & 0xF;
- dither_value[2] = dither_value[6] = (dither_scan
- >> (((x + 2) & 3) << 2)) & 0xF;
- dither_value[3] = dither_value[7] = (dither_scan
- >> (((x + 3) & 3) << 2)) & 0xF;
-#endif
- dither = _mm_loadu_si128((__m128i*) dither_value);
-
- const __m128i* s = reinterpret_cast<const __m128i*>(src);
- __m128i* d = reinterpret_cast<__m128i*>(dst);
- __m128i var256 = _mm_set1_epi16(256);
- __m128i r16_mask = _mm_set1_epi16(SK_R16_MASK);
- __m128i g16_mask = _mm_set1_epi16(SK_G16_MASK);
- __m128i b16_mask = _mm_set1_epi16(SK_B16_MASK);
-
- while (count >= 8) {
- // Load 8 pixels of src and dst.
- __m128i src_pixel1 = _mm_loadu_si128(s++);
- __m128i src_pixel2 = _mm_loadu_si128(s++);
- __m128i dst_pixel = _mm_load_si128(d);
-
- // Extract A from src.
- __m128i sa1 = _mm_slli_epi32(src_pixel1, (24 - SK_A32_SHIFT));
- sa1 = _mm_srli_epi32(sa1, 24);
- __m128i sa2 = _mm_slli_epi32(src_pixel2, (24 - SK_A32_SHIFT));
- sa2 = _mm_srli_epi32(sa2, 24);
- __m128i sa = _mm_packs_epi32(sa1, sa2);
-
- // Calculate current dither value.
- dither_cur = _mm_mullo_epi16(dither,
- _mm_add_epi16(sa, _mm_set1_epi16(1)));
- dither_cur = _mm_srli_epi16(dither_cur, 8);
-
- // Extract R from src.
- __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT));
- sr1 = _mm_srli_epi32(sr1, 24);
- __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT));
- sr2 = _mm_srli_epi32(sr2, 24);
- __m128i sr = _mm_packs_epi32(sr1, sr2);
-
- // SkDITHER_R32_FOR_565(sr, d)
- __m128i sr_offset = _mm_srli_epi16(sr, 5);
- sr = _mm_add_epi16(sr, dither_cur);
- sr = _mm_sub_epi16(sr, sr_offset);
-
- // Expand sr.
- sr = _mm_slli_epi16(sr, 2);
-
- // Extract G from src.
- __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT));
- sg1 = _mm_srli_epi32(sg1, 24);
- __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT));
- sg2 = _mm_srli_epi32(sg2, 24);
- __m128i sg = _mm_packs_epi32(sg1, sg2);
-
- // sg = SkDITHER_G32_FOR_565(sg, d).
- __m128i sg_offset = _mm_srli_epi16(sg, 6);
- sg = _mm_add_epi16(sg, _mm_srli_epi16(dither_cur, 1));
- sg = _mm_sub_epi16(sg, sg_offset);
-
- // Expand sg.
- sg = _mm_slli_epi16(sg, 3);
-
- // Extract B from src.
- __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT));
- sb1 = _mm_srli_epi32(sb1, 24);
- __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT));
- sb2 = _mm_srli_epi32(sb2, 24);
- __m128i sb = _mm_packs_epi32(sb1, sb2);
-
- // sb = SkDITHER_B32_FOR_565(sb, d).
- __m128i sb_offset = _mm_srli_epi16(sb, 5);
- sb = _mm_add_epi16(sb, dither_cur);
- sb = _mm_sub_epi16(sb, sb_offset);
-
- // Expand sb.
- sb = _mm_slli_epi16(sb, 2);
-
- // Extract R G B from dst.
- __m128i dr = _mm_srli_epi16(dst_pixel, SK_R16_SHIFT);
- dr = _mm_and_si128(dr, r16_mask);
- __m128i dg = _mm_srli_epi16(dst_pixel, SK_G16_SHIFT);
- dg = _mm_and_si128(dg, g16_mask);
- __m128i db = _mm_srli_epi16(dst_pixel, SK_B16_SHIFT);
- db = _mm_and_si128(db, b16_mask);
-
- // SkAlpha255To256(255 - a) >> 3
- __m128i isa = _mm_sub_epi16(var256, sa);
- isa = _mm_srli_epi16(isa, 3);
-
- dr = _mm_mullo_epi16(dr, isa);
- dr = _mm_add_epi16(dr, sr);
- dr = _mm_srli_epi16(dr, 5);
-
- dg = _mm_mullo_epi16(dg, isa);
- dg = _mm_add_epi16(dg, sg);
- dg = _mm_srli_epi16(dg, 5);
-
- db = _mm_mullo_epi16(db, isa);
- db = _mm_add_epi16(db, sb);
- db = _mm_srli_epi16(db, 5);
-
- // Package and store dst pixel.
- __m128i d_pixel = SkPackRGB16_SSE2(dr, dg, db);
- _mm_store_si128(d++, d_pixel);
-
- count -= 8;
- x += 8;
- }
-
- src = reinterpret_cast<const SkPMColor*>(s);
- dst = reinterpret_cast<uint16_t*>(d);
- }
-
- if (count > 0) {
- DITHER_565_SCAN(y);
- do {
- SkPMColor c = *src++;
- SkPMColorAssert(c);
- if (c) {
- unsigned a = SkGetPackedA32(c);
-
- int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
-
- unsigned sr = SkGetPackedR32(c);
- unsigned sg = SkGetPackedG32(c);
- unsigned sb = SkGetPackedB32(c);
- sr = SkDITHER_R32_FOR_565(sr, d);
- sg = SkDITHER_G32_FOR_565(sg, d);
- sb = SkDITHER_B32_FOR_565(sb, d);
-
- uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
- uint32_t dst_expanded = SkExpand_rgb_16(*dst);
- dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
- // now src and dst expanded are in g:11 r:10 x:1 b:10
- *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
- }
- dst += 1;
- DITHER_INC_X(x);
- } while (--count != 0);
- }
-}