aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkBlitRow_opts_arm_neon.cpp
diff options
context:
space:
mode:
authorGravatar robertphillips@google.com <robertphillips@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-09-04 12:48:01 +0000
committerGravatar robertphillips@google.com <robertphillips@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-09-04 12:48:01 +0000
commitb78765e63b5de5a7dfe5f9f6813f6df81cae14ae (patch)
tree292bb289b1309e4c31981aa9a1f18586b24247a8 /src/opts/SkBlitRow_opts_arm_neon.cpp
parent4f55d39a175afe70c1231eb7389790633210106f (diff)
Reverting r5364 (Update ARM and NEON optimizations for S32A_Opaque_BlitRow32)
git-svn-id: http://skia.googlecode.com/svn/trunk@5378 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src/opts/SkBlitRow_opts_arm_neon.cpp')
-rw-r--r--src/opts/SkBlitRow_opts_arm_neon.cpp186
1 files changed, 1 insertions, 185 deletions
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp
index 686c8e0476..14d59682e1 100644
--- a/src/opts/SkBlitRow_opts_arm_neon.cpp
+++ b/src/opts/SkBlitRow_opts_arm_neon.cpp
@@ -517,176 +517,6 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
}
}
-void S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src,
- int count, U8CPU alpha) {
- SkASSERT(255 == alpha);
-
- if (count <= 0)
- return;
-
- /* Use these to check if src is transparent or opaque */
- const unsigned int ALPHA_OPAQ = 0xFF000000;
- const unsigned int ALPHA_TRANS = 0x00FFFFFF;
-
-#define UNROLL 4
- const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1);
- const SkPMColor* SK_RESTRICT src_temp = src;
-
- /* set up the NEON variables */
- uint8x8_t alpha_mask;
- static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
- alpha_mask = vld1_u8(alpha_mask_setup);
-
- uint8x8_t src_raw, dst_raw, dst_final;
- uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
- uint8x8_t dst_cooked;
- uint16x8_t dst_wide;
- uint8x8_t alpha_narrow;
- uint16x8_t alpha_wide;
-
- /* choose the first processing type */
- if( src >= src_end)
- goto TAIL;
- if(*src <= ALPHA_TRANS)
- goto ALPHA_0;
- if(*src >= ALPHA_OPAQ)
- goto ALPHA_255;
- /* fall-thru */
-
-ALPHA_1_TO_254:
- do {
-
- /* get the source */
- src_raw = vreinterpret_u8_u32(vld1_u32(src));
- src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2));
-
- /* get and hold the dst too */
- dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
- dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2));
-
-
- /* get the alphas spread out properly */
- alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
- /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
- /* we collapsed (255-a)+1 ... */
- alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
-
- /* spread the dest */
- dst_wide = vmovl_u8(dst_raw);
-
- /* alpha mul the dest */
- dst_wide = vmulq_u16 (dst_wide, alpha_wide);
- dst_cooked = vshrn_n_u16(dst_wide, 8);
-
- /* sum -- ignoring any byte lane overflows */
- dst_final = vadd_u8(src_raw, dst_cooked);
-
- alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
- /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
- /* we collapsed (255-a)+1 ... */
- alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
-
- /* spread the dest */
- dst_wide = vmovl_u8(dst_raw_2);
-
- /* alpha mul the dest */
- dst_wide = vmulq_u16 (dst_wide, alpha_wide);
- dst_cooked = vshrn_n_u16(dst_wide, 8);
-
- /* sum -- ignoring any byte lane overflows */
- dst_final_2 = vadd_u8(src_raw_2, dst_cooked);
-
- vst1_u32(dst, vreinterpret_u32_u8(dst_final));
- vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2));
-
- src += UNROLL;
- dst += UNROLL;
-
- /* if 2 of the next pixels aren't between 1 and 254
- it might make sense to go to the optimized loops */
- if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ))
- break;
-
- } while(src < src_end);
-
- if (src >= src_end)
- goto TAIL;
-
- if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)
- goto ALPHA_255;
-
- /*fall-thru*/
-
-ALPHA_0:
-
- /*In this state, we know the current alpha is 0 and
- we optimize for the next alpha also being zero. */
- src_temp = src; //so we don't have to increment dst every time
- do {
- if(*(++src) > ALPHA_TRANS)
- break;
- if(*(++src) > ALPHA_TRANS)
- break;
- if(*(++src) > ALPHA_TRANS)
- break;
- if(*(++src) > ALPHA_TRANS)
- break;
- } while(src < src_end);
-
- dst += (src - src_temp);
-
- /* no longer alpha 0, so determine where to go next. */
- if( src >= src_end)
- goto TAIL;
- if(*src >= ALPHA_OPAQ)
- goto ALPHA_255;
- else
- goto ALPHA_1_TO_254;
-
-ALPHA_255:
- while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) {
- dst[0]=src[0];
- dst[1]=src[1];
- dst[2]=src[2];
- dst[3]=src[3];
- src+=UNROLL;
- dst+=UNROLL;
- if(src >= src_end)
- goto TAIL;
- }
-
- //Handle remainder.
- if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
- if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
- if(*src >= ALPHA_OPAQ) { *dst++ = *src++; }
- }
- }
-
- if( src >= src_end)
- goto TAIL;
- if(*src <= ALPHA_TRANS)
- goto ALPHA_0;
- else
- goto ALPHA_1_TO_254;
-
-TAIL:
- /* do any residual iterations */
- src_end += UNROLL + 1; //goto the real end
- while(src != src_end) {
- if( *src != 0 ) {
- if( *src >= ALPHA_OPAQ ) {
- *dst = *src;
- }
- else {
- *dst = SkPMSrcOver(*src, *dst);
- }
- }
- src++;
- dst++;
- }
- return;
-}
/* Neon version of S32_Blend_BlitRow32()
* portable version is in src/core/SkBlitRow_D32.cpp
@@ -1277,20 +1107,6 @@ const SkBlitRow::Proc sk_blitrow_platform_4444_procs_arm_neon[] = {
const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {
NULL, // S32_Opaque,
S32_Blend_BlitRow32_neon, // S32_Blend,
- /*
- * We have two choices for S32A_Opaque procs. The one reads the src alpha
- * value and attempts to optimize accordingly. The optimization is
- * sensitive to the source content and is not a win in all cases. For
- * example, if there are a lot of transitions between the alpha states,
- * the performance will almost certainly be worse. However, for many
- * common cases the performance is equivalent or better than the standard
- * case where we do not inspect the src alpha.
- */
-#if SK_A32_SHIFT == 24
- // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
- S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,
-#else
- S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
-#endif
+ S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
S32A_Blend_BlitRow32_arm // S32A_Blend
};