diff options
author | 2012-09-04 12:48:01 +0000 | |
---|---|---|
committer | 2012-09-04 12:48:01 +0000 | |
commit | b78765e63b5de5a7dfe5f9f6813f6df81cae14ae (patch) | |
tree | 292bb289b1309e4c31981aa9a1f18586b24247a8 /src/opts/SkBlitRow_opts_arm_neon.cpp | |
parent | 4f55d39a175afe70c1231eb7389790633210106f (diff) |
Reverting r5364 (Update ARM and NEON optimizations for S32A_Opaque_BlitRow32)
git-svn-id: http://skia.googlecode.com/svn/trunk@5378 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src/opts/SkBlitRow_opts_arm_neon.cpp')
-rw-r--r-- | src/opts/SkBlitRow_opts_arm_neon.cpp | 186 |
1 files changed, 1 insertions, 185 deletions
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp index 686c8e0476..14d59682e1 100644 --- a/src/opts/SkBlitRow_opts_arm_neon.cpp +++ b/src/opts/SkBlitRow_opts_arm_neon.cpp @@ -517,176 +517,6 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, } } -void S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { - SkASSERT(255 == alpha); - - if (count <= 0) - return; - - /* Use these to check if src is transparent or opaque */ - const unsigned int ALPHA_OPAQ = 0xFF000000; - const unsigned int ALPHA_TRANS = 0x00FFFFFF; - -#define UNROLL 4 - const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1); - const SkPMColor* SK_RESTRICT src_temp = src; - - /* set up the NEON variables */ - uint8x8_t alpha_mask; - static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; - alpha_mask = vld1_u8(alpha_mask_setup); - - uint8x8_t src_raw, dst_raw, dst_final; - uint8x8_t src_raw_2, dst_raw_2, dst_final_2; - uint8x8_t dst_cooked; - uint16x8_t dst_wide; - uint8x8_t alpha_narrow; - uint16x8_t alpha_wide; - - /* choose the first processing type */ - if( src >= src_end) - goto TAIL; - if(*src <= ALPHA_TRANS) - goto ALPHA_0; - if(*src >= ALPHA_OPAQ) - goto ALPHA_255; - /* fall-thru */ - -ALPHA_1_TO_254: - do { - - /* get the source */ - src_raw = vreinterpret_u8_u32(vld1_u32(src)); - src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); - - /* get and hold the dst too */ - dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); - dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); - - - /* get the alphas spread out properly */ - alpha_narrow = vtbl1_u8(src_raw, alpha_mask); - /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ - /* we collapsed (255-a)+1 ... */ - alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); - - /* spread the dest */ - dst_wide = vmovl_u8(dst_raw); - - /* alpha mul the dest */ - dst_wide = vmulq_u16 (dst_wide, alpha_wide); - dst_cooked = vshrn_n_u16(dst_wide, 8); - - /* sum -- ignoring any byte lane overflows */ - dst_final = vadd_u8(src_raw, dst_cooked); - - alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); - /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ - /* we collapsed (255-a)+1 ... */ - alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); - - /* spread the dest */ - dst_wide = vmovl_u8(dst_raw_2); - - /* alpha mul the dest */ - dst_wide = vmulq_u16 (dst_wide, alpha_wide); - dst_cooked = vshrn_n_u16(dst_wide, 8); - - /* sum -- ignoring any byte lane overflows */ - dst_final_2 = vadd_u8(src_raw_2, dst_cooked); - - vst1_u32(dst, vreinterpret_u32_u8(dst_final)); - vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2)); - - src += UNROLL; - dst += UNROLL; - - /* if 2 of the next pixels aren't between 1 and 254 - it might make sense to go to the optimized loops */ - if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)) - break; - - } while(src < src_end); - - if (src >= src_end) - goto TAIL; - - if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ) - goto ALPHA_255; - - /*fall-thru*/ - -ALPHA_0: - - /*In this state, we know the current alpha is 0 and - we optimize for the next alpha also being zero. */ - src_temp = src; //so we don't have to increment dst every time - do { - if(*(++src) > ALPHA_TRANS) - break; - if(*(++src) > ALPHA_TRANS) - break; - if(*(++src) > ALPHA_TRANS) - break; - if(*(++src) > ALPHA_TRANS) - break; - } while(src < src_end); - - dst += (src - src_temp); - - /* no longer alpha 0, so determine where to go next. */ - if( src >= src_end) - goto TAIL; - if(*src >= ALPHA_OPAQ) - goto ALPHA_255; - else - goto ALPHA_1_TO_254; - -ALPHA_255: - while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) { - dst[0]=src[0]; - dst[1]=src[1]; - dst[2]=src[2]; - dst[3]=src[3]; - src+=UNROLL; - dst+=UNROLL; - if(src >= src_end) - goto TAIL; - } - - //Handle remainder. - if(*src >= ALPHA_OPAQ) { *dst++ = *src++; - if(*src >= ALPHA_OPAQ) { *dst++ = *src++; - if(*src >= ALPHA_OPAQ) { *dst++ = *src++; } - } - } - - if( src >= src_end) - goto TAIL; - if(*src <= ALPHA_TRANS) - goto ALPHA_0; - else - goto ALPHA_1_TO_254; - -TAIL: - /* do any residual iterations */ - src_end += UNROLL + 1; //goto the real end - while(src != src_end) { - if( *src != 0 ) { - if( *src >= ALPHA_OPAQ ) { - *dst = *src; - } - else { - *dst = SkPMSrcOver(*src, *dst); - } - } - src++; - dst++; - } - return; -} /* Neon version of S32_Blend_BlitRow32() * portable version is in src/core/SkBlitRow_D32.cpp @@ -1277,20 +1107,6 @@ const SkBlitRow::Proc sk_blitrow_platform_4444_procs_arm_neon[] = { const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { NULL, // S32_Opaque, S32_Blend_BlitRow32_neon, // S32_Blend, - /* - * We have two choices for S32A_Opaque procs. The one reads the src alpha - * value and attempts to optimize accordingly. The optimization is - * sensitive to the source content and is not a win in all cases. For - * example, if there are a lot of transitions between the alpha states, - * the performance will almost certainly be worse. However, for many - * common cases the performance is equivalent or better than the standard - * case where we do not inspect the src alpha. - */ -#if SK_A32_SHIFT == 24 - // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor - S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, -#else - S32A_Opaque_BlitRow32_neon, // S32A_Opaque, -#endif + S32A_Opaque_BlitRow32_neon, // S32A_Opaque, S32A_Blend_BlitRow32_arm // S32A_Blend }; |