Reverting r5364 (Update ARM and NEON optimizations for S32A_Opaque_BlitRow32)

git-svn-id: http://skia.googlecode.com/svn/trunk@5378 2bbb7eff-a529-9590-31e7-b0007b416f81
author: robertphillips@google.com <robertphillips@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> 2012-09-04 12:48:01 +0000
committer: robertphillips@google.com <robertphillips@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> 2012-09-04 12:48:01 +0000
commit: b78765e63b5de5a7dfe5f9f6813f6df81cae14ae (patch)
tree: 292bb289b1309e4c31981aa9a1f18586b24247a8 /src/opts/SkBlitRow_opts_arm_neon.cpp
parent: 4f55d39a175afe70c1231eb7389790633210106f (diff)
1 files changed, 1 insertions, 185 deletions
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp
index 686c8e0476..14d59682e1 100644
--- a/src/opts/SkBlitRow_opts_arm_neon.cpp
+++ b/src/opts/SkBlitRow_opts_arm_neon.cpp
@@ -517,176 +517,6 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
     }
 }
 
-void S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst,
-                                const SkPMColor* SK_RESTRICT src,
-                                int count, U8CPU alpha) {
-    SkASSERT(255 == alpha);
-
-    if (count <= 0)
-    return;
-
-    /* Use these to check if src is transparent or opaque */
-    const unsigned int ALPHA_OPAQ  = 0xFF000000;
-    const unsigned int ALPHA_TRANS = 0x00FFFFFF;
-
-#define UNROLL  4
-    const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1);
-    const SkPMColor* SK_RESTRICT src_temp = src;
-
-    /* set up the NEON variables */
-    uint8x8_t alpha_mask;
-    static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
-    alpha_mask = vld1_u8(alpha_mask_setup);
-
-    uint8x8_t src_raw, dst_raw, dst_final;
-    uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
-    uint8x8_t dst_cooked;
-    uint16x8_t dst_wide;
-    uint8x8_t alpha_narrow;
-    uint16x8_t alpha_wide;
-
-    /* choose the first processing type */
-    if( src >= src_end)
-        goto TAIL;
-    if(*src <= ALPHA_TRANS)
-        goto ALPHA_0;
-    if(*src >= ALPHA_OPAQ)
-        goto ALPHA_255;
-    /* fall-thru */
-
-ALPHA_1_TO_254:
-    do {
-
-        /* get the source */
-        src_raw = vreinterpret_u8_u32(vld1_u32(src));
-        src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2));
-
-        /* get and hold the dst too */
-        dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
-        dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2));
-
-
-        /* get the alphas spread out properly */
-        alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
-        /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
-        /* we collapsed (255-a)+1 ... */
-        alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
-
-        /* spread the dest */
-        dst_wide = vmovl_u8(dst_raw);
-
-        /* alpha mul the dest */
-        dst_wide = vmulq_u16 (dst_wide, alpha_wide);
-        dst_cooked = vshrn_n_u16(dst_wide, 8);
-
-        /* sum -- ignoring any byte lane overflows */
-        dst_final = vadd_u8(src_raw, dst_cooked);
-
-        alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
-        /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
-        /* we collapsed (255-a)+1 ... */
-        alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
-
-        /* spread the dest */
-        dst_wide = vmovl_u8(dst_raw_2);
-
-        /* alpha mul the dest */
-        dst_wide = vmulq_u16 (dst_wide, alpha_wide);
-        dst_cooked = vshrn_n_u16(dst_wide, 8);
-
-        /* sum -- ignoring any byte lane overflows */
-        dst_final_2 = vadd_u8(src_raw_2, dst_cooked);
-
-        vst1_u32(dst, vreinterpret_u32_u8(dst_final));
-        vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2));
-
-        src += UNROLL;
-        dst += UNROLL;
-
-        /* if 2 of the next pixels aren't between 1 and 254
-        it might make sense to go to the optimized loops */
-        if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ))
-            break;
-
-    } while(src < src_end);
-
-    if (src >= src_end)
-        goto TAIL;
-
-    if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)
-        goto ALPHA_255;
-
-    /*fall-thru*/
-
-ALPHA_0:
-
-    /*In this state, we know the current alpha is 0 and
-     we optimize for the next alpha also being zero. */
-    src_temp = src;  //so we don't have to increment dst every time
-    do {
-        if(*(++src) > ALPHA_TRANS)
-            break;
-        if(*(++src) > ALPHA_TRANS)
-            break;
-        if(*(++src) > ALPHA_TRANS)
-            break;
-        if(*(++src) > ALPHA_TRANS)
-            break;
-    } while(src < src_end);
-
-    dst += (src - src_temp);
-
-    /* no longer alpha 0, so determine where to go next. */
-    if( src >= src_end)
-        goto TAIL;
-    if(*src >= ALPHA_OPAQ)
-        goto ALPHA_255;
-    else
-        goto ALPHA_1_TO_254;
-
-ALPHA_255:
-    while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) {
-        dst[0]=src[0];
-        dst[1]=src[1];
-        dst[2]=src[2];
-        dst[3]=src[3];
-        src+=UNROLL;
-        dst+=UNROLL;
-        if(src >= src_end)
-            goto TAIL;
-    }
-
-    //Handle remainder.
-    if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
-        if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
-            if(*src >= ALPHA_OPAQ) { *dst++ = *src++; }
-        }
-    }
-
-    if( src >= src_end)
-        goto TAIL;
-    if(*src <= ALPHA_TRANS)
-        goto ALPHA_0;
-    else
-        goto ALPHA_1_TO_254;
-
-TAIL:
-    /* do any residual iterations */
-    src_end += UNROLL + 1;  //goto the real end
-    while(src != src_end) {
-        if( *src != 0 ) {
-            if( *src >= ALPHA_OPAQ ) {
-                *dst = *src;
-            }
-            else {
-                *dst = SkPMSrcOver(*src, *dst);
-            }
-        }
-        src++;
-        dst++;
-    }
-    return;
-}
 
 /* Neon version of S32_Blend_BlitRow32()
  * portable version is in src/core/SkBlitRow_D32.cpp
@@ -1277,20 +1107,6 @@ const SkBlitRow::Proc sk_blitrow_platform_4444_procs_arm_neon[] = {
 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {
     NULL,   // S32_Opaque,
     S32_Blend_BlitRow32_neon,        // S32_Blend,
-    /*
-     * We have two choices for S32A_Opaque procs. The one reads the src alpha
-     * value and attempts to optimize accordingly.  The optimization is
-     * sensitive to the source content and is not a win in all cases. For
-     * example, if there are a lot of transitions between the alpha states,
-     * the performance will almost certainly be worse.  However, for many
-     * common cases the performance is equivalent or better than the standard
-     * case where we do not inspect the src alpha.
-     */
-#if SK_A32_SHIFT == 24
-    // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
-    S32A_Opaque_BlitRow32_neon_src_alpha,   // S32A_Opaque,
-#else
-    S32A_Opaque_BlitRow32_neon,     // S32A_Opaque,
-#endif
+    S32A_Opaque_BlitRow32_neon,        // S32A_Opaque,
     S32A_Blend_BlitRow32_arm        // S32A_Blend
 };
author	robertphillips@google.com <robertphillips@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>	2012-09-04 12:48:01 +0000
committer	robertphillips@google.com <robertphillips@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>	2012-09-04 12:48:01 +0000
commit	b78765e63b5de5a7dfe5f9f6813f6df81cae14ae (patch)
tree	292bb289b1309e4c31981aa9a1f18586b24247a8 /src/opts/SkBlitRow_opts_arm_neon.cpp
parent	4f55d39a175afe70c1231eb7389790633210106f (diff)