1 files changed, 0 insertions, 314 deletions
diff --git a/src/opts/SkBlitRow_opts_arm.cpp b/src/opts/SkBlitRow_opts_arm.cpp
index 36bed97ccb..f6e6ba2966 100644
--- a/src/opts/SkBlitRow_opts_arm.cpp
+++ b/src/opts/SkBlitRow_opts_arm.cpp
@@ -185,306 +185,6 @@ static void S32A_Opaque_BlitRow32_arm(SkPMColor* SK_RESTRICT dst,
                   : "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "ip", "memory"
                   );
 }
-
-static void __attribute__((naked)) S32A_Opaque_BlitRow32_arm_src_alpha
-                                        (SkPMColor* SK_RESTRICT dst,
-                                         const SkPMColor* SK_RESTRICT src,
-                                         int count, U8CPU alpha) {
-
-/* Optimizes for alpha == 0, alpha == 255, and 1 < alpha < 255 cases individually */
-/* Predicts that the next pixel will have the same alpha type as the current pixel */
-
-asm volatile (
-
-    "\tSTMDB  r13!, {r4-r12, r14}        \n" /* saving r4-r12, lr on the stack */
-                                             /* we should not save r0-r3 according to ABI */
-
-    "\tCMP    r2, #0                     \n" /* if (count == 0) */
-    "\tBEQ    9f                         \n" /* go to EXIT */
-
-    "\tMOV    r12, #0xff                 \n" /* load the 0xff mask in r12 */
-    "\tORR    r12, r12, r12, LSL #16     \n" /* convert it to 0xff00ff in r12 */
-
-    "\tMOV    r14, #255                  \n" /* r14 = 255 */
-                                             /* will be used later for left-side comparison */
-
-    "\tADD    r2, %[src], r2, LSL #2     \n" /* r2 points to last array element which can be used */
-    "\tSUB    r2, r2, #16                \n" /* as a base for 4-way processing algorithm */
-
-    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer is bigger than */
-    "\tBGT    8f                         \n" /* calculated marker for 4-way -> */
-                                             /* use simple one-by-one processing */
-
-    /* START OF DISPATCHING BLOCK */
-
-    "\t0:                                \n"
-
-    "\tLDM    %[src]!, {r3, r4, r5, r6}  \n" /* 4-way loading of source values to r3-r6 */
-
-    "\tLSR    r7, r3, #24                \n" /* if not all src alphas of 4-way block are equal -> */
-    "\tCMP    r7, r4, LSR #24            \n"
-    "\tCMPEQ  r7, r5, LSR #24            \n"
-    "\tCMPEQ  r7, r6, LSR #24            \n"
-    "\tBNE    1f                         \n" /* -> go to general 4-way processing routine */
-
-    "\tCMP    r14, r7                    \n" /* if all src alphas are equal to 255 */
-    "\tBEQ    3f                         \n" /* go to alpha == 255 optimized routine */
-
-    "\tCMP    r7,  #0                    \n" /* if all src alphas are equal to 0 */
-    "\tBEQ    6f                         \n" /* go to alpha == 0 optimized routine */
-
-    /* END OF DISPATCHING BLOCK */
-
-    /* START OF BLOCK OPTIMIZED FOR 0 < ALPHA < 255 */
-
-    "\t1:                                \n"
-                                             /* we do not have enough registers to make */
-                                             /* 4-way [dst] loading -> we are using 2 * 2-way */
-
-    "\tLDM    %[dst], {r7, r8}           \n" /* 1st 2-way loading of dst values to r7-r8 */
-
-    /* PROCESSING BLOCK 1 */
-    /* r3 = src, r7 = dst */
-
-    "\tLSR    r11, r3,  #24              \n" /* extracting alpha from source and storing to r11 */
-    "\tAND    r9,  r12, r7               \n" /* r9 = br masked by r12 (0xff00ff) */
-    "\tRSB    r11, r11, #256             \n" /* subtracting the alpha from 255 -> r11 = scale */
-    "\tAND    r10, r12, r7, LSR #8       \n" /* r10 = ag masked by r12 (0xff00ff) */
-    "\tMUL    r9,  r9,  r11              \n" /* br = br * scale */
-    "\tAND    r9,  r12, r9, LSR #8       \n" /* lsr br by 8 and mask it */
-    "\tMUL    r10, r10, r11              \n" /* ag = ag * scale */
-    "\tAND    r10, r10, r12, LSL #8      \n" /* mask ag with reverse mask */
-    "\tORR    r7,  r9,  r10              \n" /* br | ag */
-    "\tADD    r7,  r3,  r7               \n" /* dst = src + calc dest(r8) */
-
-    /* PROCESSING BLOCK 2 */
-    /* r4 = src, r8 = dst */
-
-    "\tLSR    r11, r4,  #24              \n" /* see PROCESSING BLOCK 1 */
-    "\tAND    r9,  r12, r8               \n"
-    "\tRSB    r11, r11, #256             \n"
-    "\tAND    r10, r12, r8, LSR #8       \n"
-    "\tMUL    r9,  r9,  r11              \n"
-    "\tAND    r9,  r12, r9, LSR #8       \n"
-    "\tMUL    r10, r10, r11              \n"
-    "\tAND    r10, r10, r12, LSL #8      \n"
-    "\tORR    r8,  r9,  r10              \n"
-    "\tADD    r8,  r4,  r8               \n"
-
-    "\tSTM    %[dst]!, {r7, r8}          \n" /* 1st 2-way storing of processed dst values */
-
-    "\tLDM    %[dst], {r9, r10}          \n" /* 2nd 2-way loading of dst values to r9-r10 */
-
-    /* PROCESSING BLOCK 3 */
-    /* r5 = src, r9 = dst */
-
-    "\tLSR    r11, r5,  #24              \n" /* see PROCESSING BLOCK 1 */
-    "\tAND    r7,  r12, r9               \n"
-    "\tRSB    r11, r11, #256             \n"
-    "\tAND    r8,  r12, r9, LSR #8       \n"
-    "\tMUL    r7,  r7,  r11              \n"
-    "\tAND    r7,  r12, r7, LSR #8       \n"
-    "\tMUL    r8,  r8,  r11              \n"
-    "\tAND    r8,  r8,  r12, LSL #8      \n"
-    "\tORR    r9,  r7,  r8               \n"
-    "\tADD    r9,  r5,  r9               \n"
-
-    /* PROCESSING BLOCK 4 */
-    /* r6 = src, r10 = dst */
-
-    "\tLSR    r11, r6,  #24              \n" /* see PROCESSING BLOCK 1 */
-    "\tAND    r7,  r12, r10              \n"
-    "\tRSB    r11, r11, #256             \n"
-    "\tAND    r8,  r12, r10, LSR #8      \n"
-    "\tMUL    r7,  r7,  r11              \n"
-    "\tAND    r7,  r12, r7, LSR #8       \n"
-    "\tMUL    r8,  r8,  r11              \n"
-    "\tAND    r8,  r8,  r12, LSL #8      \n"
-    "\tORR    r10, r7,  r8               \n"
-    "\tADD    r10, r6,  r10              \n"
-
-    "\tSTM    %[dst]!, {r9, r10}         \n" /* 2nd 2-way storing of processed dst values */
-
-    "\tCMP    %[src], r2                 \n" /* if our current [src] pointer <= calculated marker */
-    "\tBLE    0b                         \n" /* we could run 4-way processing -> go to dispatcher */
-    "\tBGT    8f                         \n" /* else -> use simple one-by-one processing */
-
-    /* END OF BLOCK OPTIMIZED FOR 0 < ALPHA < 255 */
-
-    /* START OF BLOCK OPTIMIZED FOR ALPHA == 255 */
-
-    "\t2:                                \n" /* ENTRY 1: LOADING [src] to registers */
-
-    "\tLDM    %[src]!, {r3, r4, r5, r6}  \n" /* 4-way loading of source values to r3-r6 */
-
-    "\tAND    r7, r3, r4                 \n" /* if not all alphas == 255 -> */
-    "\tAND    r8, r5, r6                 \n"
-    "\tAND    r9, r7, r8                 \n"
-    "\tCMP    r14, r9, LSR #24           \n"
-    "\tBNE    4f                         \n" /* -> go to alpha == 0 check */
-
-    "\t3:                                \n" /* ENTRY 2: [src] already loaded by DISPATCHER */
-
-    "\tSTM    %[dst]!, {r3, r4, r5, r6}  \n" /* all alphas == 255 -> 4-way copy [src] to [dst] */
-
-    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer <= marker */
-    "\tBLE    2b                         \n" /* we could run 4-way processing */
-                                             /* because now we're in ALPHA == 255 state */
-                                             /* run next cycle with priority alpha == 255 checks */
-
-    "\tBGT    8f                         \n" /* if our current [src] array pointer > marker */
-                                             /* use simple one-by-one processing */
-
-    "\t4:                                \n"
-
-    "\tORR    r7, r3, r4                 \n" /* if not all alphas == 0 -> */
-    "\tORR    r8, r5, r6                 \n"
-    "\tORR    r9, r7, r8                 \n"
-    "\tLSRS   r9, #24                    \n"
-    "\tBNE    1b                         \n" /* -> go to general processing mode */
-                                             /* (we already checked for alpha == 255) */
-
-    "\tADD    %[dst], %[dst], #16        \n" /* all src alphas == 0 -> do not change dst values */
-
-    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer <= marker */
-    "\tBLE    5f                         \n" /* we could run 4-way processing one more time */
-                                             /* because now we're in ALPHA == 0 state */
-                                             /* run next cycle with priority alpha == 0 checks */
-
-    "\tBGT    8f                         \n" /* if our current [src] array pointer > marker */
-                                             /* use simple one-by-one processing */
-
-    /* END OF BLOCK OPTIMIZED FOR ALPHA == 255 */
-
-    /* START OF BLOCK OPTIMIZED FOR ALPHA == 0 */
-
-    "\t5:                                \n" /* ENTRY 1: LOADING [src] to registers */
-
-    "\tLDM    %[src]!, {r3, r4, r5, r6}  \n" /* 4-way loading of source values to r3-r6 */
-
-    "\tORR    r7, r3, r4                 \n" /* if not all alphas == 0 -> */
-    "\tORR    r8, r5, r6                 \n"
-    "\tORR    r9, r7, r8                 \n"
-    "\tLSRS   r9, #24                    \n"
-    "\tBNE    7f                         \n" /* -> go to alpha == 255 check */
-
-    "\t6:                                \n" /* ENTRY 2: [src] already loaded by DISPATCHER */
-
-    "\tADD    %[dst], %[dst], #16        \n" /* all src alphas == 0 -> do not change dst values */
-
-    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer <= marker */
-    "\tBLE    5b                         \n" /* we could run 4-way processing one more time */
-                                             /* because now we're in ALPHA == 0 state */
-                                             /* run next cycle with priority alpha == 0 checks */
-
-    "\tBGT    8f                         \n" /* if our current [src] array pointer > marker */
-                                             /* use simple one-by-one processing */
-    "\t7:                                \n"
-
-    "\tAND    r7, r3, r4                 \n" /* if not all alphas == 255 -> */
-    "\tAND    r8, r5, r6                 \n"
-    "\tAND    r9, r7, r8                 \n"
-    "\tCMP    r14, r9, LSR #24           \n"
-    "\tBNE    1b                         \n" /* -> go to general processing mode */
-                                             /* (we already checked for alpha == 0) */
-
-    "\tSTM    %[dst]!, {r3, r4, r5, r6}  \n" /* all alphas == 255 -> 4-way copy [src] to [dst] */
-
-    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer <= marker */
-    "\tBLE    2b                         \n" /* we could run 4-way processing one more time */
-                                             /* because now we're in ALPHA == 255 state */
-                                             /* run next cycle with priority alpha == 255 checks */
-
-    "\tBGT    8f                         \n" /* if our current [src] array pointer > marker */
-                                             /* use simple one-by-one processing */
-
-    /* END OF BLOCK OPTIMIZED FOR ALPHA == 0 */
-
-    /* START OF TAIL BLOCK */
-    /* (used when array is too small to be processed with 4-way algorithm)*/
-
-    "\t8:                                \n"
-
-    "\tADD    r2, r2, #16                \n" /* now r2 points to the element just after array */
-                                             /* we've done r2 = r2 - 16 at procedure start */
-
-    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer > final marker */
-    "\tBEQ    9f                         \n" /* goto EXIT */
-
-    /* TAIL PROCESSING BLOCK 1 */
-
-    "\tLDR    r3, [%[src]], #4           \n" /* r3 = *src, src++ */
-    "\tLDR    r7, [%[dst]]               \n" /* r7 = *dst */
-
-    "\tLSR    r11, r3,  #24              \n" /* extracting alpha from source */
-    "\tAND    r9,  r12, r7               \n" /* r9 = br masked by r12 (0xff00ff) */
-    "\tRSB    r11, r11, #256             \n" /* subtracting the alpha from 255 -> r11 = scale */
-    "\tAND    r10, r12, r7, LSR #8       \n" /* r10 = ag masked by r12 (0xff00ff) */
-    "\tMUL    r9,  r9,  r11              \n" /* br = br * scale */
-    "\tAND    r9,  r12, r9, LSR #8       \n" /* lsr br by 8 and mask it */
-    "\tMUL    r10, r10, r11              \n" /* ag = ag * scale */
-    "\tAND    r10, r10, r12, LSL #8      \n" /* mask ag with reverse mask */
-    "\tORR    r7,  r9,  r10              \n" /* br | ag */
-    "\tADD    r7,  r3,  r7               \n" /* dst = src + calc dest(r8) */
-
-    "\tSTR    r7, [%[dst]], #4           \n" /* *dst = r7; dst++ */
-
-    "\tCMP    %[src], r2                 \n" /* if our current [src] array pointer > final marker */
-    "\tBEQ    9f                         \n" /* goto EXIT */
-
-    /* TAIL PROCESSING BLOCK 2 */
-
-    "\tLDR    r3, [%[src]], #4           \n" /* see TAIL PROCESSING BLOCK 1 */
-    "\tLDR    r7, [%[dst]]               \n"
-
-    "\tLSR    r11, r3,  #24              \n"
-    "\tAND    r9,  r12, r7               \n"
-    "\tRSB    r11, r11, #256             \n"
-    "\tAND    r10, r12, r7, LSR #8       \n"
-    "\tMUL    r9,  r9,  r11              \n"
-    "\tAND    r9,  r12, r9, LSR #8       \n"
-    "\tMUL    r10, r10, r11              \n"
-    "\tAND    r10, r10, r12, LSL #8      \n"
-    "\tORR    r7,  r9,  r10              \n"
-    "\tADD    r7,  r3,  r7               \n"
-
-    "\tSTR    r7, [%[dst]], #4           \n"
-
-    "\tCMP    %[src], r2                 \n"
-    "\tBEQ    9f                         \n"
-
-    /* TAIL PROCESSING BLOCK 3 */
-
-    "\tLDR    r3, [%[src]], #4           \n" /* see TAIL PROCESSING BLOCK 1 */
-    "\tLDR    r7, [%[dst]]               \n"
-
-    "\tLSR    r11, r3,  #24              \n"
-    "\tAND    r9,  r12, r7               \n"
-    "\tRSB    r11, r11, #256             \n"
-    "\tAND    r10, r12, r7, LSR #8       \n"
-    "\tMUL    r9,  r9,  r11              \n"
-    "\tAND    r9,  r12, r9, LSR #8       \n"
-    "\tMUL    r10, r10, r11              \n"
-    "\tAND    r10, r10, r12, LSL #8      \n"
-    "\tORR    r7,  r9,  r10              \n"
-    "\tADD    r7,  r3,  r7               \n"
-
-    "\tSTR    r7, [%[dst]], #4           \n"
-
-    /* END OF TAIL BLOCK */
-
-    "\t9:                                \n" /* EXIT */
-
-    "\tLDMIA  r13!, {r4-r12, r14}        \n" /* restoring r4-r12, lr from stack */
-    "\tBX     lr                         \n" /* return */
-
-    : [dst] "+r" (dst), [src] "+r" (src)
-    :
-    : "cc", "r2", "r3", "memory"
-
-    );
-
-}
 #endif // USE_ARM_CODE
 
 /*
@@ -666,21 +366,7 @@ const SkBlitRow::Proc sk_blitrow_platform_4444_procs_arm[] = {
 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm[] = {
     NULL,   // S32_Opaque,
     NULL,   // S32_Blend,
-    /*
-     * We have two choices for S32A_Opaque procs. The one reads the src alpha
-     * value and attempts to optimize accordingly.  The optimization is
-     * sensitive to the source content and is not a win in all cases. For
-     * example, if there are a lot of transitions between the alpha states,
-     * the performance will almost certainly be worse.  However, for many
-     * common cases the performance is equivalent or better than the standard
-     * case where we do not inspect the src alpha.
-     */
-#if SK_A32_SHIFT == 24
-    // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
-    S32A_Opaque_BlitRow32_arm_src_alpha,   // S32A_Opaque,
-#else
     S32A_Opaque_BlitRow32_arm,   // S32A_Opaque,
-#endif
     S32A_Blend_BlitRow32_arm     // S32A_Blend
 };
 #endif