aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkBlitRow_opts_arm.cpp
diff options
context:
space:
mode:
authorGravatar djsollen@google.com <djsollen@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-08-21 15:16:56 +0000
committerGravatar djsollen@google.com <djsollen@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2012-08-21 15:16:56 +0000
commitdb39996d4639adb9c6027b1dd0349b1981f90353 (patch)
tree4f39f35a3f0af3e2a90611d7ede447a7db33b355 /src/opts/SkBlitRow_opts_arm.cpp
parenta9590fc00c4aa2fd34329e552e6396c457702a76 (diff)
Upstream ARM 565 optimizations from AOSP.
Review URL: https://codereview.appspot.com/6461101 git-svn-id: http://skia.googlecode.com/svn/trunk@5211 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src/opts/SkBlitRow_opts_arm.cpp')
-rw-r--r--src/opts/SkBlitRow_opts_arm.cpp78
1 files changed, 74 insertions, 4 deletions
diff --git a/src/opts/SkBlitRow_opts_arm.cpp b/src/opts/SkBlitRow_opts_arm.cpp
index 50e165397a..99cd53eb87 100644
--- a/src/opts/SkBlitRow_opts_arm.cpp
+++ b/src/opts/SkBlitRow_opts_arm.cpp
@@ -17,6 +17,73 @@
#include "SkCachePreload_arm.h"
#if USE_ARM_CODE
+
+static void S32A_D565_Opaque(uint16_t* SK_RESTRICT dst,
+ const SkPMColor* SK_RESTRICT src, int count,
+ U8CPU alpha, int /*x*/, int /*y*/) {
+ SkASSERT(255 == alpha);
+
+ asm volatile (
+ "1: \n\t"
+ "ldr r3, [%[src]], #4 \n\t"
+ "cmp r3, #0xff000000 \n\t"
+ "blo 2f \n\t"
+ "and r4, r3, #0x0000f8 \n\t"
+ "and r5, r3, #0x00fc00 \n\t"
+ "and r6, r3, #0xf80000 \n\t"
+ "pld [r1, #32] \n\t"
+ "lsl r3, r4, #8 \n\t"
+ "orr r3, r3, r5, lsr #5 \n\t"
+ "orr r3, r3, r6, lsr #19 \n\t"
+ "subs %[count], %[count], #1 \n\t"
+ "strh r3, [%[dst]], #2 \n\t"
+ "bne 1b \n\t"
+ "b 4f \n\t"
+ "2: \n\t"
+ "lsrs r7, r3, #24 \n\t"
+ "beq 3f \n\t"
+ "ldrh r4, [%[dst]] \n\t"
+ "rsb r7, r7, #255 \n\t"
+ "and r6, r4, #0x001f \n\t"
+ "ubfx r5, r4, #5, #6 \n\t"
+ "pld [r0, #16] \n\t"
+ "lsr r4, r4, #11 \n\t"
+ "smulbb r6, r6, r7 \n\t"
+ "smulbb r5, r5, r7 \n\t"
+ "smulbb r4, r4, r7 \n\t"
+ "ubfx r7, r3, #16, #8 \n\t"
+ "ubfx ip, r3, #8, #8 \n\t"
+ "and r3, r3, #0xff \n\t"
+ "add r6, r6, #16 \n\t"
+ "add r5, r5, #32 \n\t"
+ "add r4, r4, #16 \n\t"
+ "add r6, r6, r6, lsr #5 \n\t"
+ "add r5, r5, r5, lsr #6 \n\t"
+ "add r4, r4, r4, lsr #5 \n\t"
+ "add r6, r7, r6, lsr #5 \n\t"
+ "add r5, ip, r5, lsr #6 \n\t"
+ "add r4, r3, r4, lsr #5 \n\t"
+ "lsr r6, r6, #3 \n\t"
+ "and r5, r5, #0xfc \n\t"
+ "and r4, r4, #0xf8 \n\t"
+ "orr r6, r6, r5, lsl #3 \n\t"
+ "orr r4, r6, r4, lsl #8 \n\t"
+ "strh r4, [%[dst]], #2 \n\t"
+ "pld [r1, #32] \n\t"
+ "subs %[count], %[count], #1 \n\t"
+ "bne 1b \n\t"
+ "b 4f \n\t"
+ "3: \n\t"
+ "subs %[count], %[count], #1 \n\t"
+ "add %[dst], %[dst], #2 \n\t"
+ "bne 1b \n\t"
+ "4: \n\t"
+ : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count)
+ :
+ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "ip"
+ );
+}
+
static void S32A_Opaque_BlitRow32_arm(SkPMColor* SK_RESTRICT dst,
const SkPMColor* SK_RESTRICT src,
int count, U8CPU alpha) {
@@ -256,10 +323,13 @@ void S32A_Blend_BlitRow32_arm(SkPMColor* SK_RESTRICT dst,
#if USE_ARM_CODE
const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm[] = {
// no dither
- NULL, // S32_D565_Opaque
- NULL, // S32_D565_Blend
- NULL, // S32A_D565_Opaque
- NULL, // S32A_D565_Blend
+ // NOTE: For the functions below, we don't have a special version
+ // that assumes that each source pixel is opaque. But our S32A is
+ // still faster than the default, so use it.
+ S32A_D565_Opaque, // S32_D565_Opaque
+ NULL, // S32_D565_Blend
+ S32A_D565_Opaque, // S32A_D565_Opaque
+ NULL, // S32A_D565_Blend
// dither
NULL, // S32_D565_Opaque_Dither