diff options
author | 2012-08-21 15:16:56 +0000 | |
---|---|---|
committer | 2012-08-21 15:16:56 +0000 | |
commit | db39996d4639adb9c6027b1dd0349b1981f90353 (patch) | |
tree | 4f39f35a3f0af3e2a90611d7ede447a7db33b355 /src/opts/SkBlitRow_opts_arm.cpp | |
parent | a9590fc00c4aa2fd34329e552e6396c457702a76 (diff) |
Upstream ARM 565 optimizations from AOSP.
Review URL: https://codereview.appspot.com/6461101
git-svn-id: http://skia.googlecode.com/svn/trunk@5211 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src/opts/SkBlitRow_opts_arm.cpp')
-rw-r--r-- | src/opts/SkBlitRow_opts_arm.cpp | 78 |
1 files changed, 74 insertions, 4 deletions
diff --git a/src/opts/SkBlitRow_opts_arm.cpp b/src/opts/SkBlitRow_opts_arm.cpp index 50e165397a..99cd53eb87 100644 --- a/src/opts/SkBlitRow_opts_arm.cpp +++ b/src/opts/SkBlitRow_opts_arm.cpp @@ -17,6 +17,73 @@ #include "SkCachePreload_arm.h" #if USE_ARM_CODE + +static void S32A_D565_Opaque(uint16_t* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, int count, + U8CPU alpha, int /*x*/, int /*y*/) { + SkASSERT(255 == alpha); + + asm volatile ( + "1: \n\t" + "ldr r3, [%[src]], #4 \n\t" + "cmp r3, #0xff000000 \n\t" + "blo 2f \n\t" + "and r4, r3, #0x0000f8 \n\t" + "and r5, r3, #0x00fc00 \n\t" + "and r6, r3, #0xf80000 \n\t" + "pld [r1, #32] \n\t" + "lsl r3, r4, #8 \n\t" + "orr r3, r3, r5, lsr #5 \n\t" + "orr r3, r3, r6, lsr #19 \n\t" + "subs %[count], %[count], #1 \n\t" + "strh r3, [%[dst]], #2 \n\t" + "bne 1b \n\t" + "b 4f \n\t" + "2: \n\t" + "lsrs r7, r3, #24 \n\t" + "beq 3f \n\t" + "ldrh r4, [%[dst]] \n\t" + "rsb r7, r7, #255 \n\t" + "and r6, r4, #0x001f \n\t" + "ubfx r5, r4, #5, #6 \n\t" + "pld [r0, #16] \n\t" + "lsr r4, r4, #11 \n\t" + "smulbb r6, r6, r7 \n\t" + "smulbb r5, r5, r7 \n\t" + "smulbb r4, r4, r7 \n\t" + "ubfx r7, r3, #16, #8 \n\t" + "ubfx ip, r3, #8, #8 \n\t" + "and r3, r3, #0xff \n\t" + "add r6, r6, #16 \n\t" + "add r5, r5, #32 \n\t" + "add r4, r4, #16 \n\t" + "add r6, r6, r6, lsr #5 \n\t" + "add r5, r5, r5, lsr #6 \n\t" + "add r4, r4, r4, lsr #5 \n\t" + "add r6, r7, r6, lsr #5 \n\t" + "add r5, ip, r5, lsr #6 \n\t" + "add r4, r3, r4, lsr #5 \n\t" + "lsr r6, r6, #3 \n\t" + "and r5, r5, #0xfc \n\t" + "and r4, r4, #0xf8 \n\t" + "orr r6, r6, r5, lsl #3 \n\t" + "orr r4, r6, r4, lsl #8 \n\t" + "strh r4, [%[dst]], #2 \n\t" + "pld [r1, #32] \n\t" + "subs %[count], %[count], #1 \n\t" + "bne 1b \n\t" + "b 4f \n\t" + "3: \n\t" + "subs %[count], %[count], #1 \n\t" + "add %[dst], %[dst], #2 \n\t" + "bne 1b \n\t" + "4: \n\t" + : [dst] "+r" (dst), [src] "+r" (src), [count] "+r" (count) + : + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "ip" + ); +} + static void S32A_Opaque_BlitRow32_arm(SkPMColor* SK_RESTRICT dst, const SkPMColor* SK_RESTRICT src, int count, U8CPU alpha) { @@ -256,10 +323,13 @@ void S32A_Blend_BlitRow32_arm(SkPMColor* SK_RESTRICT dst, #if USE_ARM_CODE const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm[] = { // no dither - NULL, // S32_D565_Opaque - NULL, // S32_D565_Blend - NULL, // S32A_D565_Opaque - NULL, // S32A_D565_Blend + // NOTE: For the functions below, we don't have a special version + // that assumes that each source pixel is opaque. But our S32A is + // still faster than the default, so use it. + S32A_D565_Opaque, // S32_D565_Opaque + NULL, // S32_D565_Blend + S32A_D565_Opaque, // S32A_D565_Opaque + NULL, // S32A_D565_Blend // dither NULL, // S32_D565_Opaque_Dither |