diff options
author | kevin.petit <kevin.petit@arm.com> | 2014-06-03 10:08:07 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2014-06-03 10:08:07 -0700 |
commit | 866b95d65dfc01af372bbed206ec067e04c1f533 (patch) | |
tree | 14883f79262b270b84a8be086a1115231543f98d /src/opts | |
parent | dcf9ab14a194be0cbea896e1dc44a2a04106a38b (diff) |
ARM Skia NEON patches - 38 - arm64 8888 blitters
Enable NEON on arm64 for most 8888 blitters
This patch enables NEON optimisation for the Color32, S32_Blend,
S32A_Opaque blitters on arm64.
Here are the perf improvements vs the existing code:
Color32:
========
+-------+------------+------------+
| count | Cortex-A53 | Cortex-A57 |
+-------+------------+------------+
| 1 | -2.39% | 23.78% |
+-------+------------+------------+
| 2 | -5.46% | 8.88% |
+-------+------------+------------+
| 4 | -4.74% | 4.89% |
+-------+------------+------------+
| 8 | 67.74% | 107.12% |
+-------+------------+------------+
| 16 | 40.03% | 101.20% |
+-------+------------+------------+
| 64 | 11.09% | 98.40% |
+-------+------------+------------+
| 256 | -2.20% | 74.81% |
+-------+------------+------------+
| 1024 | -4.28% | 78.90% |
+-------+------------+------------+
S32_Blend:
==========
+-------+------------+------------+
| count | Cortex-A53 | Cortex-A57 |
+-------+------------+------------+
| 1 | 7.84% | -6.75% |
+-------+------------+------------+
| 2 | 28.95% | 39.77% |
+-------+------------+------------+
| 4 | 5.80% | 8.26% |
+-------+------------+------------+
| 8 | 1.35% | 33.80% |
+-------+------------+------------+
| 16 | -2.13% | 41.13% |
+-------+------------+------------+
| 64 | -4.91% | 42.84% |
+-------+------------+------------+
| 256 | -6.53% | 48.72% |
+-------+------------+------------+
| 1024 | -6.65% | 46.66% |
+-------+------------+------------+
S32A_Opaque:
============
+-------+------------+------------+
| count | Cortex-A53 | Cortex-A57 |
+-------+------------+------------+
| 1 | -7.51% | -19.06% |
+-------+------------+------------+
| 2 | -5.02% | -27.70% |
+-------+------------+------------+
| 4 | 15.38% | -21.66% |
+-------+------------+------------+
| 8 | -0.98% | 1.05% |
+-------+------------+------------+
| 16 | -7.35% | 3.34% |
+-------+------------+------------+
| 64 | 50.53% | 94.63% |
+-------+------------+------------+
| 256 | 71.17% | 164.10% |
+-------+------------+------------+
| 1024 | 79.58% | 197.60% |
+-------+------------+------------+
Signed-off-by: Kevin PETIT <kevin.petit@arm.com>
BUG=skia:
R=djsollen@google.com, mtklein@google.com
Author: kevin.petit@arm.com
Review URL: https://codereview.chromium.org/302283003
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkBlitRow_opts_arm_neon.cpp | 21 |
1 files changed, 17 insertions, 4 deletions
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp index f6fd0638b3..1410649431 100644 --- a/src/opts/SkBlitRow_opts_arm_neon.cpp +++ b/src/opts/SkBlitRow_opts_arm_neon.cpp @@ -17,6 +17,7 @@ #include "SkColor_opts_neon.h" #include <arm_neon.h> +#ifdef SK_CPU_ARM void S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, const SkPMColor* SK_RESTRICT src, int count, U8CPU alpha, int /*x*/, int /*y*/) { @@ -575,6 +576,7 @@ void S32_D565_Blend_Dither_neon(uint16_t *dst, const SkPMColor *src, } while (--count != 0); } } +#endif void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, const SkPMColor* SK_RESTRICT src, @@ -919,6 +921,7 @@ void S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, } } +#ifdef SK_CPU_ARM void S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, const SkPMColor* SK_RESTRICT src, int count, U8CPU alpha) { @@ -1366,6 +1369,7 @@ void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst, } while (--count != 0); } } +#endif void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count, SkPMColor color) { @@ -1401,13 +1405,13 @@ void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count, // load src color, 8 pixels, 4 64 bit registers // (and increment src). uint32x2x4_t vsrc; -#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) +#if defined(SK_CPU_ARM) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) asm ( "vld1.32 %h[vsrc], [%[src]]!" : [vsrc] "=w" (vsrc), [src] "+r" (src) : : ); -#else // (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) +#else // 64bit targets and Clang vsrc.val[0] = vld1_u32(src); vsrc.val[1] = vld1_u32(src+2); vsrc.val[2] = vld1_u32(src+4); @@ -1443,14 +1447,14 @@ void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count, // store back the 8 calculated pixels (2 128 bit // registers), and increment dst. -#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) +#if defined(SK_CPU_ARM) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) asm ( "vst1.32 %h[vdst], [%[dst]]!" : [dst] "+r" (dst) : [vdst] "w" (vdst) : "memory" ); -#else // (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) +#else // 64bit targets and Clang vst1q_u32(dst, vdst.val[0]); vst1q_u32(dst+4, vdst.val[1]); dst += 8; @@ -1471,6 +1475,7 @@ void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count, /////////////////////////////////////////////////////////////////////////////// const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { +#ifdef SK_CPU_ARM // no dither S32_D565_Opaque_neon, S32_D565_Blend_neon, @@ -1482,6 +1487,10 @@ const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { S32_D565_Blend_Dither_neon, S32A_D565_Opaque_Dither_neon, NULL, // S32A_D565_Blend_Dither +#else + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL +#endif }; const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { @@ -1502,5 +1511,9 @@ const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { #else S32A_Opaque_BlitRow32_neon, // S32A_Opaque, #endif +#ifdef SK_CPU_ARM S32A_Blend_BlitRow32_neon // S32A_Blend +#else + NULL +#endif }; |