aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
authorGravatar kevin.petit <kevin.petit@arm.com>2014-06-03 10:08:07 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2014-06-03 10:08:07 -0700
commit866b95d65dfc01af372bbed206ec067e04c1f533 (patch)
tree14883f79262b270b84a8be086a1115231543f98d /src/opts
parentdcf9ab14a194be0cbea896e1dc44a2a04106a38b (diff)
ARM Skia NEON patches - 38 - arm64 8888 blitters
Enable NEON on arm64 for most 8888 blitters This patch enables NEON optimisation for the Color32, S32_Blend, S32A_Opaque blitters on arm64. Here are the perf improvements vs the existing code: Color32: ======== +-------+------------+------------+ | count | Cortex-A53 | Cortex-A57 | +-------+------------+------------+ | 1 | -2.39% | 23.78% | +-------+------------+------------+ | 2 | -5.46% | 8.88% | +-------+------------+------------+ | 4 | -4.74% | 4.89% | +-------+------------+------------+ | 8 | 67.74% | 107.12% | +-------+------------+------------+ | 16 | 40.03% | 101.20% | +-------+------------+------------+ | 64 | 11.09% | 98.40% | +-------+------------+------------+ | 256 | -2.20% | 74.81% | +-------+------------+------------+ | 1024 | -4.28% | 78.90% | +-------+------------+------------+ S32_Blend: ========== +-------+------------+------------+ | count | Cortex-A53 | Cortex-A57 | +-------+------------+------------+ | 1 | 7.84% | -6.75% | +-------+------------+------------+ | 2 | 28.95% | 39.77% | +-------+------------+------------+ | 4 | 5.80% | 8.26% | +-------+------------+------------+ | 8 | 1.35% | 33.80% | +-------+------------+------------+ | 16 | -2.13% | 41.13% | +-------+------------+------------+ | 64 | -4.91% | 42.84% | +-------+------------+------------+ | 256 | -6.53% | 48.72% | +-------+------------+------------+ | 1024 | -6.65% | 46.66% | +-------+------------+------------+ S32A_Opaque: ============ +-------+------------+------------+ | count | Cortex-A53 | Cortex-A57 | +-------+------------+------------+ | 1 | -7.51% | -19.06% | +-------+------------+------------+ | 2 | -5.02% | -27.70% | +-------+------------+------------+ | 4 | 15.38% | -21.66% | +-------+------------+------------+ | 8 | -0.98% | 1.05% | +-------+------------+------------+ | 16 | -7.35% | 3.34% | +-------+------------+------------+ | 64 | 50.53% | 94.63% | +-------+------------+------------+ | 256 | 71.17% | 164.10% | +-------+------------+------------+ | 1024 | 79.58% | 197.60% | +-------+------------+------------+ Signed-off-by: Kevin PETIT <kevin.petit@arm.com> BUG=skia: R=djsollen@google.com, mtklein@google.com Author: kevin.petit@arm.com Review URL: https://codereview.chromium.org/302283003
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkBlitRow_opts_arm_neon.cpp21
1 files changed, 17 insertions, 4 deletions
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp
index f6fd0638b3..1410649431 100644
--- a/src/opts/SkBlitRow_opts_arm_neon.cpp
+++ b/src/opts/SkBlitRow_opts_arm_neon.cpp
@@ -17,6 +17,7 @@
#include "SkColor_opts_neon.h"
#include <arm_neon.h>
+#ifdef SK_CPU_ARM
void S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,
const SkPMColor* SK_RESTRICT src, int count,
U8CPU alpha, int /*x*/, int /*y*/) {
@@ -575,6 +576,7 @@ void S32_D565_Blend_Dither_neon(uint16_t *dst, const SkPMColor *src,
} while (--count != 0);
}
}
+#endif
void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
const SkPMColor* SK_RESTRICT src,
@@ -919,6 +921,7 @@ void S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
}
}
+#ifdef SK_CPU_ARM
void S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
const SkPMColor* SK_RESTRICT src,
int count, U8CPU alpha) {
@@ -1366,6 +1369,7 @@ void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
} while (--count != 0);
}
}
+#endif
void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count,
SkPMColor color) {
@@ -1401,13 +1405,13 @@ void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count,
// load src color, 8 pixels, 4 64 bit registers
// (and increment src).
uint32x2x4_t vsrc;
-#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
+#if defined(SK_CPU_ARM) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)))
asm (
"vld1.32 %h[vsrc], [%[src]]!"
: [vsrc] "=w" (vsrc), [src] "+r" (src)
: :
);
-#else // (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
+#else // 64bit targets and Clang
vsrc.val[0] = vld1_u32(src);
vsrc.val[1] = vld1_u32(src+2);
vsrc.val[2] = vld1_u32(src+4);
@@ -1443,14 +1447,14 @@ void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count,
// store back the 8 calculated pixels (2 128 bit
// registers), and increment dst.
-#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
+#if defined(SK_CPU_ARM) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)))
asm (
"vst1.32 %h[vdst], [%[dst]]!"
: [dst] "+r" (dst)
: [vdst] "w" (vdst)
: "memory"
);
-#else // (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
+#else // 64bit targets and Clang
vst1q_u32(dst, vdst.val[0]);
vst1q_u32(dst+4, vdst.val[1]);
dst += 8;
@@ -1471,6 +1475,7 @@ void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count,
///////////////////////////////////////////////////////////////////////////////
const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = {
+#ifdef SK_CPU_ARM
// no dither
S32_D565_Opaque_neon,
S32_D565_Blend_neon,
@@ -1482,6 +1487,10 @@ const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = {
S32_D565_Blend_Dither_neon,
S32A_D565_Opaque_Dither_neon,
NULL, // S32A_D565_Blend_Dither
+#else
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL
+#endif
};
const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {
@@ -1502,5 +1511,9 @@ const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {
#else
S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
#endif
+#ifdef SK_CPU_ARM
S32A_Blend_BlitRow32_neon // S32A_Blend
+#else
+ NULL
+#endif
};