aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
authorGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-12-02 22:40:56 +0000
committerGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-12-02 22:40:56 +0000
commit46e266cdbe6da0a4553ec0004bcc7f9524829c9e (patch)
tree4a62b48fbde5093794247757cafab23e83faa559 /src/opts
parent36026de64400cbe91552c549cf9a906a0926fef3 (diff)
ARM Skia NEON patches - 29 - Xfermode: SkFourByteInterp
Xfermode: add a NEON version of SkFourByteInterp Brings a modest performance improvement on its own in ProcXfermodes when aa is neither zero nor FF. Combined with 1-pixel NEON modeprocs, it brings up to 35% speed improvement on the aa case. Signed-off-by: Kévin PETIT <kevin.petit@arm.com> BUG= R=djsollen@google.com, mtklein@google.com, reed@google.com Author: kevin.petit.arm@gmail.com Review URL: https://codereview.chromium.org/23724013 git-svn-id: http://skia.googlecode.com/svn/trunk@12448 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkColor_opts_neon.h29
-rw-r--r--src/opts/SkXfermode_opts_arm_neon.cpp4
2 files changed, 31 insertions, 2 deletions
diff --git a/src/opts/SkColor_opts_neon.h b/src/opts/SkColor_opts_neon.h
index 85752f5558..a3430b5c51 100644
--- a/src/opts/SkColor_opts_neon.h
+++ b/src/opts/SkColor_opts_neon.h
@@ -82,4 +82,33 @@ static inline uint8x8_t SkBlend32_neon8(uint8x8_t src, uint8x8_t dst, uint16x8_t
return vmovn_u16(vreinterpretq_u16_s16(dst_wide));
}
+static inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst,
+ unsigned srcScale) {
+ SkASSERT(srcScale <= 256);
+ int16x8_t vscale = vdupq_n_s16(srcScale);
+ int16x8_t vsrc_wide, vdst_wide, vdiff;
+ uint8x8_t res;
+
+ vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src))));
+ vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst))));
+
+ vdiff = vsrc_wide - vdst_wide;
+ vdiff *= vscale;
+
+ vdiff = vshrq_n_s16(vdiff, 8);
+
+ vdst_wide += vdiff;
+
+ res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide));
+
+ return vget_lane_u32(vreinterpret_u32_u8(res), 0);
+}
+
+static inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst,
+ U8CPU srcWeight) {
+ SkASSERT(srcWeight <= 255);
+ unsigned scale = SkAlpha255To256(srcWeight);
+ return SkFourByteInterp256_neon(src, dst, scale);
+}
+
#endif /* #ifndef SkColor_opts_neon_DEFINED */
diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp
index b8d8ef521b..7435dd44de 100644
--- a/src/opts/SkXfermode_opts_arm_neon.cpp
+++ b/src/opts/SkXfermode_opts_arm_neon.cpp
@@ -632,7 +632,7 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
SkPMColor dstC = dst[i];
SkPMColor C = proc(src[i], dstC);
if (a != 0xFF) {
- C = SkFourByteInterp(C, dstC, a);
+ C = SkFourByteInterp_neon(C, dstC, a);
}
dst[i] = C;
}
@@ -700,7 +700,7 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
SkPMColor C = proc(src[i], dstC);
if (0xFF != a) {
- C = SkFourByteInterp(C, dstC, a);
+ C = SkFourByteInterp_neon(C, dstC, a);
}
dst[i] = SkPixel32ToPixel16_ToU16(C);
}