aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-12-06 11:32:27 +0000
committerGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-12-06 11:32:27 +0000
commitd611864e679a58865b111e74fe7ac919cba42163 (patch)
treee4fc8d0a73242c1be46e6fa9472eebd0ac1f4275
parent726b97a3c497cdec2fb9f7fd01da4109e2e5ac46 (diff)
ARM Skia NEON patches - 32 - Xfermode: 1-pixel NEON modeprocs
In some cases, it's easy to provide a NEON version of the 1-pixel modeprocs. Combined with https://codereview.chromium.org/23724013/ (merged) it allows up to 35% speed improvement on Xfermodes when aa is non-NULL. Signed-off-by: Kévin PETIT <kevin.petit@arm.com> BUG= R=djsollen@google.com, reed@google.com, mtklein@google.com, luisjoseromeroesclusa@hotmail.com Author: kevin.petit.arm@gmail.com Review URL: https://codereview.chromium.org/104883004 git-svn-id: http://skia.googlecode.com/svn/trunk@12525 2bbb7eff-a529-9590-31e7-b0007b416f81
-rw-r--r--src/core/SkXfermode.cpp9
-rw-r--r--src/opts/SkXfermode_opts_arm.cpp10
-rw-r--r--src/opts/SkXfermode_opts_arm_neon.cpp170
-rw-r--r--src/opts/SkXfermode_opts_arm_neon.h6
-rw-r--r--src/opts/SkXfermode_opts_none.cpp8
5 files changed, 201 insertions, 2 deletions
diff --git a/src/core/SkXfermode.cpp b/src/core/SkXfermode.cpp
index 313e2aef8c..8cb79c2dbd 100644
--- a/src/core/SkXfermode.cpp
+++ b/src/core/SkXfermode.cpp
@@ -1669,6 +1669,7 @@ void SkXfermode::Term() {
extern SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
SkXfermode::Mode mode);
+extern SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode);
SkXfermode* SkXfermode::Create(Mode mode) {
SkASSERT(SK_ARRAY_COUNT(gProcCoeffs) == kModeCount);
@@ -1690,7 +1691,13 @@ SkXfermode* SkXfermode::Create(Mode mode) {
SkXfermode* xfer = gCachedXfermodes[mode];
if (NULL == xfer) {
- const ProcCoeff& rec = gProcCoeffs[mode];
+ ProcCoeff rec = gProcCoeffs[mode];
+
+ SkXfermodeProc pp = SkPlatformXfermodeProcFactory(mode);
+
+ if (pp != NULL) {
+ rec.fProc = pp;
+ }
// check if we have a platform optim for that
SkProcCoeffXfermode* xfm = SkPlatformXfermodeFactory(rec, mode);
diff --git a/src/opts/SkXfermode_opts_arm.cpp b/src/opts/SkXfermode_opts_arm.cpp
index eb3b3016e3..2a796d6f6a 100644
--- a/src/opts/SkXfermode_opts_arm.cpp
+++ b/src/opts/SkXfermode_opts_arm.cpp
@@ -5,12 +5,22 @@
extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec,
SkXfermode::Mode mode);
+extern SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode);
+
SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
SkXfermode::Mode mode) {
return NULL;
}
+SkXfermodeProc SkPlatformXfermodeProcFactory_impl(SkXfermode::Mode mode) {
+ return NULL;
+}
+
SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
SkXfermode::Mode mode) {
return SK_ARM_NEON_WRAP(SkPlatformXfermodeFactory_impl)(rec, mode);
}
+
+SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) {
+ return SK_ARM_NEON_WRAP(SkPlatformXfermodeProcFactory_impl)(mode);
+}
diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp
index 7435dd44de..6a79b73726 100644
--- a/src/opts/SkXfermode_opts_arm_neon.cpp
+++ b/src/opts/SkXfermode_opts_arm_neon.cpp
@@ -93,6 +93,133 @@ static inline uint8x8_t clamp_div255round_simd8_32(int32x4_t val1, int32x4_t val
}
////////////////////////////////////////////////////////////////////////////////
+// 1 pixel modeprocs
+////////////////////////////////////////////////////////////////////////////////
+
+// kSrcATop_Mode, //!< [Da, Sc * Da + (1 - Sa) * Dc]
+SkPMColor srcatop_modeproc_neon(SkPMColor src, SkPMColor dst) {
+ unsigned sa = SkGetPackedA32(src);
+ unsigned da = SkGetPackedA32(dst);
+ unsigned isa = 255 - sa;
+
+ uint8x8_t vda, visa, vsrc, vdst;
+
+ vda = vdup_n_u8(da);
+ visa = vdup_n_u8(isa);
+
+ uint16x8_t vsrc_wide, vdst_wide;
+ vsrc_wide = vmull_u8(vda, vreinterpret_u8_u32(vdup_n_u32(src)));
+ vdst_wide = vmull_u8(visa, vreinterpret_u8_u32(vdup_n_u32(dst)));
+
+ vsrc_wide += vdupq_n_u16(128);
+ vsrc_wide += vshrq_n_u16(vsrc_wide, 8);
+
+ vdst_wide += vdupq_n_u16(128);
+ vdst_wide += vshrq_n_u16(vdst_wide, 8);
+
+ vsrc = vshrn_n_u16(vsrc_wide, 8);
+ vdst = vshrn_n_u16(vdst_wide, 8);
+
+ vsrc += vdst;
+ vsrc = vset_lane_u8(da, vsrc, 3);
+
+ return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
+}
+
+// kDstATop_Mode, //!< [Sa, Sa * Dc + Sc * (1 - Da)]
+SkPMColor dstatop_modeproc_neon(SkPMColor src, SkPMColor dst) {
+ unsigned sa = SkGetPackedA32(src);
+ unsigned da = SkGetPackedA32(dst);
+ unsigned ida = 255 - da;
+
+ uint8x8_t vsa, vida, vsrc, vdst;
+
+ vsa = vdup_n_u8(sa);
+ vida = vdup_n_u8(ida);
+
+ uint16x8_t vsrc_wide, vdst_wide;
+ vsrc_wide = vmull_u8(vida, vreinterpret_u8_u32(vdup_n_u32(src)));
+ vdst_wide = vmull_u8(vsa, vreinterpret_u8_u32(vdup_n_u32(dst)));
+
+ vsrc_wide += vdupq_n_u16(128);
+ vsrc_wide += vshrq_n_u16(vsrc_wide, 8);
+
+ vdst_wide += vdupq_n_u16(128);
+ vdst_wide += vshrq_n_u16(vdst_wide, 8);
+
+ vsrc = vshrn_n_u16(vsrc_wide, 8);
+ vdst = vshrn_n_u16(vdst_wide, 8);
+
+ vsrc += vdst;
+ vsrc = vset_lane_u8(sa, vsrc, 3);
+
+ return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
+}
+
+// kXor_Mode [Sa + Da - 2 * Sa * Da, Sc * (1 - Da) + (1 - Sa) * Dc]
+SkPMColor xor_modeproc_neon(SkPMColor src, SkPMColor dst) {
+ unsigned sa = SkGetPackedA32(src);
+ unsigned da = SkGetPackedA32(dst);
+ unsigned ret_alpha = sa + da - (SkAlphaMulAlpha(sa, da) << 1);
+ unsigned isa = 255 - sa;
+ unsigned ida = 255 - da;
+
+ uint8x8_t vsrc, vdst, visa, vida;
+ uint16x8_t vsrc_wide, vdst_wide;
+
+ visa = vdup_n_u8(isa);
+ vida = vdup_n_u8(ida);
+ vsrc = vreinterpret_u8_u32(vdup_n_u32(src));
+ vdst = vreinterpret_u8_u32(vdup_n_u32(dst));
+
+ vsrc_wide = vmull_u8(vsrc, vida);
+ vdst_wide = vmull_u8(vdst, visa);
+
+ vsrc_wide += vdupq_n_u16(128);
+ vsrc_wide += vshrq_n_u16(vsrc_wide, 8);
+
+ vdst_wide += vdupq_n_u16(128);
+ vdst_wide += vshrq_n_u16(vdst_wide, 8);
+
+ vsrc = vshrn_n_u16(vsrc_wide, 8);
+ vdst = vshrn_n_u16(vdst_wide, 8);
+
+ vsrc += vdst;
+
+ vsrc = vset_lane_u8(ret_alpha, vsrc, 3);
+
+ return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
+}
+
+// kPlus_Mode
+SkPMColor plus_modeproc_neon(SkPMColor src, SkPMColor dst) {
+ uint8x8_t vsrc, vdst;
+ vsrc = vreinterpret_u8_u32(vdup_n_u32(src));
+ vdst = vreinterpret_u8_u32(vdup_n_u32(dst));
+ vsrc = vqadd_u8(vsrc, vdst);
+
+ return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
+}
+
+// kModulate_Mode
+SkPMColor modulate_modeproc_neon(SkPMColor src, SkPMColor dst) {
+ uint8x8_t vsrc, vdst, vres;
+ uint16x8_t vres_wide;
+
+ vsrc = vreinterpret_u8_u32(vdup_n_u32(src));
+ vdst = vreinterpret_u8_u32(vdup_n_u32(dst));
+
+ vres_wide = vmull_u8(vsrc, vdst);
+
+ vres_wide += vdupq_n_u16(128);
+ vres_wide += vshrq_n_u16(vres_wide, 8);
+
+ vres = vshrn_n_u16(vres_wide, 8);
+
+ return vget_lane_u32(vreinterpret_u32_u8(vres), 0);
+}
+
+////////////////////////////////////////////////////////////////////////////////
// 8 pixels modeprocs
////////////////////////////////////////////////////////////////////////////////
@@ -755,6 +882,45 @@ SK_COMPILE_ASSERT(
mode_count_arm
);
+SkXfermodeProc gNEONXfermodeProcs1[] = {
+ NULL, // kClear_Mode
+ NULL, // kSrc_Mode
+ NULL, // kDst_Mode
+ NULL, // kSrcOver_Mode
+ NULL, // kDstOver_Mode
+ NULL, // kSrcIn_Mode
+ NULL, // kDstIn_Mode
+ NULL, // kSrcOut_Mode
+ NULL, // kDstOut_Mode
+ srcatop_modeproc_neon,
+ dstatop_modeproc_neon,
+ xor_modeproc_neon,
+ plus_modeproc_neon,
+ modulate_modeproc_neon,
+ NULL, // kScreen_Mode
+
+ NULL, // kOverlay_Mode
+ NULL, // kDarken_Mode
+ NULL, // kLighten_Mode
+ NULL, // kColorDodge_Mode
+ NULL, // kColorBurn_Mode
+ NULL, // kHardLight_Mode
+ NULL, // kSoftLight_Mode
+ NULL, // kDifference_Mode
+ NULL, // kExclusion_Mode
+ NULL, // kMultiply_Mode
+
+ NULL, // kHue_Mode
+ NULL, // kSaturation_Mode
+ NULL, // kColor_Mode
+ NULL, // kLuminosity_Mode
+};
+
+SK_COMPILE_ASSERT(
+ SK_ARRAY_COUNT(gNEONXfermodeProcs1) == SkXfermode::kLastMode + 1,
+ mode1_count_arm
+);
+
SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec,
SkXfermode::Mode mode) {
@@ -765,3 +931,7 @@ SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec,
}
return NULL;
}
+
+SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) {
+ return gNEONXfermodeProcs1[mode];
+}
diff --git a/src/opts/SkXfermode_opts_arm_neon.h b/src/opts/SkXfermode_opts_arm_neon.h
index 4c88fc7a63..a8d438195e 100644
--- a/src/opts/SkXfermode_opts_arm_neon.h
+++ b/src/opts/SkXfermode_opts_arm_neon.h
@@ -26,4 +26,10 @@ private:
typedef SkProcCoeffXfermode INHERITED;
};
+extern SkPMColor srcatop_modeproc_neon(SkPMColor src, SkPMColor dst);
+extern SkPMColor dstatop_modeproc_neon(SkPMColor src, SkPMColor dst);
+extern SkPMColor xor_modeproc_neon(SkPMColor src, SkPMColor dst);
+extern SkPMColor plus_modeproc_neon(SkPMColor src, SkPMColor dst);
+extern SkPMColor modulate_modeproc_neon(SkPMColor src, SkPMColor dst);
+
#endif //#ifdef SkXfermode_opts_arm_neon_DEFINED
diff --git a/src/opts/SkXfermode_opts_none.cpp b/src/opts/SkXfermode_opts_none.cpp
index ca53fa0dd0..7c46fdd93c 100644
--- a/src/opts/SkXfermode_opts_none.cpp
+++ b/src/opts/SkXfermode_opts_none.cpp
@@ -1,11 +1,17 @@
#include "SkXfermode.h"
#include "SkXfermode_proccoeff.h"
-// The prototype below is for Clang
+// The prototypes below are for Clang
extern SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
SkXfermode::Mode mode);
+extern SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode);
+
SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
SkXfermode::Mode mode) {
return NULL;
}
+
+SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) {
+ return NULL;
+}