From df187c7eb27d7616b75d91f3329deb97c4cd6de2 Mon Sep 17 00:00:00 2001 From: "commit-bot@chromium.org" Date: Wed, 9 Oct 2013 14:39:46 +0000 Subject: ARM Skia NEON patches - 28 - Xfermode: SIMD modeprocs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xfermode: allow for SIMD modeprocs This patch introduces the ability to have SIMD Xfermode modeprocs. In the NEON implementation, SIMD modeprocs will process 8 pixels at a time. Signed-off-by: Kévin PETIT BUG= Committed: http://code.google.com/p/skia/source/detail?r=11654 R=djsollen@google.com, mtklein@google.com, reed@google.com Author: kevin.petit.arm@gmail.com Review URL: https://codereview.chromium.org/23644006 git-svn-id: http://skia.googlecode.com/svn/trunk@11669 2bbb7eff-a529-9590-31e7-b0007b416f81 --- gyp/opts.gyp | 3 + include/core/SkXfermode.h | 4 + src/core/SkXfermode.cpp | 168 ++++++++++++++++---------------------- src/core/SkXfermode_proccoeff.h | 63 ++++++++++++++ src/opts/SkXfermode_opts_arm.cpp | 158 +++++++++++++++++++++++++++++++++++ src/opts/SkXfermode_opts_none.cpp | 11 +++ 6 files changed, 308 insertions(+), 99 deletions(-) create mode 100644 src/core/SkXfermode_proccoeff.h create mode 100644 src/opts/SkXfermode_opts_arm.cpp create mode 100644 src/opts/SkXfermode_opts_none.cpp diff --git a/gyp/opts.gyp b/gyp/opts.gyp index 4f5eda88f6..04966ba67b 100644 --- a/gyp/opts.gyp +++ b/gyp/opts.gyp @@ -52,6 +52,7 @@ '../src/opts/SkBlitRow_opts_SSE2.cpp', '../src/opts/SkBlitRect_opts_SSE2.cpp', '../src/opts/SkUtils_opts_SSE2.cpp', + '../src/opts/SkXfermode_opts_none.cpp', ], }], [ 'skia_arch_type == "arm" and arm_version >= 7', { @@ -76,6 +77,7 @@ '../src/opts/SkBlitMask_opts_arm.cpp', '../src/opts/SkBlitRow_opts_arm.cpp', '../src/opts/SkBlitRow_opts_arm.h', + '../src/opts/SkXfermode_opts_arm.cpp', ], 'conditions': [ [ 'arm_neon == 1 or arm_neon_optional == 1', { @@ -99,6 +101,7 @@ '../src/opts/SkBlitMask_opts_none.cpp', '../src/opts/SkBlitRow_opts_none.cpp', '../src/opts/SkUtils_opts_none.cpp', + '../src/opts/SkXfermode_opts_none.cpp', ], }], ], diff --git a/include/core/SkXfermode.h b/include/core/SkXfermode.h index 04f3bfe5b6..ed07bd5821 100644 --- a/include/core/SkXfermode.h +++ b/include/core/SkXfermode.h @@ -275,6 +275,10 @@ protected: fProc = proc; } + SkXfermodeProc getProc() const { + return fProc; + } + private: SkXfermodeProc fProc; diff --git a/src/core/SkXfermode.cpp b/src/core/SkXfermode.cpp index 8c6eb2ca68..6f2fee6913 100644 --- a/src/core/SkXfermode.cpp +++ b/src/core/SkXfermode.cpp @@ -8,6 +8,7 @@ #include "SkXfermode.h" +#include "SkXfermode_proccoeff.h" #include "SkColorPriv.h" #include "SkFlattenableBuffers.h" #include "SkMathPriv.h" @@ -624,16 +625,7 @@ static SkPMColor luminosity_modeproc(SkPMColor src, SkPMColor dst) { return SkPackARGB32(a, r, g, b); } - -struct ProcCoeff { - SkXfermodeProc fProc; - SkXfermode::Coeff fSC; - SkXfermode::Coeff fDC; -}; - -#define CANNOT_USE_COEFF SkXfermode::Coeff(-1) - -static const ProcCoeff gProcCoeffs[] = { +const ProcCoeff gProcCoeffs[] = { { clear_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kZero_Coeff }, { src_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kZero_Coeff }, { dst_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kOne_Coeff }, @@ -1345,83 +1337,51 @@ GrEffectRef* XferEffect::TestCreate(SkRandom* rand, /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// -class SkProcCoeffXfermode : public SkProcXfermode { -public: - SkProcCoeffXfermode(const ProcCoeff& rec, Mode mode) - : INHERITED(rec.fProc) { - fMode = mode; - // these may be valid, or may be CANNOT_USE_COEFF - fSrcCoeff = rec.fSC; - fDstCoeff = rec.fDC; +bool SkProcCoeffXfermode::asMode(Mode* mode) const { + if (mode) { + *mode = fMode; } + return true; +} - virtual bool asMode(Mode* mode) const SK_OVERRIDE { - if (mode) { - *mode = fMode; - } - return true; +bool SkProcCoeffXfermode::asCoeff(Coeff* sc, Coeff* dc) const { + if (CANNOT_USE_COEFF == fSrcCoeff) { + return false; } - virtual bool asCoeff(Coeff* sc, Coeff* dc) const SK_OVERRIDE { - if (CANNOT_USE_COEFF == fSrcCoeff) { - return false; - } - - if (sc) { - *sc = fSrcCoeff; - } - if (dc) { - *dc = fDstCoeff; - } - return true; + if (sc) { + *sc = fSrcCoeff; + } + if (dc) { + *dc = fDstCoeff; } + return true; +} #if SK_SUPPORT_GPU - virtual bool asNewEffectOrCoeff(GrContext*, - GrEffectRef** effect, - Coeff* src, - Coeff* dst, - GrTexture* background) const SK_OVERRIDE { - if (this->asCoeff(src, dst)) { - return true; - } - if (XferEffect::IsSupportedMode(fMode)) { - if (NULL != effect) { - *effect = XferEffect::Create(fMode, background); - SkASSERT(NULL != *effect); - } - return true; +bool SkProcCoeffXfermode::asNewEffectOrCoeff(GrContext*, + GrEffectRef** effect, + Coeff* src, + Coeff* dst, + GrTexture* background) const { + if (this->asCoeff(src, dst)) { + return true; + } + if (XferEffect::IsSupportedMode(fMode)) { + if (NULL != effect) { + *effect = XferEffect::Create(fMode, background); + SkASSERT(NULL != *effect); } - return false; + return true; } + return false; +} #endif - SK_DEVELOPER_TO_STRING() - SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkProcCoeffXfermode) - -protected: - SkProcCoeffXfermode(SkFlattenableReadBuffer& buffer) : INHERITED(buffer) { - fMode = (SkXfermode::Mode)buffer.read32(); - - const ProcCoeff& rec = gProcCoeffs[fMode]; - // these may be valid, or may be CANNOT_USE_COEFF - fSrcCoeff = rec.fSC; - fDstCoeff = rec.fDC; - // now update our function-ptr in the super class - this->INHERITED::setProc(rec.fProc); - } - - virtual void flatten(SkFlattenableWriteBuffer& buffer) const SK_OVERRIDE { - this->INHERITED::flatten(buffer); - buffer.write32(fMode); - } - -private: - Mode fMode; - Coeff fSrcCoeff, fDstCoeff; - - typedef SkProcXfermode INHERITED; -}; +void SkProcCoeffXfermode::flatten(SkFlattenableWriteBuffer& buffer) const { + this->INHERITED::flatten(buffer); + buffer.write32(fMode); +} const char* SkXfermode::ModeName(Mode mode) { SkASSERT((unsigned) mode <= (unsigned)kLastMode); @@ -1693,6 +1653,9 @@ void SkXfermode::Term() { } } +extern SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, + SkXfermode::Mode mode); + SkXfermode* SkXfermode::Create(Mode mode) { SkASSERT(SK_ARRAY_COUNT(gProcCoeffs) == kModeCount); SkASSERT(SK_ARRAY_COUNT(gCachedXfermodes) == kModeCount); @@ -1714,29 +1677,36 @@ SkXfermode* SkXfermode::Create(Mode mode) { SkXfermode* xfer = gCachedXfermodes[mode]; if (NULL == xfer) { const ProcCoeff& rec = gProcCoeffs[mode]; - // All modes can in theory be represented by the ProcCoeff rec, since - // it contains function ptrs. However, a few modes are both simple and - // commonly used, so we call those out for their own subclasses here. - switch (mode) { - case kClear_Mode: - xfer = SkNEW_ARGS(SkClearXfermode, (rec)); - break; - case kSrc_Mode: - xfer = SkNEW_ARGS(SkSrcXfermode, (rec)); - break; - case kSrcOver_Mode: - SkASSERT(false); // should not land here - break; - case kDstIn_Mode: - xfer = SkNEW_ARGS(SkDstInXfermode, (rec)); - break; - case kDstOut_Mode: - xfer = SkNEW_ARGS(SkDstOutXfermode, (rec)); - break; - default: - // no special-case, just rely in the rec and its function-ptrs - xfer = SkNEW_ARGS(SkProcCoeffXfermode, (rec, mode)); - break; + + // check if we have a platform optim for that + SkProcCoeffXfermode* xfm = SkPlatformXfermodeFactory(rec, mode); + if (xfm != NULL) { + xfer = xfm; + } else { + // All modes can in theory be represented by the ProcCoeff rec, since + // it contains function ptrs. However, a few modes are both simple and + // commonly used, so we call those out for their own subclasses here. + switch (mode) { + case kClear_Mode: + xfer = SkNEW_ARGS(SkClearXfermode, (rec)); + break; + case kSrc_Mode: + xfer = SkNEW_ARGS(SkSrcXfermode, (rec)); + break; + case kSrcOver_Mode: + SkASSERT(false); // should not land here + break; + case kDstIn_Mode: + xfer = SkNEW_ARGS(SkDstInXfermode, (rec)); + break; + case kDstOut_Mode: + xfer = SkNEW_ARGS(SkDstOutXfermode, (rec)); + break; + default: + // no special-case, just rely in the rec and its function-ptrs + xfer = SkNEW_ARGS(SkProcCoeffXfermode, (rec, mode)); + break; + } } gCachedXfermodes[mode] = xfer; } diff --git a/src/core/SkXfermode_proccoeff.h b/src/core/SkXfermode_proccoeff.h new file mode 100644 index 0000000000..60ebe3ff4c --- /dev/null +++ b/src/core/SkXfermode_proccoeff.h @@ -0,0 +1,63 @@ +#ifndef SkXfermode_proccoeff_DEFINED +#define SkXfermode_proccoeff_DEFINED + +#include "SkXfermode.h" +#include "SkFlattenableBuffers.h" + +struct ProcCoeff { + SkXfermodeProc fProc; + SkXfermode::Coeff fSC; + SkXfermode::Coeff fDC; +}; + +#define CANNOT_USE_COEFF SkXfermode::Coeff(-1) + +extern const ProcCoeff gProcCoeffs[]; + +class SkProcCoeffXfermode : public SkProcXfermode { +public: + SkProcCoeffXfermode(const ProcCoeff& rec, Mode mode) + : INHERITED(rec.fProc) { + fMode = mode; + // these may be valid, or may be CANNOT_USE_COEFF + fSrcCoeff = rec.fSC; + fDstCoeff = rec.fDC; + } + + virtual bool asMode(Mode* mode) const SK_OVERRIDE; + + virtual bool asCoeff(Coeff* sc, Coeff* dc) const SK_OVERRIDE; + +#if SK_SUPPORT_GPU + virtual bool asNewEffectOrCoeff(GrContext*, + GrEffectRef** effect, + Coeff* src, + Coeff* dst, + GrTexture* background) const SK_OVERRIDE; +#endif + + SK_DEVELOPER_TO_STRING() + SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkProcCoeffXfermode) + +protected: + SkProcCoeffXfermode(SkFlattenableReadBuffer& buffer) : INHERITED(buffer) { + fMode = (SkXfermode::Mode)buffer.read32(); + + const ProcCoeff& rec = gProcCoeffs[fMode]; + // these may be valid, or may be CANNOT_USE_COEFF + fSrcCoeff = rec.fSC; + fDstCoeff = rec.fDC; + // now update our function-ptr in the super class + this->INHERITED::setProc(rec.fProc); + } + + virtual void flatten(SkFlattenableWriteBuffer& buffer) const SK_OVERRIDE; + +private: + Mode fMode; + Coeff fSrcCoeff, fDstCoeff; + + typedef SkProcXfermode INHERITED; +}; + +#endif // #ifndef SkXfermode_proccoeff_DEFINED diff --git a/src/opts/SkXfermode_opts_arm.cpp b/src/opts/SkXfermode_opts_arm.cpp new file mode 100644 index 0000000000..db5d5317e3 --- /dev/null +++ b/src/opts/SkXfermode_opts_arm.cpp @@ -0,0 +1,158 @@ +#include "SkXfermode.h" +#include "SkXfermode_proccoeff.h" +#include "SkColorPriv.h" +#include "SkUtilsArm.h" + +#if !SK_ARM_NEON_IS_NONE + +#include + +//////////////////////////////////////////////////////////////////////////////// + +typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst); + +class SkNEONProcCoeffXfermode : public SkProcCoeffXfermode { +public: + SkNEONProcCoeffXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, + SkXfermodeProcSIMD procSIMD) + : INHERITED(rec, mode), fProcSIMD(procSIMD) {} + + virtual void xfer32(SkPMColor dst[], const SkPMColor src[], int count, + const SkAlpha aa[]) const SK_OVERRIDE; + + SK_DEVELOPER_TO_STRING() + SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkNEONProcCoeffXfermode) + +private: + SkNEONProcCoeffXfermode(SkFlattenableReadBuffer& buffer) + : INHERITED(buffer) { + + fProcSIMD = NULL; + if (!buffer.isCrossProcess()) { + fProcSIMD = (SkXfermodeProcSIMD)buffer.readFunctionPtr(); + } + } + + virtual void flatten(SkFlattenableWriteBuffer& buffer) const SK_OVERRIDE; + + SkXfermodeProcSIMD fProcSIMD; + typedef SkProcCoeffXfermode INHERITED; +}; + + +void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], + int count, const SkAlpha aa[]) const { + SkASSERT(dst && src && count >= 0); + + SkXfermodeProc proc = this->getProc(); + SkXfermodeProcSIMD procSIMD = fProcSIMD; + + if (NULL == aa) { + // Unrolled NEON code + while (count >= 8) { + uint8x8x4_t vsrc, vdst, vres; + + asm volatile ( + "vld4.u8 %h[vsrc], [%[src]]! \t\n" + "vld4.u8 %h[vdst], [%[dst]] \t\n" + : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst) + : [src] "r" (src), [dst] "r" (dst) + : + ); + + vres = procSIMD(vsrc, vdst); + + vst4_u8((uint8_t*)dst, vres); + + count -= 8; + dst += 8; + } + // Leftovers + for (int i = 0; i < count; i++) { + dst[i] = proc(src[i], dst[i]); + } + } else { + for (int i = count - 1; i >= 0; --i) { + unsigned a = aa[i]; + if (0 != a) { + SkPMColor dstC = dst[i]; + SkPMColor C = proc(src[i], dstC); + if (a != 0xFF) { + C = SkFourByteInterp(C, dstC, a); + } + dst[i] = C; + } + } + } +} + +#ifdef SK_DEVELOPER +void SkNEONProcCoeffXfermode::toString(SkString* str) const { + this->INHERITED::toString(str); +} +#endif + +void SkNEONProcCoeffXfermode::flatten(SkFlattenableWriteBuffer& buffer) const { + this->INHERITED::flatten(buffer); + if (!buffer.isCrossProcess()) { + buffer.writeFunctionPtr((void*)fProcSIMD); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +SkXfermodeProcSIMD gNEONXfermodeProcs[] = { + [SkXfermode::kClear_Mode] = NULL, + [SkXfermode::kSrc_Mode] = NULL, + [SkXfermode::kDst_Mode] = NULL, + [SkXfermode::kSrcOver_Mode] = NULL, + [SkXfermode::kDstOver_Mode] = NULL, + [SkXfermode::kSrcIn_Mode] = NULL, + [SkXfermode::kDstIn_Mode] = NULL, + [SkXfermode::kSrcOut_Mode] = NULL, + [SkXfermode::kDstOut_Mode] = NULL, + [SkXfermode::kSrcATop_Mode] = NULL, + [SkXfermode::kDstATop_Mode] = NULL, + [SkXfermode::kXor_Mode] = NULL, + [SkXfermode::kPlus_Mode] = NULL, + [SkXfermode::kModulate_Mode]= NULL, + [SkXfermode::kScreen_Mode] = NULL, + + [SkXfermode::kOverlay_Mode] = NULL, + [SkXfermode::kDarken_Mode] = NULL, + [SkXfermode::kLighten_Mode] = NULL, + [SkXfermode::kColorDodge_Mode] = NULL, + [SkXfermode::kColorBurn_Mode] = NULL, + [SkXfermode::kHardLight_Mode] = NULL, + [SkXfermode::kSoftLight_Mode] = NULL, + [SkXfermode::kDifference_Mode] = NULL, + [SkXfermode::kExclusion_Mode] = NULL, + [SkXfermode::kMultiply_Mode] = NULL, + + [SkXfermode::kHue_Mode] = NULL, + [SkXfermode::kSaturation_Mode] = NULL, + [SkXfermode::kColor_Mode] = NULL, + [SkXfermode::kLuminosity_Mode] = NULL, +}; + +SK_COMPILE_ASSERT( + SK_ARRAY_COUNT(gNEONXfermodeProcs) == SkXfermode::kLastMode + 1, + mode_count_arm +); + +#endif + +SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, + SkXfermode::Mode mode) { +#if !SK_ARM_NEON_IS_NONE + #if SK_ARM_NEON_IS_DYNAMIC + if ((sk_cpu_arm_has_neon()) && (gNEONXfermodeProcs[mode] != NULL)) { + #elif SK_ARM_NEON_IS_ALWAYS + if (gNEONXfermodeProcs[mode] != NULL) { + #endif + return SkNEW_ARGS(SkNEONProcCoeffXfermode, + (rec, mode, gNEONXfermodeProcs[mode])); + } +#endif + return NULL; +} diff --git a/src/opts/SkXfermode_opts_none.cpp b/src/opts/SkXfermode_opts_none.cpp new file mode 100644 index 0000000000..ca53fa0dd0 --- /dev/null +++ b/src/opts/SkXfermode_opts_none.cpp @@ -0,0 +1,11 @@ +#include "SkXfermode.h" +#include "SkXfermode_proccoeff.h" + +// The prototype below is for Clang +extern SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, + SkXfermode::Mode mode); + +SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, + SkXfermode::Mode mode) { + return NULL; +} -- cgit v1.2.3