aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--gyp/opts.gyp1
-rw-r--r--src/core/SkBitmapProcState.cpp365
-rw-r--r--src/core/SkBitmapProcState_filter.h89
-rw-r--r--src/core/SkBitmapProcState_procs.h343
-rw-r--r--src/core/SkBitmapProcState_shaderproc.h6
-rw-r--r--src/opts/SkBitmapProcState_arm_neon.cpp92
-rw-r--r--src/opts/SkBitmapProcState_filter_neon.h88
7 files changed, 564 insertions, 420 deletions
diff --git a/gyp/opts.gyp b/gyp/opts.gyp
index a9f85cc83d..2ac395c027 100644
--- a/gyp/opts.gyp
+++ b/gyp/opts.gyp
@@ -142,6 +142,7 @@
'sources': [
'../src/opts/memset16_neon.S',
'../src/opts/memset32_neon.S',
+ '../src/opts/SkBitmapProcState_arm_neon.cpp',
'../src/opts/SkBitmapProcState_matrixProcs_neon.cpp',
'../src/opts/SkBitmapProcState_matrix_clamp_neon.h',
'../src/opts/SkBitmapProcState_matrix_repeat_neon.h',
diff --git a/src/core/SkBitmapProcState.cpp b/src/core/SkBitmapProcState.cpp
index 8326a324ef..01ef5a5360 100644
--- a/src/core/SkBitmapProcState.cpp
+++ b/src/core/SkBitmapProcState.cpp
@@ -6,338 +6,29 @@
* found in the LICENSE file.
*/
#include "SkBitmapProcState.h"
-#include "SkBitmapProcState_filter.h"
#include "SkColorPriv.h"
#include "SkFilterProc.h"
#include "SkPaint.h"
#include "SkShader.h" // for tilemodes
+#include "SkUtilsArm.h"
+
+#if !SK_ARM_NEON_IS_NONE
+// These are defined in src/opts/SkBitmapProcState_arm_neon.cpp
+extern const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[];
+extern const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[];
+extern void S16_D16_filter_DX_neon(const SkBitmapProcState&, const uint32_t*, int, uint16_t*);
+extern void Clamp_S16_D16_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint16_t*, int);
+extern void Repeat_S16_D16_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint16_t*, int);
+extern void SI8_opaque_D32_filter_DX_neon(const SkBitmapProcState&, const uint32_t*, int, SkPMColor*);
+extern void SI8_opaque_D32_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint32_t*, int);
+extern void Clamp_SI8_opaque_D32_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint32_t*, int);
+#endif
-// returns expanded * 5bits
-static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y,
- uint32_t a00, uint32_t a01,
- uint32_t a10, uint32_t a11) {
- SkASSERT((unsigned)x <= 0xF);
- SkASSERT((unsigned)y <= 0xF);
-
- a00 = SkExpand_rgb_16(a00);
- a01 = SkExpand_rgb_16(a01);
- a10 = SkExpand_rgb_16(a10);
- a11 = SkExpand_rgb_16(a11);
-
- int xy = x * y >> 3;
- return a00 * (32 - 2*y - 2*x + xy) +
- a01 * (2*x - xy) +
- a10 * (2*y - xy) +
- a11 * xy;
-}
-
-// turn an expanded 565 * 5bits into SkPMColor
-// g:11 | r:10 | x:1 | b:10
-static inline SkPMColor SkExpanded_565_To_PMColor(uint32_t c) {
- unsigned r = (c >> 13) & 0xFF;
- unsigned g = (c >> 24);
- unsigned b = (c >> 2) & 0xFF;
- return SkPackARGB32(0xFF, r, g, b);
-}
-
-// returns answer in SkPMColor format
-static inline SkPMColor Filter_4444_D32(unsigned x, unsigned y,
- uint32_t a00, uint32_t a01,
- uint32_t a10, uint32_t a11) {
- SkASSERT((unsigned)x <= 0xF);
- SkASSERT((unsigned)y <= 0xF);
-
- a00 = SkExpand_4444(a00);
- a01 = SkExpand_4444(a01);
- a10 = SkExpand_4444(a10);
- a11 = SkExpand_4444(a11);
-
- int xy = x * y >> 4;
- uint32_t result = a00 * (16 - y - x + xy) +
- a01 * (x - xy) +
- a10 * (y - xy) +
- a11 * xy;
-
- return SkCompact_8888(result);
-}
-
-static inline U8CPU Filter_8(unsigned x, unsigned y,
- U8CPU a00, U8CPU a01,
- U8CPU a10, U8CPU a11) {
- SkASSERT((unsigned)x <= 0xF);
- SkASSERT((unsigned)y <= 0xF);
-
- int xy = x * y;
- unsigned result = a00 * (256 - 16*y - 16*x + xy) +
- a01 * (16*x - xy) +
- a10 * (16*y - xy) +
- a11 * xy;
-
- return result >> 8;
-}
-
-/*****************************************************************************
- *
- * D32 functions
- *
- */
-
-// SRC == 8888
-
-#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_opaque(x, y, a, b, c, d, dst)
-
-#define MAKENAME(suffix) S32_opaque_D32 ## suffix
-#define DSTSIZE 32
-#define SRCTYPE SkPMColor
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
- SkASSERT(state.fAlphaScale == 256)
-#define RETURNDST(src) src
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_alpha(x, y, a, b, c, d, dst, alphaScale)
-
-#define MAKENAME(suffix) S32_alpha_D32 ## suffix
-#define DSTSIZE 32
-#define SRCTYPE SkPMColor
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
- SkASSERT(state.fAlphaScale < 256)
-#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale
-#define RETURNDST(src) SkAlphaMulQ(src, alphaScale)
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == 565
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
- do { \
- uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
- *(dst) = SkExpanded_565_To_PMColor(tmp); \
- } while (0)
-
-#define MAKENAME(suffix) S16_opaque_D32 ## suffix
-#define DSTSIZE 32
-#define SRCTYPE uint16_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \
- SkASSERT(state.fAlphaScale == 256)
-#define RETURNDST(src) SkPixel16ToPixel32(src)
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
- do { \
- uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
- *(dst) = SkAlphaMulQ(SkExpanded_565_To_PMColor(tmp), alphaScale); \
- } while (0)
-
-#define MAKENAME(suffix) S16_alpha_D32 ## suffix
-#define DSTSIZE 32
-#define SRCTYPE uint16_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \
- SkASSERT(state.fAlphaScale < 256)
-#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale
-#define RETURNDST(src) SkAlphaMulQ(SkPixel16ToPixel32(src), alphaScale)
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == Index8
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_opaque(x, y, a, b, c, d, dst)
-
-#define MAKENAME(suffix) SI8_opaque_D32 ## suffix
-#define DSTSIZE 32
-#define SRCTYPE uint8_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
- SkASSERT(state.fAlphaScale == 256)
-#define PREAMBLE(state) const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
-#define RETURNDST(src) table[src]
-#define SRC_TO_FILTER(src) table[src]
-#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false)
-#include "SkBitmapProcState_sample.h"
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_alpha(x, y, a, b, c, d, dst, alphaScale)
-
-#define MAKENAME(suffix) SI8_alpha_D32 ## suffix
-#define DSTSIZE 32
-#define SRCTYPE uint8_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
- SkASSERT(state.fAlphaScale < 256)
-#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale; \
- const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
-#define RETURNDST(src) SkAlphaMulQ(table[src], alphaScale)
-#define SRC_TO_FILTER(src) table[src]
-#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false)
-#include "SkBitmapProcState_sample.h"
-
-// SRC == 4444
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) *(dst) = Filter_4444_D32(x, y, a, b, c, d)
-
-#define MAKENAME(suffix) S4444_opaque_D32 ## suffix
-#define DSTSIZE 32
-#define SRCTYPE SkPMColor16
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \
- SkASSERT(state.fAlphaScale == 256)
-#define RETURNDST(src) SkPixel4444ToPixel32(src)
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
- do { \
- uint32_t tmp = Filter_4444_D32(x, y, a, b, c, d); \
- *(dst) = SkAlphaMulQ(tmp, alphaScale); \
- } while (0)
-
-#define MAKENAME(suffix) S4444_alpha_D32 ## suffix
-#define DSTSIZE 32
-#define SRCTYPE SkPMColor16
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \
- SkASSERT(state.fAlphaScale < 256)
-#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale
-#define RETURNDST(src) SkAlphaMulQ(SkPixel4444ToPixel32(src), alphaScale)
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == A8
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
- do { \
- unsigned tmp = Filter_8(x, y, a, b, c, d); \
- *(dst) = SkAlphaMulQ(pmColor, SkAlpha255To256(tmp)); \
- } while (0)
-
-#define MAKENAME(suffix) SA8_alpha_D32 ## suffix
-#define DSTSIZE 32
-#define SRCTYPE uint8_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kA8_Config);
-#define PREAMBLE(state) const SkPMColor pmColor = state.fPaintPMColor;
-#define RETURNDST(src) SkAlphaMulQ(pmColor, SkAlpha255To256(src))
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-
-/*****************************************************************************
- *
- * D16 functions
- *
- */
-
-// SRC == 8888
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
- do { \
- SkPMColor dstColor; \
- Filter_32_opaque(x, y, a, b, c, d, &dstColor); \
- (*dst) = SkPixel32ToPixel16(dstColor); \
- } while (0)
-
-#define MAKENAME(suffix) S32_D16 ## suffix
-#define DSTSIZE 16
-#define SRCTYPE SkPMColor
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
- SkASSERT(state.fBitmap->isOpaque())
-#define RETURNDST(src) SkPixel32ToPixel16(src)
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == 565
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
- do { \
- uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
- *(dst) = SkCompact_rgb_16((tmp) >> 5); \
- } while (0)
-
-#define MAKENAME(suffix) S16_D16 ## suffix
-#define DSTSIZE 16
-#define SRCTYPE uint16_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
-#define RETURNDST(src) src
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == Index8
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
- do { \
- uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
- *(dst) = SkCompact_rgb_16((tmp) >> 5); \
- } while (0)
-
-#define MAKENAME(suffix) SI8_D16 ## suffix
-#define DSTSIZE 16
-#define SRCTYPE uint8_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
- SkASSERT(state.fBitmap->isOpaque())
-#define PREAMBLE(state) const uint16_t* SK_RESTRICT table = state.fBitmap->getColorTable()->lock16BitCache()
-#define RETURNDST(src) table[src]
-#define SRC_TO_FILTER(src) table[src]
-#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlock16BitCache()
-#include "SkBitmapProcState_sample.h"
-
-///////////////////////////////////////////////////////////////////////////////
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
- do { \
- uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
- *(dst) = SkCompact_rgb_16((tmp) >> 5); \
- } while (0)
-
-
-// clamp
-
-#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
-#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
-#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
-#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
-
-#define MAKENAME(suffix) Clamp_S16_D16 ## suffix
-#define SRCTYPE uint16_t
-#define DSTTYPE uint16_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_shaderproc.h"
-
-
-#define TILEX_PROCF(fx, max) (((fx) & 0xFFFF) * ((max) + 1) >> 16)
-#define TILEY_PROCF(fy, max) (((fy) & 0xFFFF) * ((max) + 1) >> 16)
-#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
-#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
-
-#define MAKENAME(suffix) Repeat_S16_D16 ## suffix
-#define SRCTYPE uint16_t
-#define DSTTYPE uint16_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_shaderproc.h"
-
-
-#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
-#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
-#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
-#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_opaque(x, y, a, b, c, d, dst)
-#define MAKENAME(suffix) Clamp_SI8_opaque_D32 ## suffix
-#define SRCTYPE uint8_t
-#define DSTTYPE uint32_t
-#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config)
-#define PREAMBLE(state) const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
-#define SRC_TO_FILTER(src) table[src]
-#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false)
-#include "SkBitmapProcState_shaderproc.h"
+#if !SK_ARM_NEON_IS_ALWAYS
+#define NAME_WRAP(x) x
+#include "SkBitmapProcState_filter.h"
+#include "SkBitmapProcState_procs.h"
+#endif
///////////////////////////////////////////////////////////////////////////////
@@ -448,7 +139,8 @@ bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) {
return false;
}
- static const SampleProc32 gSample32[] = {
+#if !SK_ARM_NEON_IS_ALWAYS
+ static const SampleProc32 gSkBitmapProcStateSample32[] = {
S32_opaque_D32_nofilter_DXDY,
S32_alpha_D32_nofilter_DXDY,
S32_opaque_D32_nofilter_DX,
@@ -496,7 +188,7 @@ bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) {
SA8_alpha_D32_filter_DX
};
- static const SampleProc16 gSample16[] = {
+ static const SampleProc16 gSkBitmapProcStateSample16[] = {
S32_D16_nofilter_DXDY,
S32_D16_nofilter_DX,
S32_D16_filter_DXDY,
@@ -517,21 +209,22 @@ bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) {
// Don't support A8 -> 565
NULL, NULL, NULL, NULL
};
+#endif
- fSampleProc32 = gSample32[index];
+ fSampleProc32 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample32)[index];
index >>= 1; // shift away any opaque/alpha distinction
- fSampleProc16 = gSample16[index];
+ fSampleProc16 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample16)[index];
// our special-case shaderprocs
- if (S16_D16_filter_DX == fSampleProc16) {
+ if (SK_ARM_NEON_WRAP(S16_D16_filter_DX) == fSampleProc16) {
if (clamp_clamp) {
- fShaderProc16 = Clamp_S16_D16_filter_DX_shaderproc;
+ fShaderProc16 = SK_ARM_NEON_WRAP(Clamp_S16_D16_filter_DX_shaderproc);
} else if (SkShader::kRepeat_TileMode == fTileModeX &&
SkShader::kRepeat_TileMode == fTileModeY) {
- fShaderProc16 = Repeat_S16_D16_filter_DX_shaderproc;
+ fShaderProc16 = SK_ARM_NEON_WRAP(Repeat_S16_D16_filter_DX_shaderproc);
}
- } else if (SI8_opaque_D32_filter_DX == fSampleProc32 && clamp_clamp) {
- fShaderProc32 = Clamp_SI8_opaque_D32_filter_DX_shaderproc;
+ } else if (SK_ARM_NEON_WRAP(SI8_opaque_D32_filter_DX) == fSampleProc32 && clamp_clamp) {
+ fShaderProc32 = SK_ARM_NEON_WRAP(Clamp_SI8_opaque_D32_filter_DX_shaderproc);
}
// see if our platform has any accelerated overrides
diff --git a/src/core/SkBitmapProcState_filter.h b/src/core/SkBitmapProcState_filter.h
index f69e17a4a6..7fcf75439f 100644
--- a/src/core/SkBitmapProcState_filter.h
+++ b/src/core/SkBitmapProcState_filter.h
@@ -18,82 +18,10 @@
the drawing pipeline may rely on this (e.g. which blitrow proc to use).
*/
-#if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN)
-static inline void Filter_32_opaque_neon(unsigned x, unsigned y,
+static inline void Filter_32_opaque(unsigned x, unsigned y,
SkPMColor a00, SkPMColor a01,
SkPMColor a10, SkPMColor a11,
- SkPMColor *dst) {
- asm volatile(
- "vdup.8 d0, %[y] \n\t" // duplicate y into d0
- "vmov.u8 d16, #16 \n\t" // set up constant in d16
- "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y
-
- "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4
- "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5
- "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01
- "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11
-
- "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y)
- "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y
-
- "vdup.16 d5, %[x] \n\t" // duplicate x into d5
- "vmov.u16 d16, #16 \n\t" // set up constant in d16
- "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x
-
- "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x
- "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x
- "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)
- "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)
- "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8
- "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result
- :
- : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)
- : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
- );
-}
-
-static inline void Filter_32_alpha_neon(unsigned x, unsigned y,
- SkPMColor a00, SkPMColor a01,
- SkPMColor a10, SkPMColor a11,
- SkPMColor *dst, uint16_t scale) {
- asm volatile(
- "vdup.8 d0, %[y] \n\t" // duplicate y into d0
- "vmov.u8 d16, #16 \n\t" // set up constant in d16
- "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y
-
- "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4
- "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5
- "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01
- "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11
-
- "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y)
- "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y
-
- "vdup.16 d5, %[x] \n\t" // duplicate x into d5
- "vmov.u16 d16, #16 \n\t" // set up constant in d16
- "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x
-
- "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x
- "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x
- "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)
- "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)
- "vdup.16 d3, %[scale] \n\t" // duplicate scale into d3
- "vshr.u16 d4, d4, #8 \n\t" // shift down result by 8
- "vmul.i16 d4, d4, d3 \n\t" // multiply result by scale
- "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8
- "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result
- :
- : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)
- : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
- );
-}
-#define Filter_32_opaque Filter_32_opaque_neon
-#define Filter_32_alpha Filter_32_alpha_neon
-#else
-static inline void Filter_32_opaque_portable(unsigned x, unsigned y,
- SkPMColor a00, SkPMColor a01,
- SkPMColor a10, SkPMColor a11,
- SkPMColor* dstColor) {
+ SkPMColor* dstColor) {
SkASSERT((unsigned)x <= 0xF);
SkASSERT((unsigned)y <= 0xF);
@@ -118,11 +46,11 @@ static inline void Filter_32_opaque_portable(unsigned x, unsigned y,
*dstColor = ((lo >> 8) & mask) | (hi & ~mask);
}
-static inline void Filter_32_alpha_portable(unsigned x, unsigned y,
- SkPMColor a00, SkPMColor a01,
- SkPMColor a10, SkPMColor a11,
- SkPMColor* dstColor,
- unsigned alphaScale) {
+static inline void Filter_32_alpha(unsigned x, unsigned y,
+ SkPMColor a00, SkPMColor a01,
+ SkPMColor a10, SkPMColor a11,
+ SkPMColor* dstColor,
+ unsigned alphaScale) {
SkASSERT((unsigned)x <= 0xF);
SkASSERT((unsigned)y <= 0xF);
SkASSERT(alphaScale <= 256);
@@ -150,7 +78,4 @@ static inline void Filter_32_alpha_portable(unsigned x, unsigned y,
*dstColor = ((lo >> 8) & mask) | (hi & ~mask);
}
-#define Filter_32_opaque Filter_32_opaque_portable
-#define Filter_32_alpha Filter_32_alpha_portable
-#endif
diff --git a/src/core/SkBitmapProcState_procs.h b/src/core/SkBitmapProcState_procs.h
new file mode 100644
index 0000000000..1b9328edc7
--- /dev/null
+++ b/src/core/SkBitmapProcState_procs.h
@@ -0,0 +1,343 @@
+
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+// Define NAME_WRAP(x) before including this header to perform name-wrapping
+// E.g. for ARM NEON, defined it as 'x ## _neon' to ensure all important
+// identifiers have a _neon suffix.
+#ifndef NAME_WRAP
+#error "Please define NAME_WRAP() before including this file"
+#endif
+
+// returns expanded * 5bits
+static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y,
+ uint32_t a00, uint32_t a01,
+ uint32_t a10, uint32_t a11) {
+ SkASSERT((unsigned)x <= 0xF);
+ SkASSERT((unsigned)y <= 0xF);
+
+ a00 = SkExpand_rgb_16(a00);
+ a01 = SkExpand_rgb_16(a01);
+ a10 = SkExpand_rgb_16(a10);
+ a11 = SkExpand_rgb_16(a11);
+
+ int xy = x * y >> 3;
+ return a00 * (32 - 2*y - 2*x + xy) +
+ a01 * (2*x - xy) +
+ a10 * (2*y - xy) +
+ a11 * xy;
+}
+
+// turn an expanded 565 * 5bits into SkPMColor
+// g:11 | r:10 | x:1 | b:10
+static inline SkPMColor SkExpanded_565_To_PMColor(uint32_t c) {
+ unsigned r = (c >> 13) & 0xFF;
+ unsigned g = (c >> 24);
+ unsigned b = (c >> 2) & 0xFF;
+ return SkPackARGB32(0xFF, r, g, b);
+}
+
+// returns answer in SkPMColor format
+static inline SkPMColor Filter_4444_D32(unsigned x, unsigned y,
+ uint32_t a00, uint32_t a01,
+ uint32_t a10, uint32_t a11) {
+ SkASSERT((unsigned)x <= 0xF);
+ SkASSERT((unsigned)y <= 0xF);
+
+ a00 = SkExpand_4444(a00);
+ a01 = SkExpand_4444(a01);
+ a10 = SkExpand_4444(a10);
+ a11 = SkExpand_4444(a11);
+
+ int xy = x * y >> 4;
+ uint32_t result = a00 * (16 - y - x + xy) +
+ a01 * (x - xy) +
+ a10 * (y - xy) +
+ a11 * xy;
+
+ return SkCompact_8888(result);
+}
+
+static inline U8CPU Filter_8(unsigned x, unsigned y,
+ U8CPU a00, U8CPU a01,
+ U8CPU a10, U8CPU a11) {
+ SkASSERT((unsigned)x <= 0xF);
+ SkASSERT((unsigned)y <= 0xF);
+
+ int xy = x * y;
+ unsigned result = a00 * (256 - 16*y - 16*x + xy) +
+ a01 * (16*x - xy) +
+ a10 * (16*y - xy) +
+ a11 * xy;
+
+ return result >> 8;
+}
+
+/*****************************************************************************
+ *
+ * D32 functions
+ *
+ */
+
+// SRC == 8888
+
+#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst)
+
+#define MAKENAME(suffix) NAME_WRAP(S32_opaque_D32 ## suffix)
+#define DSTSIZE 32
+#define SRCTYPE SkPMColor
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
+ SkASSERT(state.fAlphaScale == 256)
+#define RETURNDST(src) src
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_sample.h"
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_alpha)(x, y, a, b, c, d, dst, alphaScale)
+
+#define MAKENAME(suffix) NAME_WRAP(S32_alpha_D32 ## suffix)
+#define DSTSIZE 32
+#define SRCTYPE SkPMColor
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
+ SkASSERT(state.fAlphaScale < 256)
+#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale
+#define RETURNDST(src) SkAlphaMulQ(src, alphaScale)
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == 565
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+ do { \
+ uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
+ *(dst) = SkExpanded_565_To_PMColor(tmp); \
+ } while (0)
+
+#define MAKENAME(suffix) NAME_WRAP(S16_opaque_D32 ## suffix)
+#define DSTSIZE 32
+#define SRCTYPE uint16_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \
+ SkASSERT(state.fAlphaScale == 256)
+#define RETURNDST(src) SkPixel16ToPixel32(src)
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_sample.h"
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+ do { \
+ uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
+ *(dst) = SkAlphaMulQ(SkExpanded_565_To_PMColor(tmp), alphaScale); \
+ } while (0)
+
+#define MAKENAME(suffix) NAME_WRAP(S16_alpha_D32 ## suffix)
+#define DSTSIZE 32
+#define SRCTYPE uint16_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \
+ SkASSERT(state.fAlphaScale < 256)
+#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale
+#define RETURNDST(src) SkAlphaMulQ(SkPixel16ToPixel32(src), alphaScale)
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == Index8
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst)
+
+#define MAKENAME(suffix) NAME_WRAP(SI8_opaque_D32 ## suffix)
+#define DSTSIZE 32
+#define SRCTYPE uint8_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
+ SkASSERT(state.fAlphaScale == 256)
+#define PREAMBLE(state) const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
+#define RETURNDST(src) table[src]
+#define SRC_TO_FILTER(src) table[src]
+#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false)
+#include "SkBitmapProcState_sample.h"
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_alpha)(x, y, a, b, c, d, dst, alphaScale)
+
+#define MAKENAME(suffix) NAME_WRAP(SI8_alpha_D32 ## suffix)
+#define DSTSIZE 32
+#define SRCTYPE uint8_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
+ SkASSERT(state.fAlphaScale < 256)
+#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale; \
+ const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
+#define RETURNDST(src) SkAlphaMulQ(table[src], alphaScale)
+#define SRC_TO_FILTER(src) table[src]
+#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false)
+#include "SkBitmapProcState_sample.h"
+
+// SRC == 4444
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) *(dst) = Filter_4444_D32(x, y, a, b, c, d)
+
+#define MAKENAME(suffix) NAME_WRAP(S4444_opaque_D32 ## suffix)
+#define DSTSIZE 32
+#define SRCTYPE SkPMColor16
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \
+ SkASSERT(state.fAlphaScale == 256)
+#define RETURNDST(src) SkPixel4444ToPixel32(src)
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_sample.h"
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+ do { \
+ uint32_t tmp = Filter_4444_D32(x, y, a, b, c, d); \
+ *(dst) = SkAlphaMulQ(tmp, alphaScale); \
+ } while (0)
+
+#define MAKENAME(suffix) NAME_WRAP(S4444_alpha_D32 ## suffix)
+#define DSTSIZE 32
+#define SRCTYPE SkPMColor16
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \
+ SkASSERT(state.fAlphaScale < 256)
+#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale
+#define RETURNDST(src) SkAlphaMulQ(SkPixel4444ToPixel32(src), alphaScale)
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == A8
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+ do { \
+ unsigned tmp = Filter_8(x, y, a, b, c, d); \
+ *(dst) = SkAlphaMulQ(pmColor, SkAlpha255To256(tmp)); \
+ } while (0)
+
+#define MAKENAME(suffix) NAME_WRAP(SA8_alpha_D32 ## suffix)
+#define DSTSIZE 32
+#define SRCTYPE uint8_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kA8_Config);
+#define PREAMBLE(state) const SkPMColor pmColor = state.fPaintPMColor;
+#define RETURNDST(src) SkAlphaMulQ(pmColor, SkAlpha255To256(src))
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_sample.h"
+
+/*****************************************************************************
+ *
+ * D16 functions
+ *
+ */
+
+// SRC == 8888
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+ do { \
+ SkPMColor dstColor; \
+ NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, &dstColor); \
+ (*dst) = SkPixel32ToPixel16(dstColor); \
+ } while (0)
+
+#define MAKENAME(suffix) NAME_WRAP(S32_D16 ## suffix)
+#define DSTSIZE 16
+#define SRCTYPE SkPMColor
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
+ SkASSERT(state.fBitmap->isOpaque())
+#define RETURNDST(src) SkPixel32ToPixel16(src)
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == 565
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+ do { \
+ uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
+ *(dst) = SkCompact_rgb_16((tmp) >> 5); \
+ } while (0)
+
+#define MAKENAME(suffix) NAME_WRAP(S16_D16 ## suffix)
+#define DSTSIZE 16
+#define SRCTYPE uint16_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
+#define RETURNDST(src) src
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == Index8
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+ do { \
+ uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
+ *(dst) = SkCompact_rgb_16((tmp) >> 5); \
+ } while (0)
+
+#define MAKENAME(suffix) NAME_WRAP(SI8_D16 ## suffix)
+#define DSTSIZE 16
+#define SRCTYPE uint8_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
+ SkASSERT(state.fBitmap->isOpaque())
+#define PREAMBLE(state) const uint16_t* SK_RESTRICT table = state.fBitmap->getColorTable()->lock16BitCache()
+#define RETURNDST(src) table[src]
+#define SRC_TO_FILTER(src) table[src]
+#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlock16BitCache()
+#include "SkBitmapProcState_sample.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+ do { \
+ uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \
+ *(dst) = SkCompact_rgb_16((tmp) >> 5); \
+ } while (0)
+
+
+// clamp
+
+#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
+#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
+#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
+#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
+
+#define MAKENAME(suffix) NAME_WRAP(Clamp_S16_D16 ## suffix)
+#define SRCTYPE uint16_t
+#define DSTTYPE uint16_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_shaderproc.h"
+
+
+#define TILEX_PROCF(fx, max) (((fx) & 0xFFFF) * ((max) + 1) >> 16)
+#define TILEY_PROCF(fy, max) (((fy) & 0xFFFF) * ((max) + 1) >> 16)
+#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
+#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
+
+#define MAKENAME(suffix) NAME_WRAP(Repeat_S16_D16 ## suffix)
+#define SRCTYPE uint16_t
+#define DSTTYPE uint16_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
+#define SRC_TO_FILTER(src) src
+#include "SkBitmapProcState_shaderproc.h"
+
+
+#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
+#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
+#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
+#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst)
+#define MAKENAME(suffix) NAME_WRAP(Clamp_SI8_opaque_D32 ## suffix)
+#define SRCTYPE uint8_t
+#define DSTTYPE uint32_t
+#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config)
+#define PREAMBLE(state) const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
+#define SRC_TO_FILTER(src) table[src]
+#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false)
+#include "SkBitmapProcState_shaderproc.h"
+
+#undef NAME_WRAP \ No newline at end of file
diff --git a/src/core/SkBitmapProcState_shaderproc.h b/src/core/SkBitmapProcState_shaderproc.h
index ead57f123e..33c238c3aa 100644
--- a/src/core/SkBitmapProcState_shaderproc.h
+++ b/src/core/SkBitmapProcState_shaderproc.h
@@ -10,8 +10,10 @@
#define SCALE_FILTER_NAME MAKENAME(_filter_DX_shaderproc)
-static void SCALE_FILTER_NAME(const SkBitmapProcState& s, int x, int y,
- DSTTYPE* SK_RESTRICT colors, int count) {
+// Can't be static in the general case because some of these implementations
+// will be defined and referenced in different object files.
+void SCALE_FILTER_NAME(const SkBitmapProcState& s, int x, int y,
+ DSTTYPE* SK_RESTRICT colors, int count) {
SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
SkMatrix::kScale_Mask)) == 0);
SkASSERT(s.fInvKy == 0);
diff --git a/src/opts/SkBitmapProcState_arm_neon.cpp b/src/opts/SkBitmapProcState_arm_neon.cpp
new file mode 100644
index 0000000000..d50707dce0
--- /dev/null
+++ b/src/opts/SkBitmapProcState_arm_neon.cpp
@@ -0,0 +1,92 @@
+
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "SkBitmapProcState.h"
+#include "SkBitmapProcState_filter.h"
+#include "SkColorPriv.h"
+#include "SkFilterProc.h"
+#include "SkPaint.h"
+#include "SkShader.h" // for tilemodes
+#include "SkUtilsArm.h"
+
+// Required to ensure the table is part of the final binary.
+extern const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[];
+extern const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[];
+
+#define NAME_WRAP(x) x ## _neon
+#include "SkBitmapProcState_filter_neon.h"
+#include "SkBitmapProcState_procs.h"
+
+const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[] = {
+ S32_opaque_D32_nofilter_DXDY_neon,
+ S32_alpha_D32_nofilter_DXDY_neon,
+ S32_opaque_D32_nofilter_DX_neon,
+ S32_alpha_D32_nofilter_DX_neon,
+ S32_opaque_D32_filter_DXDY_neon,
+ S32_alpha_D32_filter_DXDY_neon,
+ S32_opaque_D32_filter_DX_neon,
+ S32_alpha_D32_filter_DX_neon,
+
+ S16_opaque_D32_nofilter_DXDY_neon,
+ S16_alpha_D32_nofilter_DXDY_neon,
+ S16_opaque_D32_nofilter_DX_neon,
+ S16_alpha_D32_nofilter_DX_neon,
+ S16_opaque_D32_filter_DXDY_neon,
+ S16_alpha_D32_filter_DXDY_neon,
+ S16_opaque_D32_filter_DX_neon,
+ S16_alpha_D32_filter_DX_neon,
+
+ SI8_opaque_D32_nofilter_DXDY_neon,
+ SI8_alpha_D32_nofilter_DXDY_neon,
+ SI8_opaque_D32_nofilter_DX_neon,
+ SI8_alpha_D32_nofilter_DX_neon,
+ SI8_opaque_D32_filter_DXDY_neon,
+ SI8_alpha_D32_filter_DXDY_neon,
+ SI8_opaque_D32_filter_DX_neon,
+ SI8_alpha_D32_filter_DX_neon,
+
+ S4444_opaque_D32_nofilter_DXDY_neon,
+ S4444_alpha_D32_nofilter_DXDY_neon,
+ S4444_opaque_D32_nofilter_DX_neon,
+ S4444_alpha_D32_nofilter_DX_neon,
+ S4444_opaque_D32_filter_DXDY_neon,
+ S4444_alpha_D32_filter_DXDY_neon,
+ S4444_opaque_D32_filter_DX_neon,
+ S4444_alpha_D32_filter_DX_neon,
+
+ // A8 treats alpha/opauqe the same (equally efficient)
+ SA8_alpha_D32_nofilter_DXDY_neon,
+ SA8_alpha_D32_nofilter_DXDY_neon,
+ SA8_alpha_D32_nofilter_DX_neon,
+ SA8_alpha_D32_nofilter_DX_neon,
+ SA8_alpha_D32_filter_DXDY_neon,
+ SA8_alpha_D32_filter_DXDY_neon,
+ SA8_alpha_D32_filter_DX_neon,
+ SA8_alpha_D32_filter_DX_neon
+};
+
+const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[] = {
+ S32_D16_nofilter_DXDY_neon,
+ S32_D16_nofilter_DX_neon,
+ S32_D16_filter_DXDY_neon,
+ S32_D16_filter_DX_neon,
+
+ S16_D16_nofilter_DXDY_neon,
+ S16_D16_nofilter_DX_neon,
+ S16_D16_filter_DXDY_neon,
+ S16_D16_filter_DX_neon,
+
+ SI8_D16_nofilter_DXDY_neon,
+ SI8_D16_nofilter_DX_neon,
+ SI8_D16_filter_DXDY_neon,
+ SI8_D16_filter_DX_neon,
+
+ // Don't support 4444 -> 565
+ NULL, NULL, NULL, NULL,
+ // Don't support A8 -> 565
+ NULL, NULL, NULL, NULL
+};
diff --git a/src/opts/SkBitmapProcState_filter_neon.h b/src/opts/SkBitmapProcState_filter_neon.h
new file mode 100644
index 0000000000..aadab91e29
--- /dev/null
+++ b/src/opts/SkBitmapProcState_filter_neon.h
@@ -0,0 +1,88 @@
+
+/*
+ * Copyright 2012 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+
+#include "SkColorPriv.h"
+
+/*
+ Filter_32_opaque
+
+ There is no hard-n-fast rule that the filtering must produce
+ exact results for the color components, but if the 4 incoming colors are
+ all opaque, then the output color must also be opaque. Subsequent parts of
+ the drawing pipeline may rely on this (e.g. which blitrow proc to use).
+ */
+
+static inline void Filter_32_opaque_neon(unsigned x, unsigned y,
+ SkPMColor a00, SkPMColor a01,
+ SkPMColor a10, SkPMColor a11,
+ SkPMColor *dst) {
+ asm volatile(
+ "vdup.8 d0, %[y] \n\t" // duplicate y into d0
+ "vmov.u8 d16, #16 \n\t" // set up constant in d16
+ "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y
+
+ "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4
+ "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5
+ "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01
+ "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11
+
+ "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y)
+ "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y
+
+ "vdup.16 d5, %[x] \n\t" // duplicate x into d5
+ "vmov.u16 d16, #16 \n\t" // set up constant in d16
+ "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x
+
+ "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x
+ "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x
+ "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)
+ "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)
+ "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8
+ "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result
+ :
+ : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)
+ : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
+ );
+}
+
+static inline void Filter_32_alpha_neon(unsigned x, unsigned y,
+ SkPMColor a00, SkPMColor a01,
+ SkPMColor a10, SkPMColor a11,
+ SkPMColor *dst, uint16_t scale) {
+ asm volatile(
+ "vdup.8 d0, %[y] \n\t" // duplicate y into d0
+ "vmov.u8 d16, #16 \n\t" // set up constant in d16
+ "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y
+
+ "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4
+ "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5
+ "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01
+ "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11
+
+ "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y)
+ "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y
+
+ "vdup.16 d5, %[x] \n\t" // duplicate x into d5
+ "vmov.u16 d16, #16 \n\t" // set up constant in d16
+ "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x
+
+ "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x
+ "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x
+ "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)
+ "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)
+ "vdup.16 d3, %[scale] \n\t" // duplicate scale into d3
+ "vshr.u16 d4, d4, #8 \n\t" // shift down result by 8
+ "vmul.i16 d4, d4, d3 \n\t" // multiply result by scale
+ "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8
+ "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result
+ :
+ : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)
+ : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
+ );
+}