diff options
-rw-r--r-- | src/core/SkBitmapProcState.cpp | 126 | ||||
-rw-r--r-- | src/core/SkBitmapProcState_filter.h | 157 | ||||
-rw-r--r-- | src/core/SkBitmapProcState_sample.h | 49 | ||||
-rw-r--r-- | src/core/SkBitmapProcState_shaderproc.h | 13 | ||||
-rw-r--r-- | src/opts/SkBitmapProcState_opts_arm.cpp | 516 |
5 files changed, 258 insertions, 603 deletions
diff --git a/src/core/SkBitmapProcState.cpp b/src/core/SkBitmapProcState.cpp index 3e5b927684..3c5833dd25 100644 --- a/src/core/SkBitmapProcState.cpp +++ b/src/core/SkBitmapProcState.cpp @@ -1,4 +1,5 @@ #include "SkBitmapProcState.h" +#include "SkBitmapProcState_filter.h" #include "SkColorPriv.h" #include "SkFilterProc.h" #include "SkPaint.h" @@ -12,33 +13,6 @@ #define UNPACK_SECONDARY_SHORT(packed) ((uint32_t)(packed) >> 16) #endif -static inline SkPMColor Filter_32(unsigned x, unsigned y, - SkPMColor a00, SkPMColor a01, - SkPMColor a10, SkPMColor a11) { - SkASSERT((unsigned)x <= 0xF); - SkASSERT((unsigned)y <= 0xF); - - int xy = x * y; - uint32_t mask = gMask_00FF00FF; //0xFF00FF; - - int scale = 256 - 16*y - 16*x + xy; - uint32_t lo = (a00 & mask) * scale; - uint32_t hi = ((a00 >> 8) & mask) * scale; - - scale = 16*x - xy; - lo += (a01 & mask) * scale; - hi += ((a01 >> 8) & mask) * scale; - - scale = 16*y - xy; - lo += (a10 & mask) * scale; - hi += ((a10 >> 8) & mask) * scale; - - lo += (a11 & mask) * xy; - hi += ((a11 >> 8) & mask) * xy; - - return ((lo >> 8) & mask) | (hi & ~mask); -} - // returns expanded * 5bits static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y, uint32_t a00, uint32_t a01, @@ -111,7 +85,7 @@ static inline U8CPU Filter_8(unsigned x, unsigned y, // SRC == 8888 -#define FILTER_PROC(x, y, a, b, c, d) Filter_32(x, y, a, b, c, d) +#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_opaque(x, y, a, b, c, d, dst) #define MAKENAME(suffix) S32_opaque_D32 ## suffix #define DSTSIZE 32 @@ -120,24 +94,29 @@ static inline U8CPU Filter_8(unsigned x, unsigned y, SkASSERT(state.fAlphaScale == 256) #define RETURNDST(src) src #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) c #include "SkBitmapProcState_sample.h" +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_alpha(x, y, a, b, c, d, dst, alphaScale) + #define MAKENAME(suffix) S32_alpha_D32 ## suffix #define DSTSIZE 32 #define SRCTYPE SkPMColor #define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \ SkASSERT(state.fAlphaScale < 256) -#define PREAMBLE(state) unsigned scale = state.fAlphaScale -#define RETURNDST(src) SkAlphaMulQ(src, scale) +#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale +#define RETURNDST(src) SkAlphaMulQ(src, alphaScale) #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) SkAlphaMulQ(c, scale) #include "SkBitmapProcState_sample.h" // SRC == 565 #undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d) Filter_565_Expanded(x, y, a, b, c, d) +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkExpanded_565_To_PMColor(tmp); \ + } while (0) #define MAKENAME(suffix) S16_opaque_D32 ## suffix #define DSTSIZE 32 @@ -146,24 +125,29 @@ static inline U8CPU Filter_8(unsigned x, unsigned y, SkASSERT(state.fAlphaScale == 256) #define RETURNDST(src) SkPixel16ToPixel32(src) #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) SkExpanded_565_To_PMColor(c) #include "SkBitmapProcState_sample.h" +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkAlphaMulQ(SkExpanded_565_To_PMColor(tmp), alphaScale); \ + } while (0) + #define MAKENAME(suffix) S16_alpha_D32 ## suffix #define DSTSIZE 32 #define SRCTYPE uint16_t #define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \ SkASSERT(state.fAlphaScale < 256) -#define PREAMBLE(state) unsigned scale = state.fAlphaScale -#define RETURNDST(src) SkAlphaMulQ(SkPixel16ToPixel32(src), scale) +#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale +#define RETURNDST(src) SkAlphaMulQ(SkPixel16ToPixel32(src), alphaScale) #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) SkAlphaMulQ(SkExpanded_565_To_PMColor(c), scale) #include "SkBitmapProcState_sample.h" // SRC == Index8 #undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d) Filter_32(x, y, a, b, c, d) +#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_opaque(x, y, a, b, c, d, dst) #define MAKENAME(suffix) SI8_opaque_D32 ## suffix #define DSTSIZE 32 @@ -173,53 +157,63 @@ static inline U8CPU Filter_8(unsigned x, unsigned y, #define PREAMBLE(state) const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors() #define RETURNDST(src) table[src] #define SRC_TO_FILTER(src) table[src] -#define FILTER_TO_DST(c) c #define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false) #include "SkBitmapProcState_sample.h" +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_alpha(x, y, a, b, c, d, dst, alphaScale) + #define MAKENAME(suffix) SI8_alpha_D32 ## suffix #define DSTSIZE 32 #define SRCTYPE uint8_t #define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \ SkASSERT(state.fAlphaScale < 256) -#define PREAMBLE(state) unsigned scale = state.fAlphaScale; \ +#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale; \ const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors() -#define RETURNDST(src) SkAlphaMulQ(table[src], scale) +#define RETURNDST(src) SkAlphaMulQ(table[src], alphaScale) #define SRC_TO_FILTER(src) table[src] -#define FILTER_TO_DST(c) SkAlphaMulQ(c, scale) #define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false) #include "SkBitmapProcState_sample.h" // SRC == 4444 #undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d) Filter_4444_D32(x, y, a, b, c, d) +#define FILTER_PROC(x, y, a, b, c, d, dst) *(dst) = Filter_4444_D32(x, y, a, b, c, d) #define MAKENAME(suffix) S4444_opaque_D32 ## suffix #define DSTSIZE 32 #define SRCTYPE SkPMColor16 #define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \ -SkASSERT(state.fAlphaScale == 256) + SkASSERT(state.fAlphaScale == 256) #define RETURNDST(src) SkPixel4444ToPixel32(src) #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) c #include "SkBitmapProcState_sample.h" +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_4444_D32(x, y, a, b, c, d); \ + *(dst) = SkAlphaMulQ(tmp, alphaScale); \ + } while (0) + #define MAKENAME(suffix) S4444_alpha_D32 ## suffix #define DSTSIZE 32 #define SRCTYPE SkPMColor16 #define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \ -SkASSERT(state.fAlphaScale < 256) -#define PREAMBLE(state) unsigned scale = state.fAlphaScale -#define RETURNDST(src) SkAlphaMulQ(SkPixel4444ToPixel32(src), scale) + SkASSERT(state.fAlphaScale < 256) +#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale +#define RETURNDST(src) SkAlphaMulQ(SkPixel4444ToPixel32(src), alphaScale) #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) SkAlphaMulQ(c, scale) #include "SkBitmapProcState_sample.h" // SRC == A8 #undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d) Filter_8(x, y, a, b, c, d) +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + unsigned tmp = Filter_8(x, y, a, b, c, d); \ + *(dst) = SkAlphaMulQ(pmColor, SkAlpha255To256(tmp)); \ + } while (0) #define MAKENAME(suffix) SA8_alpha_D32 ## suffix #define DSTSIZE 32 @@ -229,7 +223,6 @@ SkASSERT(state.fAlphaScale < 256) #define PREAMBLE(state) const SkPMColor pmColor = state.fPaintPMColor; #define RETURNDST(src) SkAlphaMulQ(pmColor, SkAlpha255To256(src)) #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) SkAlphaMulQ(pmColor, SkAlpha255To256(c)) #include "SkBitmapProcState_sample.h" /***************************************************************************** @@ -241,7 +234,12 @@ SkASSERT(state.fAlphaScale < 256) // SRC == 8888 #undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d) Filter_32(x, y, a, b, c, d) +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + SkPMColor dstColor; \ + Filter_32_opaque(x, y, a, b, c, d, &dstColor); \ + (*dst) = SkPixel32ToPixel16(dstColor); \ + } while (0) #define MAKENAME(suffix) S32_D16 ## suffix #define DSTSIZE 16 @@ -250,13 +248,16 @@ SkASSERT(state.fAlphaScale < 256) SkASSERT(state.fBitmap->isOpaque()) #define RETURNDST(src) SkPixel32ToPixel16(src) #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) SkPixel32ToPixel16(c) #include "SkBitmapProcState_sample.h" // SRC == 565 #undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d) Filter_565_Expanded(x, y, a, b, c, d) +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkCompact_rgb_16((tmp) >> 5); \ + } while (0) #define MAKENAME(suffix) S16_D16 ## suffix #define DSTSIZE 16 @@ -264,13 +265,16 @@ SkASSERT(state.fAlphaScale < 256) #define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config) #define RETURNDST(src) src #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) SkCompact_rgb_16((c) >> 5) #include "SkBitmapProcState_sample.h" // SRC == Index8 #undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d) Filter_565_Expanded(x, y, a, b, c, d) +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkCompact_rgb_16((tmp) >> 5); \ + } while (0) #define MAKENAME(suffix) SI8_D16 ## suffix #define DSTSIZE 16 @@ -280,14 +284,18 @@ SkASSERT(state.fAlphaScale < 256) #define PREAMBLE(state) const uint16_t* SK_RESTRICT table = state.fBitmap->getColorTable()->lock16BitCache() #define RETURNDST(src) table[src] #define SRC_TO_FILTER(src) table[src] -#define FILTER_TO_DST(c) SkCompact_rgb_16(c >> 5) #define POSTAMBLE(state) state.fBitmap->getColorTable()->unlock16BitCache() #include "SkBitmapProcState_sample.h" /////////////////////////////////////////////////////////////////////////////// #undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d) Filter_565_Expanded(x, y, a, b, c, d) +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkCompact_rgb_16((tmp) >> 5); \ + } while (0) + #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) @@ -299,7 +307,6 @@ SkASSERT(state.fAlphaScale < 256) #define DSTTYPE uint16_t #define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config) #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) SkCompact_rgb_16((c) >> 5) #include "SkBitmapProcState_shaderproc.h" @@ -313,7 +320,6 @@ SkASSERT(state.fAlphaScale < 256) #define DSTTYPE uint16_t #define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config) #define SRC_TO_FILTER(src) src -#define FILTER_TO_DST(c) SkCompact_rgb_16((c) >> 5) #include "SkBitmapProcState_shaderproc.h" /////////////////////////////////////////////////////////////////////////////// diff --git a/src/core/SkBitmapProcState_filter.h b/src/core/SkBitmapProcState_filter.h new file mode 100644 index 0000000000..706ff7d25d --- /dev/null +++ b/src/core/SkBitmapProcState_filter.h @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __arm__ + #include <machine/cpu-features.h> +#endif +#include "SkColorPriv.h" + +#if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) +static inline void Filter_32_opaque_neon(unsigned x, unsigned y, + SkPMColor a00, SkPMColor a01, + SkPMColor a10, SkPMColor a11, + SkPMColor *dst) { + asm volatile( + "vdup.8 d0, %[y] \n\t" // duplicate y into d0 + "vmov.u8 d16, #16 \n\t" // set up constant in d16 + "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y + + "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 + "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 + "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 + "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 + + "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) + "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y + + "vdup.16 d5, %[x] \n\t" // duplicate x into d5 + "vmov.u16 d16, #16 \n\t" // set up constant in d16 + "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x + + "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x + "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x + "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) + "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) + "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 + "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result + : + : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst) + : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" + ); +} + +static inline void Filter_32_alpha_neon(unsigned x, unsigned y, + SkPMColor a00, SkPMColor a01, + SkPMColor a10, SkPMColor a11, + SkPMColor *dst, uint16_t scale) { + asm volatile( + "vdup.8 d0, %[y] \n\t" // duplicate y into d0 + "vmov.u8 d16, #16 \n\t" // set up constant in d16 + "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y + + "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 + "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 + "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 + "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 + + "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) + "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y + + "vdup.16 d5, %[x] \n\t" // duplicate x into d5 + "vmov.u16 d16, #16 \n\t" // set up constant in d16 + "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x + + "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x + "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x + "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) + "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) + "vdup.16 d3, %[scale] \n\t" // duplicate scale into d3 + "vshr.u16 d4, d4, #8 \n\t" // shift down result by 8 + "vmul.i16 d4, d4, d3 \n\t" // multiply result by scale + "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 + "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result + : + : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale) + : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" + ); +} +#define Filter_32_opaque Filter_32_opaque_neon +#define Filter_32_alpha Filter_32_alpha_neon +#else +static inline void Filter_32_opaque_portable(unsigned x, unsigned y, + SkPMColor a00, SkPMColor a01, + SkPMColor a10, SkPMColor a11, + SkPMColor* dstColor) { + SkASSERT((unsigned)x <= 0xF); + SkASSERT((unsigned)y <= 0xF); + + int xy = x * y; + uint32_t mask = gMask_00FF00FF; //0xFF00FF; + + int scale = 256 - 16*y - 16*x + xy; + uint32_t lo = (a00 & mask) * scale; + uint32_t hi = ((a00 >> 8) & mask) * scale; + + scale = 16*x - xy; + lo += (a01 & mask) * scale; + hi += ((a01 >> 8) & mask) * scale; + + scale = 16*y - xy; + lo += (a10 & mask) * scale; + hi += ((a10 >> 8) & mask) * scale; + + lo += (a11 & mask) * xy; + hi += ((a11 >> 8) & mask) * xy; + + *dstColor = ((lo >> 8) & mask) | (hi & ~mask); +} + +static inline void Filter_32_alpha_portable(unsigned x, unsigned y, + SkPMColor a00, SkPMColor a01, + SkPMColor a10, SkPMColor a11, + SkPMColor* dstColor, + unsigned alphaScale) { + SkASSERT((unsigned)x <= 0xF); + SkASSERT((unsigned)y <= 0xF); + SkASSERT(alphaScale <= 256); + + int xy = x * y; + uint32_t mask = gMask_00FF00FF; //0xFF00FF; + + int scale = 256 - 16*y - 16*x + xy; + uint32_t lo = (a00 & mask) * scale; + uint32_t hi = ((a00 >> 8) & mask) * scale; + + scale = 16*x - xy; + lo += (a01 & mask) * scale; + hi += ((a01 >> 8) & mask) * scale; + + scale = 16*y - xy; + lo += (a10 & mask) * scale; + hi += ((a10 >> 8) & mask) * scale; + + lo += (a11 & mask) * xy; + hi += ((a11 >> 8) & mask) * xy; + + lo = ((lo >> 8) & mask) * alphaScale; + hi = ((hi >> 8) & mask) * alphaScale; + + *dstColor = ((lo >> 8) & mask) | (hi & ~mask); +} +#define Filter_32_opaque Filter_32_opaque_portable +#define Filter_32_alpha Filter_32_alpha_portable +#endif + diff --git a/src/core/SkBitmapProcState_sample.h b/src/core/SkBitmapProcState_sample.h index 954afca75c..4e1f1395e3 100644 --- a/src/core/SkBitmapProcState_sample.h +++ b/src/core/SkBitmapProcState_sample.h @@ -76,6 +76,7 @@ static void MAKENAME(_nofilter_DX)(const SkBitmapProcState& s, SkASSERT((unsigned)xy[0] < (unsigned)s.fBitmap->height()); srcAddr = (const SRCTYPE*)((const char*)srcAddr + xy[0] * s.fBitmap->rowBytes()); + xy += 1; SRCTYPE src; @@ -85,20 +86,20 @@ static void MAKENAME(_nofilter_DX)(const SkBitmapProcState& s, BITMAPPROC_MEMSET(colors, dstValue, count); } else { int i; - const uint16_t* SK_RESTRICT xx = (const uint16_t*)(xy + 1); for (i = (count >> 2); i > 0; --i) { - SkASSERT(*xx < (unsigned)s.fBitmap->width()); - src = srcAddr[*xx++]; *colors++ = RETURNDST(src); - - SkASSERT(*xx < (unsigned)s.fBitmap->width()); - src = srcAddr[*xx++]; *colors++ = RETURNDST(src); + uint32_t xx0 = *xy++; + uint32_t xx1 = *xy++; + SRCTYPE x0 = srcAddr[UNPACK_PRIMARY_SHORT(xx0)]; + SRCTYPE x1 = srcAddr[UNPACK_SECONDARY_SHORT(xx0)]; + SRCTYPE x2 = srcAddr[UNPACK_PRIMARY_SHORT(xx1)]; + SRCTYPE x3 = srcAddr[UNPACK_SECONDARY_SHORT(xx1)]; - SkASSERT(*xx < (unsigned)s.fBitmap->width()); - src = srcAddr[*xx++]; *colors++ = RETURNDST(src); - - SkASSERT(*xx < (unsigned)s.fBitmap->width()); - src = srcAddr[*xx++]; *colors++ = RETURNDST(src); + *colors++ = RETURNDST(x0); + *colors++ = RETURNDST(x1); + *colors++ = RETURNDST(x2); + *colors++ = RETURNDST(x3); } + const uint16_t* SK_RESTRICT xx = (const uint16_t*)(xy); for (i = (count & 3); i > 0; --i) { SkASSERT(*xx < (unsigned)s.fBitmap->width()); src = srcAddr[*xx++]; *colors++ = RETURNDST(src); @@ -144,12 +145,13 @@ static void MAKENAME(_filter_DX)(const SkBitmapProcState& s, unsigned subX = x0 & 0xF; x0 >>= 4; - uint32_t c = FILTER_PROC(subX, subY, - SRC_TO_FILTER(row0[x0]), - SRC_TO_FILTER(row0[x1]), - SRC_TO_FILTER(row1[x0]), - SRC_TO_FILTER(row1[x1])); - *colors++ = FILTER_TO_DST(c); + FILTER_PROC(subX, subY, + SRC_TO_FILTER(row0[x0]), + SRC_TO_FILTER(row0[x1]), + SRC_TO_FILTER(row1[x0]), + SRC_TO_FILTER(row1[x1]), + colors); + colors += 1; } while (--count != 0); @@ -186,12 +188,13 @@ static void MAKENAME(_filter_DXDY)(const SkBitmapProcState& s, const SRCTYPE* SK_RESTRICT row0 = (const SRCTYPE*)(srcAddr + y0 * rb); const SRCTYPE* SK_RESTRICT row1 = (const SRCTYPE*)(srcAddr + y1 * rb); - uint32_t c = FILTER_PROC(subX, subY, - SRC_TO_FILTER(row0[x0]), - SRC_TO_FILTER(row0[x1]), - SRC_TO_FILTER(row1[x0]), - SRC_TO_FILTER(row1[x1])); - *colors++ = FILTER_TO_DST(c); + FILTER_PROC(subX, subY, + SRC_TO_FILTER(row0[x0]), + SRC_TO_FILTER(row0[x1]), + SRC_TO_FILTER(row1[x0]), + SRC_TO_FILTER(row1[x1]), + colors); + colors += 1; } while (--count != 0); #ifdef POSTAMBLE diff --git a/src/core/SkBitmapProcState_shaderproc.h b/src/core/SkBitmapProcState_shaderproc.h index 070a62bf90..b4a53e4006 100644 --- a/src/core/SkBitmapProcState_shaderproc.h +++ b/src/core/SkBitmapProcState_shaderproc.h @@ -41,12 +41,13 @@ static void SCALE_FILTER_NAME(const SkBitmapProcState& s, int x, int y, unsigned x0 = TILEX_PROCF(fx, maxX); unsigned x1 = TILEX_PROCF((fx + oneX), maxX); - uint32_t c = FILTER_PROC(subX, subY, - SRC_TO_FILTER(row0[x0]), - SRC_TO_FILTER(row0[x1]), - SRC_TO_FILTER(row1[x0]), - SRC_TO_FILTER(row1[x1])); - *colors++ = FILTER_TO_DST(c); + FILTER_PROC(subX, subY, + SRC_TO_FILTER(row0[x0]), + SRC_TO_FILTER(row0[x1]), + SRC_TO_FILTER(row1[x0]), + SRC_TO_FILTER(row1[x1]), + colors); + colors += 1; fx += dx; } while (--count != 0); diff --git a/src/opts/SkBitmapProcState_opts_arm.cpp b/src/opts/SkBitmapProcState_opts_arm.cpp index 1bd4d91949..707304c964 100644 --- a/src/opts/SkBitmapProcState_opts_arm.cpp +++ b/src/opts/SkBitmapProcState_opts_arm.cpp @@ -19,264 +19,6 @@ #include "SkColorPriv.h" #include "SkUtils.h" -#if __ARM_ARCH__ >= 5 && !defined(SK_CPU_BENDIAN) -void S16_D16_nofilter_DX_arm(const SkBitmapProcState& s, - const uint32_t* SK_RESTRICT xy, - int count, uint16_t* SK_RESTRICT colors) { - SkASSERT(count > 0 && colors != NULL); - SkASSERT(s.fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)); - SkASSERT(s.fDoFilter == false); - - const uint16_t* SK_RESTRICT srcAddr = (const uint16_t*)s.fBitmap->getPixels(); - - // buffer is y32, x16, x16, x16, x16, x16 - // bump srcAddr to the proper row, since we're told Y never changes - SkASSERT((unsigned)xy[0] < (unsigned)s.fBitmap->height()); - srcAddr = (const uint16_t*)((const char*)srcAddr + - xy[0] * s.fBitmap->rowBytes()); - - uint16_t src; - - if (1 == s.fBitmap->width()) { - src = srcAddr[0]; - uint16_t dstValue = src; - sk_memset16(colors, dstValue, count); - } else { - int i; - const uint16_t* SK_RESTRICT xx = (const uint16_t*)(xy + 1); - - if((count >> 2) > 0) { - asm volatile ( - "mov r8, %[count], lsr #2 \n\t" // shift down count so we iterate in fours - "1: \n\t" - "subs r8, r8, #1 \n\t" // decrement loop counter - "ldrh r4, [%[xx]], #2 \n\t" // load xx value, update ptr - "ldrh r5, [%[xx]], #2 \n\t" // load xx value, update ptr - "ldrh r6, [%[xx]], #2 \n\t" // load xx value, update ptr - "add r4, r4, r4 \n\t" // double offset for half word addressing - "ldrh r7, [%[xx]], #2 \n\t" // load xx value, update ptr - "add r5, r5, r5 \n\t" // double offset for half word addressing - "ldrh r4, [%[srcAddr], r4] \n\t" // load value from srcAddr[*xx] - "add r6, r6, r6 \n\t" // double offset for half word addressing - "ldrh r5, [%[srcAddr], r5] \n\t" // load value from srcAddr[*xx] - "add r7, r7, r7 \n\t" // double offset for half word addressing - "ldrh r6, [%[srcAddr], r6] \n\t" // load value from srcAddr[*xx] - "ldrh r7, [%[srcAddr], r7] \n\t" // load value from srcAddr[*xx] - "strh r4, [%[colors]], #2 \n\t" // store value to colors, update ptr - "strh r5, [%[colors]], #2 \n\t" // store value to colors, update ptr - "strh r6, [%[colors]], #2 \n\t" // store value to colors, update ptr - "strh r7, [%[colors]], #2 \n\t" // store value to colors, update ptr - "bgt 1b \n\t" // branch if loop counter > 0 - : [count] "+r" (count), [xx] "+r" (xx), [srcAddr] "+r" (srcAddr), [colors] "+r" (colors) - : - : "cc", "memory", "r4", "r5", "r6", "r7", "r8" - ); - } - for (i = (count & 3); i > 0; --i) { - SkASSERT(*xx < (unsigned)s.fBitmap->width()); - src = srcAddr[*xx++]; *colors++ = src; - } - } -} -#endif //__ARM_ARCH__ >= 5 && !defined(SK_CPU_BENDIAN) - -#if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) -void S16_D16_filter_DX_arm(const SkBitmapProcState& s, - const uint32_t* SK_RESTRICT xy, - int count, uint16_t* SK_RESTRICT colors) -{ - SkASSERT(count > 0 && colors != NULL); - SkASSERT(s.fDoFilter); - - const char* SK_RESTRICT srcAddr = (const char*)s.fBitmap->getPixels(); - unsigned rb = s.fBitmap->rowBytes(); - unsigned subY; - const uint16_t* SK_RESTRICT row0; - const uint16_t* SK_RESTRICT row1; - unsigned int rowgap; - const uint32_t c7ffe = 0x7ffe; - - // setup row ptrs and update proc_table - { - uint32_t XY = *xy++; - unsigned y0 = XY >> 14; - row0 = (const uint16_t*)(srcAddr + (y0 >> 4) * rb); - row1 = (const uint16_t*)(srcAddr + (XY & 0x3FFF) * rb); - rowgap = (unsigned int)row1 - (unsigned int)row0; - subY = y0 & 0xF; - } - - unsigned int count4 = ((count >> 2) << 4) | subY; - count &= 3; - - asm volatile ( - "and r4, %[count4], #0xF \n\t" // mask off subY - "vmov.u16 d2[0], r4 \n\t" // move subY to Neon - "rsb r4, r4, #16 \n\t" // r4 = 16-subY - "vmov.u16 d2[1], r4 \n\t" // move 16-subY to Neon - "movs %[count4], %[count4], lsr #4 \n\t" // shift count down, lose subY - "vmov.u16 d3, #16 \n\t" // create constant - "vmov.u16 q2, #31 \n\t" // set up blue mask - "beq 2f \n\t" // if count4 == 0, exit - - "1: \n\t" - "ldmia %[xy]!, {r4, r5, r6, r7} \n\t" // load four xy values - // xy = [ x0:14 | subX:4 | x1:14 ] - // extract subX for iter 0-3 - "vmov d0, r4, r5 \n\t" // move xy to Neon, iter 0-1 - "vmov d1, r6, r7 \n\t" // move xy to Neon, iter 2-3 - - // Load 16 pixels for four filter iterations from memory. - // Because the source pixels are potentially scattered, each lane - // of each vector is loaded separately. Also, the X sub pixel - // offset is extracted. - - // iter 0 - "mov r8, r4, lsr #18 \n\t" // extract x0 - "and r4, %[c7ffe], r4, lsl #1 \n\t" // extract x1 and make byte offset - "add r8, %[row0], r8, lsl #1 \n\t" // calculate address of row0[x0] - "add r4, %[row0], r4 \n\t" // calculate address of row0[x1] - "vld1.u16 {d16[0]}, [r8], %[rowgap] \n\t" // load row0[x0] and move ptr to row1 - "vld1.u16 {d17[0]}, [r4], %[rowgap] \n\t" // load row0[x1] and move ptr to row1 - "vld1.u16 {d18[0]}, [r8] \n\t" // load row1[x0] - "vld1.u16 {d19[0]}, [r4] \n\t" // load row1[x1] - - // iter 1 - "mov r8, r5, lsr #18 \n\t" // extract x0 - "and r5, %[c7ffe], r5, lsl #1 \n\t" // extract x1 and make byte offset - "add r8, %[row0], r8, lsl #1 \n\t" // calculate address of row0[x0] - "add r5, %[row0], r5 \n\t" // calculate address of row0[x1] - "vld1.u16 {d16[1]}, [r8], %[rowgap] \n\t" // load row0[x0] and move ptr to row1 - "vld1.u16 {d17[1]}, [r5], %[rowgap] \n\t" // load row0[x1] and move ptr to row1 - "vld1.u16 {d18[1]}, [r8] \n\t" // load row1[x0] - "vld1.u16 {d19[1]}, [r5] \n\t" // load row1[x1] - - "vshrn.u32 d0, q0, #2 \n\t" // shift right subX by 2 and narrow - // iter 2 - "mov r8, r6, lsr #18 \n\t" // extract x0 - "and r6, %[c7ffe], r6, lsl #1 \n\t" // extract x1 and make byte offset - "add r8, %[row0], r8, lsl #1 \n\t" // calculate address of row0[x0] - "add r6, %[row0], r6 \n\t" // calculate address of row0[x1] - "vld1.u16 {d16[2]}, [r8], %[rowgap] \n\t" // load row0[x0] and move ptr to row1 - "vld1.u16 {d17[2]}, [r6], %[rowgap] \n\t" // load row0[x1] and move ptr to row1 - "vld1.u16 {d18[2]}, [r8] \n\t" // load row1[x0] - "vld1.u16 {d19[2]}, [r6] \n\t" // load row1[x1] - - "vshr.u16 d0, d0, #12 \n\t" // shift right subX to bottom 4 bits - // iter 3 - "mov r8, r7, lsr #18 \n\t" // extract x0 - "and r7, %[c7ffe], r7, lsl #1 \n\t" // extract x1 and make byte offset - "add r8, %[row0], r8, lsl #1 \n\t" // calculate address of row0[x0] - "add r7, %[row0], r7 \n\t" // calculate address of row0[x1] - "vld1.u16 {d16[3]}, [r8], %[rowgap] \n\t" // load row0[x0] and move ptr to row1 - "vld1.u16 {d17[3]}, [r7], %[rowgap] \n\t" // load row0[x1] and move ptr to row1 - "vld1.u16 {d18[3]}, [r8] \n\t" // load row1[x0] - "vld1.u16 {d19[3]}, [r7] \n\t" // load row1[x1] - - // Registers d16-d19 now contain pixels a00-a11 for 4 iterations: - // d16 = [ a00_3 | a00_2 | a00_1 | a00_0 ] - // d17 = [ a01_3 | a01_2 | a01_1 | a01_0 ] - // d18 = [ a10_3 | a10_2 | a10_1 | a10_0 ] - // d19 = [ a11_3 | a11_2 | a11_1 | a11_0 ] - // - // Extract RGB channels from each 565 pixel. - - "vshl.i16 q11, q8, #5 \n\t" // shift greens to top of each lane - "vand q12, q8, q2 \n\t" // mask blues - "vshr.u16 q10, q8, #11 \n\t" // shift reds to bottom of each lane - "vshr.u16 q11, q11, #10 \n\t" // shift greens to bottom of each lane - "vshl.i16 q14, q9, #5 \n\t" // shift greens to top of each lane - "vand q15, q9, q2 \n\t" // mask blues - "vshr.u16 q13, q9, #11 \n\t" // shift reds to bottom of each lane - "vshr.u16 q14, q14, #10 \n\t" // shift greens to bottom of each lane - - // There are now six Q regs, containing - // q10 = [ a01r3 | a01r2 | a01r1 | a01r0 | a00r3 | a00r2 | a00r1 | a00r0 ] - // q11 = [ a01g3 | a01g2 | a01g1 | a01g0 | a00g3 | a00g2 | a00g1 | a00g0 ] - // q12 = [ a01b3 | a01b2 | a01b1 | a01b0 | a00b3 | a00b2 | a00b1 | a00b0 ] - // q13 = [ a11r3 | a11r2 | a11r1 | a11r0 | a01r3 | a01r2 | a01r1 | a01r0 ] - // q14 = [ a11g3 | a11g2 | a11g1 | a11g0 | a01g3 | a01g2 | a01g1 | a01g0 ] - // q15 = [ a11b3 | a11b2 | a11b1 | a11b0 | a01b3 | a01b2 | a01b1 | a01b0 ] - // where aXXyZ: XX = pixel position, y = colour channel, Z = iteration - // d0 = subX, d1 = 16-subX - // d2[0] = subY, d2[1] = 16-subY - // d3 = 16, q2(d4d5) = 31 - - // The filter: - // - // | | - // ---- a00 ---- a01 ----> * (16-y) - // | | - // -----a10 ---- a11 ----> * y - // | | - // V V - // * (16-x) * x - // - // result = (a00.(16-y).(16-x) + a01.(16-y).x + a10.(16-x).y + a11.x.y) >> 8 - // - - "vsub.u16 d1, d3, d0 \n\t" // calculate 16-subX - // multiply top pixel pair by (16-y) - "vmul.i16 q10, q10, d2[1] \n\t" // top reds multiplied by (16-y) - "vmul.i16 q11, q11, d2[1] \n\t" // top greens multiplied by (16-y) - "vmul.i16 q12, q12, d2[1] \n\t" // top blues multiplied by (16-y) - // multiply bottom pixel pair by y - "vmul.i16 q13, q13, d2[0] \n\t" // bottom reds multiplied by y - "vmul.i16 q14, q14, d2[0] \n\t" // bottom greens multiplied by y - "vmul.i16 q15, q15, d2[0] \n\t" // bottom blues multiplied by y - // mul/acc left pixels by (16-x) - "vmul.i16 d16, d20, d1 \n\t" // resultr = a00r * (16-x) - "vmul.i16 d17, d22, d1 \n\t" // resultg = a00g * (16-x) - "vmul.i16 d18, d24, d1 \n\t" // resultb = a00b * (16-x) - "vmla.i16 d16, d26, d1 \n\t" // resultr += a00r * (16-x) - "vmla.i16 d17, d28, d1 \n\t" // resultg += a00g * (16-x) - "vmla.i16 d18, d30, d1 \n\t" // resultb += a00b * (16-x) - // mul/acc right pixels by x - "vmla.i16 d16, d21, d0 \n\t" // resultr += a01r * x - "vmla.i16 d17, d23, d0 \n\t" // resultg += a01g * x - "vmla.i16 d18, d25, d0 \n\t" // resultb += a01b * x - "vmla.i16 d16, d27, d0 \n\t" // resultr += a11r * x - "vmla.i16 d17, d29, d0 \n\t" // resultg += a11g * x - "vmla.i16 d18, d31, d0 \n\t" // resultb += a11b * x - "subs %[count4], %[count4], #1 \n\t" // decrement counter - // shift results down 8 bits - "vshr.u16 q8, q8, #8 \n\t" // resultr >>= 8, resultg >>=8 - "vshr.u16 d18, d18, #8 \n\t" // resultb >>= 8 - // put rgb into 565 - "vsli.i16 d18, d17, #5 \n\t" // shift greens into blues - "vsli.i16 d18, d16, #11 \n\t" // shift reds into greens and blues - "vst1.i16 {d18}, [%[colors]]! \n\t" // store result - "bgt 1b \n\t" // if counter > 0, loop - "2: \n\t" // exit - : [xy] "+r" (xy), [count4] "+r" (count4), [colors] "+r" (colors) - : [row0] "r" (row0), [rowgap] "r" (rowgap), [c7ffe] "r" (c7ffe) - : "cc", "memory", "r4", "r5", "r6", "r7", "r8", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" - ); - - while (count != 0) { - uint32_t XX = *xy++; // x0:14 | subX:4 | x1:14 - unsigned x0 = XX >> 14; - unsigned x1 = XX & 0x3FFF; - unsigned subX = x0 & 0xF; - x0 >>= 4; - - uint32_t a00 = SkExpand_rgb_16(row0[x0]); - uint32_t a01 = SkExpand_rgb_16(row0[x1]); - uint32_t a10 = SkExpand_rgb_16(row1[x0]); - uint32_t a11 = SkExpand_rgb_16(row1[x1]); - - int xy = subX * subY >> 3; - uint32_t c = a00 * (32 - 2*subY - 2*subX + xy) + - a01 * (2*subX - xy) + - a10 * (2*subY - xy) + - a11 * xy; - - *colors++ = SkCompact_rgb_16(c>>5); - count--; - } -} -#endif //defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) - #if __ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN) void SI8_D16_nofilter_DX_arm(const SkBitmapProcState& s, const uint32_t* SK_RESTRICT xy, @@ -438,229 +180,6 @@ void SI8_opaque_D32_nofilter_DX_arm(const SkBitmapProcState& s, } #endif //__ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN) -#if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) -static inline void Filter_32_direct(unsigned x, unsigned y, - SkPMColor a00, SkPMColor a01, - SkPMColor a10, SkPMColor a11, - SkPMColor *dst) { - asm volatile( - "vdup.8 d0, %[y] \n\t" // duplicate y into d0 - "vmov.u8 d16, #16 \n\t" // set up constant in d16 - "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y - - "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 - "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 - "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 - "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 - - "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) - "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y - - "vdup.16 d5, %[x] \n\t" // duplicate x into d5 - "vmov.u16 d16, #16 \n\t" // set up constant in d16 - "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x - - "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x - "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x - "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) - "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) - "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 - "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result - : - : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst) - : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" - ); -} - -static inline void Filter_32_direct_alpha(unsigned x, unsigned y, - SkPMColor a00, SkPMColor a01, - SkPMColor a10, SkPMColor a11, - SkPMColor *dst, uint16_t scale) { - asm volatile( - "vdup.8 d0, %[y] \n\t" // duplicate y into d0 - "vmov.u8 d16, #16 \n\t" // set up constant in d16 - "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y - - "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 - "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 - "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 - "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 - - "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) - "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y - - "vdup.16 d5, %[x] \n\t" // duplicate x into d5 - "vmov.u16 d16, #16 \n\t" // set up constant in d16 - "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x - - "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x - "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x - "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) - "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) - "vdup.16 d3, %[scale] \n\t" // duplicate scale into d3 - "vshr.u16 d4, d4, #8 \n\t" // shift down result by 8 - "vmul.i16 d4, d4, d3 \n\t" // multiply result by scale - "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 - "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result - : - : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale) - : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" - ); -} - -void SI8_opaque_D32_filter_DX_arm(const SkBitmapProcState& s, - const uint32_t* SK_RESTRICT xy, - int count, SkPMColor* SK_RESTRICT colors) { - SkASSERT(count > 0 && colors != NULL); - SkASSERT(s.fDoFilter); - - const SkPMColor* SK_RESTRICT table = s.fBitmap->getColorTable()->lockColors(); - const char* SK_RESTRICT srcAddr = (const char*)s.fBitmap->getPixels(); - unsigned rb = s.fBitmap->rowBytes(); - unsigned subY; - const uint8_t* SK_RESTRICT row0; - const uint8_t* SK_RESTRICT row1; - - // setup row ptrs and update proc_table - { - uint32_t XY = *xy++; - unsigned y0 = XY >> 14; - row0 = (const uint8_t*)(srcAddr + (y0 >> 4) * rb); - row1 = (const uint8_t*)(srcAddr + (XY & 0x3FFF) * rb); - subY = y0 & 0xF; - } - - do { - uint32_t XX = *xy++; // x0:14 | 4 | x1:14 - unsigned x0 = XX >> 14; - unsigned x1 = XX & 0x3FFF; - unsigned subX = x0 & 0xF; - x0 >>= 4; - - Filter_32_direct(subX, subY, table[row0[x0]], - table[row0[x1]], - table[row1[x0]], - table[row1[x1]], colors); - colors++; - } while (--count != 0); - - s.fBitmap->getColorTable()->unlockColors(false); -} - -void SI8_opaque_D32_filter_DXDY_arm(const SkBitmapProcState& s, - const uint32_t* SK_RESTRICT xy, - int count, SkPMColor* SK_RESTRICT colors) { - SkASSERT(count > 0 && colors != NULL); - SkASSERT(s.fDoFilter); - - const SkPMColor* SK_RESTRICT table = s.fBitmap->getColorTable()->lockColors(); - const char* SK_RESTRICT srcAddr = (const char*)s.fBitmap->getPixels(); - int rb = s.fBitmap->rowBytes(); - - do { - uint32_t data = *xy++; - unsigned y0 = data >> 14; - unsigned y1 = data & 0x3FFF; - unsigned subY = y0 & 0xF; - y0 >>= 4; - - data = *xy++; - unsigned x0 = data >> 14; - unsigned x1 = data & 0x3FFF; - unsigned subX = x0 & 0xF; - x0 >>= 4; - - const uint8_t* SK_RESTRICT row0 = (const uint8_t*)(srcAddr + y0 * rb); - const uint8_t* SK_RESTRICT row1 = (const uint8_t*)(srcAddr + y1 * rb); - - Filter_32_direct(subX, subY, table[row0[x0]], - table[row0[x1]], - table[row1[x0]], - table[row1[x1]], colors); - colors++; - } while (--count != 0); - - s.fBitmap->getColorTable()->unlockColors(false); -} - -void SI8_alpha_D32_filter_DX_arm(const SkBitmapProcState& s, - const uint32_t* SK_RESTRICT xy, - int count, SkPMColor* SK_RESTRICT colors) { - SkASSERT(count > 0 && colors != NULL); - SkASSERT(s.fDoFilter); - - unsigned scale = s.fAlphaScale; - const SkPMColor* SK_RESTRICT table = s.fBitmap->getColorTable()->lockColors(); - const char* SK_RESTRICT srcAddr = (const char*)s.fBitmap->getPixels(); - unsigned rb = s.fBitmap->rowBytes(); - unsigned subY; - const uint8_t* SK_RESTRICT row0; - const uint8_t* SK_RESTRICT row1; - - // setup row ptrs and update proc_table - { - uint32_t XY = *xy++; - unsigned y0 = XY >> 14; - row0 = (const uint8_t*)(srcAddr + (y0 >> 4) * rb); - row1 = (const uint8_t*)(srcAddr + (XY & 0x3FFF) * rb); - subY = y0 & 0xF; - } - - do { - uint32_t XX = *xy++; // x0:14 | 4 | x1:14 - unsigned x0 = XX >> 14; - unsigned x1 = XX & 0x3FFF; - unsigned subX = x0 & 0xF; - x0 >>= 4; - - Filter_32_direct_alpha(subX, subY, table[row0[x0]], - table[row0[x1]], - table[row1[x0]], - table[row1[x1]], colors, scale); - colors++; - } while (--count != 0); - - s.fBitmap->getColorTable()->unlockColors(false); -} - -void SI8_alpha_D32_filter_DXDY_arm(const SkBitmapProcState& s, - const uint32_t* SK_RESTRICT xy, - int count, SkPMColor* SK_RESTRICT colors) { - SkASSERT(count > 0 && colors != NULL); - SkASSERT(s.fDoFilter); - - unsigned scale = s.fAlphaScale; - const SkPMColor* SK_RESTRICT table = s.fBitmap->getColorTable()->lockColors(); - const char* SK_RESTRICT srcAddr = (const char*)s.fBitmap->getPixels(); - int rb = s.fBitmap->rowBytes(); - - do { - uint32_t data = *xy++; - unsigned y0 = data >> 14; - unsigned y1 = data & 0x3FFF; - unsigned subY = y0 & 0xF; - y0 >>= 4; - - data = *xy++; - unsigned x0 = data >> 14; - unsigned x1 = data & 0x3FFF; - unsigned subX = x0 & 0xF; - x0 >>= 4; - - const uint8_t* SK_RESTRICT row0 = (const uint8_t*)(srcAddr + y0 * rb); - const uint8_t* SK_RESTRICT row1 = (const uint8_t*)(srcAddr + y1 * rb); - - Filter_32_direct_alpha(subX, subY, table[row0[x0]], - table[row0[x1]], - table[row1[x0]], - table[row1[x1]], colors, scale); - colors++; - } while (--count != 0); - - s.fBitmap->getColorTable()->unlockColors(false); -} -#endif //defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) - /////////////////////////////////////////////////////////////////////////////// /* If we replace a sampleproc, then we null-out the associated shaderproc, @@ -676,20 +195,6 @@ void SkBitmapProcState::platformProcs() { } switch (fBitmap->config()) { - case SkBitmap::kRGB_565_Config: -#if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) - if (justDx && doFilter) { - fSampleProc16 = S16_D16_filter_DX_arm; - fShaderProc16 = NULL; - } -#endif -#if __ARM_ARCH__ >= 5 && !defined(SK_CPU_BENDIAN) - if (justDx && !doFilter) { - fSampleProc16 = S16_D16_nofilter_DX_arm; - fShaderProc16 = NULL; - } -#endif - break; // k565 case SkBitmap::kIndex8_Config: #if __ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN) if (justDx && !doFilter) { @@ -698,30 +203,13 @@ void SkBitmapProcState::platformProcs() { fShaderProc16 = NULL; #endif if (isOpaque) { + // this one is only very slighty faster than the C version fSampleProc32 = SI8_opaque_D32_nofilter_DX_arm; fShaderProc32 = NULL; } } #endif -#if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) - if (doFilter) { - if (isOpaque) { - if (justDx) { - fSampleProc32 = SI8_opaque_D32_filter_DX_arm; - } else { - fSampleProc32 = SI8_opaque_D32_filter_DXDY_arm; - } - } else { // !isOpaque - if (justDx) { - fSampleProc32 = SI8_alpha_D32_filter_DX_arm; - } else { - fSampleProc32 = SI8_alpha_D32_filter_DXDY_arm; - } - } - fShaderProc32 = NULL; - } -#endif - break; // kIndex8 + break; default: break; } |