aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--bench/BitmapBench.cpp206
-rw-r--r--src/opts/SkBlitRow_opts_arm_neon.cpp188
2 files changed, 346 insertions, 48 deletions
diff --git a/bench/BitmapBench.cpp b/bench/BitmapBench.cpp
index 0efdde3840..bd2ba685f2 100644
--- a/bench/BitmapBench.cpp
+++ b/bench/BitmapBench.cpp
@@ -21,25 +21,6 @@ static const char* gConfigName[] = {
"ERROR", "a1", "a8", "index8", "565", "4444", "8888"
};
-static void drawIntoBitmap(const SkBitmap& bm) {
- const int w = bm.width();
- const int h = bm.height();
-
- SkCanvas canvas(bm);
- SkPaint p;
- p.setAntiAlias(true);
- p.setColor(SK_ColorRED);
- canvas.drawCircle(SkIntToScalar(w)/2, SkIntToScalar(h)/2,
- SkIntToScalar(SkMin32(w, h))*3/8, p);
-
- SkRect r;
- r.set(0, 0, SkIntToScalar(w), SkIntToScalar(h));
- p.setStyle(SkPaint::kStroke_Style);
- p.setStrokeWidth(SkIntToScalar(4));
- p.setColor(SK_ColorBLUE);
- canvas.drawRect(r, p);
-}
-
static int conv6ToByte(int x) {
return x * 0xFF / 5;
}
@@ -102,38 +83,23 @@ class BitmapBench : public SkBenchmark {
bool fIsOpaque;
bool fForceUpdate; //bitmap marked as dirty before each draw. forces bitmap to be updated on device cache
int fTileX, fTileY; // -1 means don't use shader
+ bool fIsVolatile;
+ SkBitmap::Config fConfig;
SkString fName;
enum { N = SkBENCHLOOP(300) };
+ enum { W = 128 };
+ enum { H = 128 };
public:
BitmapBench(void* param, bool isOpaque, SkBitmap::Config c,
bool forceUpdate = false, bool bitmapVolatile = false,
int tx = -1, int ty = -1)
- : INHERITED(param), fIsOpaque(isOpaque), fForceUpdate(forceUpdate), fTileX(tx), fTileY(ty) {
- const int w = 128;
- const int h = 128;
- SkBitmap bm;
-
- if (SkBitmap::kIndex8_Config == c) {
- bm.setConfig(SkBitmap::kARGB_8888_Config, w, h);
- } else {
- bm.setConfig(c, w, h);
- }
- bm.allocPixels();
- bm.eraseColor(isOpaque ? SK_ColorBLACK : 0);
-
- drawIntoBitmap(bm);
-
- if (SkBitmap::kIndex8_Config == c) {
- convertToIndex666(bm, &fBitmap);
- } else {
- fBitmap = bm;
- }
-
- if (fBitmap.getColorTable()) {
- fBitmap.getColorTable()->setIsOpaque(isOpaque);
- }
- fBitmap.setIsOpaque(isOpaque);
- fBitmap.setIsVolatile(bitmapVolatile);
+ : INHERITED(param)
+ , fIsOpaque(isOpaque)
+ , fForceUpdate(forceUpdate)
+ , fTileX(tx)
+ , fTileY(ty)
+ , fIsVolatile(bitmapVolatile)
+ , fConfig(c) {
}
protected:
@@ -145,16 +111,43 @@ protected:
fName.appendf("_%s", gTileName[fTileY]);
}
}
- fName.appendf("_%s%s", gConfigName[fBitmap.config()],
+ fName.appendf("_%s%s", gConfigName[fConfig],
fIsOpaque ? "" : "_A");
if (fForceUpdate)
fName.append("_update");
- if (fBitmap.isVolatile())
+ if (fIsVolatile)
fName.append("_volatile");
return fName.c_str();
}
+ virtual void onPreDraw() {
+ SkBitmap bm;
+
+ if (SkBitmap::kIndex8_Config == fConfig) {
+ bm.setConfig(SkBitmap::kARGB_8888_Config, W, H);
+ } else {
+ bm.setConfig(fConfig, W, H);
+ }
+
+ bm.allocPixels();
+ bm.eraseColor(fIsOpaque ? SK_ColorBLACK : 0);
+
+ onDrawIntoBitmap(bm);
+
+ if (SkBitmap::kIndex8_Config == fConfig) {
+ convertToIndex666(bm, &fBitmap);
+ } else {
+ fBitmap = bm;
+ }
+
+ if (fBitmap.getColorTable()) {
+ fBitmap.getColorTable()->setIsOpaque(fIsOpaque);
+ }
+ fBitmap.setIsOpaque(fIsOpaque);
+ fBitmap.setIsVolatile(fIsVolatile);
+ }
+
virtual void onDraw(SkCanvas* canvas) {
SkIPoint dim = this->getSize();
SkRandom rand;
@@ -177,6 +170,25 @@ protected:
}
}
+ virtual void onDrawIntoBitmap(const SkBitmap& bm) {
+ const int w = bm.width();
+ const int h = bm.height();
+
+ SkCanvas canvas(bm);
+ SkPaint p;
+ p.setAntiAlias(true);
+ p.setColor(SK_ColorRED);
+ canvas.drawCircle(SkIntToScalar(w)/2, SkIntToScalar(h)/2,
+ SkIntToScalar(SkMin32(w, h))*3/8, p);
+
+ SkRect r;
+ r.set(0, 0, SkIntToScalar(w), SkIntToScalar(h));
+ p.setStyle(SkPaint::kStroke_Style);
+ p.setStrokeWidth(SkIntToScalar(4));
+ p.setColor(SK_ColorBLUE);
+ canvas.drawRect(r, p);
+ }
+
private:
typedef SkBenchmark INHERITED;
};
@@ -241,6 +253,95 @@ private:
typedef BitmapBench INHERITED;
};
+/** Verify optimizations that test source alpha values. */
+
+class SourceAlphaBitmapBench : public BitmapBench {
+public:
+ enum SourceAlpha { kOpaque_SourceAlpha, kTransparent_SourceAlpha,
+ kTwoStripes_SourceAlpha, kThreeStripes_SourceAlpha};
+private:
+ SkString fFullName;
+ SourceAlpha fSourceAlpha;
+public:
+ SourceAlphaBitmapBench(void* param, SourceAlpha alpha, SkBitmap::Config c,
+ bool forceUpdate = false, bool bitmapVolatile = false,
+ int tx = -1, int ty = -1)
+ : INHERITED(param, false, c, forceUpdate, bitmapVolatile, tx, ty)
+ , fSourceAlpha(alpha) {
+ }
+
+protected:
+ virtual const char* onGetName() {
+ fFullName.set(INHERITED::onGetName());
+
+ if (fSourceAlpha == kOpaque_SourceAlpha) {
+ fFullName.append("_source_opaque");
+ } else if (fSourceAlpha == kTransparent_SourceAlpha) {
+ fFullName.append("_source_transparent");
+ } else if (fSourceAlpha == kTwoStripes_SourceAlpha) {
+ fFullName.append("_source_stripes_two");
+ } else if (fSourceAlpha == kThreeStripes_SourceAlpha) {
+ fFullName.append("_source_stripes_three");
+ }
+
+ return fFullName.c_str();
+ }
+
+ virtual void onDrawIntoBitmap(const SkBitmap& bm) SK_OVERRIDE {
+ const int w = bm.width();
+ const int h = bm.height();
+
+ if (kOpaque_SourceAlpha == fSourceAlpha) {
+ bm.eraseColor(SK_ColorBLACK);
+ } else if (kTransparent_SourceAlpha == fSourceAlpha) {
+ bm.eraseColor(0);
+ } else if (kTwoStripes_SourceAlpha == fSourceAlpha) {
+ bm.eraseColor(0);
+
+ SkCanvas canvas(bm);
+ SkPaint p;
+ p.setAntiAlias(false);
+ p.setStyle(SkPaint::kFill_Style);
+ p.setColor(SK_ColorRED);
+
+ // Draw red vertical stripes on transparent background
+ SkRect r;
+ for (int x = 0; x < w; x+=2)
+ {
+ r.set(SkIntToScalar(x), 0, SkIntToScalar(x+1), SkIntToScalar(h));
+ canvas.drawRect(r, p);
+ }
+
+ } else if (kThreeStripes_SourceAlpha == fSourceAlpha) {
+ bm.eraseColor(0);
+
+ SkCanvas canvas(bm);
+ SkPaint p;
+ p.setAntiAlias(false);
+ p.setStyle(SkPaint::kFill_Style);
+
+ // Draw vertical stripes on transparent background with a pattern
+ // where the first pixel is fully transparent, the next is semi-transparent
+ // and the third is fully opaque.
+ SkRect r;
+ for (int x = 0; x < w; x++)
+ {
+ if (x % 3 == 0) {
+ continue; // Keep transparent
+ } else if (x % 3 == 1) {
+ p.setColor(SkColorSetARGB(127, 127, 127, 127)); // Semi-transparent
+ } else if (x % 3 == 2) {
+ p.setColor(SK_ColorRED); // Opaque
+ }
+ r.set(SkIntToScalar(x), 0, SkIntToScalar(x+1), SkIntToScalar(h));
+ canvas.drawRect(r, p);
+ }
+ }
+ }
+
+private:
+ typedef BitmapBench INHERITED;
+};
static SkBenchmark* Fact0(void* p) { return new BitmapBench(p, false, SkBitmap::kARGB_8888_Config); }
static SkBenchmark* Fact1(void* p) { return new BitmapBench(p, true, SkBitmap::kARGB_8888_Config); }
static SkBenchmark* Fact2(void* p) { return new BitmapBench(p, true, SkBitmap::kRGB_565_Config); }
@@ -263,6 +364,12 @@ static SkBenchmark* Fact14(void* p) { return new FilterBitmapBench(p, true, SkBi
static SkBenchmark* Fact15(void* p) { return new FilterBitmapBench(p, true, SkBitmap::kARGB_8888_Config, true, true, -1, -1, true, true, true); }
static SkBenchmark* Fact16(void* p) { return new FilterBitmapBench(p, true, SkBitmap::kARGB_8888_Config, true, false, -1, -1, true, true, true); }
+// source alpha tests -> S32A_Opaque_BlitRow32_{arm,neon}
+static SkBenchmark* Fact17(void* p) { return new SourceAlphaBitmapBench(p, SourceAlphaBitmapBench::kOpaque_SourceAlpha, SkBitmap::kARGB_8888_Config); }
+static SkBenchmark* Fact18(void* p) { return new SourceAlphaBitmapBench(p, SourceAlphaBitmapBench::kTransparent_SourceAlpha, SkBitmap::kARGB_8888_Config); }
+static SkBenchmark* Fact19(void* p) { return new SourceAlphaBitmapBench(p, SourceAlphaBitmapBench::kTwoStripes_SourceAlpha, SkBitmap::kARGB_8888_Config); }
+static SkBenchmark* Fact20(void* p) { return new SourceAlphaBitmapBench(p, SourceAlphaBitmapBench::kThreeStripes_SourceAlpha, SkBitmap::kARGB_8888_Config); }
+
static BenchRegistry gReg0(Fact0);
static BenchRegistry gReg1(Fact1);
static BenchRegistry gReg2(Fact2);
@@ -282,3 +389,8 @@ static BenchRegistry gReg13(Fact13);
static BenchRegistry gReg14(Fact14);
static BenchRegistry gReg15(Fact15);
static BenchRegistry gReg16(Fact16);
+
+static BenchRegistry gReg17(Fact17);
+static BenchRegistry gReg18(Fact18);
+static BenchRegistry gReg19(Fact19);
+static BenchRegistry gReg20(Fact20);
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp
index 22785be610..00086c3789 100644
--- a/src/opts/SkBlitRow_opts_arm_neon.cpp
+++ b/src/opts/SkBlitRow_opts_arm_neon.cpp
@@ -517,6 +517,178 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
}
}
+void S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst,
+ const SkPMColor* SK_RESTRICT src,
+ int count, U8CPU alpha) {
+ SkASSERT(255 == alpha);
+
+ if (count <= 0)
+ return;
+
+ /* Use these to check if src is transparent or opaque */
+ const unsigned int ALPHA_OPAQ = 0xFF000000;
+ const unsigned int ALPHA_TRANS = 0x00FFFFFF;
+
+#define UNROLL 4
+ const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1);
+ const SkPMColor* SK_RESTRICT src_temp = src;
+
+ /* set up the NEON variables */
+ uint8x8_t alpha_mask;
+ static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
+ alpha_mask = vld1_u8(alpha_mask_setup);
+
+ uint8x8_t src_raw, dst_raw, dst_final;
+ uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
+ uint8x8_t dst_cooked;
+ uint16x8_t dst_wide;
+ uint8x8_t alpha_narrow;
+ uint16x8_t alpha_wide;
+
+ /* choose the first processing type */
+ if( src >= src_end)
+ goto TAIL;
+ if(*src <= ALPHA_TRANS)
+ goto ALPHA_0;
+ if(*src >= ALPHA_OPAQ)
+ goto ALPHA_255;
+ /* fall-thru */
+
+ALPHA_1_TO_254:
+ do {
+
+ /* get the source */
+ src_raw = vreinterpret_u8_u32(vld1_u32(src));
+ src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2));
+
+ /* get and hold the dst too */
+ dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
+ dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2));
+
+
+ /* get the alphas spread out properly */
+ alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
+ /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
+ /* we collapsed (255-a)+1 ... */
+ alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
+
+ /* spread the dest */
+ dst_wide = vmovl_u8(dst_raw);
+
+ /* alpha mul the dest */
+ dst_wide = vmulq_u16 (dst_wide, alpha_wide);
+ dst_cooked = vshrn_n_u16(dst_wide, 8);
+
+ /* sum -- ignoring any byte lane overflows */
+ dst_final = vadd_u8(src_raw, dst_cooked);
+
+ alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
+ /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
+ /* we collapsed (255-a)+1 ... */
+ alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
+
+ /* spread the dest */
+ dst_wide = vmovl_u8(dst_raw_2);
+
+ /* alpha mul the dest */
+ dst_wide = vmulq_u16 (dst_wide, alpha_wide);
+ dst_cooked = vshrn_n_u16(dst_wide, 8);
+
+ /* sum -- ignoring any byte lane overflows */
+ dst_final_2 = vadd_u8(src_raw_2, dst_cooked);
+
+ vst1_u32(dst, vreinterpret_u32_u8(dst_final));
+ vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2));
+
+ src += UNROLL;
+ dst += UNROLL;
+
+ /* if 2 of the next pixels aren't between 1 and 254
+ it might make sense to go to the optimized loops */
+ if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ))
+ break;
+
+ } while(src < src_end);
+
+ if (src >= src_end)
+ goto TAIL;
+
+ if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)
+ goto ALPHA_255;
+
+ /*fall-thru*/
+
+ALPHA_0:
+
+ /*In this state, we know the current alpha is 0 and
+ we optimize for the next alpha also being zero. */
+ src_temp = src; //so we don't have to increment dst every time
+ do {
+ if(*(++src) > ALPHA_TRANS)
+ break;
+ if(*(++src) > ALPHA_TRANS)
+ break;
+ if(*(++src) > ALPHA_TRANS)
+ break;
+ if(*(++src) > ALPHA_TRANS)
+ break;
+ } while(src < src_end);
+
+ dst += (src - src_temp);
+
+ /* no longer alpha 0, so determine where to go next. */
+ if( src >= src_end)
+ goto TAIL;
+ if(*src >= ALPHA_OPAQ)
+ goto ALPHA_255;
+ else
+ goto ALPHA_1_TO_254;
+
+ALPHA_255:
+ while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) {
+ dst[0]=src[0];
+ dst[1]=src[1];
+ dst[2]=src[2];
+ dst[3]=src[3];
+ src+=UNROLL;
+ dst+=UNROLL;
+ if(src >= src_end)
+ goto TAIL;
+ }
+
+ //Handle remainder.
+ if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
+ if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
+ if(*src >= ALPHA_OPAQ) { *dst++ = *src++; }
+ }
+ }
+
+ if( src >= src_end)
+ goto TAIL;
+ if(*src <= ALPHA_TRANS)
+ goto ALPHA_0;
+ else
+ goto ALPHA_1_TO_254;
+
+TAIL:
+ /* do any residual iterations */
+ src_end += UNROLL + 1; //goto the real end
+ while(src != src_end) {
+ if( *src != 0 ) {
+ if( *src >= ALPHA_OPAQ ) {
+ *dst = *src;
+ }
+ else {
+ *dst = SkPMSrcOver(*src, *dst);
+ }
+ }
+ src++;
+ dst++;
+ }
+
+#undef UNROLL
+ return;
+}
/* Neon version of S32_Blend_BlitRow32()
* portable version is in src/core/SkBlitRow_D32.cpp
@@ -1107,6 +1279,20 @@ const SkBlitRow::Proc sk_blitrow_platform_4444_procs_arm_neon[] = {
const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {
NULL, // S32_Opaque,
S32_Blend_BlitRow32_neon, // S32_Blend,
- S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
+ /*
+ * We have two choices for S32A_Opaque procs. The one reads the src alpha
+ * value and attempts to optimize accordingly. The optimization is
+ * sensitive to the source content and is not a win in all cases. For
+ * example, if there are a lot of transitions between the alpha states,
+ * the performance will almost certainly be worse. However, for many
+ * common cases the performance is equivalent or better than the standard
+ * case where we do not inspect the src alpha.
+ */
+#if SK_A32_SHIFT == 24
+ // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
+ S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,
+#else
+ S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
+#endif
S32A_Blend_BlitRow32_arm // S32A_Blend
};