From c2532dd0b89e03ed158229872cb1ee06ae7f10fe Mon Sep 17 00:00:00 2001 From: "djsollen@google.com" Date: Tue, 9 Apr 2013 18:06:06 +0000 Subject: Partial reapply of r5364 minus the non-neon code path. See https://codereview.appspot.com/6465075 for a more detailed description of the contents of this CL. Review URL: https://codereview.chromium.org/13060004 git-svn-id: http://skia.googlecode.com/svn/trunk@8579 2bbb7eff-a529-9590-31e7-b0007b416f81 --- bench/BitmapBench.cpp | 206 +++++++++++++++++++++++++++-------- src/opts/SkBlitRow_opts_arm_neon.cpp | 188 +++++++++++++++++++++++++++++++- 2 files changed, 346 insertions(+), 48 deletions(-) diff --git a/bench/BitmapBench.cpp b/bench/BitmapBench.cpp index 0efdde3840..bd2ba685f2 100644 --- a/bench/BitmapBench.cpp +++ b/bench/BitmapBench.cpp @@ -21,25 +21,6 @@ static const char* gConfigName[] = { "ERROR", "a1", "a8", "index8", "565", "4444", "8888" }; -static void drawIntoBitmap(const SkBitmap& bm) { - const int w = bm.width(); - const int h = bm.height(); - - SkCanvas canvas(bm); - SkPaint p; - p.setAntiAlias(true); - p.setColor(SK_ColorRED); - canvas.drawCircle(SkIntToScalar(w)/2, SkIntToScalar(h)/2, - SkIntToScalar(SkMin32(w, h))*3/8, p); - - SkRect r; - r.set(0, 0, SkIntToScalar(w), SkIntToScalar(h)); - p.setStyle(SkPaint::kStroke_Style); - p.setStrokeWidth(SkIntToScalar(4)); - p.setColor(SK_ColorBLUE); - canvas.drawRect(r, p); -} - static int conv6ToByte(int x) { return x * 0xFF / 5; } @@ -102,38 +83,23 @@ class BitmapBench : public SkBenchmark { bool fIsOpaque; bool fForceUpdate; //bitmap marked as dirty before each draw. forces bitmap to be updated on device cache int fTileX, fTileY; // -1 means don't use shader + bool fIsVolatile; + SkBitmap::Config fConfig; SkString fName; enum { N = SkBENCHLOOP(300) }; + enum { W = 128 }; + enum { H = 128 }; public: BitmapBench(void* param, bool isOpaque, SkBitmap::Config c, bool forceUpdate = false, bool bitmapVolatile = false, int tx = -1, int ty = -1) - : INHERITED(param), fIsOpaque(isOpaque), fForceUpdate(forceUpdate), fTileX(tx), fTileY(ty) { - const int w = 128; - const int h = 128; - SkBitmap bm; - - if (SkBitmap::kIndex8_Config == c) { - bm.setConfig(SkBitmap::kARGB_8888_Config, w, h); - } else { - bm.setConfig(c, w, h); - } - bm.allocPixels(); - bm.eraseColor(isOpaque ? SK_ColorBLACK : 0); - - drawIntoBitmap(bm); - - if (SkBitmap::kIndex8_Config == c) { - convertToIndex666(bm, &fBitmap); - } else { - fBitmap = bm; - } - - if (fBitmap.getColorTable()) { - fBitmap.getColorTable()->setIsOpaque(isOpaque); - } - fBitmap.setIsOpaque(isOpaque); - fBitmap.setIsVolatile(bitmapVolatile); + : INHERITED(param) + , fIsOpaque(isOpaque) + , fForceUpdate(forceUpdate) + , fTileX(tx) + , fTileY(ty) + , fIsVolatile(bitmapVolatile) + , fConfig(c) { } protected: @@ -145,16 +111,43 @@ protected: fName.appendf("_%s", gTileName[fTileY]); } } - fName.appendf("_%s%s", gConfigName[fBitmap.config()], + fName.appendf("_%s%s", gConfigName[fConfig], fIsOpaque ? "" : "_A"); if (fForceUpdate) fName.append("_update"); - if (fBitmap.isVolatile()) + if (fIsVolatile) fName.append("_volatile"); return fName.c_str(); } + virtual void onPreDraw() { + SkBitmap bm; + + if (SkBitmap::kIndex8_Config == fConfig) { + bm.setConfig(SkBitmap::kARGB_8888_Config, W, H); + } else { + bm.setConfig(fConfig, W, H); + } + + bm.allocPixels(); + bm.eraseColor(fIsOpaque ? SK_ColorBLACK : 0); + + onDrawIntoBitmap(bm); + + if (SkBitmap::kIndex8_Config == fConfig) { + convertToIndex666(bm, &fBitmap); + } else { + fBitmap = bm; + } + + if (fBitmap.getColorTable()) { + fBitmap.getColorTable()->setIsOpaque(fIsOpaque); + } + fBitmap.setIsOpaque(fIsOpaque); + fBitmap.setIsVolatile(fIsVolatile); + } + virtual void onDraw(SkCanvas* canvas) { SkIPoint dim = this->getSize(); SkRandom rand; @@ -177,6 +170,25 @@ protected: } } + virtual void onDrawIntoBitmap(const SkBitmap& bm) { + const int w = bm.width(); + const int h = bm.height(); + + SkCanvas canvas(bm); + SkPaint p; + p.setAntiAlias(true); + p.setColor(SK_ColorRED); + canvas.drawCircle(SkIntToScalar(w)/2, SkIntToScalar(h)/2, + SkIntToScalar(SkMin32(w, h))*3/8, p); + + SkRect r; + r.set(0, 0, SkIntToScalar(w), SkIntToScalar(h)); + p.setStyle(SkPaint::kStroke_Style); + p.setStrokeWidth(SkIntToScalar(4)); + p.setColor(SK_ColorBLUE); + canvas.drawRect(r, p); + } + private: typedef SkBenchmark INHERITED; }; @@ -241,6 +253,95 @@ private: typedef BitmapBench INHERITED; }; +/** Verify optimizations that test source alpha values. */ + +class SourceAlphaBitmapBench : public BitmapBench { +public: + enum SourceAlpha { kOpaque_SourceAlpha, kTransparent_SourceAlpha, + kTwoStripes_SourceAlpha, kThreeStripes_SourceAlpha}; +private: + SkString fFullName; + SourceAlpha fSourceAlpha; +public: + SourceAlphaBitmapBench(void* param, SourceAlpha alpha, SkBitmap::Config c, + bool forceUpdate = false, bool bitmapVolatile = false, + int tx = -1, int ty = -1) + : INHERITED(param, false, c, forceUpdate, bitmapVolatile, tx, ty) + , fSourceAlpha(alpha) { + } + +protected: + virtual const char* onGetName() { + fFullName.set(INHERITED::onGetName()); + + if (fSourceAlpha == kOpaque_SourceAlpha) { + fFullName.append("_source_opaque"); + } else if (fSourceAlpha == kTransparent_SourceAlpha) { + fFullName.append("_source_transparent"); + } else if (fSourceAlpha == kTwoStripes_SourceAlpha) { + fFullName.append("_source_stripes_two"); + } else if (fSourceAlpha == kThreeStripes_SourceAlpha) { + fFullName.append("_source_stripes_three"); + } + + return fFullName.c_str(); + } + + virtual void onDrawIntoBitmap(const SkBitmap& bm) SK_OVERRIDE { + const int w = bm.width(); + const int h = bm.height(); + + if (kOpaque_SourceAlpha == fSourceAlpha) { + bm.eraseColor(SK_ColorBLACK); + } else if (kTransparent_SourceAlpha == fSourceAlpha) { + bm.eraseColor(0); + } else if (kTwoStripes_SourceAlpha == fSourceAlpha) { + bm.eraseColor(0); + + SkCanvas canvas(bm); + SkPaint p; + p.setAntiAlias(false); + p.setStyle(SkPaint::kFill_Style); + p.setColor(SK_ColorRED); + + // Draw red vertical stripes on transparent background + SkRect r; + for (int x = 0; x < w; x+=2) + { + r.set(SkIntToScalar(x), 0, SkIntToScalar(x+1), SkIntToScalar(h)); + canvas.drawRect(r, p); + } + + } else if (kThreeStripes_SourceAlpha == fSourceAlpha) { + bm.eraseColor(0); + + SkCanvas canvas(bm); + SkPaint p; + p.setAntiAlias(false); + p.setStyle(SkPaint::kFill_Style); + + // Draw vertical stripes on transparent background with a pattern + // where the first pixel is fully transparent, the next is semi-transparent + // and the third is fully opaque. + SkRect r; + for (int x = 0; x < w; x++) + { + if (x % 3 == 0) { + continue; // Keep transparent + } else if (x % 3 == 1) { + p.setColor(SkColorSetARGB(127, 127, 127, 127)); // Semi-transparent + } else if (x % 3 == 2) { + p.setColor(SK_ColorRED); // Opaque + } + r.set(SkIntToScalar(x), 0, SkIntToScalar(x+1), SkIntToScalar(h)); + canvas.drawRect(r, p); + } + } + } + +private: + typedef BitmapBench INHERITED; +}; static SkBenchmark* Fact0(void* p) { return new BitmapBench(p, false, SkBitmap::kARGB_8888_Config); } static SkBenchmark* Fact1(void* p) { return new BitmapBench(p, true, SkBitmap::kARGB_8888_Config); } static SkBenchmark* Fact2(void* p) { return new BitmapBench(p, true, SkBitmap::kRGB_565_Config); } @@ -263,6 +364,12 @@ static SkBenchmark* Fact14(void* p) { return new FilterBitmapBench(p, true, SkBi static SkBenchmark* Fact15(void* p) { return new FilterBitmapBench(p, true, SkBitmap::kARGB_8888_Config, true, true, -1, -1, true, true, true); } static SkBenchmark* Fact16(void* p) { return new FilterBitmapBench(p, true, SkBitmap::kARGB_8888_Config, true, false, -1, -1, true, true, true); } +// source alpha tests -> S32A_Opaque_BlitRow32_{arm,neon} +static SkBenchmark* Fact17(void* p) { return new SourceAlphaBitmapBench(p, SourceAlphaBitmapBench::kOpaque_SourceAlpha, SkBitmap::kARGB_8888_Config); } +static SkBenchmark* Fact18(void* p) { return new SourceAlphaBitmapBench(p, SourceAlphaBitmapBench::kTransparent_SourceAlpha, SkBitmap::kARGB_8888_Config); } +static SkBenchmark* Fact19(void* p) { return new SourceAlphaBitmapBench(p, SourceAlphaBitmapBench::kTwoStripes_SourceAlpha, SkBitmap::kARGB_8888_Config); } +static SkBenchmark* Fact20(void* p) { return new SourceAlphaBitmapBench(p, SourceAlphaBitmapBench::kThreeStripes_SourceAlpha, SkBitmap::kARGB_8888_Config); } + static BenchRegistry gReg0(Fact0); static BenchRegistry gReg1(Fact1); static BenchRegistry gReg2(Fact2); @@ -282,3 +389,8 @@ static BenchRegistry gReg13(Fact13); static BenchRegistry gReg14(Fact14); static BenchRegistry gReg15(Fact15); static BenchRegistry gReg16(Fact16); + +static BenchRegistry gReg17(Fact17); +static BenchRegistry gReg18(Fact18); +static BenchRegistry gReg19(Fact19); +static BenchRegistry gReg20(Fact20); diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp index 22785be610..00086c3789 100644 --- a/src/opts/SkBlitRow_opts_arm_neon.cpp +++ b/src/opts/SkBlitRow_opts_arm_neon.cpp @@ -517,6 +517,178 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, } } +void S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha) { + SkASSERT(255 == alpha); + + if (count <= 0) + return; + + /* Use these to check if src is transparent or opaque */ + const unsigned int ALPHA_OPAQ = 0xFF000000; + const unsigned int ALPHA_TRANS = 0x00FFFFFF; + +#define UNROLL 4 + const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1); + const SkPMColor* SK_RESTRICT src_temp = src; + + /* set up the NEON variables */ + uint8x8_t alpha_mask; + static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; + alpha_mask = vld1_u8(alpha_mask_setup); + + uint8x8_t src_raw, dst_raw, dst_final; + uint8x8_t src_raw_2, dst_raw_2, dst_final_2; + uint8x8_t dst_cooked; + uint16x8_t dst_wide; + uint8x8_t alpha_narrow; + uint16x8_t alpha_wide; + + /* choose the first processing type */ + if( src >= src_end) + goto TAIL; + if(*src <= ALPHA_TRANS) + goto ALPHA_0; + if(*src >= ALPHA_OPAQ) + goto ALPHA_255; + /* fall-thru */ + +ALPHA_1_TO_254: + do { + + /* get the source */ + src_raw = vreinterpret_u8_u32(vld1_u32(src)); + src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); + + /* get and hold the dst too */ + dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); + dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); + + + /* get the alphas spread out properly */ + alpha_narrow = vtbl1_u8(src_raw, alpha_mask); + /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ + /* we collapsed (255-a)+1 ... */ + alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); + + /* spread the dest */ + dst_wide = vmovl_u8(dst_raw); + + /* alpha mul the dest */ + dst_wide = vmulq_u16 (dst_wide, alpha_wide); + dst_cooked = vshrn_n_u16(dst_wide, 8); + + /* sum -- ignoring any byte lane overflows */ + dst_final = vadd_u8(src_raw, dst_cooked); + + alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); + /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ + /* we collapsed (255-a)+1 ... */ + alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); + + /* spread the dest */ + dst_wide = vmovl_u8(dst_raw_2); + + /* alpha mul the dest */ + dst_wide = vmulq_u16 (dst_wide, alpha_wide); + dst_cooked = vshrn_n_u16(dst_wide, 8); + + /* sum -- ignoring any byte lane overflows */ + dst_final_2 = vadd_u8(src_raw_2, dst_cooked); + + vst1_u32(dst, vreinterpret_u32_u8(dst_final)); + vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2)); + + src += UNROLL; + dst += UNROLL; + + /* if 2 of the next pixels aren't between 1 and 254 + it might make sense to go to the optimized loops */ + if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)) + break; + + } while(src < src_end); + + if (src >= src_end) + goto TAIL; + + if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ) + goto ALPHA_255; + + /*fall-thru*/ + +ALPHA_0: + + /*In this state, we know the current alpha is 0 and + we optimize for the next alpha also being zero. */ + src_temp = src; //so we don't have to increment dst every time + do { + if(*(++src) > ALPHA_TRANS) + break; + if(*(++src) > ALPHA_TRANS) + break; + if(*(++src) > ALPHA_TRANS) + break; + if(*(++src) > ALPHA_TRANS) + break; + } while(src < src_end); + + dst += (src - src_temp); + + /* no longer alpha 0, so determine where to go next. */ + if( src >= src_end) + goto TAIL; + if(*src >= ALPHA_OPAQ) + goto ALPHA_255; + else + goto ALPHA_1_TO_254; + +ALPHA_255: + while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) { + dst[0]=src[0]; + dst[1]=src[1]; + dst[2]=src[2]; + dst[3]=src[3]; + src+=UNROLL; + dst+=UNROLL; + if(src >= src_end) + goto TAIL; + } + + //Handle remainder. + if(*src >= ALPHA_OPAQ) { *dst++ = *src++; + if(*src >= ALPHA_OPAQ) { *dst++ = *src++; + if(*src >= ALPHA_OPAQ) { *dst++ = *src++; } + } + } + + if( src >= src_end) + goto TAIL; + if(*src <= ALPHA_TRANS) + goto ALPHA_0; + else + goto ALPHA_1_TO_254; + +TAIL: + /* do any residual iterations */ + src_end += UNROLL + 1; //goto the real end + while(src != src_end) { + if( *src != 0 ) { + if( *src >= ALPHA_OPAQ ) { + *dst = *src; + } + else { + *dst = SkPMSrcOver(*src, *dst); + } + } + src++; + dst++; + } + +#undef UNROLL + return; +} /* Neon version of S32_Blend_BlitRow32() * portable version is in src/core/SkBlitRow_D32.cpp @@ -1107,6 +1279,20 @@ const SkBlitRow::Proc sk_blitrow_platform_4444_procs_arm_neon[] = { const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { NULL, // S32_Opaque, S32_Blend_BlitRow32_neon, // S32_Blend, - S32A_Opaque_BlitRow32_neon, // S32A_Opaque, + /* + * We have two choices for S32A_Opaque procs. The one reads the src alpha + * value and attempts to optimize accordingly. The optimization is + * sensitive to the source content and is not a win in all cases. For + * example, if there are a lot of transitions between the alpha states, + * the performance will almost certainly be worse. However, for many + * common cases the performance is equivalent or better than the standard + * case where we do not inspect the src alpha. + */ +#if SK_A32_SHIFT == 24 + // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor + S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, +#else + S32A_Opaque_BlitRow32_neon, // S32A_Opaque, +#endif S32A_Blend_BlitRow32_arm // S32A_Blend }; -- cgit v1.2.3