From 6ff4acedb58ab5ef83b8e598637976d592572630 Mon Sep 17 00:00:00 2001 From: "qiankun.miao" Date: Tue, 25 Nov 2014 07:12:27 -0800 Subject: Optimize highQualityFilter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit portable version: before: 10M 1 806µs 807µs 810µs 821µs 1% █▂▁▁▃▁▁▁█▁ 8888 bitmap_BGRA_8888_A_scale_rotate_bicubic after: 10M 1 566µs 568µs 569µs 579µs 1% ▄▂▂█▂▁▁▁▃▁ 8888 bitmap_BGRA_8888_A_scale_rotate_bicubic SSE version: before: 10M 1 485µs 486µs 487µs 494µs 1% ▇▂▁▁▁▁█▂▁▁ 8888 bitmap_BGRA_8888_A_scale_rotate_bicubic after: 10M 1 419µs 420µs 421µs 430µs 1% ▅▃▂▁▁█▂▁▁▁ 8888 bitmap_BGRA_8888_A_scale_rotate_bicubic BUG=skia: Review URL: https://codereview.chromium.org/759603002 --- src/opts/SkBitmapFilter_opts_SSE2.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/opts/SkBitmapFilter_opts_SSE2.cpp') diff --git a/src/opts/SkBitmapFilter_opts_SSE2.cpp b/src/opts/SkBitmapFilter_opts_SSE2.cpp index 04f14863d7..2996f535d9 100644 --- a/src/opts/SkBitmapFilter_opts_SSE2.cpp +++ b/src/opts/SkBitmapFilter_opts_SSE2.cpp @@ -49,6 +49,7 @@ void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, const int maxX = s.fBitmap->width(); const int maxY = s.fBitmap->height(); SkAutoTMalloc xWeights(maxX); + const SkBitmapFilter* filter = s.getBitmapFilter(); while (count-- > 0) { SkPoint srcPt; @@ -59,34 +60,37 @@ void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, __m128 weight = _mm_setzero_ps(); __m128 accum = _mm_setzero_ps(); - int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY); - int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()+1), maxY); - int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX); - int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width())+1, maxX); + int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY - filter->width()), maxY); + int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY + filter->width() + 1), maxY); + int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX - filter->width()), maxX); + int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX + filter->width()) + 1, maxX); for (int srcX = x0; srcX < x1 ; srcX++) { // Looking these up once instead of each loop is a ~15% speedup. - xWeights[srcX - x0] = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX)); + xWeights[srcX - x0] = filter->lookupScalar((srcPt.fX - srcX)); } for (int srcY = y0; srcY < y1; srcY++) { - SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY)); + SkScalar yWeight = filter->lookupScalar((srcPt.fY - srcY)); for (int srcX = x0; srcX < x1 ; srcX++) { SkScalar xWeight = xWeights[srcX - x0]; SkScalar combined_weight = SkScalarMul(xWeight, yWeight); + __m128 weightVector = _mm_set1_ps(combined_weight); + weight = _mm_add_ps( weight, weightVector ); SkPMColor color = *s.fBitmap->getAddr32(srcX, srcY); + if (!color) { + continue; + } __m128i c = _mm_cvtsi32_si128(color); c = _mm_unpacklo_epi8(c, _mm_setzero_si128()); c = _mm_unpacklo_epi16(c, _mm_setzero_si128()); __m128 cfloat = _mm_cvtepi32_ps(c); - __m128 weightVector = _mm_set1_ps(combined_weight); accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector)); - weight = _mm_add_ps( weight, weightVector ); } } -- cgit v1.2.3