diff options
author | 2014-11-25 07:12:27 -0800 | |
---|---|---|
committer | 2014-11-25 07:12:27 -0800 | |
commit | 6ff4acedb58ab5ef83b8e598637976d592572630 (patch) | |
tree | 69c3f9f147887905ad8a24d8eacd414ce2bc8f71 /src/opts | |
parent | 2253aa93930cdc5d0615098ce5473065427bcff6 (diff) |
Optimize highQualityFilter
portable version:
before:
10M 1 806µs 807µs 810µs 821µs 1% █▂▁▁▃▁▁▁█▁ 8888 bitmap_BGRA_8888_A_scale_rotate_bicubic
after:
10M 1 566µs 568µs 569µs 579µs 1% ▄▂▂█▂▁▁▁▃▁ 8888 bitmap_BGRA_8888_A_scale_rotate_bicubic
SSE version:
before:
10M 1 485µs 486µs 487µs 494µs 1% ▇▂▁▁▁▁█▂▁▁ 8888 bitmap_BGRA_8888_A_scale_rotate_bicubic
after:
10M 1 419µs 420µs 421µs 430µs 1% ▅▃▂▁▁█▂▁▁▁ 8888 bitmap_BGRA_8888_A_scale_rotate_bicubic
BUG=skia:
Review URL: https://codereview.chromium.org/759603002
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkBitmapFilter_opts_SSE2.cpp | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/src/opts/SkBitmapFilter_opts_SSE2.cpp b/src/opts/SkBitmapFilter_opts_SSE2.cpp index 04f14863d7..2996f535d9 100644 --- a/src/opts/SkBitmapFilter_opts_SSE2.cpp +++ b/src/opts/SkBitmapFilter_opts_SSE2.cpp @@ -49,6 +49,7 @@ void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, const int maxX = s.fBitmap->width(); const int maxY = s.fBitmap->height(); SkAutoTMalloc<SkScalar> xWeights(maxX); + const SkBitmapFilter* filter = s.getBitmapFilter(); while (count-- > 0) { SkPoint srcPt; @@ -59,34 +60,37 @@ void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, __m128 weight = _mm_setzero_ps(); __m128 accum = _mm_setzero_ps(); - int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY); - int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()+1), maxY); - int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX); - int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width())+1, maxX); + int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY - filter->width()), maxY); + int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY + filter->width() + 1), maxY); + int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX - filter->width()), maxX); + int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX + filter->width()) + 1, maxX); for (int srcX = x0; srcX < x1 ; srcX++) { // Looking these up once instead of each loop is a ~15% speedup. - xWeights[srcX - x0] = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX)); + xWeights[srcX - x0] = filter->lookupScalar((srcPt.fX - srcX)); } for (int srcY = y0; srcY < y1; srcY++) { - SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY)); + SkScalar yWeight = filter->lookupScalar((srcPt.fY - srcY)); for (int srcX = x0; srcX < x1 ; srcX++) { SkScalar xWeight = xWeights[srcX - x0]; SkScalar combined_weight = SkScalarMul(xWeight, yWeight); + __m128 weightVector = _mm_set1_ps(combined_weight); + weight = _mm_add_ps( weight, weightVector ); SkPMColor color = *s.fBitmap->getAddr32(srcX, srcY); + if (!color) { + continue; + } __m128i c = _mm_cvtsi32_si128(color); c = _mm_unpacklo_epi8(c, _mm_setzero_si128()); c = _mm_unpacklo_epi16(c, _mm_setzero_si128()); __m128 cfloat = _mm_cvtepi32_ps(c); - __m128 weightVector = _mm_set1_ps(combined_weight); accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector)); - weight = _mm_add_ps( weight, weightVector ); } } |