diff options
author | mtklein <mtklein@google.com> | 2014-09-03 08:57:02 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2014-09-03 08:57:02 -0700 |
commit | c09b2c49a30fed981283f97476e885b40e53f094 (patch) | |
tree | 8281f9082c191cdb27faeb15e27cc33507530724 /src/opts | |
parent | 3f5417590a1ed34de2fb89994493db0b1153b4cb (diff) |
Revert of Enable highQualityFilter_SSE2 (patchset #1 id:1 of https://codereview.chromium.org/525283002/)
Reason for revert:
Color order looks wrong on Macs:
Before: http://chromium-skia-gm.commondatastorage.googleapis.com/gm/bitmap-64bitMD5/filterbitmap_image_mandrill_16.png/12823183142873462143.png
After: http://chromium-skia-gm.commondatastorage.googleapis.com/gm/bitmap-64bitMD5/filterbitmap_image_mandrill_16.png/13683040204546320578.png
Original issue's description:
> Enable highQualityFilter_SSE2
>
> With SSE2, bitmap_BGRA_8888_A_scale_rotate_bicubic gains about 40%
> performance improvement on desktop i7-3770.
>
> BUG=skia:
>
> Committed: https://skia.googlesource.com/skia/+/b381fa10d8079c58928058bb8a6db32b39f05e51
R=humper@google.com, qiankun.miao@intel.com
TBR=humper@google.com, qiankun.miao@intel.com
NOTREECHECKS=true
NOTRY=true
BUG=skia:
Author: mtklein@google.com
Review URL: https://codereview.chromium.org/539523002
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkBitmapFilter_opts_SSE2.cpp | 56 | ||||
-rw-r--r-- | src/opts/opts_check_x86.cpp | 2 |
2 files changed, 30 insertions, 28 deletions
diff --git a/src/opts/SkBitmapFilter_opts_SSE2.cpp b/src/opts/SkBitmapFilter_opts_SSE2.cpp index 5bf5326c9e..b831e298d0 100644 --- a/src/opts/SkBitmapFilter_opts_SSE2.cpp +++ b/src/opts/SkBitmapFilter_opts_SSE2.cpp @@ -46,45 +46,45 @@ static inline void print128f(__m128 value) { void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, SkPMColor* SK_RESTRICT colors, int count) { - const int maxX = s.fBitmap->width(); - const int maxY = s.fBitmap->height(); - SkAutoTMalloc<SkScalar> xWeights(maxX); + const int maxX = s.fBitmap->width() - 1; + const int maxY = s.fBitmap->height() - 1; while (count-- > 0) { SkPoint srcPt; - s.fInvProc(s.fInvMatrix, x + 0.5f, y + 0.5f, &srcPt); + s.fInvProc(s.fInvMatrix, SkIntToScalar(x), + SkIntToScalar(y), &srcPt); srcPt.fX -= SK_ScalarHalf; srcPt.fY -= SK_ScalarHalf; + int sx = SkScalarFloorToInt(srcPt.fX); + int sy = SkScalarFloorToInt(srcPt.fY); + __m128 weight = _mm_setzero_ps(); __m128 accum = _mm_setzero_ps(); - int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY); - int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()+1), maxY); - int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX); - int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width())+1, maxX); - - for (int srcX = x0; srcX < x1 ; srcX++) { - // Looking these up once instead of each loop is a ~15% speedup. - xWeights[srcX - x0] = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX)); - } + int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f))); + int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f))); + int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f))); + int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f))); - for (int srcY = y0; srcY < y1; srcY++) { - SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY)); + for (int src_y = y0; src_y <= y1; src_y++) { + float yweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fY - src_y)); - for (int srcX = x0; srcX < x1 ; srcX++) { - SkScalar xWeight = xWeights[srcX - x0]; + for (int src_x = x0; src_x <= x1 ; src_x++) { + float xweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fX - src_x)); - SkScalar combined_weight = SkScalarMul(xWeight, yWeight); + float combined_weight = xweight * yweight; - SkPMColor color = *s.fBitmap->getAddr32(srcX, srcY); + SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y); - __m128i c = _mm_cvtsi32_si128(color); + __m128i c = _mm_cvtsi32_si128( color ); c = _mm_unpacklo_epi8(c, _mm_setzero_si128()); c = _mm_unpacklo_epi16(c, _mm_setzero_si128()); - __m128 cfloat = _mm_cvtepi32_ps(c); + + __m128 cfloat = _mm_cvtepi32_ps( c ); __m128 weightVector = _mm_set1_ps(combined_weight); + accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector)); weight = _mm_add_ps( weight, weightVector ); } @@ -92,13 +92,15 @@ void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, accum = _mm_div_ps(accum, weight); accum = _mm_add_ps(accum, _mm_set1_ps(0.5f)); - __m128i accumInt = _mm_cvttps_epi32(accum); - int* localResult = (int*)(&accumInt); - int a = SkClampMax(localResult[3], 255); - int r = SkClampMax(localResult[2], a); - int g = SkClampMax(localResult[1], a); - int b = SkClampMax(localResult[0], a); + __m128i accumInt = _mm_cvtps_epi32( accum ); + + int localResult[4]; + _mm_storeu_si128((__m128i *) (localResult), accumInt); + int a = SkClampMax(localResult[0], 255); + int r = SkClampMax(localResult[1], a); + int g = SkClampMax(localResult[2], a); + int b = SkClampMax(localResult[3], a); *colors++ = SkPackARGB32(a, r, g, b); diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp index 55eb843312..5bab17aa00 100644 --- a/src/opts/opts_check_x86.cpp +++ b/src/opts/opts_check_x86.cpp @@ -123,7 +123,7 @@ static inline bool supports_simd(int minLevel) { //////////////////////////////////////////////////////////////////////////////// -SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", true, "Use SSE optimized version of high quality image filters"); +SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters"); void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) { if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { |