aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@google.com>2014-09-03 08:57:02 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2014-09-03 08:57:02 -0700
commitc09b2c49a30fed981283f97476e885b40e53f094 (patch)
tree8281f9082c191cdb27faeb15e27cc33507530724 /src
parent3f5417590a1ed34de2fb89994493db0b1153b4cb (diff)
Revert of Enable highQualityFilter_SSE2 (patchset #1 id:1 of https://codereview.chromium.org/525283002/)
Reason for revert: Color order looks wrong on Macs: Before: http://chromium-skia-gm.commondatastorage.googleapis.com/gm/bitmap-64bitMD5/filterbitmap_image_mandrill_16.png/12823183142873462143.png After: http://chromium-skia-gm.commondatastorage.googleapis.com/gm/bitmap-64bitMD5/filterbitmap_image_mandrill_16.png/13683040204546320578.png Original issue's description: > Enable highQualityFilter_SSE2 > > With SSE2, bitmap_BGRA_8888_A_scale_rotate_bicubic gains about 40% > performance improvement on desktop i7-3770. > > BUG=skia: > > Committed: https://skia.googlesource.com/skia/+/b381fa10d8079c58928058bb8a6db32b39f05e51 R=humper@google.com, qiankun.miao@intel.com TBR=humper@google.com, qiankun.miao@intel.com NOTREECHECKS=true NOTRY=true BUG=skia: Author: mtklein@google.com Review URL: https://codereview.chromium.org/539523002
Diffstat (limited to 'src')
-rw-r--r--src/opts/SkBitmapFilter_opts_SSE2.cpp56
-rw-r--r--src/opts/opts_check_x86.cpp2
2 files changed, 30 insertions, 28 deletions
diff --git a/src/opts/SkBitmapFilter_opts_SSE2.cpp b/src/opts/SkBitmapFilter_opts_SSE2.cpp
index 5bf5326c9e..b831e298d0 100644
--- a/src/opts/SkBitmapFilter_opts_SSE2.cpp
+++ b/src/opts/SkBitmapFilter_opts_SSE2.cpp
@@ -46,45 +46,45 @@ static inline void print128f(__m128 value) {
void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y,
SkPMColor* SK_RESTRICT colors, int count) {
- const int maxX = s.fBitmap->width();
- const int maxY = s.fBitmap->height();
- SkAutoTMalloc<SkScalar> xWeights(maxX);
+ const int maxX = s.fBitmap->width() - 1;
+ const int maxY = s.fBitmap->height() - 1;
while (count-- > 0) {
SkPoint srcPt;
- s.fInvProc(s.fInvMatrix, x + 0.5f, y + 0.5f, &srcPt);
+ s.fInvProc(s.fInvMatrix, SkIntToScalar(x),
+ SkIntToScalar(y), &srcPt);
srcPt.fX -= SK_ScalarHalf;
srcPt.fY -= SK_ScalarHalf;
+ int sx = SkScalarFloorToInt(srcPt.fX);
+ int sy = SkScalarFloorToInt(srcPt.fY);
+
__m128 weight = _mm_setzero_ps();
__m128 accum = _mm_setzero_ps();
- int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY);
- int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()+1), maxY);
- int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX);
- int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width())+1, maxX);
-
- for (int srcX = x0; srcX < x1 ; srcX++) {
- // Looking these up once instead of each loop is a ~15% speedup.
- xWeights[srcX - x0] = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));
- }
+ int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f)));
+ int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f)));
+ int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f)));
+ int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f)));
- for (int srcY = y0; srcY < y1; srcY++) {
- SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY));
+ for (int src_y = y0; src_y <= y1; src_y++) {
+ float yweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fY - src_y));
- for (int srcX = x0; srcX < x1 ; srcX++) {
- SkScalar xWeight = xWeights[srcX - x0];
+ for (int src_x = x0; src_x <= x1 ; src_x++) {
+ float xweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fX - src_x));
- SkScalar combined_weight = SkScalarMul(xWeight, yWeight);
+ float combined_weight = xweight * yweight;
- SkPMColor color = *s.fBitmap->getAddr32(srcX, srcY);
+ SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y);
- __m128i c = _mm_cvtsi32_si128(color);
+ __m128i c = _mm_cvtsi32_si128( color );
c = _mm_unpacklo_epi8(c, _mm_setzero_si128());
c = _mm_unpacklo_epi16(c, _mm_setzero_si128());
- __m128 cfloat = _mm_cvtepi32_ps(c);
+
+ __m128 cfloat = _mm_cvtepi32_ps( c );
__m128 weightVector = _mm_set1_ps(combined_weight);
+
accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector));
weight = _mm_add_ps( weight, weightVector );
}
@@ -92,13 +92,15 @@ void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y,
accum = _mm_div_ps(accum, weight);
accum = _mm_add_ps(accum, _mm_set1_ps(0.5f));
- __m128i accumInt = _mm_cvttps_epi32(accum);
- int* localResult = (int*)(&accumInt);
- int a = SkClampMax(localResult[3], 255);
- int r = SkClampMax(localResult[2], a);
- int g = SkClampMax(localResult[1], a);
- int b = SkClampMax(localResult[0], a);
+ __m128i accumInt = _mm_cvtps_epi32( accum );
+
+ int localResult[4];
+ _mm_storeu_si128((__m128i *) (localResult), accumInt);
+ int a = SkClampMax(localResult[0], 255);
+ int r = SkClampMax(localResult[1], a);
+ int g = SkClampMax(localResult[2], a);
+ int b = SkClampMax(localResult[3], a);
*colors++ = SkPackARGB32(a, r, g, b);
diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp
index 55eb843312..5bab17aa00 100644
--- a/src/opts/opts_check_x86.cpp
+++ b/src/opts/opts_check_x86.cpp
@@ -123,7 +123,7 @@ static inline bool supports_simd(int minLevel) {
////////////////////////////////////////////////////////////////////////////////
-SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", true, "Use SSE optimized version of high quality image filters");
+SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) {
if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {