aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-08-18 09:43:28 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-08-18 09:43:28 -0700
commit2d141ba2df8f7506848aa9369f502944e837cd09 (patch)
tree0b669c1b2694b024e1bf93bec11d86dcb256fe87 /src
parentf05271581fc6204c2b7ccf146af5d02eec27e670 (diff)
Patches on top of Radu's latest.
patch from issue 1273033005 at patchset 120001 (http://crrev.com/1273033005#ps120001) BUG=skia: Review URL: https://codereview.chromium.org/1288323004
Diffstat (limited to 'src')
-rw-r--r--src/core/SkOpts.cpp2
-rw-r--r--src/core/SkOpts.h9
-rw-r--r--src/core/SkPMFloat.h1
-rw-r--r--src/effects/SkColorCubeFilter.cpp34
-rw-r--r--src/opts/SkColorCubeFilter_opts.h85
-rw-r--r--src/opts/SkOpts_neon.cpp3
-rw-r--r--src/opts/SkOpts_ssse3.cpp2
-rw-r--r--src/opts/SkPMFloat_neon.h17
-rw-r--r--src/opts/SkPMFloat_none.h10
-rw-r--r--src/opts/SkPMFloat_sse.h25
10 files changed, 157 insertions, 31 deletions
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index 890d9c7532..d5308e708e 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -11,6 +11,7 @@
#define SK_OPTS_NS portable
#include "SkBlitMask_opts.h"
#include "SkBlurImageFilter_opts.h"
+#include "SkColorCubeFilter_opts.h"
#include "SkFloatingPoint_opts.h"
#include "SkMorphologyImageFilter_opts.h"
#include "SkTextureCompressor_opts.h"
@@ -38,6 +39,7 @@ namespace SkOpts {
decltype(memset16) memset16 = portable::memset16;
decltype(memset32) memset32 = portable::memset32;
decltype(create_xfermode) create_xfermode = SkCreate4pxXfermode;
+ decltype(color_cube_filter_span) color_cube_filter_span = portable::color_cube_filter_span;
decltype(box_blur_xx) box_blur_xx = portable::box_blur_xx;
decltype(box_blur_xy) box_blur_xy = portable::box_blur_xy;
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index e0ef7dc424..04aa9b1753 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -44,6 +44,15 @@ namespace SkOpts {
extern bool (*fill_block_dimensions)(SkTextureCompressor::Format, int* x, int* y);
extern void (*blit_mask_d32_a8)(SkPMColor*, size_t, const SkAlpha*, size_t, SkColor, int, int);
+
+ // This function is an optimized version of SkColorCubeFilter::filterSpan
+ extern void (*color_cube_filter_span)(const SkPMColor[],
+ int,
+ SkPMColor[],
+ const int * [2],
+ const SkScalar * [2],
+ int,
+ const SkColor*);
}
#endif//SkOpts_DEFINED
diff --git a/src/core/SkPMFloat.h b/src/core/SkPMFloat.h
index f97f25c9c6..1fc80f5004 100644
--- a/src/core/SkPMFloat.h
+++ b/src/core/SkPMFloat.h
@@ -26,6 +26,7 @@ class SkPMFloat : public Sk4f {
public:
static SkPMFloat FromPMColor(SkPMColor c) { return SkPMFloat(c); }
static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); }
+ static SkPMFloat FromBGRx(SkColor c); // Ignores c's alpha, instead forcing it to 1.
Sk4f alphas() const; // argb -> aaaa, generally faster than the equivalent Sk4f(this->a()).
diff --git a/src/effects/SkColorCubeFilter.cpp b/src/effects/SkColorCubeFilter.cpp
index 3b7c75fa7f..f37b695cba 100644
--- a/src/effects/SkColorCubeFilter.cpp
+++ b/src/effects/SkColorCubeFilter.cpp
@@ -8,6 +8,7 @@
#include "SkColorCubeFilter.h"
#include "SkColorPriv.h"
#include "SkOnce.h"
+#include "SkOpts.h"
#include "SkReadBuffer.h"
#include "SkUnPreMultiply.h"
#include "SkWriteBuffer.h"
@@ -128,36 +129,9 @@ void SkColorCubeFilter::filterSpan(const SkPMColor src[], int count, SkPMColor d
const SkScalar* colorToScalar;
fCache.getProcessingLuts(&colorToIndex, &colorToFactors, &colorToScalar);
- const int dim = fCache.cubeDimension();
- SkColor* colorCube = (SkColor*)fCubeData->data();
- for (int i = 0; i < count; ++i) {
- SkColor inputColor = SkUnPreMultiply::PMColorToColor(src[i]);
- uint8_t r = SkColorGetR(inputColor);
- uint8_t g = SkColorGetG(inputColor);
- uint8_t b = SkColorGetB(inputColor);
- uint8_t a = SkColorGetA(inputColor);
- SkScalar rOut(0), gOut(0), bOut(0);
- for (int x = 0; x < 2; ++x) {
- for (int y = 0; y < 2; ++y) {
- for (int z = 0; z < 2; ++z) {
- SkColor lutColor = colorCube[colorToIndex[x][r] +
- (colorToIndex[y][g] +
- colorToIndex[z][b] * dim) * dim];
- SkScalar factor = colorToFactors[x][r] *
- colorToFactors[y][g] *
- colorToFactors[z][b];
- rOut += colorToScalar[SkColorGetR(lutColor)] * factor;
- gOut += colorToScalar[SkColorGetG(lutColor)] * factor;
- bOut += colorToScalar[SkColorGetB(lutColor)] * factor;
- }
- }
- }
- const SkScalar aOut = SkIntToScalar(a);
- dst[i] = SkPackARGB32(a,
- SkScalarRoundToInt(rOut * aOut),
- SkScalarRoundToInt(gOut * aOut),
- SkScalarRoundToInt(bOut * aOut));
- }
+ SkOpts::color_cube_filter_span(src, count, dst, colorToIndex,
+ colorToFactors, fCache.cubeDimension(),
+ (SkColor*)fCubeData->data());
}
SkFlattenable* SkColorCubeFilter::CreateProc(SkReadBuffer& buffer) {
diff --git a/src/opts/SkColorCubeFilter_opts.h b/src/opts/SkColorCubeFilter_opts.h
new file mode 100644
index 0000000000..325d7aa649
--- /dev/null
+++ b/src/opts/SkColorCubeFilter_opts.h
@@ -0,0 +1,85 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SkColorCubeFilter_opts_DEFINED
+#define SkColorCubeFilter_opts_DEFINED
+
+#include "SkColor.h"
+#include "SkPMFloat.h"
+#include "SkUnPreMultiply.h"
+
+namespace SK_OPTS_NS {
+
+void color_cube_filter_span(const SkPMColor src[],
+ int count,
+ SkPMColor dst[],
+ const int* colorToIndex[2],
+ const SkScalar* colorToFactors[2],
+ int dim,
+ const SkColor* colorCube) {
+ uint8_t* ptr_dst = reinterpret_cast<uint8_t*>(dst);
+ uint8_t r, g, b, a;
+
+ for (int i = 0; i < count; ++i) {
+ const SkPMColor input = src[i];
+ a = input >> SK_A32_SHIFT;
+
+ if (a != 255) {
+ const SkColor source = SkUnPreMultiply::PMColorToColor(input);
+ r = SkColorGetR(source);
+ g = SkColorGetG(source);
+ b = SkColorGetB(source);
+ } else {
+ r = SkGetPackedR32(input);
+ g = SkGetPackedG32(input);
+ b = SkGetPackedB32(input);
+ }
+
+ const SkScalar g0 = colorToFactors[0][g],
+ g1 = colorToFactors[1][g],
+ b0 = colorToFactors[0][b],
+ b1 = colorToFactors[1][b];
+
+ const Sk4f g0b0(g0*b0),
+ g0b1(g0*b1),
+ g1b0(g1*b0),
+ g1b1(g1*b1);
+
+ const int i00 = (colorToIndex[0][g] + colorToIndex[0][b] * dim) * dim;
+ const int i01 = (colorToIndex[0][g] + colorToIndex[1][b] * dim) * dim;
+ const int i10 = (colorToIndex[1][g] + colorToIndex[0][b] * dim) * dim;
+ const int i11 = (colorToIndex[1][g] + colorToIndex[1][b] * dim) * dim;
+
+ SkPMFloat color(0);
+
+ for (int x = 0; x < 2; ++x) {
+ const int ix = colorToIndex[x][r];
+
+ const SkColor lutColor00 = colorCube[ix + i00];
+ const SkColor lutColor01 = colorCube[ix + i01];
+ const SkColor lutColor10 = colorCube[ix + i10];
+ const SkColor lutColor11 = colorCube[ix + i11];
+
+ Sk4f sum = SkPMFloat::FromBGRx(lutColor00) * g0b0;
+ sum = sum + SkPMFloat::FromBGRx(lutColor01) * g0b1;
+ sum = sum + SkPMFloat::FromBGRx(lutColor10) * g1b0;
+ sum = sum + SkPMFloat::FromBGRx(lutColor11) * g1b1;
+
+ color = color + sum * Sk4f((float)colorToFactors[x][r]);
+ }
+
+ if (a != 255) {
+ color = color * Sk4f(((float)a) / 255);
+ }
+
+ dst[i] = color.round();
+
+ ptr_dst[SK_A32_SHIFT / 8] = a;
+ ptr_dst += 4;
+ }
+}
+
+} // namespace SK_OPTS NS
+
+#endif // SkColorCubeFilter_opts_DEFINED
diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp
index 789a977238..aa184d8ef4 100644
--- a/src/opts/SkOpts_neon.cpp
+++ b/src/opts/SkOpts_neon.cpp
@@ -10,6 +10,7 @@
#define SK_OPTS_NS neon
#include "SkBlitMask_opts.h"
#include "SkBlurImageFilter_opts.h"
+#include "SkColorCubeFilter_opts.h"
#include "SkFloatingPoint_opts.h"
#include "SkMorphologyImageFilter_opts.h"
#include "SkTextureCompressor_opts.h"
@@ -36,5 +37,7 @@ namespace SkOpts {
fill_block_dimensions = neon::fill_block_dimensions;
blit_mask_d32_a8 = neon::blit_mask_d32_a8;
+
+ color_cube_filter_span = neon::color_cube_filter_span;
}
}
diff --git a/src/opts/SkOpts_ssse3.cpp b/src/opts/SkOpts_ssse3.cpp
index 7e056a0787..c65f0cbb7f 100644
--- a/src/opts/SkOpts_ssse3.cpp
+++ b/src/opts/SkOpts_ssse3.cpp
@@ -8,11 +8,13 @@
#include "SkOpts.h"
#define SK_OPTS_NS ssse3
#include "SkBlitMask_opts.h"
+#include "SkColorCubeFilter_opts.h"
#include "SkXfermode_opts.h"
namespace SkOpts {
void Init_ssse3() {
create_xfermode = SkCreate4pxXfermode;
blit_mask_d32_a8 = ssse3::blit_mask_d32_a8;
+ color_cube_filter_span = ssse3::color_cube_filter_span;
}
}
diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h
index 8bee5b551a..ecb151f1fd 100644
--- a/src/opts/SkPMFloat_neon.h
+++ b/src/opts/SkPMFloat_neon.h
@@ -7,6 +7,8 @@
namespace { // See SkPMFloat.h
+static_assert(SK_A32_SHIFT == 24, "This file assumes little-endian.");
+
inline SkPMFloat::SkPMFloat(SkPMColor c) {
SkPMColorAssert(c);
uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);
@@ -28,8 +30,21 @@ inline SkPMColor SkPMFloat::round() const {
}
inline Sk4f SkPMFloat::alphas() const {
- static_assert(SK_A32_SHIFT == 24, "Assuming little-endian.");
return vdupq_lane_f32(vget_high_f32(fVec), 1); // Duplicate high lane of high half i.e. lane 3.
}
+inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
+ uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);
+#if defined(SK_PMCOLOR_IS_RGBA)
+ fix8 = vtbl1_u8(fix8, vcreate_u8(0x0300010203000102ULL)); // 03 00 01 02, 2x, i.e. swap R&B.
+#endif
+ uint16x8_t fix8_16 = vmovl_u8(fix8);
+ uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));
+ fix8_32 = vsetq_lane_u32(0xFF, fix8_32, 3); // Force alpha to 1.
+
+ SkPMFloat pmf = Sk4f(vmulq_f32(vcvtq_f32_u32(fix8_32), vdupq_n_f32(1.0f/255)));
+ SkASSERT(pmf.isValid());
+ return pmf;
+}
+
} // namespace
diff --git a/src/opts/SkPMFloat_none.h b/src/opts/SkPMFloat_none.h
index 518ad159ff..42446e6ac3 100644
--- a/src/opts/SkPMFloat_none.h
+++ b/src/opts/SkPMFloat_none.h
@@ -34,4 +34,14 @@ inline Sk4f SkPMFloat::alphas() const {
return Sk4f(this->a());
}
+inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
+ float inv255 = 1.0f / 255;
+ SkPMFloat pmf = SkPMFloat::FromARGB(1.0f,
+ SkGetPackedR32(c) * inv255,
+ SkGetPackedG32(c) * inv255,
+ SkGetPackedB32(c) * inv255);
+ SkASSERT(pmf.isValid());
+ return pmf;
+}
+
} // namespace
diff --git a/src/opts/SkPMFloat_sse.h b/src/opts/SkPMFloat_sse.h
index 85503886a6..6cfee1da6f 100644
--- a/src/opts/SkPMFloat_sse.h
+++ b/src/opts/SkPMFloat_sse.h
@@ -38,4 +38,29 @@ inline Sk4f SkPMFloat::alphas() const {
return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying lane 3 to all lanes.
}
+inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
+ __m128i fix8 = _mm_cvtsi32_si128((int)c);
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+ const char _ = ~0; // Zero these bytes.
+ __m128i fix8_32 = _mm_shuffle_epi8(fix8,
+ #if defined(SK_PMCOLOR_IS_BGRA)
+ _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, _,_,_,_)
+ #else
+ _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, _,_,_,_)
+ #endif
+ );
+#else
+ __m128i fix8_16 = _mm_unpacklo_epi8 (fix8 , _mm_setzero_si128()),
+ fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
+ #if defined(SK_PMCOLOR_IS_RGBA)
+ fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e swap lanes 0 and 2.
+ #endif
+#endif
+ fix8_32 = _mm_or_si128(fix8_32, _mm_set_epi32(0xFF,0,0,0)); // Force alpha to 1.
+
+ SkPMFloat pmf = Sk4f(_mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f/255)));
+ SkASSERT(pmf.isValid());
+ return pmf;
+}
+
} // namespace