aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-08-18 09:43:28 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-08-18 09:43:28 -0700
commit2d141ba2df8f7506848aa9369f502944e837cd09 (patch)
tree0b669c1b2694b024e1bf93bec11d86dcb256fe87 /src/opts
parentf05271581fc6204c2b7ccf146af5d02eec27e670 (diff)
Patches on top of Radu's latest.
patch from issue 1273033005 at patchset 120001 (http://crrev.com/1273033005#ps120001) BUG=skia: Review URL: https://codereview.chromium.org/1288323004
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkColorCubeFilter_opts.h85
-rw-r--r--src/opts/SkOpts_neon.cpp3
-rw-r--r--src/opts/SkOpts_ssse3.cpp2
-rw-r--r--src/opts/SkPMFloat_neon.h17
-rw-r--r--src/opts/SkPMFloat_none.h10
-rw-r--r--src/opts/SkPMFloat_sse.h25
6 files changed, 141 insertions, 1 deletions
diff --git a/src/opts/SkColorCubeFilter_opts.h b/src/opts/SkColorCubeFilter_opts.h
new file mode 100644
index 0000000000..325d7aa649
--- /dev/null
+++ b/src/opts/SkColorCubeFilter_opts.h
@@ -0,0 +1,85 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SkColorCubeFilter_opts_DEFINED
+#define SkColorCubeFilter_opts_DEFINED
+
+#include "SkColor.h"
+#include "SkPMFloat.h"
+#include "SkUnPreMultiply.h"
+
+namespace SK_OPTS_NS {
+
+void color_cube_filter_span(const SkPMColor src[],
+ int count,
+ SkPMColor dst[],
+ const int* colorToIndex[2],
+ const SkScalar* colorToFactors[2],
+ int dim,
+ const SkColor* colorCube) {
+ uint8_t* ptr_dst = reinterpret_cast<uint8_t*>(dst);
+ uint8_t r, g, b, a;
+
+ for (int i = 0; i < count; ++i) {
+ const SkPMColor input = src[i];
+ a = input >> SK_A32_SHIFT;
+
+ if (a != 255) {
+ const SkColor source = SkUnPreMultiply::PMColorToColor(input);
+ r = SkColorGetR(source);
+ g = SkColorGetG(source);
+ b = SkColorGetB(source);
+ } else {
+ r = SkGetPackedR32(input);
+ g = SkGetPackedG32(input);
+ b = SkGetPackedB32(input);
+ }
+
+ const SkScalar g0 = colorToFactors[0][g],
+ g1 = colorToFactors[1][g],
+ b0 = colorToFactors[0][b],
+ b1 = colorToFactors[1][b];
+
+ const Sk4f g0b0(g0*b0),
+ g0b1(g0*b1),
+ g1b0(g1*b0),
+ g1b1(g1*b1);
+
+ const int i00 = (colorToIndex[0][g] + colorToIndex[0][b] * dim) * dim;
+ const int i01 = (colorToIndex[0][g] + colorToIndex[1][b] * dim) * dim;
+ const int i10 = (colorToIndex[1][g] + colorToIndex[0][b] * dim) * dim;
+ const int i11 = (colorToIndex[1][g] + colorToIndex[1][b] * dim) * dim;
+
+ SkPMFloat color(0);
+
+ for (int x = 0; x < 2; ++x) {
+ const int ix = colorToIndex[x][r];
+
+ const SkColor lutColor00 = colorCube[ix + i00];
+ const SkColor lutColor01 = colorCube[ix + i01];
+ const SkColor lutColor10 = colorCube[ix + i10];
+ const SkColor lutColor11 = colorCube[ix + i11];
+
+ Sk4f sum = SkPMFloat::FromBGRx(lutColor00) * g0b0;
+ sum = sum + SkPMFloat::FromBGRx(lutColor01) * g0b1;
+ sum = sum + SkPMFloat::FromBGRx(lutColor10) * g1b0;
+ sum = sum + SkPMFloat::FromBGRx(lutColor11) * g1b1;
+
+ color = color + sum * Sk4f((float)colorToFactors[x][r]);
+ }
+
+ if (a != 255) {
+ color = color * Sk4f(((float)a) / 255);
+ }
+
+ dst[i] = color.round();
+
+ ptr_dst[SK_A32_SHIFT / 8] = a;
+ ptr_dst += 4;
+ }
+}
+
+} // namespace SK_OPTS NS
+
+#endif // SkColorCubeFilter_opts_DEFINED
diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp
index 789a977238..aa184d8ef4 100644
--- a/src/opts/SkOpts_neon.cpp
+++ b/src/opts/SkOpts_neon.cpp
@@ -10,6 +10,7 @@
#define SK_OPTS_NS neon
#include "SkBlitMask_opts.h"
#include "SkBlurImageFilter_opts.h"
+#include "SkColorCubeFilter_opts.h"
#include "SkFloatingPoint_opts.h"
#include "SkMorphologyImageFilter_opts.h"
#include "SkTextureCompressor_opts.h"
@@ -36,5 +37,7 @@ namespace SkOpts {
fill_block_dimensions = neon::fill_block_dimensions;
blit_mask_d32_a8 = neon::blit_mask_d32_a8;
+
+ color_cube_filter_span = neon::color_cube_filter_span;
}
}
diff --git a/src/opts/SkOpts_ssse3.cpp b/src/opts/SkOpts_ssse3.cpp
index 7e056a0787..c65f0cbb7f 100644
--- a/src/opts/SkOpts_ssse3.cpp
+++ b/src/opts/SkOpts_ssse3.cpp
@@ -8,11 +8,13 @@
#include "SkOpts.h"
#define SK_OPTS_NS ssse3
#include "SkBlitMask_opts.h"
+#include "SkColorCubeFilter_opts.h"
#include "SkXfermode_opts.h"
namespace SkOpts {
void Init_ssse3() {
create_xfermode = SkCreate4pxXfermode;
blit_mask_d32_a8 = ssse3::blit_mask_d32_a8;
+ color_cube_filter_span = ssse3::color_cube_filter_span;
}
}
diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h
index 8bee5b551a..ecb151f1fd 100644
--- a/src/opts/SkPMFloat_neon.h
+++ b/src/opts/SkPMFloat_neon.h
@@ -7,6 +7,8 @@
namespace { // See SkPMFloat.h
+static_assert(SK_A32_SHIFT == 24, "This file assumes little-endian.");
+
inline SkPMFloat::SkPMFloat(SkPMColor c) {
SkPMColorAssert(c);
uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);
@@ -28,8 +30,21 @@ inline SkPMColor SkPMFloat::round() const {
}
inline Sk4f SkPMFloat::alphas() const {
- static_assert(SK_A32_SHIFT == 24, "Assuming little-endian.");
return vdupq_lane_f32(vget_high_f32(fVec), 1); // Duplicate high lane of high half i.e. lane 3.
}
+inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
+ uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);
+#if defined(SK_PMCOLOR_IS_RGBA)
+ fix8 = vtbl1_u8(fix8, vcreate_u8(0x0300010203000102ULL)); // 03 00 01 02, 2x, i.e. swap R&B.
+#endif
+ uint16x8_t fix8_16 = vmovl_u8(fix8);
+ uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));
+ fix8_32 = vsetq_lane_u32(0xFF, fix8_32, 3); // Force alpha to 1.
+
+ SkPMFloat pmf = Sk4f(vmulq_f32(vcvtq_f32_u32(fix8_32), vdupq_n_f32(1.0f/255)));
+ SkASSERT(pmf.isValid());
+ return pmf;
+}
+
} // namespace
diff --git a/src/opts/SkPMFloat_none.h b/src/opts/SkPMFloat_none.h
index 518ad159ff..42446e6ac3 100644
--- a/src/opts/SkPMFloat_none.h
+++ b/src/opts/SkPMFloat_none.h
@@ -34,4 +34,14 @@ inline Sk4f SkPMFloat::alphas() const {
return Sk4f(this->a());
}
+inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
+ float inv255 = 1.0f / 255;
+ SkPMFloat pmf = SkPMFloat::FromARGB(1.0f,
+ SkGetPackedR32(c) * inv255,
+ SkGetPackedG32(c) * inv255,
+ SkGetPackedB32(c) * inv255);
+ SkASSERT(pmf.isValid());
+ return pmf;
+}
+
} // namespace
diff --git a/src/opts/SkPMFloat_sse.h b/src/opts/SkPMFloat_sse.h
index 85503886a6..6cfee1da6f 100644
--- a/src/opts/SkPMFloat_sse.h
+++ b/src/opts/SkPMFloat_sse.h
@@ -38,4 +38,29 @@ inline Sk4f SkPMFloat::alphas() const {
return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying lane 3 to all lanes.
}
+inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
+ __m128i fix8 = _mm_cvtsi32_si128((int)c);
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+ const char _ = ~0; // Zero these bytes.
+ __m128i fix8_32 = _mm_shuffle_epi8(fix8,
+ #if defined(SK_PMCOLOR_IS_BGRA)
+ _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, _,_,_,_)
+ #else
+ _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, _,_,_,_)
+ #endif
+ );
+#else
+ __m128i fix8_16 = _mm_unpacklo_epi8 (fix8 , _mm_setzero_si128()),
+ fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
+ #if defined(SK_PMCOLOR_IS_RGBA)
+ fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e swap lanes 0 and 2.
+ #endif
+#endif
+ fix8_32 = _mm_or_si128(fix8_32, _mm_set_epi32(0xFF,0,0,0)); // Force alpha to 1.
+
+ SkPMFloat pmf = Sk4f(_mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f/255)));
+ SkASSERT(pmf.isValid());
+ return pmf;
+}
+
} // namespace