diff options
author | mtklein <mtklein@chromium.org> | 2015-08-18 09:43:28 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-08-18 09:43:28 -0700 |
commit | 2d141ba2df8f7506848aa9369f502944e837cd09 (patch) | |
tree | 0b669c1b2694b024e1bf93bec11d86dcb256fe87 /src/opts | |
parent | f05271581fc6204c2b7ccf146af5d02eec27e670 (diff) |
Patches on top of Radu's latest.
patch from issue 1273033005 at patchset 120001 (http://crrev.com/1273033005#ps120001)
BUG=skia:
Review URL: https://codereview.chromium.org/1288323004
Diffstat (limited to 'src/opts')
-rw-r--r-- | src/opts/SkColorCubeFilter_opts.h | 85 | ||||
-rw-r--r-- | src/opts/SkOpts_neon.cpp | 3 | ||||
-rw-r--r-- | src/opts/SkOpts_ssse3.cpp | 2 | ||||
-rw-r--r-- | src/opts/SkPMFloat_neon.h | 17 | ||||
-rw-r--r-- | src/opts/SkPMFloat_none.h | 10 | ||||
-rw-r--r-- | src/opts/SkPMFloat_sse.h | 25 |
6 files changed, 141 insertions, 1 deletions
diff --git a/src/opts/SkColorCubeFilter_opts.h b/src/opts/SkColorCubeFilter_opts.h new file mode 100644 index 0000000000..325d7aa649 --- /dev/null +++ b/src/opts/SkColorCubeFilter_opts.h @@ -0,0 +1,85 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SkColorCubeFilter_opts_DEFINED +#define SkColorCubeFilter_opts_DEFINED + +#include "SkColor.h" +#include "SkPMFloat.h" +#include "SkUnPreMultiply.h" + +namespace SK_OPTS_NS { + +void color_cube_filter_span(const SkPMColor src[], + int count, + SkPMColor dst[], + const int* colorToIndex[2], + const SkScalar* colorToFactors[2], + int dim, + const SkColor* colorCube) { + uint8_t* ptr_dst = reinterpret_cast<uint8_t*>(dst); + uint8_t r, g, b, a; + + for (int i = 0; i < count; ++i) { + const SkPMColor input = src[i]; + a = input >> SK_A32_SHIFT; + + if (a != 255) { + const SkColor source = SkUnPreMultiply::PMColorToColor(input); + r = SkColorGetR(source); + g = SkColorGetG(source); + b = SkColorGetB(source); + } else { + r = SkGetPackedR32(input); + g = SkGetPackedG32(input); + b = SkGetPackedB32(input); + } + + const SkScalar g0 = colorToFactors[0][g], + g1 = colorToFactors[1][g], + b0 = colorToFactors[0][b], + b1 = colorToFactors[1][b]; + + const Sk4f g0b0(g0*b0), + g0b1(g0*b1), + g1b0(g1*b0), + g1b1(g1*b1); + + const int i00 = (colorToIndex[0][g] + colorToIndex[0][b] * dim) * dim; + const int i01 = (colorToIndex[0][g] + colorToIndex[1][b] * dim) * dim; + const int i10 = (colorToIndex[1][g] + colorToIndex[0][b] * dim) * dim; + const int i11 = (colorToIndex[1][g] + colorToIndex[1][b] * dim) * dim; + + SkPMFloat color(0); + + for (int x = 0; x < 2; ++x) { + const int ix = colorToIndex[x][r]; + + const SkColor lutColor00 = colorCube[ix + i00]; + const SkColor lutColor01 = colorCube[ix + i01]; + const SkColor lutColor10 = colorCube[ix + i10]; + const SkColor lutColor11 = colorCube[ix + i11]; + + Sk4f sum = SkPMFloat::FromBGRx(lutColor00) * g0b0; + sum = sum + SkPMFloat::FromBGRx(lutColor01) * g0b1; + sum = sum + SkPMFloat::FromBGRx(lutColor10) * g1b0; + sum = sum + SkPMFloat::FromBGRx(lutColor11) * g1b1; + + color = color + sum * Sk4f((float)colorToFactors[x][r]); + } + + if (a != 255) { + color = color * Sk4f(((float)a) / 255); + } + + dst[i] = color.round(); + + ptr_dst[SK_A32_SHIFT / 8] = a; + ptr_dst += 4; + } +} + +} // namespace SK_OPTS NS + +#endif // SkColorCubeFilter_opts_DEFINED diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index 789a977238..aa184d8ef4 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -10,6 +10,7 @@ #define SK_OPTS_NS neon #include "SkBlitMask_opts.h" #include "SkBlurImageFilter_opts.h" +#include "SkColorCubeFilter_opts.h" #include "SkFloatingPoint_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkTextureCompressor_opts.h" @@ -36,5 +37,7 @@ namespace SkOpts { fill_block_dimensions = neon::fill_block_dimensions; blit_mask_d32_a8 = neon::blit_mask_d32_a8; + + color_cube_filter_span = neon::color_cube_filter_span; } } diff --git a/src/opts/SkOpts_ssse3.cpp b/src/opts/SkOpts_ssse3.cpp index 7e056a0787..c65f0cbb7f 100644 --- a/src/opts/SkOpts_ssse3.cpp +++ b/src/opts/SkOpts_ssse3.cpp @@ -8,11 +8,13 @@ #include "SkOpts.h" #define SK_OPTS_NS ssse3 #include "SkBlitMask_opts.h" +#include "SkColorCubeFilter_opts.h" #include "SkXfermode_opts.h" namespace SkOpts { void Init_ssse3() { create_xfermode = SkCreate4pxXfermode; blit_mask_d32_a8 = ssse3::blit_mask_d32_a8; + color_cube_filter_span = ssse3::color_cube_filter_span; } } diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h index 8bee5b551a..ecb151f1fd 100644 --- a/src/opts/SkPMFloat_neon.h +++ b/src/opts/SkPMFloat_neon.h @@ -7,6 +7,8 @@ namespace { // See SkPMFloat.h +static_assert(SK_A32_SHIFT == 24, "This file assumes little-endian."); + inline SkPMFloat::SkPMFloat(SkPMColor c) { SkPMColorAssert(c); uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c); @@ -28,8 +30,21 @@ inline SkPMColor SkPMFloat::round() const { } inline Sk4f SkPMFloat::alphas() const { - static_assert(SK_A32_SHIFT == 24, "Assuming little-endian."); return vdupq_lane_f32(vget_high_f32(fVec), 1); // Duplicate high lane of high half i.e. lane 3. } +inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) { + uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c); +#if defined(SK_PMCOLOR_IS_RGBA) + fix8 = vtbl1_u8(fix8, vcreate_u8(0x0300010203000102ULL)); // 03 00 01 02, 2x, i.e. swap R&B. +#endif + uint16x8_t fix8_16 = vmovl_u8(fix8); + uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); + fix8_32 = vsetq_lane_u32(0xFF, fix8_32, 3); // Force alpha to 1. + + SkPMFloat pmf = Sk4f(vmulq_f32(vcvtq_f32_u32(fix8_32), vdupq_n_f32(1.0f/255))); + SkASSERT(pmf.isValid()); + return pmf; +} + } // namespace diff --git a/src/opts/SkPMFloat_none.h b/src/opts/SkPMFloat_none.h index 518ad159ff..42446e6ac3 100644 --- a/src/opts/SkPMFloat_none.h +++ b/src/opts/SkPMFloat_none.h @@ -34,4 +34,14 @@ inline Sk4f SkPMFloat::alphas() const { return Sk4f(this->a()); } +inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) { + float inv255 = 1.0f / 255; + SkPMFloat pmf = SkPMFloat::FromARGB(1.0f, + SkGetPackedR32(c) * inv255, + SkGetPackedG32(c) * inv255, + SkGetPackedB32(c) * inv255); + SkASSERT(pmf.isValid()); + return pmf; +} + } // namespace diff --git a/src/opts/SkPMFloat_sse.h b/src/opts/SkPMFloat_sse.h index 85503886a6..6cfee1da6f 100644 --- a/src/opts/SkPMFloat_sse.h +++ b/src/opts/SkPMFloat_sse.h @@ -38,4 +38,29 @@ inline Sk4f SkPMFloat::alphas() const { return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying lane 3 to all lanes. } +inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) { + __m128i fix8 = _mm_cvtsi32_si128((int)c); +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 + const char _ = ~0; // Zero these bytes. + __m128i fix8_32 = _mm_shuffle_epi8(fix8, + #if defined(SK_PMCOLOR_IS_BGRA) + _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, _,_,_,_) + #else + _mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, _,_,_,_) + #endif + ); +#else + __m128i fix8_16 = _mm_unpacklo_epi8 (fix8 , _mm_setzero_si128()), + fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); + #if defined(SK_PMCOLOR_IS_RGBA) + fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e swap lanes 0 and 2. + #endif +#endif + fix8_32 = _mm_or_si128(fix8_32, _mm_set_epi32(0xFF,0,0,0)); // Force alpha to 1. + + SkPMFloat pmf = Sk4f(_mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f/255))); + SkASSERT(pmf.isValid()); + return pmf; +} + } // namespace |