diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/SkColorSpaceXform.cpp | 138 | ||||
-rw-r--r-- | src/core/SkColorSpaceXformOpts.h | 379 | ||||
-rw-r--r-- | src/core/SkOpts.cpp | 9 | ||||
-rw-r--r-- | src/core/SkOpts.h | 25 | ||||
-rw-r--r-- | src/opts/SkColorXform_opts.h | 252 | ||||
-rw-r--r-- | src/opts/SkNx_neon.h | 4 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 4 | ||||
-rw-r--r-- | src/opts/SkOpts_sse41.cpp | 9 |
8 files changed, 462 insertions, 358 deletions
diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp index 4a7f175082..57b4fa6ddd 100644 --- a/src/core/SkColorSpaceXform.cpp +++ b/src/core/SkColorSpaceXform.cpp @@ -8,7 +8,7 @@ #include "SkColorPriv.h" #include "SkColorSpace_Base.h" #include "SkColorSpaceXform.h" -#include "SkOpts.h" +#include "SkColorSpaceXformOpts.h" #include "SkSRGB.h" static constexpr float sk_linear_from_2dot2[256] = { @@ -78,56 +78,6 @@ static constexpr float sk_linear_from_2dot2[256] = { 0.974300202388861000f, 0.982826255053791000f, 0.991392843592940000f, 1.000000000000000000f, }; -static void build_table_linear_from_gamma(float* outTable, float exponent) { - for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) { - *outTable++ = powf(x, exponent); - } -} - -// Interpolating lookup in a variably sized table. -static float interp_lut(float input, const float* table, int tableSize) { - float index = input * (tableSize - 1); - float diff = index - sk_float_floor2int(index); - return table[(int) sk_float_floor2int(index)] * (1.0f - diff) + - table[(int) sk_float_ceil2int(index)] * diff; -} - -// outTable is always 256 entries, inTable may be larger or smaller. -static void build_table_linear_from_gamma(float* outTable, const float* inTable, - int inTableSize) { - if (256 == inTableSize) { - memcpy(outTable, inTable, sizeof(float) * 256); - return; - } - - for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) { - *outTable++ = interp_lut(x, inTable, inTableSize); - } -} - -static void build_table_linear_from_gamma(float* outTable, float g, float a, float b, float c, - float d, float e, float f) { - // Y = (aX + b)^g + c for X >= d - // Y = eX + f otherwise - for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) { - if (x >= d) { - *outTable++ = powf(a * x + b, g) + c; - } else { - *outTable++ = e * x + f; - } - } -} - -static inline bool compute_gamut_xform(SkMatrix44* srcToDst, const SkMatrix44& srcToXYZ, - const SkMatrix44& dstToXYZ) { - if (!dstToXYZ.invert(srcToDst)) { - return false; - } - - srcToDst->postConcat(srcToXYZ); - return true; -} - /////////////////////////////////////////////////////////////////////////////////////////////////// static constexpr uint8_t linear_to_srgb[1024] = { @@ -190,7 +140,7 @@ static constexpr uint8_t linear_to_srgb[1024] = { 253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 255, 255, 255, 255, 255 }; -static constexpr uint8_t linear_to_2dot2[1024] = { +static constexpr uint8_t linear_to_2dot2_table[1024] = { 0, 11, 15, 18, 21, 23, 25, 26, 28, 30, 31, 32, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 50, 51, 52, 53, 54, 54, 55, 56, 56, 57, 58, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 65, 65, 66, 66, @@ -250,6 +200,50 @@ static constexpr uint8_t linear_to_2dot2[1024] = { 253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 255, 255, 255, 255, 255, }; +/////////////////////////////////////////////////////////////////////////////////////////////////// + +static void build_table_linear_from_gamma(float* outTable, float exponent) { + for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) { + *outTable++ = powf(x, exponent); + } +} + +// Interpolating lookup in a variably sized table. +static float interp_lut(float input, const float* table, int tableSize) { + float index = input * (tableSize - 1); + float diff = index - sk_float_floor2int(index); + return table[(int) sk_float_floor2int(index)] * (1.0f - diff) + + table[(int) sk_float_ceil2int(index)] * diff; +} + +// outTable is always 256 entries, inTable may be larger or smaller. +static void build_table_linear_from_gamma(float* outTable, const float* inTable, + int inTableSize) { + if (256 == inTableSize) { + memcpy(outTable, inTable, sizeof(float) * 256); + return; + } + + for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) { + *outTable++ = interp_lut(x, inTable, inTableSize); + } +} + +static void build_table_linear_from_gamma(float* outTable, float g, float a, float b, float c, + float d, float e, float f) { + // Y = (aX + b)^g + c for X >= d + // Y = eX + f otherwise + for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) { + if (x >= d) { + *outTable++ = powf(a * x + b, g) + c; + } else { + *outTable++ = e * x + f; + } + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + // Expand range from 0-1 to 0-255, then convert. static uint8_t clamp_normalized_float_to_byte(float v) { // The ordering of the logic is a little strange here in order @@ -373,7 +367,7 @@ static const GammaFns<float> kToLinear { static const GammaFns<uint8_t> kFromLinear { linear_to_srgb, - linear_to_2dot2, + linear_to_2dot2_table, &build_table_linear_to_gamma, &build_table_linear_to_gamma, &build_table_linear_to_gamma, @@ -449,6 +443,18 @@ static void build_gamma_tables(const T* outGammaTables[3], T* gammaTableStorage, /////////////////////////////////////////////////////////////////////////////////////////////////// +static inline bool compute_gamut_xform(SkMatrix44* srcToDst, const SkMatrix44& srcToXYZ, + const SkMatrix44& dstToXYZ) { + if (!dstToXYZ.invert(srcToDst)) { + return false; + } + + srcToDst->postConcat(srcToXYZ); + return true; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + std::unique_ptr<SkColorSpaceXform> SkColorSpaceXform::New(const sk_sp<SkColorSpace>& srcSpace, const sk_sp<SkColorSpace>& dstSpace) { if (!srcSpace || !dstSpace) { @@ -615,8 +621,8 @@ static void handle_color_lut(uint32_t* dst, const uint32_t* src, int len, /////////////////////////////////////////////////////////////////////////////////////////////////// -template <SkColorSpace::GammaNamed Dst> -SkColorSpaceXform_Base<Dst>::SkColorSpaceXform_Base(const sk_sp<SkColorSpace>& srcSpace, +template <SkColorSpace::GammaNamed kDst> +SkColorSpaceXform_Base<kDst>::SkColorSpaceXform_Base(const sk_sp<SkColorSpace>& srcSpace, const SkMatrix44& srcToDst, const sk_sp<SkColorSpace>& dstSpace) : fColorLUT(sk_ref_sp((SkColorLookUpTable*) as_CSB(srcSpace)->colorLUT())) @@ -636,7 +642,8 @@ void SkColorSpaceXform_Base<SkColorSpace::kSRGB_GammaNamed> src = dst; } - SkOpts::color_xform_RGB1_to_srgb(dst, src, len, fSrcGammaTables, fSrcToDst); + color_xform_RGBA<SkColorSpace::kSRGB_GammaNamed, false, false> + (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables); } template <> @@ -648,7 +655,8 @@ void SkColorSpaceXform_Base<SkColorSpace::k2Dot2Curve_GammaNamed> src = dst; } - SkOpts::color_xform_RGB1_to_2dot2(dst, src, len, fSrcGammaTables, fSrcToDst); + color_xform_RGBA<SkColorSpace::k2Dot2Curve_GammaNamed, false, false> + (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables); } template <> @@ -660,7 +668,8 @@ void SkColorSpaceXform_Base<SkColorSpace::kNonStandard_GammaNamed> src = dst; } - SkOpts::color_xform_RGB1_to_table(dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables); + color_xform_RGBA<SkColorSpace::kNonStandard_GammaNamed, false, false> + (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables); } template <> @@ -672,7 +681,8 @@ void SkColorSpaceXform_Base<SkColorSpace::kSRGB_GammaNamed> src = dst; } - SkOpts::color_xform_RGB1_to_srgb_swaprb(dst, src, len, fSrcGammaTables, fSrcToDst); + color_xform_RGBA<SkColorSpace::kSRGB_GammaNamed, false, true> + (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables); } template <> @@ -684,7 +694,8 @@ void SkColorSpaceXform_Base<SkColorSpace::k2Dot2Curve_GammaNamed> src = dst; } - SkOpts::color_xform_RGB1_to_2dot2_swaprb(dst, src, len, fSrcGammaTables, fSrcToDst); + color_xform_RGBA<SkColorSpace::k2Dot2Curve_GammaNamed, false, true> + (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables); } template <> @@ -696,8 +707,8 @@ void SkColorSpaceXform_Base<SkColorSpace::kNonStandard_GammaNamed> src = dst; } - SkOpts::color_xform_RGB1_to_table_swaprb(dst, src, len, fSrcGammaTables, fSrcToDst, - fDstGammaTables); + color_xform_RGBA<SkColorSpace::kNonStandard_GammaNamed, false, true> + (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables); } template <SkColorSpace::GammaNamed T> @@ -717,5 +728,6 @@ void SkColorSpaceXform_Base<T> src = (const RGBA32*) storage.get(); } - SkOpts::color_xform_RGB1_to_linear(dst, src, len, fSrcGammaTables, fSrcToDst); + color_xform_RGBA<SkColorSpace::kLinear_GammaNamed, false, false> + (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables); } diff --git a/src/core/SkColorSpaceXformOpts.h b/src/core/SkColorSpaceXformOpts.h new file mode 100644 index 0000000000..cf0e48cbb3 --- /dev/null +++ b/src/core/SkColorSpaceXformOpts.h @@ -0,0 +1,379 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkColorSpaceXformOpts_DEFINED +#define SkColorSpaceXformOpts_DEFINED + +#include "SkNx.h" +#include "SkColorPriv.h" +#include "SkHalf.h" +#include "SkSRGB.h" +#include "SkTemplates.h" + +static inline void load_matrix(const float matrix[16], + Sk4f& rXgXbX, Sk4f& rYgYbY, Sk4f& rZgZbZ, Sk4f& rTgTbT) { + rXgXbX = Sk4f::Load(matrix + 0); + rYgYbY = Sk4f::Load(matrix + 4); + rZgZbZ = Sk4f::Load(matrix + 8); + rTgTbT = Sk4f::Load(matrix + 12); +} + +static inline void load_rgb_from_tables(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f&, + const float* const srcTables[3]) { + r = { srcTables[0][(src[0] >> 0) & 0xFF], + srcTables[0][(src[1] >> 0) & 0xFF], + srcTables[0][(src[2] >> 0) & 0xFF], + srcTables[0][(src[3] >> 0) & 0xFF], }; + g = { srcTables[1][(src[0] >> 8) & 0xFF], + srcTables[1][(src[1] >> 8) & 0xFF], + srcTables[1][(src[2] >> 8) & 0xFF], + srcTables[1][(src[3] >> 8) & 0xFF], }; + b = { srcTables[2][(src[0] >> 16) & 0xFF], + srcTables[2][(src[1] >> 16) & 0xFF], + srcTables[2][(src[2] >> 16) & 0xFF], + srcTables[2][(src[3] >> 16) & 0xFF], }; +} + +static inline void load_rgba_from_tables(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const srcTables[3]) { + r = { srcTables[0][(src[0] >> 0) & 0xFF], + srcTables[0][(src[1] >> 0) & 0xFF], + srcTables[0][(src[2] >> 0) & 0xFF], + srcTables[0][(src[3] >> 0) & 0xFF], }; + g = { srcTables[1][(src[0] >> 8) & 0xFF], + srcTables[1][(src[1] >> 8) & 0xFF], + srcTables[1][(src[2] >> 8) & 0xFF], + srcTables[1][(src[3] >> 8) & 0xFF], }; + b = { srcTables[2][(src[0] >> 16) & 0xFF], + srcTables[2][(src[1] >> 16) & 0xFF], + srcTables[2][(src[2] >> 16) & 0xFF], + srcTables[2][(src[3] >> 16) & 0xFF], }; + a = (1.0f / 255.0f) * SkNx_cast<float>(Sk4u::Load(src) >> 24); +} + +static inline void load_rgb_from_tables_1(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f&, + const float* const srcTables[3]) { + // Splat r,g,b across a register each. + r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]); + g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]); + b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]); +} + +static inline void load_rgba_from_tables_1(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const srcTables[3]) { + // Splat r,g,b across a register each. + r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]); + g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]); + b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]); + a = (1.0f / 255.0f) * Sk4f(*src >> 24); +} + +static inline void transform_gamut(const Sk4f& r, const Sk4f& g, const Sk4f& b, const Sk4f& a, + const Sk4f& rXgXbX, const Sk4f& rYgYbY, const Sk4f& rZgZbZ, + Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da) { + dr = rXgXbX[0]*r + rYgYbY[0]*g + rZgZbZ[0]*b; + dg = rXgXbX[1]*r + rYgYbY[1]*g + rZgZbZ[1]*b; + db = rXgXbX[2]*r + rYgYbY[2]*g + rZgZbZ[2]*b; + da = a; +} + +static inline void transform_gamut_1(const Sk4f& r, const Sk4f& g, const Sk4f& b, + const Sk4f& rXgXbX, const Sk4f& rYgYbY, const Sk4f& rZgZbZ, + Sk4f& rgba) { + rgba = rXgXbX*r + rYgYbY*g + rZgZbZ*b; +} + +static inline void translate_gamut(const Sk4f& rTgTbT, Sk4f& dr, Sk4f& dg, Sk4f& db) { + dr = dr + rTgTbT[0]; + dg = dg + rTgTbT[1]; + db = db + rTgTbT[2]; +} + +static inline void translate_gamut_1(const Sk4f& rTgTbT, Sk4f& rgba) { + rgba = rgba + rTgTbT; +} + +static inline void premultiply(Sk4f& dr, Sk4f& dg, Sk4f& db, const Sk4f& da) { + dr = da * dr; + dg = da * dg; + db = da * db; +} + +static inline void premultiply_1(const Sk4f& a, Sk4f& rgba) { + rgba = a * rgba; +} + +static inline void store_srgb(void* dst, const uint32_t* src, + Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, + const uint8_t* const[3], bool kSwapRB) { + int kRShift = 0; + int kGShift = 8; + int kBShift = 16; + int kAShift = 24; + if (kSwapRB) { + kBShift = 0; + kRShift = 16; + } + + dr = sk_linear_to_srgb_needs_trunc(dr); + dg = sk_linear_to_srgb_needs_trunc(dg); + db = sk_linear_to_srgb_needs_trunc(db); + + dr = sk_clamp_0_255(dr); + dg = sk_clamp_0_255(dg); + db = sk_clamp_0_255(db); + + Sk4i da = SkNx_cast<int32_t>(Sk4u::Load(src) >> 24); + + Sk4i rgba = (SkNx_cast<int>(dr) << kRShift) + | (SkNx_cast<int>(dg) << kGShift) + | (SkNx_cast<int>(db) << kBShift) + | (da << kAShift); + rgba.store(dst); +} + +static inline void store_srgb_1(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f&, + const uint8_t* const[3], bool kSwapRB) { + rgba = sk_clamp_0_255(sk_linear_to_srgb_needs_trunc(rgba)); + + uint32_t tmp; + SkNx_cast<uint8_t>(SkNx_cast<int32_t>(rgba)).store(&tmp); + tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); + if (kSwapRB) { + tmp = SkSwizzle_RB(tmp); + } + + *(uint32_t*)dst = tmp; +} + +static inline Sk4f linear_to_2dot2(const Sk4f& x) { + // x^(29/64) is a very good approximation of the true value, x^(1/2.2). + auto x2 = x.rsqrt(), // x^(-1/2) + x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) + x64 = x32.rsqrt(); // x^(+1/64) + + // 29 = 32 - 2 - 1 + return 255.0f * x2.invert() * x32 * x64.invert(); +} + +static inline void store_2dot2(void* dst, const uint32_t* src, + Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, + const uint8_t* const[3], bool kSwapRB) { + int kRShift = 0; + int kGShift = 8; + int kBShift = 16; + int kAShift = 24; + if (kSwapRB) { + kBShift = 0; + kRShift = 16; + } + + dr = linear_to_2dot2(dr); + dg = linear_to_2dot2(dg); + db = linear_to_2dot2(db); + + dr = sk_clamp_0_255(dr); + dg = sk_clamp_0_255(dg); + db = sk_clamp_0_255(db); + + Sk4i da = SkNx_cast<int32_t>(Sk4u::Load(src) >> 24); + + Sk4i rgba = (Sk4f_round(dr) << kRShift) + | (Sk4f_round(dg) << kGShift) + | (Sk4f_round(db) << kBShift) + | (da << kAShift); + rgba.store(dst); +} + +static inline void store_2dot2_1(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f&, + const uint8_t* const[3], bool kSwapRB) { + rgba = sk_clamp_0_255(linear_to_2dot2(rgba)); + + uint32_t tmp; + SkNx_cast<uint8_t>(Sk4f_round(rgba)).store(&tmp); + tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); + if (kSwapRB) { + tmp = SkSwizzle_RB(tmp); + } + + *(uint32_t*)dst = tmp; +} + +static inline void store_f16(void* dst, const uint32_t* src, + Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da, + const uint8_t* const[3], bool kSwapRB) { + Sk4h_store4(dst, SkFloatToHalf_finite(dr), + SkFloatToHalf_finite(dg), + SkFloatToHalf_finite(db), + SkFloatToHalf_finite(da)); + dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); +} + +static inline void store_f16_1(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f& a, + const uint8_t* const[3], bool kSwapRB) { + rgba = Sk4f(rgba[0], rgba[1], rgba[2], a[3]); + SkFloatToHalf_finite(rgba).store((uint64_t*) dst); +} + +static inline void store_generic(void* dst, const uint32_t* src, + Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, + const uint8_t* const dstTables[3], bool kSwapRB) { + int kRShift = 0; + int kGShift = 8; + int kBShift = 16; + int kAShift = 24; + if (kSwapRB) { + kBShift = 0; + kRShift = 16; + } + + dr = Sk4f::Min(Sk4f::Max(1023.0f * dr, 0.0f), 1023.0f); + dg = Sk4f::Min(Sk4f::Max(1023.0f * dg, 0.0f), 1023.0f); + db = Sk4f::Min(Sk4f::Max(1023.0f * db, 0.0f), 1023.0f); + + Sk4i ir = Sk4f_round(dr); + Sk4i ig = Sk4f_round(dg); + Sk4i ib = Sk4f_round(db); + + Sk4i da = SkNx_cast<int32_t>(Sk4u::Load(src) >> 24); + + uint32_t* dst32 = (uint32_t*) dst; + dst32[0] = dstTables[0][ir[0]] << kRShift + | dstTables[1][ig[0]] << kGShift + | dstTables[2][ib[0]] << kBShift + | da[0] << kAShift; + dst32[1] = dstTables[0][ir[1]] << kRShift + | dstTables[1][ig[1]] << kGShift + | dstTables[2][ib[1]] << kBShift + | da[1] << kAShift; + dst32[2] = dstTables[0][ir[2]] << kRShift + | dstTables[1][ig[2]] << kGShift + | dstTables[2][ib[2]] << kBShift + | da[2] << kAShift; + dst32[3] = dstTables[0][ir[3]] << kRShift + | dstTables[1][ig[3]] << kGShift + | dstTables[2][ib[3]] << kBShift + | da[3] << kAShift; +} + +static inline void store_generic_1(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f&, + const uint8_t* const dstTables[3], bool kSwapRB) { + rgba = Sk4f::Min(Sk4f::Max(1023.0f * rgba, 0.0f), 1023.0f); + + Sk4i indices = Sk4f_round(rgba); + + *((uint32_t*) dst) = dstTables[0][indices[0]] << 0 + | dstTables[1][indices[1]] << 8 + | dstTables[2][indices[2]] << 16 + | (*src & 0xFF000000); +} + +template <SkColorSpace::GammaNamed kDstGamma, bool kPremul, bool kSwapRB> +static void color_xform_RGBA(void* dst, const uint32_t* src, int len, + const float* const srcTables[3], const float matrix[16], + const uint8_t* const dstTables[3]) { + decltype(store_srgb )* store; + decltype(store_srgb_1 )* store_1; + decltype(load_rgb_from_tables )* load; + decltype(load_rgb_from_tables_1)* load_1; + size_t sizeOfDstPixel; + switch (kDstGamma) { + case SkColorSpace::kSRGB_GammaNamed: + load = kPremul ? load_rgba_from_tables : load_rgb_from_tables; + load_1 = kPremul ? load_rgba_from_tables_1 : load_rgb_from_tables_1; + store = store_srgb; + store_1 = store_srgb_1; + sizeOfDstPixel = 4; + break; + case SkColorSpace::k2Dot2Curve_GammaNamed: + load = kPremul ? load_rgba_from_tables : load_rgb_from_tables; + load_1 = kPremul ? load_rgba_from_tables_1 : load_rgb_from_tables_1; + store = store_2dot2; + store_1 = store_2dot2_1; + sizeOfDstPixel = 4; + break; + case SkColorSpace::kLinear_GammaNamed: + load = load_rgba_from_tables; + load_1 = load_rgba_from_tables_1; + store = store_f16; + store_1 = store_f16_1; + sizeOfDstPixel = 8; + break; + case SkColorSpace::kNonStandard_GammaNamed: + load = kPremul ? load_rgba_from_tables : load_rgb_from_tables; + load_1 = kPremul ? load_rgba_from_tables_1 : load_rgb_from_tables_1; + store = store_generic; + store_1 = store_generic_1; + sizeOfDstPixel = 4; + break; + } + + Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT; + load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT); + + if (len >= 4) { + // Naively this would be a loop of load-transform-store, but we found it faster to + // move the N+1th load ahead of the Nth store. We don't bother doing this for N<4. + Sk4f r, g, b, a; + load(src, r, g, b, a, srcTables); + src += 4; + len -= 4; + + Sk4f dr, dg, db, da; + while (len >= 4) { + transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); + translate_gamut(rTgTbT, dr, dg, db); + + if (kPremul) { + premultiply(dr, dg, db, da); + } + + load(src, r, g, b, a, srcTables); + src += 4; + len -= 4; + + store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB); + dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); + } + + transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); + translate_gamut(rTgTbT, dr, dg, db); + + if (kPremul) { + premultiply(dr, dg, db, da); + } + + store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB); + dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); + } + + while (len > 0) { + Sk4f r, g, b, a; + load_1(src, r, g, b, a, srcTables); + + Sk4f rgba; + transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba); + + translate_gamut_1(rTgTbT, rgba); + + store_1(dst, src, rgba, a, dstTables, kSwapRB); + + src += 1; + len -= 1; + dst = SkTAddOffset<void>(dst, sizeOfDstPixel); + } +} + +#endif // SkColorSpaceXformOpts_DEFINED diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 9ba7bc7178..5263fe46e0 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -27,7 +27,6 @@ #include "SkBlitRow_opts.h" #include "SkBlurImageFilter_opts.h" #include "SkColorCubeFilter_opts.h" -#include "SkColorXform_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkSwizzler_opts.h" #include "SkTextureCompressor_opts.h" @@ -71,14 +70,6 @@ namespace SkOpts { DEFINE_DEFAULT(inverted_CMYK_to_BGR1); DEFINE_DEFAULT(srcover_srgb_srgb); - - DEFINE_DEFAULT(color_xform_RGB1_to_2dot2); - DEFINE_DEFAULT(color_xform_RGB1_to_srgb); - DEFINE_DEFAULT(color_xform_RGB1_to_table); - DEFINE_DEFAULT(color_xform_RGB1_to_linear); - DEFINE_DEFAULT(color_xform_RGB1_to_2dot2_swaprb); - DEFINE_DEFAULT(color_xform_RGB1_to_srgb_swaprb); - DEFINE_DEFAULT(color_xform_RGB1_to_table_swaprb); #undef DEFINE_DEFAULT // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 87489ee94e..7c6cfb0dfb 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -65,31 +65,6 @@ namespace SkOpts { // Blend ndst src pixels over dst, where both src and dst point to sRGB pixels (RGBA or BGRA). // If nsrc < ndst, we loop over src to create a pattern. extern void (*srcover_srgb_srgb)(uint32_t* dst, const uint32_t* src, int ndst, int nsrc); - - // Color xform RGB1 pixels. - extern void (*color_xform_RGB1_to_2dot2) (uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float srcToDstMatrix[16]); - extern void (*color_xform_RGB1_to_srgb)(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float srcToDstMatrix[16]); - extern void (*color_xform_RGB1_to_table)(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float srcToDstMatrix[16], - const uint8_t* const dstTables[3]); - extern void (*color_xform_RGB1_to_linear)(uint64_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float srcToDstMatrix[16]); - extern void (*color_xform_RGB1_to_2dot2_swaprb) (uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float srcToDstMatrix[16]); - extern void (*color_xform_RGB1_to_srgb_swaprb)(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float srcToDstMatrix[16]); - extern void (*color_xform_RGB1_to_table_swaprb)(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float srcToDstMatrix[16], - const uint8_t* const dstTables[3]); } #endif//SkOpts_DEFINED diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h deleted file mode 100644 index b3da55c1fd..0000000000 --- a/src/opts/SkColorXform_opts.h +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright 2016 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SkColorXform_opts_DEFINED -#define SkColorXform_opts_DEFINED - -#include "SkNx.h" -#include "SkColorPriv.h" -#include "SkHalf.h" -#include "SkSRGB.h" -#include "SkTemplates.h" - -namespace SK_OPTS_NS { - -// Strange that we need a wrapper on SkNx_cast to use as a function ptr. -static Sk4i Sk4f_trunc(const Sk4f& x) { - return SkNx_cast<int>(x); -} - -static Sk4f linear_to_2dot2(const Sk4f& x) { - // x^(29/64) is a very good approximation of the true value, x^(1/2.2). - auto x2 = x.rsqrt(), // x^(-1/2) - x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) - x64 = x32.rsqrt(); // x^(+1/64) - - // 29 = 32 - 2 - 1 - return 255.0f * x2.invert() * x32 * x64.invert(); -} - -enum DstGamma { - // 8888 - kSRGB_DstGamma, - k2Dot2_DstGamma, - kTable_DstGamma, - - // F16 - kLinear_DstGamma, -}; - -template <DstGamma kDstGamma, bool kSwapRB> -static void color_xform_RGB1(void* dst, const uint32_t* src, int len, - const float* const srcTables[3], const float matrix[16], - const uint8_t* const dstTables[3]) { - int kRShift = 0; - int kGShift = 8; - int kBShift = 16; - int kAShift = 24; - if (kSwapRB) { - kBShift = 0; - kRShift = 16; - } - - Sk4f rXgXbX = Sk4f::Load(matrix + 0), - rYgYbY = Sk4f::Load(matrix + 4), - rZgZbZ = Sk4f::Load(matrix + 8), - rTgTbT = Sk4f::Load(matrix + 12); - - if (len >= 4) { - Sk4f reds, greens, blues; - auto load_next_4 = [&reds, &greens, &blues, &src, &len, &srcTables] { - reds = Sk4f{srcTables[0][(src[0] >> 0) & 0xFF], - srcTables[0][(src[1] >> 0) & 0xFF], - srcTables[0][(src[2] >> 0) & 0xFF], - srcTables[0][(src[3] >> 0) & 0xFF]}; - greens = Sk4f{srcTables[1][(src[0] >> 8) & 0xFF], - srcTables[1][(src[1] >> 8) & 0xFF], - srcTables[1][(src[2] >> 8) & 0xFF], - srcTables[1][(src[3] >> 8) & 0xFF]}; - blues = Sk4f{srcTables[2][(src[0] >> 16) & 0xFF], - srcTables[2][(src[1] >> 16) & 0xFF], - srcTables[2][(src[2] >> 16) & 0xFF], - srcTables[2][(src[3] >> 16) & 0xFF]}; - src += 4; - len -= 4; - }; - - Sk4f dstReds, dstGreens, dstBlues; - auto transform_4 = [&reds, &greens, &blues, &dstReds, &dstGreens, &dstBlues, &rXgXbX, - &rYgYbY, &rZgZbZ, &rTgTbT] { - dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues + rTgTbT[0]; - dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues + rTgTbT[1]; - dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues + rTgTbT[2]; - }; - - auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables, kRShift, kGShift, - kBShift, kAShift] { - if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { - Sk4f (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ? - sk_linear_to_srgb_needs_trunc : linear_to_2dot2; - Sk4i (*float_to_int)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ? - Sk4f_trunc : Sk4f_round; - - dstReds = linear_to_curve(dstReds); - dstGreens = linear_to_curve(dstGreens); - dstBlues = linear_to_curve(dstBlues); - - dstReds = sk_clamp_0_255(dstReds); - dstGreens = sk_clamp_0_255(dstGreens); - dstBlues = sk_clamp_0_255(dstBlues); - - auto rgba = (float_to_int(dstReds) << kRShift) - | (float_to_int(dstGreens) << kGShift) - | (float_to_int(dstBlues) << kBShift) - | (Sk4i{0xFF} << kAShift); - rgba.store((uint32_t*) dst); - - dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); - } else if (kTable_DstGamma == kDstGamma) { - Sk4f scaledReds = Sk4f::Min(Sk4f::Max(1023.0f * dstReds, 0.0f), 1023.0f); - Sk4f scaledGreens = Sk4f::Min(Sk4f::Max(1023.0f * dstGreens, 0.0f), 1023.0f); - Sk4f scaledBlues = Sk4f::Min(Sk4f::Max(1023.0f * dstBlues, 0.0f), 1023.0f); - - Sk4i indicesReds = Sk4f_round(scaledReds); - Sk4i indicesGreens = Sk4f_round(scaledGreens); - Sk4i indicesBlues = Sk4f_round(scaledBlues); - - uint32_t* dst32 = (uint32_t*) dst; - dst32[0] = dstTables[0][indicesReds [0]] << kRShift - | dstTables[1][indicesGreens[0]] << kGShift - | dstTables[2][indicesBlues [0]] << kBShift - | 0xFF << kAShift; - dst32[1] = dstTables[0][indicesReds [1]] << kRShift - | dstTables[1][indicesGreens[1]] << kGShift - | dstTables[2][indicesBlues [1]] << kBShift - | 0xFF << kAShift; - dst32[2] = dstTables[0][indicesReds [2]] << kRShift - | dstTables[1][indicesGreens[2]] << kGShift - | dstTables[2][indicesBlues [2]] << kBShift - | 0xFF << kAShift; - dst32[3] = dstTables[0][indicesReds [3]] << kRShift - | dstTables[1][indicesGreens[3]] << kGShift - | dstTables[2][indicesBlues [3]] << kBShift - | 0xFF << kAShift; - - dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); - } else { - Sk4h_store4(dst, SkFloatToHalf_finite(dstReds), - SkFloatToHalf_finite(dstGreens), - SkFloatToHalf_finite(dstBlues), - SK_Half1); - dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); - } - }; - - load_next_4(); - - while (len >= 4) { - transform_4(); - load_next_4(); - store_4(); - } - - transform_4(); - store_4(); - } - - while (len > 0) { - // Splat r,g,b across a register each. - auto r = Sk4f{srcTables[0][(*src >> 0) & 0xFF]}, - g = Sk4f{srcTables[1][(*src >> 8) & 0xFF]}, - b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]}; - - auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b + rTgTbT; - - if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { - Sk4f (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ? - sk_linear_to_srgb_needs_trunc : linear_to_2dot2; - Sk4i (*float_to_int)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ? - Sk4f_trunc : Sk4f_round; - - dstPixel = sk_clamp_0_255(linear_to_curve(dstPixel)); - - uint32_t rgba; - SkNx_cast<uint8_t>(float_to_int(dstPixel)).store(&rgba); - rgba |= 0xFF000000; - if (kSwapRB) { - *((uint32_t*) dst) = SkSwizzle_RB(rgba); - } else { - *((uint32_t*) dst) = rgba; - } - dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); - } else if (kTable_DstGamma == kDstGamma) { - Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 1023.0f); - - Sk4i indices = Sk4f_round(scaledPixel); - - *((uint32_t*) dst) = dstTables[0][indices[0]] << kRShift - | dstTables[1][indices[1]] << kGShift - | dstTables[2][indices[2]] << kBShift - | 0xFF << kAShift; - - dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); - } else { - uint64_t rgba; - SkFloatToHalf_finite(dstPixel).store(&rgba); - rgba |= static_cast<uint64_t>(SK_Half1) << 48; - *((uint64_t*) dst) = rgba; - dst = SkTAddOffset<void>(dst, sizeof(uint64_t)); - } - - src += 1; - len -= 1; - } -} - -static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], const float matrix[16]) { - color_xform_RGB1<k2Dot2_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr); -} - -static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], const float matrix[16]) { - color_xform_RGB1<kSRGB_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr); -} - -static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], const float matrix[16], - const uint8_t* const dstTables[3]) { - color_xform_RGB1<kTable_DstGamma, false>(dst, src, len, srcTables, matrix, dstTables); -} - -static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], const float matrix[16]) { - color_xform_RGB1<kLinear_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr); -} - -static void color_xform_RGB1_to_2dot2_swaprb(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float matrix[16]) { - color_xform_RGB1<k2Dot2_DstGamma, true>(dst, src, len, srcTables, matrix, nullptr); -} - -static void color_xform_RGB1_to_srgb_swaprb(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float matrix[16]) { - color_xform_RGB1<kSRGB_DstGamma, true>(dst, src, len, srcTables, matrix, nullptr); -} - -static void color_xform_RGB1_to_table_swaprb(uint32_t* dst, const uint32_t* src, int len, - const float* const srcTables[3], - const float matrix[16], - const uint8_t* const dstTables[3]) { - color_xform_RGB1<kTable_DstGamma, true>(dst, src, len, srcTables, matrix, dstTables); -} - -} // namespace SK_OPTS_NS - -#endif // SkColorXform_opts_DEFINED diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index e2574aeef0..a511243219 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -507,6 +507,10 @@ template<> inline Sk4h SkNx_cast<uint16_t, int32_t>(const Sk4i& src) { return vmovn_u32(vreinterpretq_u32_s32(src.fVec)); } +template<> /*static*/ inline Sk4i SkNx_cast<int32_t, uint32_t>(const Sk4u& src) { + return vreinterpretq_s32_u32(src.fVec); +} + static inline Sk4i Sk4f_round(const Sk4f& x) { return vcvtq_s32_f32((x + 0.5f).fVec); } diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index 003b89f667..3e66637df3 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -447,6 +447,10 @@ template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int32_t>(const Sk4i& src) { return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec); } +template<> /*static*/ inline Sk4i SkNx_cast<int32_t, uint32_t>(const Sk4u& src) { + return src.fVec; +} + static inline Sk4i Sk4f_round(const Sk4f& x) { return _mm_cvtps_epi32(x.fVec); } diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp index e70cedeb31..17ce0668ff 100644 --- a/src/opts/SkOpts_sse41.cpp +++ b/src/opts/SkOpts_sse41.cpp @@ -11,7 +11,6 @@ #include "SkBlurImageFilter_opts.h" #include "SkBlitRow_opts.h" #include "SkBlend_opts.h" -#include "SkColorXform_opts.h" namespace SkOpts { void Init_sse41() { @@ -20,13 +19,5 @@ namespace SkOpts { box_blur_yx = sse41::box_blur_yx; srcover_srgb_srgb = sse41::srcover_srgb_srgb; blit_row_s32a_opaque = sse41::blit_row_s32a_opaque; - - color_xform_RGB1_to_2dot2 = sse41::color_xform_RGB1_to_2dot2; - color_xform_RGB1_to_srgb = sse41::color_xform_RGB1_to_srgb; - color_xform_RGB1_to_table = sse41::color_xform_RGB1_to_table; - color_xform_RGB1_to_linear = sse41::color_xform_RGB1_to_linear; - color_xform_RGB1_to_2dot2_swaprb = sse41::color_xform_RGB1_to_2dot2_swaprb; - color_xform_RGB1_to_srgb_swaprb = sse41::color_xform_RGB1_to_srgb_swaprb; - color_xform_RGB1_to_table_swaprb = sse41::color_xform_RGB1_to_table_swaprb; } } |