aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar msarett <msarett@google.com>2016-08-02 11:30:30 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-08-02 11:30:30 -0700
commit15ee3deee8aca2bf6e658449f25ee34a8153e6ee (patch)
tree05633345fc092e02530c3a8ea80cd79bf1f5abe0 /src
parentb605f89398767bc39516e1c13a1d9a23bc8e2162 (diff)
Refactor of SkColorSpaceXformOpts
(1) Performance is better or stays the same. (2) Code is split into functions (RasterPipeline-ish design). IMO, it's not really more or less readable. But I think it's now much easier add capabilities, apply optimizations, or do more refactors. Or to actually use RasterPipeline. I help back from trying any of these to try to keep this CL sane. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2194303002 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review-Url: https://codereview.chromium.org/2194303002
Diffstat (limited to 'src')
-rw-r--r--src/core/SkColorSpaceXform.cpp138
-rw-r--r--src/core/SkColorSpaceXformOpts.h379
-rw-r--r--src/core/SkOpts.cpp9
-rw-r--r--src/core/SkOpts.h25
-rw-r--r--src/opts/SkColorXform_opts.h252
-rw-r--r--src/opts/SkNx_neon.h4
-rw-r--r--src/opts/SkNx_sse.h4
-rw-r--r--src/opts/SkOpts_sse41.cpp9
8 files changed, 462 insertions, 358 deletions
diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp
index 4a7f175082..57b4fa6ddd 100644
--- a/src/core/SkColorSpaceXform.cpp
+++ b/src/core/SkColorSpaceXform.cpp
@@ -8,7 +8,7 @@
#include "SkColorPriv.h"
#include "SkColorSpace_Base.h"
#include "SkColorSpaceXform.h"
-#include "SkOpts.h"
+#include "SkColorSpaceXformOpts.h"
#include "SkSRGB.h"
static constexpr float sk_linear_from_2dot2[256] = {
@@ -78,56 +78,6 @@ static constexpr float sk_linear_from_2dot2[256] = {
0.974300202388861000f, 0.982826255053791000f, 0.991392843592940000f, 1.000000000000000000f,
};
-static void build_table_linear_from_gamma(float* outTable, float exponent) {
- for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) {
- *outTable++ = powf(x, exponent);
- }
-}
-
-// Interpolating lookup in a variably sized table.
-static float interp_lut(float input, const float* table, int tableSize) {
- float index = input * (tableSize - 1);
- float diff = index - sk_float_floor2int(index);
- return table[(int) sk_float_floor2int(index)] * (1.0f - diff) +
- table[(int) sk_float_ceil2int(index)] * diff;
-}
-
-// outTable is always 256 entries, inTable may be larger or smaller.
-static void build_table_linear_from_gamma(float* outTable, const float* inTable,
- int inTableSize) {
- if (256 == inTableSize) {
- memcpy(outTable, inTable, sizeof(float) * 256);
- return;
- }
-
- for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) {
- *outTable++ = interp_lut(x, inTable, inTableSize);
- }
-}
-
-static void build_table_linear_from_gamma(float* outTable, float g, float a, float b, float c,
- float d, float e, float f) {
- // Y = (aX + b)^g + c for X >= d
- // Y = eX + f otherwise
- for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) {
- if (x >= d) {
- *outTable++ = powf(a * x + b, g) + c;
- } else {
- *outTable++ = e * x + f;
- }
- }
-}
-
-static inline bool compute_gamut_xform(SkMatrix44* srcToDst, const SkMatrix44& srcToXYZ,
- const SkMatrix44& dstToXYZ) {
- if (!dstToXYZ.invert(srcToDst)) {
- return false;
- }
-
- srcToDst->postConcat(srcToXYZ);
- return true;
-}
-
///////////////////////////////////////////////////////////////////////////////////////////////////
static constexpr uint8_t linear_to_srgb[1024] = {
@@ -190,7 +140,7 @@ static constexpr uint8_t linear_to_srgb[1024] = {
253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 255, 255, 255, 255, 255
};
-static constexpr uint8_t linear_to_2dot2[1024] = {
+static constexpr uint8_t linear_to_2dot2_table[1024] = {
0, 11, 15, 18, 21, 23, 25, 26, 28, 30, 31, 32, 34, 35, 36, 37, 39, 40,
41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 50, 51, 52, 53, 54, 54, 55,
56, 56, 57, 58, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 65, 65, 66, 66,
@@ -250,6 +200,50 @@ static constexpr uint8_t linear_to_2dot2[1024] = {
253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 255, 255, 255, 255, 255,
};
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+static void build_table_linear_from_gamma(float* outTable, float exponent) {
+ for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) {
+ *outTable++ = powf(x, exponent);
+ }
+}
+
+// Interpolating lookup in a variably sized table.
+static float interp_lut(float input, const float* table, int tableSize) {
+ float index = input * (tableSize - 1);
+ float diff = index - sk_float_floor2int(index);
+ return table[(int) sk_float_floor2int(index)] * (1.0f - diff) +
+ table[(int) sk_float_ceil2int(index)] * diff;
+}
+
+// outTable is always 256 entries, inTable may be larger or smaller.
+static void build_table_linear_from_gamma(float* outTable, const float* inTable,
+ int inTableSize) {
+ if (256 == inTableSize) {
+ memcpy(outTable, inTable, sizeof(float) * 256);
+ return;
+ }
+
+ for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) {
+ *outTable++ = interp_lut(x, inTable, inTableSize);
+ }
+}
+
+static void build_table_linear_from_gamma(float* outTable, float g, float a, float b, float c,
+ float d, float e, float f) {
+ // Y = (aX + b)^g + c for X >= d
+ // Y = eX + f otherwise
+ for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) {
+ if (x >= d) {
+ *outTable++ = powf(a * x + b, g) + c;
+ } else {
+ *outTable++ = e * x + f;
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
// Expand range from 0-1 to 0-255, then convert.
static uint8_t clamp_normalized_float_to_byte(float v) {
// The ordering of the logic is a little strange here in order
@@ -373,7 +367,7 @@ static const GammaFns<float> kToLinear {
static const GammaFns<uint8_t> kFromLinear {
linear_to_srgb,
- linear_to_2dot2,
+ linear_to_2dot2_table,
&build_table_linear_to_gamma,
&build_table_linear_to_gamma,
&build_table_linear_to_gamma,
@@ -449,6 +443,18 @@ static void build_gamma_tables(const T* outGammaTables[3], T* gammaTableStorage,
///////////////////////////////////////////////////////////////////////////////////////////////////
+static inline bool compute_gamut_xform(SkMatrix44* srcToDst, const SkMatrix44& srcToXYZ,
+ const SkMatrix44& dstToXYZ) {
+ if (!dstToXYZ.invert(srcToDst)) {
+ return false;
+ }
+
+ srcToDst->postConcat(srcToXYZ);
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
std::unique_ptr<SkColorSpaceXform> SkColorSpaceXform::New(const sk_sp<SkColorSpace>& srcSpace,
const sk_sp<SkColorSpace>& dstSpace) {
if (!srcSpace || !dstSpace) {
@@ -615,8 +621,8 @@ static void handle_color_lut(uint32_t* dst, const uint32_t* src, int len,
///////////////////////////////////////////////////////////////////////////////////////////////////
-template <SkColorSpace::GammaNamed Dst>
-SkColorSpaceXform_Base<Dst>::SkColorSpaceXform_Base(const sk_sp<SkColorSpace>& srcSpace,
+template <SkColorSpace::GammaNamed kDst>
+SkColorSpaceXform_Base<kDst>::SkColorSpaceXform_Base(const sk_sp<SkColorSpace>& srcSpace,
const SkMatrix44& srcToDst,
const sk_sp<SkColorSpace>& dstSpace)
: fColorLUT(sk_ref_sp((SkColorLookUpTable*) as_CSB(srcSpace)->colorLUT()))
@@ -636,7 +642,8 @@ void SkColorSpaceXform_Base<SkColorSpace::kSRGB_GammaNamed>
src = dst;
}
- SkOpts::color_xform_RGB1_to_srgb(dst, src, len, fSrcGammaTables, fSrcToDst);
+ color_xform_RGBA<SkColorSpace::kSRGB_GammaNamed, false, false>
+ (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables);
}
template <>
@@ -648,7 +655,8 @@ void SkColorSpaceXform_Base<SkColorSpace::k2Dot2Curve_GammaNamed>
src = dst;
}
- SkOpts::color_xform_RGB1_to_2dot2(dst, src, len, fSrcGammaTables, fSrcToDst);
+ color_xform_RGBA<SkColorSpace::k2Dot2Curve_GammaNamed, false, false>
+ (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables);
}
template <>
@@ -660,7 +668,8 @@ void SkColorSpaceXform_Base<SkColorSpace::kNonStandard_GammaNamed>
src = dst;
}
- SkOpts::color_xform_RGB1_to_table(dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables);
+ color_xform_RGBA<SkColorSpace::kNonStandard_GammaNamed, false, false>
+ (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables);
}
template <>
@@ -672,7 +681,8 @@ void SkColorSpaceXform_Base<SkColorSpace::kSRGB_GammaNamed>
src = dst;
}
- SkOpts::color_xform_RGB1_to_srgb_swaprb(dst, src, len, fSrcGammaTables, fSrcToDst);
+ color_xform_RGBA<SkColorSpace::kSRGB_GammaNamed, false, true>
+ (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables);
}
template <>
@@ -684,7 +694,8 @@ void SkColorSpaceXform_Base<SkColorSpace::k2Dot2Curve_GammaNamed>
src = dst;
}
- SkOpts::color_xform_RGB1_to_2dot2_swaprb(dst, src, len, fSrcGammaTables, fSrcToDst);
+ color_xform_RGBA<SkColorSpace::k2Dot2Curve_GammaNamed, false, true>
+ (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables);
}
template <>
@@ -696,8 +707,8 @@ void SkColorSpaceXform_Base<SkColorSpace::kNonStandard_GammaNamed>
src = dst;
}
- SkOpts::color_xform_RGB1_to_table_swaprb(dst, src, len, fSrcGammaTables, fSrcToDst,
- fDstGammaTables);
+ color_xform_RGBA<SkColorSpace::kNonStandard_GammaNamed, false, true>
+ (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables);
}
template <SkColorSpace::GammaNamed T>
@@ -717,5 +728,6 @@ void SkColorSpaceXform_Base<T>
src = (const RGBA32*) storage.get();
}
- SkOpts::color_xform_RGB1_to_linear(dst, src, len, fSrcGammaTables, fSrcToDst);
+ color_xform_RGBA<SkColorSpace::kLinear_GammaNamed, false, false>
+ (dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables);
}
diff --git a/src/core/SkColorSpaceXformOpts.h b/src/core/SkColorSpaceXformOpts.h
new file mode 100644
index 0000000000..cf0e48cbb3
--- /dev/null
+++ b/src/core/SkColorSpaceXformOpts.h
@@ -0,0 +1,379 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkColorSpaceXformOpts_DEFINED
+#define SkColorSpaceXformOpts_DEFINED
+
+#include "SkNx.h"
+#include "SkColorPriv.h"
+#include "SkHalf.h"
+#include "SkSRGB.h"
+#include "SkTemplates.h"
+
+static inline void load_matrix(const float matrix[16],
+ Sk4f& rXgXbX, Sk4f& rYgYbY, Sk4f& rZgZbZ, Sk4f& rTgTbT) {
+ rXgXbX = Sk4f::Load(matrix + 0);
+ rYgYbY = Sk4f::Load(matrix + 4);
+ rZgZbZ = Sk4f::Load(matrix + 8);
+ rTgTbT = Sk4f::Load(matrix + 12);
+}
+
+static inline void load_rgb_from_tables(const uint32_t* src,
+ Sk4f& r, Sk4f& g, Sk4f& b, Sk4f&,
+ const float* const srcTables[3]) {
+ r = { srcTables[0][(src[0] >> 0) & 0xFF],
+ srcTables[0][(src[1] >> 0) & 0xFF],
+ srcTables[0][(src[2] >> 0) & 0xFF],
+ srcTables[0][(src[3] >> 0) & 0xFF], };
+ g = { srcTables[1][(src[0] >> 8) & 0xFF],
+ srcTables[1][(src[1] >> 8) & 0xFF],
+ srcTables[1][(src[2] >> 8) & 0xFF],
+ srcTables[1][(src[3] >> 8) & 0xFF], };
+ b = { srcTables[2][(src[0] >> 16) & 0xFF],
+ srcTables[2][(src[1] >> 16) & 0xFF],
+ srcTables[2][(src[2] >> 16) & 0xFF],
+ srcTables[2][(src[3] >> 16) & 0xFF], };
+}
+
+static inline void load_rgba_from_tables(const uint32_t* src,
+ Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a,
+ const float* const srcTables[3]) {
+ r = { srcTables[0][(src[0] >> 0) & 0xFF],
+ srcTables[0][(src[1] >> 0) & 0xFF],
+ srcTables[0][(src[2] >> 0) & 0xFF],
+ srcTables[0][(src[3] >> 0) & 0xFF], };
+ g = { srcTables[1][(src[0] >> 8) & 0xFF],
+ srcTables[1][(src[1] >> 8) & 0xFF],
+ srcTables[1][(src[2] >> 8) & 0xFF],
+ srcTables[1][(src[3] >> 8) & 0xFF], };
+ b = { srcTables[2][(src[0] >> 16) & 0xFF],
+ srcTables[2][(src[1] >> 16) & 0xFF],
+ srcTables[2][(src[2] >> 16) & 0xFF],
+ srcTables[2][(src[3] >> 16) & 0xFF], };
+ a = (1.0f / 255.0f) * SkNx_cast<float>(Sk4u::Load(src) >> 24);
+}
+
+static inline void load_rgb_from_tables_1(const uint32_t* src,
+ Sk4f& r, Sk4f& g, Sk4f& b, Sk4f&,
+ const float* const srcTables[3]) {
+ // Splat r,g,b across a register each.
+ r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]);
+ g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]);
+ b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]);
+}
+
+static inline void load_rgba_from_tables_1(const uint32_t* src,
+ Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a,
+ const float* const srcTables[3]) {
+ // Splat r,g,b across a register each.
+ r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]);
+ g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]);
+ b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]);
+ a = (1.0f / 255.0f) * Sk4f(*src >> 24);
+}
+
+static inline void transform_gamut(const Sk4f& r, const Sk4f& g, const Sk4f& b, const Sk4f& a,
+ const Sk4f& rXgXbX, const Sk4f& rYgYbY, const Sk4f& rZgZbZ,
+ Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da) {
+ dr = rXgXbX[0]*r + rYgYbY[0]*g + rZgZbZ[0]*b;
+ dg = rXgXbX[1]*r + rYgYbY[1]*g + rZgZbZ[1]*b;
+ db = rXgXbX[2]*r + rYgYbY[2]*g + rZgZbZ[2]*b;
+ da = a;
+}
+
+static inline void transform_gamut_1(const Sk4f& r, const Sk4f& g, const Sk4f& b,
+ const Sk4f& rXgXbX, const Sk4f& rYgYbY, const Sk4f& rZgZbZ,
+ Sk4f& rgba) {
+ rgba = rXgXbX*r + rYgYbY*g + rZgZbZ*b;
+}
+
+static inline void translate_gamut(const Sk4f& rTgTbT, Sk4f& dr, Sk4f& dg, Sk4f& db) {
+ dr = dr + rTgTbT[0];
+ dg = dg + rTgTbT[1];
+ db = db + rTgTbT[2];
+}
+
+static inline void translate_gamut_1(const Sk4f& rTgTbT, Sk4f& rgba) {
+ rgba = rgba + rTgTbT;
+}
+
+static inline void premultiply(Sk4f& dr, Sk4f& dg, Sk4f& db, const Sk4f& da) {
+ dr = da * dr;
+ dg = da * dg;
+ db = da * db;
+}
+
+static inline void premultiply_1(const Sk4f& a, Sk4f& rgba) {
+ rgba = a * rgba;
+}
+
+static inline void store_srgb(void* dst, const uint32_t* src,
+ Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&,
+ const uint8_t* const[3], bool kSwapRB) {
+ int kRShift = 0;
+ int kGShift = 8;
+ int kBShift = 16;
+ int kAShift = 24;
+ if (kSwapRB) {
+ kBShift = 0;
+ kRShift = 16;
+ }
+
+ dr = sk_linear_to_srgb_needs_trunc(dr);
+ dg = sk_linear_to_srgb_needs_trunc(dg);
+ db = sk_linear_to_srgb_needs_trunc(db);
+
+ dr = sk_clamp_0_255(dr);
+ dg = sk_clamp_0_255(dg);
+ db = sk_clamp_0_255(db);
+
+ Sk4i da = SkNx_cast<int32_t>(Sk4u::Load(src) >> 24);
+
+ Sk4i rgba = (SkNx_cast<int>(dr) << kRShift)
+ | (SkNx_cast<int>(dg) << kGShift)
+ | (SkNx_cast<int>(db) << kBShift)
+ | (da << kAShift);
+ rgba.store(dst);
+}
+
+static inline void store_srgb_1(void* dst, const uint32_t* src,
+ Sk4f& rgba, const Sk4f&,
+ const uint8_t* const[3], bool kSwapRB) {
+ rgba = sk_clamp_0_255(sk_linear_to_srgb_needs_trunc(rgba));
+
+ uint32_t tmp;
+ SkNx_cast<uint8_t>(SkNx_cast<int32_t>(rgba)).store(&tmp);
+ tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF);
+ if (kSwapRB) {
+ tmp = SkSwizzle_RB(tmp);
+ }
+
+ *(uint32_t*)dst = tmp;
+}
+
+static inline Sk4f linear_to_2dot2(const Sk4f& x) {
+ // x^(29/64) is a very good approximation of the true value, x^(1/2.2).
+ auto x2 = x.rsqrt(), // x^(-1/2)
+ x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32)
+ x64 = x32.rsqrt(); // x^(+1/64)
+
+ // 29 = 32 - 2 - 1
+ return 255.0f * x2.invert() * x32 * x64.invert();
+}
+
+static inline void store_2dot2(void* dst, const uint32_t* src,
+ Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&,
+ const uint8_t* const[3], bool kSwapRB) {
+ int kRShift = 0;
+ int kGShift = 8;
+ int kBShift = 16;
+ int kAShift = 24;
+ if (kSwapRB) {
+ kBShift = 0;
+ kRShift = 16;
+ }
+
+ dr = linear_to_2dot2(dr);
+ dg = linear_to_2dot2(dg);
+ db = linear_to_2dot2(db);
+
+ dr = sk_clamp_0_255(dr);
+ dg = sk_clamp_0_255(dg);
+ db = sk_clamp_0_255(db);
+
+ Sk4i da = SkNx_cast<int32_t>(Sk4u::Load(src) >> 24);
+
+ Sk4i rgba = (Sk4f_round(dr) << kRShift)
+ | (Sk4f_round(dg) << kGShift)
+ | (Sk4f_round(db) << kBShift)
+ | (da << kAShift);
+ rgba.store(dst);
+}
+
+static inline void store_2dot2_1(void* dst, const uint32_t* src,
+ Sk4f& rgba, const Sk4f&,
+ const uint8_t* const[3], bool kSwapRB) {
+ rgba = sk_clamp_0_255(linear_to_2dot2(rgba));
+
+ uint32_t tmp;
+ SkNx_cast<uint8_t>(Sk4f_round(rgba)).store(&tmp);
+ tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF);
+ if (kSwapRB) {
+ tmp = SkSwizzle_RB(tmp);
+ }
+
+ *(uint32_t*)dst = tmp;
+}
+
+static inline void store_f16(void* dst, const uint32_t* src,
+ Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da,
+ const uint8_t* const[3], bool kSwapRB) {
+ Sk4h_store4(dst, SkFloatToHalf_finite(dr),
+ SkFloatToHalf_finite(dg),
+ SkFloatToHalf_finite(db),
+ SkFloatToHalf_finite(da));
+ dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t));
+}
+
+static inline void store_f16_1(void* dst, const uint32_t* src,
+ Sk4f& rgba, const Sk4f& a,
+ const uint8_t* const[3], bool kSwapRB) {
+ rgba = Sk4f(rgba[0], rgba[1], rgba[2], a[3]);
+ SkFloatToHalf_finite(rgba).store((uint64_t*) dst);
+}
+
+static inline void store_generic(void* dst, const uint32_t* src,
+ Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&,
+ const uint8_t* const dstTables[3], bool kSwapRB) {
+ int kRShift = 0;
+ int kGShift = 8;
+ int kBShift = 16;
+ int kAShift = 24;
+ if (kSwapRB) {
+ kBShift = 0;
+ kRShift = 16;
+ }
+
+ dr = Sk4f::Min(Sk4f::Max(1023.0f * dr, 0.0f), 1023.0f);
+ dg = Sk4f::Min(Sk4f::Max(1023.0f * dg, 0.0f), 1023.0f);
+ db = Sk4f::Min(Sk4f::Max(1023.0f * db, 0.0f), 1023.0f);
+
+ Sk4i ir = Sk4f_round(dr);
+ Sk4i ig = Sk4f_round(dg);
+ Sk4i ib = Sk4f_round(db);
+
+ Sk4i da = SkNx_cast<int32_t>(Sk4u::Load(src) >> 24);
+
+ uint32_t* dst32 = (uint32_t*) dst;
+ dst32[0] = dstTables[0][ir[0]] << kRShift
+ | dstTables[1][ig[0]] << kGShift
+ | dstTables[2][ib[0]] << kBShift
+ | da[0] << kAShift;
+ dst32[1] = dstTables[0][ir[1]] << kRShift
+ | dstTables[1][ig[1]] << kGShift
+ | dstTables[2][ib[1]] << kBShift
+ | da[1] << kAShift;
+ dst32[2] = dstTables[0][ir[2]] << kRShift
+ | dstTables[1][ig[2]] << kGShift
+ | dstTables[2][ib[2]] << kBShift
+ | da[2] << kAShift;
+ dst32[3] = dstTables[0][ir[3]] << kRShift
+ | dstTables[1][ig[3]] << kGShift
+ | dstTables[2][ib[3]] << kBShift
+ | da[3] << kAShift;
+}
+
+static inline void store_generic_1(void* dst, const uint32_t* src,
+ Sk4f& rgba, const Sk4f&,
+ const uint8_t* const dstTables[3], bool kSwapRB) {
+ rgba = Sk4f::Min(Sk4f::Max(1023.0f * rgba, 0.0f), 1023.0f);
+
+ Sk4i indices = Sk4f_round(rgba);
+
+ *((uint32_t*) dst) = dstTables[0][indices[0]] << 0
+ | dstTables[1][indices[1]] << 8
+ | dstTables[2][indices[2]] << 16
+ | (*src & 0xFF000000);
+}
+
+template <SkColorSpace::GammaNamed kDstGamma, bool kPremul, bool kSwapRB>
+static void color_xform_RGBA(void* dst, const uint32_t* src, int len,
+ const float* const srcTables[3], const float matrix[16],
+ const uint8_t* const dstTables[3]) {
+ decltype(store_srgb )* store;
+ decltype(store_srgb_1 )* store_1;
+ decltype(load_rgb_from_tables )* load;
+ decltype(load_rgb_from_tables_1)* load_1;
+ size_t sizeOfDstPixel;
+ switch (kDstGamma) {
+ case SkColorSpace::kSRGB_GammaNamed:
+ load = kPremul ? load_rgba_from_tables : load_rgb_from_tables;
+ load_1 = kPremul ? load_rgba_from_tables_1 : load_rgb_from_tables_1;
+ store = store_srgb;
+ store_1 = store_srgb_1;
+ sizeOfDstPixel = 4;
+ break;
+ case SkColorSpace::k2Dot2Curve_GammaNamed:
+ load = kPremul ? load_rgba_from_tables : load_rgb_from_tables;
+ load_1 = kPremul ? load_rgba_from_tables_1 : load_rgb_from_tables_1;
+ store = store_2dot2;
+ store_1 = store_2dot2_1;
+ sizeOfDstPixel = 4;
+ break;
+ case SkColorSpace::kLinear_GammaNamed:
+ load = load_rgba_from_tables;
+ load_1 = load_rgba_from_tables_1;
+ store = store_f16;
+ store_1 = store_f16_1;
+ sizeOfDstPixel = 8;
+ break;
+ case SkColorSpace::kNonStandard_GammaNamed:
+ load = kPremul ? load_rgba_from_tables : load_rgb_from_tables;
+ load_1 = kPremul ? load_rgba_from_tables_1 : load_rgb_from_tables_1;
+ store = store_generic;
+ store_1 = store_generic_1;
+ sizeOfDstPixel = 4;
+ break;
+ }
+
+ Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT;
+ load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT);
+
+ if (len >= 4) {
+ // Naively this would be a loop of load-transform-store, but we found it faster to
+ // move the N+1th load ahead of the Nth store. We don't bother doing this for N<4.
+ Sk4f r, g, b, a;
+ load(src, r, g, b, a, srcTables);
+ src += 4;
+ len -= 4;
+
+ Sk4f dr, dg, db, da;
+ while (len >= 4) {
+ transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da);
+ translate_gamut(rTgTbT, dr, dg, db);
+
+ if (kPremul) {
+ premultiply(dr, dg, db, da);
+ }
+
+ load(src, r, g, b, a, srcTables);
+ src += 4;
+ len -= 4;
+
+ store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB);
+ dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel);
+ }
+
+ transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da);
+ translate_gamut(rTgTbT, dr, dg, db);
+
+ if (kPremul) {
+ premultiply(dr, dg, db, da);
+ }
+
+ store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB);
+ dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel);
+ }
+
+ while (len > 0) {
+ Sk4f r, g, b, a;
+ load_1(src, r, g, b, a, srcTables);
+
+ Sk4f rgba;
+ transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba);
+
+ translate_gamut_1(rTgTbT, rgba);
+
+ store_1(dst, src, rgba, a, dstTables, kSwapRB);
+
+ src += 1;
+ len -= 1;
+ dst = SkTAddOffset<void>(dst, sizeOfDstPixel);
+ }
+}
+
+#endif // SkColorSpaceXformOpts_DEFINED
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index 9ba7bc7178..5263fe46e0 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -27,7 +27,6 @@
#include "SkBlitRow_opts.h"
#include "SkBlurImageFilter_opts.h"
#include "SkColorCubeFilter_opts.h"
-#include "SkColorXform_opts.h"
#include "SkMorphologyImageFilter_opts.h"
#include "SkSwizzler_opts.h"
#include "SkTextureCompressor_opts.h"
@@ -71,14 +70,6 @@ namespace SkOpts {
DEFINE_DEFAULT(inverted_CMYK_to_BGR1);
DEFINE_DEFAULT(srcover_srgb_srgb);
-
- DEFINE_DEFAULT(color_xform_RGB1_to_2dot2);
- DEFINE_DEFAULT(color_xform_RGB1_to_srgb);
- DEFINE_DEFAULT(color_xform_RGB1_to_table);
- DEFINE_DEFAULT(color_xform_RGB1_to_linear);
- DEFINE_DEFAULT(color_xform_RGB1_to_2dot2_swaprb);
- DEFINE_DEFAULT(color_xform_RGB1_to_srgb_swaprb);
- DEFINE_DEFAULT(color_xform_RGB1_to_table_swaprb);
#undef DEFINE_DEFAULT
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 87489ee94e..7c6cfb0dfb 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -65,31 +65,6 @@ namespace SkOpts {
// Blend ndst src pixels over dst, where both src and dst point to sRGB pixels (RGBA or BGRA).
// If nsrc < ndst, we loop over src to create a pattern.
extern void (*srcover_srgb_srgb)(uint32_t* dst, const uint32_t* src, int ndst, int nsrc);
-
- // Color xform RGB1 pixels.
- extern void (*color_xform_RGB1_to_2dot2) (uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float srcToDstMatrix[16]);
- extern void (*color_xform_RGB1_to_srgb)(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float srcToDstMatrix[16]);
- extern void (*color_xform_RGB1_to_table)(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float srcToDstMatrix[16],
- const uint8_t* const dstTables[3]);
- extern void (*color_xform_RGB1_to_linear)(uint64_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float srcToDstMatrix[16]);
- extern void (*color_xform_RGB1_to_2dot2_swaprb) (uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float srcToDstMatrix[16]);
- extern void (*color_xform_RGB1_to_srgb_swaprb)(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float srcToDstMatrix[16]);
- extern void (*color_xform_RGB1_to_table_swaprb)(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float srcToDstMatrix[16],
- const uint8_t* const dstTables[3]);
}
#endif//SkOpts_DEFINED
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h
deleted file mode 100644
index b3da55c1fd..0000000000
--- a/src/opts/SkColorXform_opts.h
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright 2016 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkColorXform_opts_DEFINED
-#define SkColorXform_opts_DEFINED
-
-#include "SkNx.h"
-#include "SkColorPriv.h"
-#include "SkHalf.h"
-#include "SkSRGB.h"
-#include "SkTemplates.h"
-
-namespace SK_OPTS_NS {
-
-// Strange that we need a wrapper on SkNx_cast to use as a function ptr.
-static Sk4i Sk4f_trunc(const Sk4f& x) {
- return SkNx_cast<int>(x);
-}
-
-static Sk4f linear_to_2dot2(const Sk4f& x) {
- // x^(29/64) is a very good approximation of the true value, x^(1/2.2).
- auto x2 = x.rsqrt(), // x^(-1/2)
- x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32)
- x64 = x32.rsqrt(); // x^(+1/64)
-
- // 29 = 32 - 2 - 1
- return 255.0f * x2.invert() * x32 * x64.invert();
-}
-
-enum DstGamma {
- // 8888
- kSRGB_DstGamma,
- k2Dot2_DstGamma,
- kTable_DstGamma,
-
- // F16
- kLinear_DstGamma,
-};
-
-template <DstGamma kDstGamma, bool kSwapRB>
-static void color_xform_RGB1(void* dst, const uint32_t* src, int len,
- const float* const srcTables[3], const float matrix[16],
- const uint8_t* const dstTables[3]) {
- int kRShift = 0;
- int kGShift = 8;
- int kBShift = 16;
- int kAShift = 24;
- if (kSwapRB) {
- kBShift = 0;
- kRShift = 16;
- }
-
- Sk4f rXgXbX = Sk4f::Load(matrix + 0),
- rYgYbY = Sk4f::Load(matrix + 4),
- rZgZbZ = Sk4f::Load(matrix + 8),
- rTgTbT = Sk4f::Load(matrix + 12);
-
- if (len >= 4) {
- Sk4f reds, greens, blues;
- auto load_next_4 = [&reds, &greens, &blues, &src, &len, &srcTables] {
- reds = Sk4f{srcTables[0][(src[0] >> 0) & 0xFF],
- srcTables[0][(src[1] >> 0) & 0xFF],
- srcTables[0][(src[2] >> 0) & 0xFF],
- srcTables[0][(src[3] >> 0) & 0xFF]};
- greens = Sk4f{srcTables[1][(src[0] >> 8) & 0xFF],
- srcTables[1][(src[1] >> 8) & 0xFF],
- srcTables[1][(src[2] >> 8) & 0xFF],
- srcTables[1][(src[3] >> 8) & 0xFF]};
- blues = Sk4f{srcTables[2][(src[0] >> 16) & 0xFF],
- srcTables[2][(src[1] >> 16) & 0xFF],
- srcTables[2][(src[2] >> 16) & 0xFF],
- srcTables[2][(src[3] >> 16) & 0xFF]};
- src += 4;
- len -= 4;
- };
-
- Sk4f dstReds, dstGreens, dstBlues;
- auto transform_4 = [&reds, &greens, &blues, &dstReds, &dstGreens, &dstBlues, &rXgXbX,
- &rYgYbY, &rZgZbZ, &rTgTbT] {
- dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues + rTgTbT[0];
- dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues + rTgTbT[1];
- dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues + rTgTbT[2];
- };
-
- auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables, kRShift, kGShift,
- kBShift, kAShift] {
- if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) {
- Sk4f (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ?
- sk_linear_to_srgb_needs_trunc : linear_to_2dot2;
- Sk4i (*float_to_int)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ?
- Sk4f_trunc : Sk4f_round;
-
- dstReds = linear_to_curve(dstReds);
- dstGreens = linear_to_curve(dstGreens);
- dstBlues = linear_to_curve(dstBlues);
-
- dstReds = sk_clamp_0_255(dstReds);
- dstGreens = sk_clamp_0_255(dstGreens);
- dstBlues = sk_clamp_0_255(dstBlues);
-
- auto rgba = (float_to_int(dstReds) << kRShift)
- | (float_to_int(dstGreens) << kGShift)
- | (float_to_int(dstBlues) << kBShift)
- | (Sk4i{0xFF} << kAShift);
- rgba.store((uint32_t*) dst);
-
- dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
- } else if (kTable_DstGamma == kDstGamma) {
- Sk4f scaledReds = Sk4f::Min(Sk4f::Max(1023.0f * dstReds, 0.0f), 1023.0f);
- Sk4f scaledGreens = Sk4f::Min(Sk4f::Max(1023.0f * dstGreens, 0.0f), 1023.0f);
- Sk4f scaledBlues = Sk4f::Min(Sk4f::Max(1023.0f * dstBlues, 0.0f), 1023.0f);
-
- Sk4i indicesReds = Sk4f_round(scaledReds);
- Sk4i indicesGreens = Sk4f_round(scaledGreens);
- Sk4i indicesBlues = Sk4f_round(scaledBlues);
-
- uint32_t* dst32 = (uint32_t*) dst;
- dst32[0] = dstTables[0][indicesReds [0]] << kRShift
- | dstTables[1][indicesGreens[0]] << kGShift
- | dstTables[2][indicesBlues [0]] << kBShift
- | 0xFF << kAShift;
- dst32[1] = dstTables[0][indicesReds [1]] << kRShift
- | dstTables[1][indicesGreens[1]] << kGShift
- | dstTables[2][indicesBlues [1]] << kBShift
- | 0xFF << kAShift;
- dst32[2] = dstTables[0][indicesReds [2]] << kRShift
- | dstTables[1][indicesGreens[2]] << kGShift
- | dstTables[2][indicesBlues [2]] << kBShift
- | 0xFF << kAShift;
- dst32[3] = dstTables[0][indicesReds [3]] << kRShift
- | dstTables[1][indicesGreens[3]] << kGShift
- | dstTables[2][indicesBlues [3]] << kBShift
- | 0xFF << kAShift;
-
- dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
- } else {
- Sk4h_store4(dst, SkFloatToHalf_finite(dstReds),
- SkFloatToHalf_finite(dstGreens),
- SkFloatToHalf_finite(dstBlues),
- SK_Half1);
- dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t));
- }
- };
-
- load_next_4();
-
- while (len >= 4) {
- transform_4();
- load_next_4();
- store_4();
- }
-
- transform_4();
- store_4();
- }
-
- while (len > 0) {
- // Splat r,g,b across a register each.
- auto r = Sk4f{srcTables[0][(*src >> 0) & 0xFF]},
- g = Sk4f{srcTables[1][(*src >> 8) & 0xFF]},
- b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]};
-
- auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b + rTgTbT;
-
- if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) {
- Sk4f (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ?
- sk_linear_to_srgb_needs_trunc : linear_to_2dot2;
- Sk4i (*float_to_int)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ?
- Sk4f_trunc : Sk4f_round;
-
- dstPixel = sk_clamp_0_255(linear_to_curve(dstPixel));
-
- uint32_t rgba;
- SkNx_cast<uint8_t>(float_to_int(dstPixel)).store(&rgba);
- rgba |= 0xFF000000;
- if (kSwapRB) {
- *((uint32_t*) dst) = SkSwizzle_RB(rgba);
- } else {
- *((uint32_t*) dst) = rgba;
- }
- dst = SkTAddOffset<void>(dst, sizeof(uint32_t));
- } else if (kTable_DstGamma == kDstGamma) {
- Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 1023.0f);
-
- Sk4i indices = Sk4f_round(scaledPixel);
-
- *((uint32_t*) dst) = dstTables[0][indices[0]] << kRShift
- | dstTables[1][indices[1]] << kGShift
- | dstTables[2][indices[2]] << kBShift
- | 0xFF << kAShift;
-
- dst = SkTAddOffset<void>(dst, sizeof(uint32_t));
- } else {
- uint64_t rgba;
- SkFloatToHalf_finite(dstPixel).store(&rgba);
- rgba |= static_cast<uint64_t>(SK_Half1) << 48;
- *((uint64_t*) dst) = rgba;
- dst = SkTAddOffset<void>(dst, sizeof(uint64_t));
- }
-
- src += 1;
- len -= 1;
- }
-}
-
-static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3], const float matrix[16]) {
- color_xform_RGB1<k2Dot2_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr);
-}
-
-static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3], const float matrix[16]) {
- color_xform_RGB1<kSRGB_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr);
-}
-
-static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3], const float matrix[16],
- const uint8_t* const dstTables[3]) {
- color_xform_RGB1<kTable_DstGamma, false>(dst, src, len, srcTables, matrix, dstTables);
-}
-
-static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3], const float matrix[16]) {
- color_xform_RGB1<kLinear_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr);
-}
-
-static void color_xform_RGB1_to_2dot2_swaprb(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float matrix[16]) {
- color_xform_RGB1<k2Dot2_DstGamma, true>(dst, src, len, srcTables, matrix, nullptr);
-}
-
-static void color_xform_RGB1_to_srgb_swaprb(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float matrix[16]) {
- color_xform_RGB1<kSRGB_DstGamma, true>(dst, src, len, srcTables, matrix, nullptr);
-}
-
-static void color_xform_RGB1_to_table_swaprb(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3],
- const float matrix[16],
- const uint8_t* const dstTables[3]) {
- color_xform_RGB1<kTable_DstGamma, true>(dst, src, len, srcTables, matrix, dstTables);
-}
-
-} // namespace SK_OPTS_NS
-
-#endif // SkColorXform_opts_DEFINED
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index e2574aeef0..a511243219 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -507,6 +507,10 @@ template<> inline Sk4h SkNx_cast<uint16_t, int32_t>(const Sk4i& src) {
return vmovn_u32(vreinterpretq_u32_s32(src.fVec));
}
+template<> /*static*/ inline Sk4i SkNx_cast<int32_t, uint32_t>(const Sk4u& src) {
+ return vreinterpretq_s32_u32(src.fVec);
+}
+
static inline Sk4i Sk4f_round(const Sk4f& x) {
return vcvtq_s32_f32((x + 0.5f).fVec);
}
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index 003b89f667..3e66637df3 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -447,6 +447,10 @@ template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int32_t>(const Sk4i& src) {
return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec);
}
+template<> /*static*/ inline Sk4i SkNx_cast<int32_t, uint32_t>(const Sk4u& src) {
+ return src.fVec;
+}
+
static inline Sk4i Sk4f_round(const Sk4f& x) {
return _mm_cvtps_epi32(x.fVec);
}
diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp
index e70cedeb31..17ce0668ff 100644
--- a/src/opts/SkOpts_sse41.cpp
+++ b/src/opts/SkOpts_sse41.cpp
@@ -11,7 +11,6 @@
#include "SkBlurImageFilter_opts.h"
#include "SkBlitRow_opts.h"
#include "SkBlend_opts.h"
-#include "SkColorXform_opts.h"
namespace SkOpts {
void Init_sse41() {
@@ -20,13 +19,5 @@ namespace SkOpts {
box_blur_yx = sse41::box_blur_yx;
srcover_srgb_srgb = sse41::srcover_srgb_srgb;
blit_row_s32a_opaque = sse41::blit_row_s32a_opaque;
-
- color_xform_RGB1_to_2dot2 = sse41::color_xform_RGB1_to_2dot2;
- color_xform_RGB1_to_srgb = sse41::color_xform_RGB1_to_srgb;
- color_xform_RGB1_to_table = sse41::color_xform_RGB1_to_table;
- color_xform_RGB1_to_linear = sse41::color_xform_RGB1_to_linear;
- color_xform_RGB1_to_2dot2_swaprb = sse41::color_xform_RGB1_to_2dot2_swaprb;
- color_xform_RGB1_to_srgb_swaprb = sse41::color_xform_RGB1_to_srgb_swaprb;
- color_xform_RGB1_to_table_swaprb = sse41::color_xform_RGB1_to_table_swaprb;
}
}