aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkColorXform_opts.h106
-rw-r--r--src/opts/SkOpts_sse41.cpp1
2 files changed, 71 insertions, 36 deletions
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h
index 72ef35381e..e7a2b45946 100644
--- a/src/opts/SkColorXform_opts.h
+++ b/src/opts/SkColorXform_opts.h
@@ -10,7 +10,9 @@
#include "SkNx.h"
#include "SkColorPriv.h"
+#include "SkHalf.h"
#include "SkSRGB.h"
+#include "SkTemplates.h"
namespace SK_OPTS_NS {
@@ -31,13 +33,17 @@ static Sk4f clamp_0_to_255(const Sk4f& x) {
}
enum DstGamma {
+ // 8888
kSRGB_DstGamma,
k2Dot2_DstGamma,
kTable_DstGamma,
+
+ // F16
+ kLinear_DstGamma,
};
template <DstGamma kDstGamma>
-static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
+static void color_xform_RGB1(void* dst, const uint32_t* src, int len,
const float* const srcTables[3], const float matrix[16],
const uint8_t* const dstTables[3]) {
Sk4f rXgXbX = Sk4f::Load(matrix + 0),
@@ -84,12 +90,14 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
dstGreens = clamp_0_to_255(dstGreens);
dstBlues = clamp_0_to_255(dstBlues);
- auto rgba = (Sk4f_round(dstReds) )
- | (Sk4f_round(dstGreens) << 8)
- | (Sk4f_round(dstBlues) << 16)
- | (Sk4i{ 0xFF << 24});
- rgba.store(dst);
- } else {
+ auto rgba = (Sk4f_round(dstReds) << SK_R32_SHIFT)
+ | (Sk4f_round(dstGreens) << SK_G32_SHIFT)
+ | (Sk4f_round(dstBlues) << SK_B32_SHIFT)
+ | (Sk4i{ 0xFF << SK_A32_SHIFT});
+ rgba.store((uint32_t*) dst);
+
+ dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
+ } else if (kTable_DstGamma == kDstGamma) {
Sk4f scaledReds = Sk4f::Min(Sk4f::Max(1023.0f * dstReds, 0.0f), 1023.0f);
Sk4f scaledGreens = Sk4f::Min(Sk4f::Max(1023.0f * dstGreens, 0.0f), 1023.0f);
Sk4f scaledBlues = Sk4f::Min(Sk4f::Max(1023.0f * dstBlues, 0.0f), 1023.0f);
@@ -98,25 +106,38 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
Sk4i indicesGreens = Sk4f_round(scaledGreens);
Sk4i indicesBlues = Sk4f_round(scaledBlues);
- dst[0] = dstTables[0][indicesReds [0]]
- | dstTables[1][indicesGreens[0]] << 8
- | dstTables[2][indicesBlues [0]] << 16
- | 0xFF << 24;
- dst[1] = dstTables[0][indicesReds [1]]
- | dstTables[1][indicesGreens[1]] << 8
- | dstTables[2][indicesBlues [1]] << 16
- | 0xFF << 24;
- dst[2] = dstTables[0][indicesReds [2]]
- | dstTables[1][indicesGreens[2]] << 8
- | dstTables[2][indicesBlues [2]] << 16
- | 0xFF << 24;
- dst[3] = dstTables[0][indicesReds [3]]
- | dstTables[1][indicesGreens[3]] << 8
- | dstTables[2][indicesBlues [3]] << 16
- | 0xFF << 24;
+ uint32_t* dst32 = (uint32_t*) dst;
+ dst32[0] = dstTables[0][indicesReds [0]] << SK_R32_SHIFT
+ | dstTables[1][indicesGreens[0]] << SK_G32_SHIFT
+ | dstTables[2][indicesBlues [0]] << SK_B32_SHIFT
+ | 0xFF << SK_A32_SHIFT;
+ dst32[1] = dstTables[0][indicesReds [1]] << SK_R32_SHIFT
+ | dstTables[1][indicesGreens[1]] << SK_G32_SHIFT
+ | dstTables[2][indicesBlues [1]] << SK_B32_SHIFT
+ | 0xFF << SK_A32_SHIFT;
+ dst32[2] = dstTables[0][indicesReds [2]] << SK_R32_SHIFT
+ | dstTables[1][indicesGreens[2]] << SK_G32_SHIFT
+ | dstTables[2][indicesBlues [2]] << SK_B32_SHIFT
+ | 0xFF << SK_A32_SHIFT;
+ dst32[3] = dstTables[0][indicesReds [3]] << SK_R32_SHIFT
+ | dstTables[1][indicesGreens[3]] << SK_G32_SHIFT
+ | dstTables[2][indicesBlues [3]] << SK_B32_SHIFT
+ | 0xFF << SK_A32_SHIFT;
+
+ dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
+ } else {
+ // FIXME (msarett):
+ // Can we do better here? Should we store half floats as planar?
+ // Should we write Intel/Arm specific code? Should we add a transpose
+ // function to SkNx? Should we rewrite the algorithm to be interleaved?
+ uint64_t* dst64 = (uint64_t*) dst;
+ dst64[0] = SkFloatToHalf_finite(Sk4f(dstReds[0], dstGreens[0], dstBlues[0], 1.0f));
+ dst64[1] = SkFloatToHalf_finite(Sk4f(dstReds[1], dstGreens[1], dstBlues[1], 1.0f));
+ dst64[2] = SkFloatToHalf_finite(Sk4f(dstReds[2], dstGreens[2], dstBlues[2], 1.0f));
+ dst64[3] = SkFloatToHalf_finite(Sk4f(dstReds[3], dstGreens[3], dstBlues[3], 1.0f));
+
+ dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t));
}
-
- dst += 4;
};
load_next_4();
@@ -137,7 +158,6 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
g = Sk4f{srcTables[1][(*src >> 8) & 0xFF]},
b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]};
- // Apply transformation matrix to dst gamut.
auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b;
if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) {
@@ -151,40 +171,54 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len,
uint32_t rgba;
SkNx_cast<uint8_t>(Sk4f_round(dstPixel)).store(&rgba);
rgba |= 0xFF000000;
- *dst = rgba;
- } else {
+ *((uint32_t*) dst) = SkSwizzle_RGBA_to_PMColor(rgba);
+ dst = SkTAddOffset<void>(dst, sizeof(uint32_t));
+ } else if (kTable_DstGamma == kDstGamma) {
Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 1023.0f);
Sk4i indices = Sk4f_round(scaledPixel);
- *dst = dstTables[0][indices[0]]
- | dstTables[1][indices[1]] << 8
- | dstTables[2][indices[2]] << 16
- | 0xFF << 24;
+ *((uint32_t*) dst) = dstTables[0][indices[0]] << SK_R32_SHIFT
+ | dstTables[1][indices[1]] << SK_G32_SHIFT
+ | dstTables[2][indices[2]] << SK_B32_SHIFT
+ | 0xFF << SK_A32_SHIFT;
+
+ dst = SkTAddOffset<void>(dst, sizeof(uint32_t));
+ } else {
+ uint64_t rgba = SkFloatToHalf_finite(dstPixel);
+
+ // Set alpha to 1.0
+ rgba |= 0x3C00000000000000;
+ *((uint64_t*) dst) = rgba;
+ dst = SkTAddOffset<void>(dst, sizeof(uint64_t));
}
- dst += 1;
src += 1;
len -= 1;
}
}
static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3], const float matrix[16]) {
+ const float* const srcTables[3], const float matrix[12]) {
color_xform_RGB1<k2Dot2_DstGamma>(dst, src, len, srcTables, matrix, nullptr);
}
static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3], const float matrix[16]) {
+ const float* const srcTables[3], const float matrix[12]) {
color_xform_RGB1<kSRGB_DstGamma>(dst, src, len, srcTables, matrix, nullptr);
}
static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int len,
- const float* const srcTables[3], const float matrix[16],
+ const float* const srcTables[3], const float matrix[12],
const uint8_t* const dstTables[3]) {
color_xform_RGB1<kTable_DstGamma>(dst, src, len, srcTables, matrix, dstTables);
}
+static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int len,
+ const float* const srcTables[3], const float matrix[12]) {
+ color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr);
+}
+
} // namespace SK_OPTS_NS
#endif // SkColorXform_opts_DEFINED
diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp
index 56ac386333..e1e024d948 100644
--- a/src/opts/SkOpts_sse41.cpp
+++ b/src/opts/SkOpts_sse41.cpp
@@ -24,5 +24,6 @@ namespace SkOpts {
color_xform_RGB1_to_2dot2 = sse41::color_xform_RGB1_to_2dot2;
color_xform_RGB1_to_srgb = sse41::color_xform_RGB1_to_srgb;
color_xform_RGB1_to_table = sse41::color_xform_RGB1_to_table;
+ color_xform_RGB1_to_linear = sse41::color_xform_RGB1_to_linear;
}
}