diff options
author | Mike Klein <mtklein@google.com> | 2017-06-15 19:50:33 +0000 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-06-15 19:50:39 +0000 |
commit | 1f8796dbde0dd2cd146e051e88def2c1c23f5d9c (patch) | |
tree | 70bb256a2f1a7d51fa3d31b2ec22d8acc8f84c5d /src/core/SkColorSpaceXform.cpp | |
parent | 66366c697853e906d961ae691e2bc5209cdcfa62 (diff) |
Revert "explore always using SkRasterPipeline for color space transforms"
This reverts commit 58564425e5acb5911cc3719f9c1e39190cc829b8.
Reason for revert: iOS
Original change's description:
> explore always using SkRasterPipeline for color space transforms
>
> On my trashcan, bench times change: mostly slowdowns, some speedups.
> On the other hand, this cuts about 270K of code out of Skia.
>
> Change-Id: Ib1069792508ced361c11ea809b0b8187f5e28a5c
> Reviewed-on: https://skia-review.googlesource.com/19744
> Reviewed-by: Matt Sarett <msarett@google.com>
> Commit-Queue: Mike Klein <mtklein@chromium.org>
TBR=mtklein@chromium.org,msarett@google.com
Change-Id: I26469e174f0281597ac5720ec790686ef3fbb4e0
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/20036
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
Diffstat (limited to 'src/core/SkColorSpaceXform.cpp')
-rw-r--r-- | src/core/SkColorSpaceXform.cpp | 719 |
1 files changed, 698 insertions, 21 deletions
diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp index 78d8812869..8887bc279f 100644 --- a/src/core/SkColorSpaceXform.cpp +++ b/src/core/SkColorSpaceXform.cpp @@ -349,7 +349,570 @@ std::unique_ptr<SkColorSpaceXform> SkColorSpaceXform_Base::New(SkColorSpace* src /////////////////////////////////////////////////////////////////////////////////////////////////// -static inline int num_tables(SkColorSpace_XYZ* space) { +#define AI SK_ALWAYS_INLINE + +static AI void load_matrix(const float matrix[13], + Sk4f& rXgXbX, Sk4f& rYgYbY, Sk4f& rZgZbZ, Sk4f& rTgTbT) { + rXgXbX = Sk4f::Load(matrix + 0); + rYgYbY = Sk4f::Load(matrix + 3); + rZgZbZ = Sk4f::Load(matrix + 6); + rTgTbT = Sk4f::Load(matrix + 9); +} + +enum Order { + kRGBA_Order, + kBGRA_Order, +}; + +static AI void set_rb_shifts(Order kOrder, int* kRShift, int* kBShift) { + if (kRGBA_Order == kOrder) { + *kRShift = 0; + *kBShift = 16; + } else { + *kRShift = 16; + *kBShift = 0; + } +} + +template <Order kOrder> +static AI void load_rgb_from_tables(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const srcTables[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + r = { srcTables[0][(src[0] >> kRShift) & 0xFF], + srcTables[0][(src[1] >> kRShift) & 0xFF], + srcTables[0][(src[2] >> kRShift) & 0xFF], + srcTables[0][(src[3] >> kRShift) & 0xFF], }; + g = { srcTables[1][(src[0] >> kGShift) & 0xFF], + srcTables[1][(src[1] >> kGShift) & 0xFF], + srcTables[1][(src[2] >> kGShift) & 0xFF], + srcTables[1][(src[3] >> kGShift) & 0xFF], }; + b = { srcTables[2][(src[0] >> kBShift) & 0xFF], + srcTables[2][(src[1] >> kBShift) & 0xFF], + srcTables[2][(src[2] >> kBShift) & 0xFF], + srcTables[2][(src[3] >> kBShift) & 0xFF], }; + a = 0.0f; // Don't let the compiler complain that |a| is uninitialized. +} + +template <Order kOrder> +static AI void load_rgba_from_tables(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const srcTables[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + r = { srcTables[0][(src[0] >> kRShift) & 0xFF], + srcTables[0][(src[1] >> kRShift) & 0xFF], + srcTables[0][(src[2] >> kRShift) & 0xFF], + srcTables[0][(src[3] >> kRShift) & 0xFF], }; + g = { srcTables[1][(src[0] >> kGShift) & 0xFF], + srcTables[1][(src[1] >> kGShift) & 0xFF], + srcTables[1][(src[2] >> kGShift) & 0xFF], + srcTables[1][(src[3] >> kGShift) & 0xFF], }; + b = { srcTables[2][(src[0] >> kBShift) & 0xFF], + srcTables[2][(src[1] >> kBShift) & 0xFF], + srcTables[2][(src[2] >> kBShift) & 0xFF], + srcTables[2][(src[3] >> kBShift) & 0xFF], }; + a = (1.0f / 255.0f) * SkNx_cast<float>(Sk4u::Load(src) >> 24); +} + +template <Order kOrder> +static AI void load_rgb_linear(const uint32_t* src, Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + r = (1.0f / 255.0f) * SkNx_cast<float>((Sk4u::Load(src) >> kRShift) & 0xFF); + g = (1.0f / 255.0f) * SkNx_cast<float>((Sk4u::Load(src) >> kGShift) & 0xFF); + b = (1.0f / 255.0f) * SkNx_cast<float>((Sk4u::Load(src) >> kBShift) & 0xFF); + a = 0.0f; // Don't let the compiler complain that |a| is uninitialized. +} + +template <Order kOrder> +static AI void load_rgba_linear(const uint32_t* src, Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + r = (1.0f / 255.0f) * SkNx_cast<float>((Sk4u::Load(src) >> kRShift) & 0xFF); + g = (1.0f / 255.0f) * SkNx_cast<float>((Sk4u::Load(src) >> kGShift) & 0xFF); + b = (1.0f / 255.0f) * SkNx_cast<float>((Sk4u::Load(src) >> kBShift) & 0xFF); + a = (1.0f / 255.0f) * SkNx_cast<float>((Sk4u::Load(src) >> 24)); +} + +template <Order kOrder> +static AI void load_rgb_from_tables_1(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const srcTables[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + r = Sk4f(srcTables[0][(*src >> kRShift) & 0xFF]); + g = Sk4f(srcTables[1][(*src >> kGShift) & 0xFF]); + b = Sk4f(srcTables[2][(*src >> kBShift) & 0xFF]); + a = 0.0f; // Don't let MSAN complain that |a| is uninitialized. +} + +template <Order kOrder> +static AI void load_rgba_from_tables_1(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const srcTables[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + r = Sk4f(srcTables[0][(*src >> kRShift) & 0xFF]); + g = Sk4f(srcTables[1][(*src >> kGShift) & 0xFF]); + b = Sk4f(srcTables[2][(*src >> kBShift) & 0xFF]); + a = (1.0f / 255.0f) * Sk4f(*src >> 24); +} + +template <Order kOrder> +static AI void load_rgb_linear_1(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const srcTables[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + r = Sk4f((1.0f / 255.0f) * ((*src >> kRShift) & 0xFF)); + g = Sk4f((1.0f / 255.0f) * ((*src >> kGShift) & 0xFF)); + b = Sk4f((1.0f / 255.0f) * ((*src >> kBShift) & 0xFF)); + a = 0.0f; // Don't let MSAN complain that |a| is uninitialized. +} + +template <Order kOrder> +static AI void load_rgba_linear_1(const uint32_t* src, + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, + const float* const srcTables[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + r = Sk4f((1.0f / 255.0f) * ((*src >> kRShift) & 0xFF)); + g = Sk4f((1.0f / 255.0f) * ((*src >> kGShift) & 0xFF)); + b = Sk4f((1.0f / 255.0f) * ((*src >> kBShift) & 0xFF)); + a = Sk4f((1.0f / 255.0f) * ((*src >> 24))); +} + +static AI void transform_gamut(const Sk4f& r, const Sk4f& g, const Sk4f& b, const Sk4f& a, + const Sk4f& rXgXbX, const Sk4f& rYgYbY, const Sk4f& rZgZbZ, + Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da) { + dr = rXgXbX[0]*r + rYgYbY[0]*g + rZgZbZ[0]*b; + dg = rXgXbX[1]*r + rYgYbY[1]*g + rZgZbZ[1]*b; + db = rXgXbX[2]*r + rYgYbY[2]*g + rZgZbZ[2]*b; + da = a; +} + +static AI void transform_gamut_1(const Sk4f& r, const Sk4f& g, const Sk4f& b, + const Sk4f& rXgXbX, const Sk4f& rYgYbY, const Sk4f& rZgZbZ, + Sk4f& rgba) { + rgba = rXgXbX*r + rYgYbY*g + rZgZbZ*b; +} + +static AI void translate_gamut(const Sk4f& rTgTbT, Sk4f& dr, Sk4f& dg, Sk4f& db) { + dr = dr + rTgTbT[0]; + dg = dg + rTgTbT[1]; + db = db + rTgTbT[2]; +} + +static AI void translate_gamut_1(const Sk4f& rTgTbT, Sk4f& rgba) { + rgba = rgba + rTgTbT; +} + +template <Order kOrder> +static AI void store_srgb(void* dst, const uint32_t* src, Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, + const uint8_t* const[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + dr = sk_linear_to_srgb_needs_trunc(dr); + dg = sk_linear_to_srgb_needs_trunc(dg); + db = sk_linear_to_srgb_needs_trunc(db); + + dr = sk_clamp_0_255(dr); + dg = sk_clamp_0_255(dg); + db = sk_clamp_0_255(db); + + Sk4i da = Sk4i::Load(src) & 0xFF000000; + + Sk4i rgba = (SkNx_cast<int>(dr) << kRShift) + | (SkNx_cast<int>(dg) << kGShift) + | (SkNx_cast<int>(db) << kBShift) + | (da ); + rgba.store(dst); +} + +template <Order kOrder> +static AI void store_srgb_1(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f&, + const uint8_t* const[3]) { + rgba = sk_clamp_0_255(sk_linear_to_srgb_needs_trunc(rgba)); + + uint32_t tmp; + SkNx_cast<uint8_t>(SkNx_cast<int32_t>(rgba)).store(&tmp); + tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); + if (kBGRA_Order == kOrder) { + tmp = SkSwizzle_RB(tmp); + } + + *(uint32_t*)dst = tmp; +} + +static AI Sk4f linear_to_2dot2(const Sk4f& x) { + // x^(29/64) is a very good approximation of the true value, x^(1/2.2). + auto x2 = x.rsqrt(), // x^(-1/2) + x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) + x64 = x32.rsqrt(); // x^(+1/64) + + // 29 = 32 - 2 - 1 + return 255.0f * x2.invert() * x32 * x64.invert(); +} + +template <Order kOrder> +static AI void store_2dot2(void* dst, const uint32_t* src, Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, + const uint8_t* const[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + dr = linear_to_2dot2(dr); + dg = linear_to_2dot2(dg); + db = linear_to_2dot2(db); + + dr = sk_clamp_0_255(dr); + dg = sk_clamp_0_255(dg); + db = sk_clamp_0_255(db); + + Sk4i da = Sk4i::Load(src) & 0xFF000000; + + Sk4i rgba = (Sk4f_round(dr) << kRShift) + | (Sk4f_round(dg) << kGShift) + | (Sk4f_round(db) << kBShift) + | (da ); + rgba.store(dst); +} + +template <Order kOrder> +static AI void store_2dot2_1(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f&, + const uint8_t* const[3]) { + rgba = sk_clamp_0_255(linear_to_2dot2(rgba)); + + uint32_t tmp; + SkNx_cast<uint8_t>(Sk4f_round(rgba)).store(&tmp); + tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); + if (kBGRA_Order == kOrder) { + tmp = SkSwizzle_RB(tmp); + } + + *(uint32_t*)dst = tmp; +} + +template <Order kOrder> +static AI void store_linear(void* dst, const uint32_t* src, Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, + const uint8_t* const[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + dr = sk_clamp_0_255(255.0f * dr); + dg = sk_clamp_0_255(255.0f * dg); + db = sk_clamp_0_255(255.0f * db); + + Sk4i da = Sk4i::Load(src) & 0xFF000000; + + Sk4i rgba = (Sk4f_round(dr) << kRShift) + | (Sk4f_round(dg) << kGShift) + | (Sk4f_round(db) << kBShift) + | (da ); + rgba.store(dst); +} + +template <Order kOrder> +static AI void store_linear_1(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f&, + const uint8_t* const[3]) { + rgba = sk_clamp_0_255(255.0f * rgba); + + uint32_t tmp; + SkNx_cast<uint8_t>(Sk4f_round(rgba)).store(&tmp); + tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); + if (kBGRA_Order == kOrder) { + tmp = SkSwizzle_RB(tmp); + } + + *(uint32_t*)dst = tmp; +} + +template <Order kOrder> +static AI void store_f16(void* dst, const uint32_t* src, Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da, + const uint8_t* const[3]) { + Sk4h::Store4(dst, SkFloatToHalf_finite_ftz(dr), + SkFloatToHalf_finite_ftz(dg), + SkFloatToHalf_finite_ftz(db), + SkFloatToHalf_finite_ftz(da)); +} + +template <Order kOrder> +static AI void store_f16_1(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f& a, + const uint8_t* const[3]) { + rgba = Sk4f(rgba[0], rgba[1], rgba[2], a[3]); + SkFloatToHalf_finite_ftz(rgba).store((uint64_t*) dst); +} + +template <Order kOrder> +static AI void store_f16_opaque(void* dst, const uint32_t* src, Sk4f& dr, Sk4f& dg, Sk4f& db, + Sk4f&, const uint8_t* const[3]) { + Sk4h::Store4(dst, SkFloatToHalf_finite_ftz(dr), + SkFloatToHalf_finite_ftz(dg), + SkFloatToHalf_finite_ftz(db), + SK_Half1); +} + +template <Order kOrder> +static AI void store_f16_1_opaque(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f&, + const uint8_t* const[3]) { + uint64_t tmp; + SkFloatToHalf_finite_ftz(rgba).store(&tmp); + tmp &= 0x0000FFFFFFFFFFFF; + tmp |= static_cast<uint64_t>(SK_Half1) << 48; + *((uint64_t*) dst) = tmp; +} + +template <Order kOrder> +static AI void store_generic(void* dst, const uint32_t* src, Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, + const uint8_t* const dstTables[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + dr = Sk4f::Min(Sk4f::Max(1023.0f * dr, 0.0f), 1023.0f); + dg = Sk4f::Min(Sk4f::Max(1023.0f * dg, 0.0f), 1023.0f); + db = Sk4f::Min(Sk4f::Max(1023.0f * db, 0.0f), 1023.0f); + + Sk4i ir = Sk4f_round(dr); + Sk4i ig = Sk4f_round(dg); + Sk4i ib = Sk4f_round(db); + + Sk4i da = Sk4i::Load(src) & 0xFF000000; + + uint32_t* dst32 = (uint32_t*) dst; + dst32[0] = dstTables[0][ir[0]] << kRShift + | dstTables[1][ig[0]] << kGShift + | dstTables[2][ib[0]] << kBShift + | da[0]; + dst32[1] = dstTables[0][ir[1]] << kRShift + | dstTables[1][ig[1]] << kGShift + | dstTables[2][ib[1]] << kBShift + | da[1]; + dst32[2] = dstTables[0][ir[2]] << kRShift + | dstTables[1][ig[2]] << kGShift + | dstTables[2][ib[2]] << kBShift + | da[2]; + dst32[3] = dstTables[0][ir[3]] << kRShift + | dstTables[1][ig[3]] << kGShift + | dstTables[2][ib[3]] << kBShift + | da[3]; +} + +template <Order kOrder> +static AI void store_generic_1(void* dst, const uint32_t* src, + Sk4f& rgba, const Sk4f&, + const uint8_t* const dstTables[3]) { + int kRShift, kGShift = 8, kBShift; + set_rb_shifts(kOrder, &kRShift, &kBShift); + rgba = Sk4f::Min(Sk4f::Max(1023.0f * rgba, 0.0f), 1023.0f); + + Sk4i indices = Sk4f_round(rgba); + + *((uint32_t*) dst) = dstTables[0][indices[0]] << kRShift + | dstTables[1][indices[1]] << kGShift + | dstTables[2][indices[2]] << kBShift + | (*src & 0xFF000000); +} + +typedef decltype(load_rgb_from_tables<kRGBA_Order> )* LoadFn; +typedef decltype(load_rgb_from_tables_1<kRGBA_Order>)* Load1Fn; +typedef decltype(store_generic<kRGBA_Order> )* StoreFn; +typedef decltype(store_generic_1<kRGBA_Order> )* Store1Fn; + +enum SrcFormat { + kRGBA_8888_Linear_SrcFormat, + kRGBA_8888_Table_SrcFormat, + kBGRA_8888_Linear_SrcFormat, + kBGRA_8888_Table_SrcFormat, +}; + +enum DstFormat { + kRGBA_8888_Linear_DstFormat, + kRGBA_8888_SRGB_DstFormat, + kRGBA_8888_2Dot2_DstFormat, + kRGBA_8888_Table_DstFormat, + kBGRA_8888_Linear_DstFormat, + kBGRA_8888_SRGB_DstFormat, + kBGRA_8888_2Dot2_DstFormat, + kBGRA_8888_Table_DstFormat, + kF16_Linear_DstFormat, +}; + +template <SrcFormat kSrc, + DstFormat kDst, + SkAlphaType kAlphaType, + ColorSpaceMatch kCSM> +static void color_xform_RGBA(void* dst, const void* vsrc, int len, + const float* const srcTables[3], const float matrix[13], + const uint8_t* const dstTables[3]) { + LoadFn load; + Load1Fn load_1; + const bool kLoadAlpha = kF16_Linear_DstFormat == kDst && kOpaque_SkAlphaType != kAlphaType; + switch (kSrc) { + case kRGBA_8888_Linear_SrcFormat: + if (kLoadAlpha) { + load = load_rgba_linear<kRGBA_Order>; + load_1 = load_rgba_linear_1<kRGBA_Order>; + } else { + load = load_rgb_linear<kRGBA_Order>; + load_1 = load_rgb_linear_1<kRGBA_Order>; + } + break; + case kRGBA_8888_Table_SrcFormat: + if (kLoadAlpha) { + load = load_rgba_from_tables<kRGBA_Order>; + load_1 = load_rgba_from_tables_1<kRGBA_Order>; + } else { + load = load_rgb_from_tables<kRGBA_Order>; + load_1 = load_rgb_from_tables_1<kRGBA_Order>; + } + break; + case kBGRA_8888_Linear_SrcFormat: + if (kLoadAlpha) { + load = load_rgba_linear<kBGRA_Order>; + load_1 = load_rgba_linear_1<kBGRA_Order>; + } else { + load = load_rgb_linear<kBGRA_Order>; + load_1 = load_rgb_linear_1<kBGRA_Order>; + } + break; + case kBGRA_8888_Table_SrcFormat: + if (kLoadAlpha) { + load = load_rgba_from_tables<kBGRA_Order>; + load_1 = load_rgba_from_tables_1<kBGRA_Order>; + } else { + load = load_rgb_from_tables<kBGRA_Order>; + load_1 = load_rgb_from_tables_1<kBGRA_Order>; + } + break; + } + + StoreFn store; + Store1Fn store_1; + size_t sizeOfDstPixel; + switch (kDst) { + case kRGBA_8888_Linear_DstFormat: + store = store_linear<kRGBA_Order>; + store_1 = store_linear_1<kRGBA_Order>; + sizeOfDstPixel = 4; + break; + case kRGBA_8888_SRGB_DstFormat: + store = store_srgb<kRGBA_Order>; + store_1 = store_srgb_1<kRGBA_Order>; + sizeOfDstPixel = 4; + break; + case kRGBA_8888_2Dot2_DstFormat: + store = store_2dot2<kRGBA_Order>; + store_1 = store_2dot2_1<kRGBA_Order>; + sizeOfDstPixel = 4; + break; + case kRGBA_8888_Table_DstFormat: + store = store_generic<kRGBA_Order>; + store_1 = store_generic_1<kRGBA_Order>; + sizeOfDstPixel = 4; + break; + case kBGRA_8888_Linear_DstFormat: + store = store_linear<kBGRA_Order>; + store_1 = store_linear_1<kBGRA_Order>; + sizeOfDstPixel = 4; + break; + case kBGRA_8888_SRGB_DstFormat: + store = store_srgb<kBGRA_Order>; + store_1 = store_srgb_1<kBGRA_Order>; + sizeOfDstPixel = 4; + break; + case kBGRA_8888_2Dot2_DstFormat: + store = store_2dot2<kBGRA_Order>; + store_1 = store_2dot2_1<kBGRA_Order>; + sizeOfDstPixel = 4; + break; + case kBGRA_8888_Table_DstFormat: + store = store_generic<kBGRA_Order>; + store_1 = store_generic_1<kBGRA_Order>; + sizeOfDstPixel = 4; + break; + case kF16_Linear_DstFormat: + store = (kOpaque_SkAlphaType == kAlphaType) ? store_f16_opaque<kRGBA_Order> : + store_f16<kRGBA_Order>; + store_1 = (kOpaque_SkAlphaType == kAlphaType) ? store_f16_1_opaque<kRGBA_Order> : + store_f16_1<kRGBA_Order>; + sizeOfDstPixel = 8; + break; + } + + const uint32_t* src = (const uint32_t*) vsrc; + Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT; + load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT); + + if (len >= 4) { + // Naively this would be a loop of load-transform-store, but we found it faster to + // move the N+1th load ahead of the Nth store. We don't bother doing this for N<4. + Sk4f r, g, b, a; + load(src, r, g, b, a, srcTables); + src += 4; + len -= 4; + + Sk4f dr, dg, db, da; + while (len >= 4) { + if (kNone_ColorSpaceMatch == kCSM) { + transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); + translate_gamut(rTgTbT, dr, dg, db); + } else { + dr = r; + dg = g; + db = b; + da = a; + } + + load(src, r, g, b, a, srcTables); + + store(dst, src - 4, dr, dg, db, da, dstTables); + dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); + src += 4; + len -= 4; + } + + if (kNone_ColorSpaceMatch == kCSM) { + transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); + translate_gamut(rTgTbT, dr, dg, db); + } else { + dr = r; + dg = g; + db = b; + da = a; + } + + store(dst, src - 4, dr, dg, db, da, dstTables); + dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); + } + + while (len > 0) { + Sk4f r, g, b, a; + load_1(src, r, g, b, a, srcTables); + + Sk4f rgba; + if (kNone_ColorSpaceMatch == kCSM) { + transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba); + translate_gamut_1(rTgTbT, rgba); + } else { + rgba = Sk4f(r[0], g[0], b[0], a[0]); + } + + store_1(dst, src, rgba, a, dstTables); + + src += 1; + len -= 1; + dst = SkTAddOffset<void>(dst, sizeOfDstPixel); + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +static AI int num_tables(SkColorSpace_XYZ* space) { switch (space->gammaNamed()) { case kSRGB_SkGammaNamed: case k2Dot2Curve_SkGammaNamed: @@ -427,31 +990,145 @@ SkColorSpaceXform_XYZ<kCSM> /////////////////////////////////////////////////////////////////////////////////////////////////// -template <ColorSpaceMatch kCSM> -bool SkColorSpaceXform_XYZ<kCSM>::onApply(ColorFormat dstColorFormat, void* dst, - ColorFormat srcColorFormat, const void* src, - int len, SkAlphaType alphaType) const { - if (kFull_ColorSpaceMatch == kCSM && kPremul_SkAlphaType != alphaType) { - if ((kRGBA_8888_ColorFormat == dstColorFormat && - kRGBA_8888_ColorFormat == srcColorFormat) || - (kBGRA_8888_ColorFormat == dstColorFormat && - kBGRA_8888_ColorFormat == srcColorFormat)) - { - memcpy(dst, src, len * sizeof(uint32_t)); +template <SrcFormat kSrc, DstFormat kDst, ColorSpaceMatch kCSM> +static AI bool apply_set_alpha(void* dst, const void* src, int len, SkAlphaType alphaType, + const float* const srcTables[3], const float matrix[13], + const uint8_t* const dstTables[3]) { + switch (alphaType) { + case kOpaque_SkAlphaType: + color_xform_RGBA<kSrc, kDst, kOpaque_SkAlphaType, kCSM> + (dst, src, len, srcTables, matrix, dstTables); return true; - } - - if ((kRGBA_8888_ColorFormat == dstColorFormat && - kBGRA_8888_ColorFormat == srcColorFormat) || - (kBGRA_8888_ColorFormat == dstColorFormat && - kRGBA_8888_ColorFormat == srcColorFormat)) - { - SkOpts::RGBA_to_BGRA((uint32_t*)dst, src, len); + case kUnpremul_SkAlphaType: + color_xform_RGBA<kSrc, kDst, kUnpremul_SkAlphaType, kCSM> + (dst, src, len, srcTables, matrix, dstTables); return true; + default: + return false; + } +} + +template <DstFormat kDst, ColorSpaceMatch kCSM> +static AI bool apply_set_src(void* dst, const void* src, int len, SkAlphaType alphaType, + const float* const srcTables[3], const float matrix[13], + const uint8_t* const dstTables[3], + SkColorSpaceXform::ColorFormat srcColorFormat, + SrcGamma srcGamma) { + switch (srcColorFormat) { + case SkColorSpaceXform::kRGBA_8888_ColorFormat: + switch (srcGamma) { + case kLinear_SrcGamma: + return apply_set_alpha<kRGBA_8888_Linear_SrcFormat, kDst, kCSM> + (dst, src, len, alphaType, nullptr, matrix, dstTables); + default: + return apply_set_alpha<kRGBA_8888_Table_SrcFormat, kDst, kCSM> + (dst, src, len, alphaType, srcTables, matrix, dstTables); + } + case SkColorSpaceXform::kBGRA_8888_ColorFormat: + switch (srcGamma) { + case kLinear_SrcGamma: + return apply_set_alpha<kBGRA_8888_Linear_SrcFormat, kDst, kCSM> + (dst, src, len, alphaType, nullptr, matrix, dstTables); + default: + return apply_set_alpha<kBGRA_8888_Table_SrcFormat, kDst, kCSM> + (dst, src, len, alphaType, srcTables, matrix, dstTables); + } + default: + return false; + } +} + +#undef AI + +template <ColorSpaceMatch kCSM> +bool SkColorSpaceXform_XYZ<kCSM> +::onApply(ColorFormat dstColorFormat, void* dst, ColorFormat srcColorFormat, const void* src, + int len, SkAlphaType alphaType) const +{ + if (kFull_ColorSpaceMatch == kCSM) { + if (kPremul_SkAlphaType != alphaType) { + if ((kRGBA_8888_ColorFormat == dstColorFormat && + kRGBA_8888_ColorFormat == srcColorFormat) || + (kBGRA_8888_ColorFormat == dstColorFormat && + kBGRA_8888_ColorFormat == srcColorFormat)) + { + memcpy(dst, src, len * sizeof(uint32_t)); + return true; + } + if ((kRGBA_8888_ColorFormat == dstColorFormat && + kBGRA_8888_ColorFormat == srcColorFormat) || + (kBGRA_8888_ColorFormat == dstColorFormat && + kRGBA_8888_ColorFormat == srcColorFormat)) + { + SkOpts::RGBA_to_BGRA((uint32_t*) dst, src, len); + return true; + } } } - return this->applyPipeline(dstColorFormat, dst, srcColorFormat, src, len, alphaType); + if (kRGBA_F32_ColorFormat == dstColorFormat || + kBGR_565_ColorFormat == dstColorFormat || + kRGBA_F32_ColorFormat == srcColorFormat || + kRGBA_F16_ColorFormat == srcColorFormat || + kRGBA_U16_BE_ColorFormat == srcColorFormat || + kRGB_U16_BE_ColorFormat == srcColorFormat || + kPremul_SkAlphaType == alphaType) + { + return this->applyPipeline(dstColorFormat, dst, srcColorFormat, src, len, alphaType); + } + + switch (dstColorFormat) { + case kRGBA_8888_ColorFormat: + switch (fDstGamma) { + case kLinear_DstGamma: + return apply_set_src<kRGBA_8888_Linear_DstFormat, kCSM> + (dst, src, len, alphaType, fSrcGammaTables, fSrcToDst, nullptr, + srcColorFormat, fSrcGamma); + case kSRGB_DstGamma: + return apply_set_src<kRGBA_8888_SRGB_DstFormat, kCSM> + (dst, src, len, alphaType, fSrcGammaTables, fSrcToDst, nullptr, + srcColorFormat, fSrcGamma); + case k2Dot2_DstGamma: + return apply_set_src<kRGBA_8888_2Dot2_DstFormat, kCSM> + (dst, src, len, alphaType, fSrcGammaTables, fSrcToDst, nullptr, + srcColorFormat, fSrcGamma); + case kTable_DstGamma: + return apply_set_src<kRGBA_8888_Table_DstFormat, kCSM> + (dst, src, len, alphaType, fSrcGammaTables, fSrcToDst, fDstGammaTables, + srcColorFormat, fSrcGamma); + } + case kBGRA_8888_ColorFormat: + switch (fDstGamma) { + case kLinear_DstGamma: + return apply_set_src<kBGRA_8888_Linear_DstFormat, kCSM> + (dst, src, len, alphaType, fSrcGammaTables, fSrcToDst, nullptr, + srcColorFormat, fSrcGamma); + case kSRGB_DstGamma: + return apply_set_src<kBGRA_8888_SRGB_DstFormat, kCSM> + (dst, src, len, alphaType, fSrcGammaTables, fSrcToDst, nullptr, + srcColorFormat, fSrcGamma); + case k2Dot2_DstGamma: + return apply_set_src<kBGRA_8888_2Dot2_DstFormat, kCSM> + (dst, src, len, alphaType, fSrcGammaTables, fSrcToDst, nullptr, + srcColorFormat, fSrcGamma); + case kTable_DstGamma: + return apply_set_src<kBGRA_8888_Table_DstFormat, kCSM> + (dst, src, len, alphaType, fSrcGammaTables, fSrcToDst, fDstGammaTables, + srcColorFormat, fSrcGamma); + } + case kRGBA_F16_ColorFormat: + switch (fDstGamma) { + case kLinear_DstGamma: + return apply_set_src<kF16_Linear_DstFormat, kCSM> + (dst, src, len, alphaType, fSrcGammaTables, fSrcToDst, nullptr, + srcColorFormat, fSrcGamma); + default: + return false; + } + default: + SkASSERT(false); + return false; + } } bool SkColorSpaceXform::apply(ColorFormat dstColorFormat, void* dst, ColorFormat srcColorFormat, |