diff options
author | Matt Sarett <msarett@google.com> | 2016-10-14 13:04:55 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2016-10-14 17:31:07 +0000 |
commit | d478a99f5c29f0e8fa64b50831f53232f5577f2d (patch) | |
tree | 9aec8dcdf52d081188c6cbcc9f47f6c7011b1d4a /src | |
parent | cd72afa8f19260ab231348083bf0ed51722240d7 (diff) |
Refactor color_xform_RGBA() to improve performance
Now that we've identified this as the "fast path", it has
become (somewhat) stable, and I'm ready to ship it, I feel
comfortable playing games with clang to get the behavior
we want.
toSRGB Performance on HP z620:
Before: 370us or 470us (depending on the mood of my computer)
After: 345us (or better, always)
My guess is that clang optimizers do a better job of interleaving
instructions effectively when we put everything into one function.
Regardless of being silly about performance, I also acutally
prefer how the code reads this way.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3440
Change-Id: Id7a40fb9966341ab184a5331b3e4da19b3af3344
Reviewed-on: https://skia-review.googlesource.com/3440
Commit-Queue: Matt Sarett <msarett@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/SkColorSpaceXform.cpp | 165 |
1 files changed, 77 insertions, 88 deletions
diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp index b71ee4ab7a..50cd9c11e5 100644 --- a/src/core/SkColorSpaceXform.cpp +++ b/src/core/SkColorSpaceXform.cpp @@ -1011,90 +1011,6 @@ typedef decltype(load_rgb_from_tables_1<kRGBA_Order>)* Load1Fn; typedef decltype(store_generic<kRGBA_Order> )* StoreFn; typedef decltype(store_generic_1<kRGBA_Order> )* Store1Fn; -template <SkAlphaType kAlphaType, - ColorSpaceMatch kCSM> -static inline void do_color_xform(void* dst, const void* vsrc, int len, - const float* const srcTables[3], const float matrix[16], - const uint8_t* const dstTables[3], LoadFn load, Load1Fn load_1, - StoreFn store, Store1Fn store_1, size_t sizeOfDstPixel) { - const uint32_t* src = (const uint32_t*) vsrc; - Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT; - load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT); - - if (len >= 4) { - // Naively this would be a loop of load-transform-store, but we found it faster to - // move the N+1th load ahead of the Nth store. We don't bother doing this for N<4. - Sk4f r, g, b, a; - load(src, r, g, b, a, srcTables); - src += 4; - len -= 4; - - Sk4f dr, dg, db, da; - while (len >= 4) { - if (kNone_ColorSpaceMatch == kCSM) { - transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); - translate_gamut(rTgTbT, dr, dg, db); - } else { - dr = r; - dg = g; - db = b; - da = a; - } - - if (kPremul_SkAlphaType == kAlphaType) { - premultiply(dr, dg, db, da); - } - - load(src, r, g, b, a, srcTables); - - store(dst, src - 4, dr, dg, db, da, dstTables); - dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); - src += 4; - len -= 4; - } - - if (kNone_ColorSpaceMatch == kCSM) { - transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); - translate_gamut(rTgTbT, dr, dg, db); - } else { - dr = r; - dg = g; - db = b; - da = a; - } - - if (kPremul_SkAlphaType == kAlphaType) { - premultiply(dr, dg, db, da); - } - - store(dst, src - 4, dr, dg, db, da, dstTables); - dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); - } - - while (len > 0) { - Sk4f r, g, b, a; - load_1(src, r, g, b, a, srcTables); - - Sk4f rgba; - if (kNone_ColorSpaceMatch == kCSM) { - transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba); - translate_gamut_1(rTgTbT, rgba); - } else { - rgba = Sk4f(r[0], g[0], b[0], a[0]); - } - - if (kPremul_SkAlphaType == kAlphaType) { - premultiply_1(a, rgba); - } - - store_1(dst, src, rgba, a, dstTables); - - src += 1; - len -= 1; - dst = SkTAddOffset<void>(dst, sizeOfDstPixel); - } -} - enum SrcFormat { kRGBA_8888_Linear_SrcFormat, kRGBA_8888_Table_SrcFormat, @@ -1119,7 +1035,7 @@ template <SrcFormat kSrc, DstFormat kDst, SkAlphaType kAlphaType, ColorSpaceMatch kCSM> -static void color_xform_RGBA(void* dst, const void* src, int len, +static void color_xform_RGBA(void* dst, const void* vsrc, int len, const float* const srcTables[3], const float matrix[16], const uint8_t* const dstTables[3]) { LoadFn load; @@ -1224,9 +1140,82 @@ static void color_xform_RGBA(void* dst, const void* src, int len, break; } - do_color_xform<kAlphaType, kCSM> - (dst, src, len, srcTables, matrix, dstTables, load, load_1, store, store_1, - sizeOfDstPixel); + const uint32_t* src = (const uint32_t*) vsrc; + Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT; + load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT); + + if (len >= 4) { + // Naively this would be a loop of load-transform-store, but we found it faster to + // move the N+1th load ahead of the Nth store. We don't bother doing this for N<4. + Sk4f r, g, b, a; + load(src, r, g, b, a, srcTables); + src += 4; + len -= 4; + + Sk4f dr, dg, db, da; + while (len >= 4) { + if (kNone_ColorSpaceMatch == kCSM) { + transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); + translate_gamut(rTgTbT, dr, dg, db); + } else { + dr = r; + dg = g; + db = b; + da = a; + } + + if (kPremul_SkAlphaType == kAlphaType) { + premultiply(dr, dg, db, da); + } + + load(src, r, g, b, a, srcTables); + + store(dst, src - 4, dr, dg, db, da, dstTables); + dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); + src += 4; + len -= 4; + } + + if (kNone_ColorSpaceMatch == kCSM) { + transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); + translate_gamut(rTgTbT, dr, dg, db); + } else { + dr = r; + dg = g; + db = b; + da = a; + } + + if (kPremul_SkAlphaType == kAlphaType) { + premultiply(dr, dg, db, da); + } + + store(dst, src - 4, dr, dg, db, da, dstTables); + dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); + } + + while (len > 0) { + Sk4f r, g, b, a; + load_1(src, r, g, b, a, srcTables); + + Sk4f rgba; + if (kNone_ColorSpaceMatch == kCSM) { + transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba); + translate_gamut_1(rTgTbT, rgba); + } else { + rgba = Sk4f(r[0], g[0], b[0], a[0]); + } + + if (kPremul_SkAlphaType == kAlphaType) { + premultiply_1(a, rgba); + } + + store_1(dst, src, rgba, a, dstTables); + + src += 1; + len -= 1; + dst = SkTAddOffset<void>(dst, sizeOfDstPixel); + } } /////////////////////////////////////////////////////////////////////////////////////////////////// |