aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Matt Sarett <msarett@google.com>2016-10-14 13:04:55 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2016-10-14 17:31:07 +0000
commitd478a99f5c29f0e8fa64b50831f53232f5577f2d (patch)
tree9aec8dcdf52d081188c6cbcc9f47f6c7011b1d4a /src
parentcd72afa8f19260ab231348083bf0ed51722240d7 (diff)
Refactor color_xform_RGBA() to improve performance
Now that we've identified this as the "fast path", it has become (somewhat) stable, and I'm ready to ship it, I feel comfortable playing games with clang to get the behavior we want. toSRGB Performance on HP z620: Before: 370us or 470us (depending on the mood of my computer) After: 345us (or better, always) My guess is that clang optimizers do a better job of interleaving instructions effectively when we put everything into one function. Regardless of being silly about performance, I also acutally prefer how the code reads this way. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3440 Change-Id: Id7a40fb9966341ab184a5331b3e4da19b3af3344 Reviewed-on: https://skia-review.googlesource.com/3440 Commit-Queue: Matt Sarett <msarett@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r--src/core/SkColorSpaceXform.cpp165
1 files changed, 77 insertions, 88 deletions
diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp
index b71ee4ab7a..50cd9c11e5 100644
--- a/src/core/SkColorSpaceXform.cpp
+++ b/src/core/SkColorSpaceXform.cpp
@@ -1011,90 +1011,6 @@ typedef decltype(load_rgb_from_tables_1<kRGBA_Order>)* Load1Fn;
typedef decltype(store_generic<kRGBA_Order> )* StoreFn;
typedef decltype(store_generic_1<kRGBA_Order> )* Store1Fn;
-template <SkAlphaType kAlphaType,
- ColorSpaceMatch kCSM>
-static inline void do_color_xform(void* dst, const void* vsrc, int len,
- const float* const srcTables[3], const float matrix[16],
- const uint8_t* const dstTables[3], LoadFn load, Load1Fn load_1,
- StoreFn store, Store1Fn store_1, size_t sizeOfDstPixel) {
- const uint32_t* src = (const uint32_t*) vsrc;
- Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT;
- load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT);
-
- if (len >= 4) {
- // Naively this would be a loop of load-transform-store, but we found it faster to
- // move the N+1th load ahead of the Nth store. We don't bother doing this for N<4.
- Sk4f r, g, b, a;
- load(src, r, g, b, a, srcTables);
- src += 4;
- len -= 4;
-
- Sk4f dr, dg, db, da;
- while (len >= 4) {
- if (kNone_ColorSpaceMatch == kCSM) {
- transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da);
- translate_gamut(rTgTbT, dr, dg, db);
- } else {
- dr = r;
- dg = g;
- db = b;
- da = a;
- }
-
- if (kPremul_SkAlphaType == kAlphaType) {
- premultiply(dr, dg, db, da);
- }
-
- load(src, r, g, b, a, srcTables);
-
- store(dst, src - 4, dr, dg, db, da, dstTables);
- dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel);
- src += 4;
- len -= 4;
- }
-
- if (kNone_ColorSpaceMatch == kCSM) {
- transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da);
- translate_gamut(rTgTbT, dr, dg, db);
- } else {
- dr = r;
- dg = g;
- db = b;
- da = a;
- }
-
- if (kPremul_SkAlphaType == kAlphaType) {
- premultiply(dr, dg, db, da);
- }
-
- store(dst, src - 4, dr, dg, db, da, dstTables);
- dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel);
- }
-
- while (len > 0) {
- Sk4f r, g, b, a;
- load_1(src, r, g, b, a, srcTables);
-
- Sk4f rgba;
- if (kNone_ColorSpaceMatch == kCSM) {
- transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba);
- translate_gamut_1(rTgTbT, rgba);
- } else {
- rgba = Sk4f(r[0], g[0], b[0], a[0]);
- }
-
- if (kPremul_SkAlphaType == kAlphaType) {
- premultiply_1(a, rgba);
- }
-
- store_1(dst, src, rgba, a, dstTables);
-
- src += 1;
- len -= 1;
- dst = SkTAddOffset<void>(dst, sizeOfDstPixel);
- }
-}
-
enum SrcFormat {
kRGBA_8888_Linear_SrcFormat,
kRGBA_8888_Table_SrcFormat,
@@ -1119,7 +1035,7 @@ template <SrcFormat kSrc,
DstFormat kDst,
SkAlphaType kAlphaType,
ColorSpaceMatch kCSM>
-static void color_xform_RGBA(void* dst, const void* src, int len,
+static void color_xform_RGBA(void* dst, const void* vsrc, int len,
const float* const srcTables[3], const float matrix[16],
const uint8_t* const dstTables[3]) {
LoadFn load;
@@ -1224,9 +1140,82 @@ static void color_xform_RGBA(void* dst, const void* src, int len,
break;
}
- do_color_xform<kAlphaType, kCSM>
- (dst, src, len, srcTables, matrix, dstTables, load, load_1, store, store_1,
- sizeOfDstPixel);
+ const uint32_t* src = (const uint32_t*) vsrc;
+ Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT;
+ load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT);
+
+ if (len >= 4) {
+ // Naively this would be a loop of load-transform-store, but we found it faster to
+ // move the N+1th load ahead of the Nth store. We don't bother doing this for N<4.
+ Sk4f r, g, b, a;
+ load(src, r, g, b, a, srcTables);
+ src += 4;
+ len -= 4;
+
+ Sk4f dr, dg, db, da;
+ while (len >= 4) {
+ if (kNone_ColorSpaceMatch == kCSM) {
+ transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da);
+ translate_gamut(rTgTbT, dr, dg, db);
+ } else {
+ dr = r;
+ dg = g;
+ db = b;
+ da = a;
+ }
+
+ if (kPremul_SkAlphaType == kAlphaType) {
+ premultiply(dr, dg, db, da);
+ }
+
+ load(src, r, g, b, a, srcTables);
+
+ store(dst, src - 4, dr, dg, db, da, dstTables);
+ dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel);
+ src += 4;
+ len -= 4;
+ }
+
+ if (kNone_ColorSpaceMatch == kCSM) {
+ transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da);
+ translate_gamut(rTgTbT, dr, dg, db);
+ } else {
+ dr = r;
+ dg = g;
+ db = b;
+ da = a;
+ }
+
+ if (kPremul_SkAlphaType == kAlphaType) {
+ premultiply(dr, dg, db, da);
+ }
+
+ store(dst, src - 4, dr, dg, db, da, dstTables);
+ dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel);
+ }
+
+ while (len > 0) {
+ Sk4f r, g, b, a;
+ load_1(src, r, g, b, a, srcTables);
+
+ Sk4f rgba;
+ if (kNone_ColorSpaceMatch == kCSM) {
+ transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba);
+ translate_gamut_1(rTgTbT, rgba);
+ } else {
+ rgba = Sk4f(r[0], g[0], b[0], a[0]);
+ }
+
+ if (kPremul_SkAlphaType == kAlphaType) {
+ premultiply_1(a, rgba);
+ }
+
+ store_1(dst, src, rgba, a, dstTables);
+
+ src += 1;
+ len -= 1;
+ dst = SkTAddOffset<void>(dst, sizeOfDstPixel);
+ }
}
///////////////////////////////////////////////////////////////////////////////////////////////////