aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-05-24 19:00:47 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-05-25 13:25:47 +0000
commitc998f733e3a3da0674fe32acfcec34b4650e4c2a (patch)
tree6d655a3b0300572f3b7598b27e43960442cbba48
parentd1a6dd6ad96b5eb291976d3d0c949496af470762 (diff)
make sure to_srgb maps 1 to 1
CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Release-SK_CPU_LIMIT_SSE2,Test-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Release-SK_CPU_LIMIT_SSE41,Test-Android-Clang-Nexus10-CPU-Exynos5250-arm-Release-Android,Test-Android-Clang-PixelC-CPU-TegraX1-arm64-Release-Android,Test-Android-Clang-Ci20-CPU-IngenicJZ4780-mipsel-Release-Android BUG=skia:6678,skia:6683 Change-Id: I217084fa0a11ad661a8751f0c3b1cade5cc52473 Reviewed-on: https://skia-review.googlesource.com/17902 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
-rw-r--r--src/jumper/SkJumper_generated.S93
-rw-r--r--src/jumper/SkJumper_generated_win.S77
-rw-r--r--src/jumper/SkJumper_stages.cpp15
-rw-r--r--tests/SRGBTest.cpp24
4 files changed, 147 insertions, 62 deletions
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 3208b0edf5..4d755b69e0 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -1434,20 +1434,20 @@ _sk_to_srgb_aarch64:
.long 0x6e31dc51 // fmul v17.4s, v2.4s, v17.4s
.long 0x6ea2e662 // fcmgt v2.4s, v19.4s, v2.4s
.long 0x4e040d13 // dup v19.4s, w8
- .long 0x52a7f228 // mov w8, #0x3f910000
- .long 0x728281a8 // movk w8, #0x140d
+ .long 0x52a7f208 // mov w8, #0x3f900000
+ .long 0x72947ae8 // movk w8, #0xa3d7
.long 0x6e36de10 // fmul v16.4s, v16.4s, v22.4s
.long 0x4e040d16 // dup v22.4s, w8
.long 0x6e37de94 // fmul v20.4s, v20.4s, v23.4s
.long 0x4eb31e77 // mov v23.16b, v19.16b
.long 0x6e38deb5 // fmul v21.4s, v21.4s, v24.4s
.long 0x4eb31e78 // mov v24.16b, v19.16b
- .long 0x52a7c288 // mov w8, #0x3e140000
+ .long 0x52a7c208 // mov w8, #0x3e100000
.long 0x4e30ce57 // fmla v23.4s, v18.4s, v16.4s
.long 0x4e34ce58 // fmla v24.4s, v18.4s, v20.4s
.long 0x4e35ce53 // fmla v19.4s, v18.4s, v21.4s
.long 0x4eb61ed2 // mov v18.16b, v22.16b
- .long 0x7293d1a8 // movk w8, #0x9e8d
+ .long 0x7298c988 // movk w8, #0xc64c
.long 0x4e30cef2 // fmla v18.4s, v23.4s, v16.4s
.long 0x4eb61ed7 // mov v23.16b, v22.16b
.long 0x4e35ce76 // fmla v22.4s, v19.4s, v21.4s
@@ -5848,14 +5848,14 @@ _sk_to_srgb_vfp4:
.long 0xf22221b2 // vorr d2, d18, d18
.long 0xe12fff13 // bx r3
.long 0xe320f000 // nop {0}
- .long 0x3e149e8d // .word 0x3e149e8d
- .long 0x3e149e8d // .word 0x3e149e8d
+ .long 0x3e10c64c // .word 0x3e10c64c
+ .long 0x3e10c64c // .word 0x3e10c64c
.long 0xbb20d739 // .word 0xbb20d739
.long 0xbb20d739 // .word 0xbb20d739
.long 0x3c629fba // .word 0x3c629fba
.long 0x3c629fba // .word 0x3c629fba
- .long 0x3f91140d // .word 0x3f91140d
- .long 0x3f91140d // .word 0x3f91140d
+ .long 0x3f90a3d7 // .word 0x3f90a3d7
+ .long 0x3f90a3d7 // .word 0x3f90a3d7
.long 0x414eb852 // .word 0x414eb852
.long 0x414eb852 // .word 0x414eb852
.long 0x3b98b1a8 // .word 0x3b98b1a8
@@ -13601,8 +13601,9 @@ BALIGN4
.byte 61,82,184,78,65 // cmp $0x414eb852,%eax
.byte 186,159,98,60,57 // mov $0x393c629f,%edx
.byte 215 // xlat %ds:(%rbx)
- .byte 32,187,13,20,145,63 // and %bh,0x3f91140d(%rbx)
- .byte 141,158,20,62,168,177 // lea -0x4e57c1ec(%rsi),%ebx
+ .byte 32,187,109,165,144,63 // and %bh,0x3f90a56d(%rbx)
+ .byte 252 // cld
+ .byte 191,16,62,168,177 // mov $0xb1a83e10,%edi
.byte 152 // cwtl
.byte 59,0 // cmp (%rax),%eax
.byte 0,128,63,0,0,192 // add %al,-0x3fffffc1(%rax)
@@ -20335,10 +20336,12 @@ BALIGN4
.byte 61,82,184,78,65 // cmp $0x414eb852,%eax
.byte 57,215 // cmp %edx,%edi
.byte 32,187,186,159,98,60 // and %bh,0x3c629fba(%rbx)
- .byte 13,20,145,63,141 // or $0x8d3f9114,%eax
- .byte 158 // sahf
- .byte 20,62 // adc $0x3e,%al
- .byte 168,177 // test $0xb1,%al
+ .byte 109 // insl (%dx),%es:(%rdi)
+ .byte 165 // movsl %ds:(%rsi),%es:(%rdi)
+ .byte 144 // nop
+ .byte 63 // (bad)
+ .byte 252 // cld
+ .byte 191,16,62,168,177 // mov $0xb1a83e10,%edi
.byte 152 // cwtl
.byte 59,0 // cmp (%rax),%eax
.byte 0,128,63,0,0,192 // add %al,-0x3fffffc1(%rax)
@@ -25952,18 +25955,29 @@ BALIGN16
.byte 60,186 // cmp $0xba,%al
.byte 159 // lahf
.byte 98 // (bad)
- .byte 60,13 // cmp $0xd,%al
- .byte 20,145 // adc $0x91,%al
+ .byte 60,109 // cmp $0x6d,%al
+ .byte 165 // movsl %ds:(%rsi),%es:(%rdi)
+ .byte 144 // nop
.byte 63 // (bad)
- .byte 13,20,145,63,13 // or $0xd3f9114,%eax
- .byte 20,145 // adc $0x91,%al
+ .byte 109 // insl (%dx),%es:(%rdi)
+ .byte 165 // movsl %ds:(%rsi),%es:(%rdi)
+ .byte 144 // nop
+ .byte 63 // (bad)
+ .byte 109 // insl (%dx),%es:(%rdi)
+ .byte 165 // movsl %ds:(%rsi),%es:(%rdi)
+ .byte 144 // nop
.byte 63 // (bad)
- .byte 13,20,145,63,141 // or $0x8d3f9114,%eax
- .byte 158 // sahf
- .byte 20,62 // adc $0x3e,%al
- .byte 141,158,20,62,141,158 // lea -0x6172c1ec(%rsi),%ebx
- .byte 20,62 // adc $0x3e,%al
- .byte 141,158,20,62,168,177 // lea -0x4e57c1ec(%rsi),%ebx
+ .byte 109 // insl (%dx),%es:(%rdi)
+ .byte 165 // movsl %ds:(%rsi),%es:(%rdi)
+ .byte 144 // nop
+ .byte 63 // (bad)
+ .byte 252 // cld
+ .byte 191,16,62,252,191 // mov $0xbffc3e10,%edi
+ .byte 16,62 // adc %bh,(%rsi)
+ .byte 252 // cld
+ .byte 191,16,62,252,191 // mov $0xbffc3e10,%edi
+ .byte 16,62 // adc %bh,(%rsi)
+ .byte 168,177 // test $0xb1,%al
.byte 152 // cwtl
.byte 59,168,177,152,59,168 // cmp -0x57c4674f(%rax),%ebp
.byte 177,152 // mov $0x98,%cl
@@ -33082,18 +33096,29 @@ BALIGN16
.byte 60,186 // cmp $0xba,%al
.byte 159 // lahf
.byte 98 // (bad)
- .byte 60,13 // cmp $0xd,%al
- .byte 20,145 // adc $0x91,%al
+ .byte 60,109 // cmp $0x6d,%al
+ .byte 165 // movsl %ds:(%rsi),%es:(%rdi)
+ .byte 144 // nop
.byte 63 // (bad)
- .byte 13,20,145,63,13 // or $0xd3f9114,%eax
- .byte 20,145 // adc $0x91,%al
+ .byte 109 // insl (%dx),%es:(%rdi)
+ .byte 165 // movsl %ds:(%rsi),%es:(%rdi)
+ .byte 144 // nop
+ .byte 63 // (bad)
+ .byte 109 // insl (%dx),%es:(%rdi)
+ .byte 165 // movsl %ds:(%rsi),%es:(%rdi)
+ .byte 144 // nop
.byte 63 // (bad)
- .byte 13,20,145,63,141 // or $0x8d3f9114,%eax
- .byte 158 // sahf
- .byte 20,62 // adc $0x3e,%al
- .byte 141,158,20,62,141,158 // lea -0x6172c1ec(%rsi),%ebx
- .byte 20,62 // adc $0x3e,%al
- .byte 141,158,20,62,168,177 // lea -0x4e57c1ec(%rsi),%ebx
+ .byte 109 // insl (%dx),%es:(%rdi)
+ .byte 165 // movsl %ds:(%rsi),%es:(%rdi)
+ .byte 144 // nop
+ .byte 63 // (bad)
+ .byte 252 // cld
+ .byte 191,16,62,252,191 // mov $0xbffc3e10,%edi
+ .byte 16,62 // adc %bh,(%rsi)
+ .byte 252 // cld
+ .byte 191,16,62,252,191 // mov $0xbffc3e10,%edi
+ .byte 16,62 // adc %bh,(%rsi)
+ .byte 168,177 // test $0xb1,%al
.byte 152 // cwtl
.byte 59,168,177,152,59,168 // cmp -0x57c4674f(%rax),%ebp
.byte 177,152 // mov $0x98,%cl
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 7dc87ba51e..0c80a5c983 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -4529,8 +4529,9 @@ ALIGN 4
DB 61,82,184,78,65 ; cmp $0x414eb852,%eax
DB 186,159,98,60,57 ; mov $0x393c629f,%edx
DB 215 ; xlat %ds:(%rbx)
- DB 32,187,13,20,145,63 ; and %bh,0x3f91140d(%rbx)
- DB 141,158,20,62,168,177 ; lea -0x4e57c1ec(%rsi),%ebx
+ DB 32,187,109,165,144,63 ; and %bh,0x3f90a56d(%rbx)
+ DB 252 ; cld
+ DB 191,16,62,168,177 ; mov $0xb1a83e10,%edi
DB 152 ; cwtl
DB 59,0 ; cmp (%rax),%eax
DB 0,128,63,0,0,192 ; add %al,-0x3fffffc1(%rax)
@@ -11042,10 +11043,12 @@ ALIGN 4
DB 61,82,184,78,65 ; cmp $0x414eb852,%eax
DB 57,215 ; cmp %edx,%edi
DB 32,187,186,159,98,60 ; and %bh,0x3c629fba(%rbx)
- DB 13,20,145,63,141 ; or $0x8d3f9114,%eax
- DB 158 ; sahf
- DB 20,62 ; adc $0x3e,%al
- DB 168,177 ; test $0xb1,%al
+ DB 109 ; insl (%dx),%es:(%rdi)
+ DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
+ DB 144 ; nop
+ DB 63 ; (bad)
+ DB 252 ; cld
+ DB 191,16,62,168,177 ; mov $0xb1a83e10,%edi
DB 152 ; cwtl
DB 59,0 ; cmp (%rax),%eax
DB 0,128,63,0,0,192 ; add %al,-0x3fffffc1(%rax)
@@ -16438,18 +16441,29 @@ ALIGN 16
DB 60,186 ; cmp $0xba,%al
DB 159 ; lahf
DB 98 ; (bad)
- DB 60,13 ; cmp $0xd,%al
- DB 20,145 ; adc $0x91,%al
+ DB 60,109 ; cmp $0x6d,%al
+ DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
+ DB 144 ; nop
DB 63 ; (bad)
- DB 13,20,145,63,13 ; or $0xd3f9114,%eax
- DB 20,145 ; adc $0x91,%al
+ DB 109 ; insl (%dx),%es:(%rdi)
+ DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
+ DB 144 ; nop
+ DB 63 ; (bad)
+ DB 109 ; insl (%dx),%es:(%rdi)
+ DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
+ DB 144 ; nop
DB 63 ; (bad)
- DB 13,20,145,63,141 ; or $0x8d3f9114,%eax
- DB 158 ; sahf
- DB 20,62 ; adc $0x3e,%al
- DB 141,158,20,62,141,158 ; lea -0x6172c1ec(%rsi),%ebx
- DB 20,62 ; adc $0x3e,%al
- DB 141,158,20,62,168,177 ; lea -0x4e57c1ec(%rsi),%ebx
+ DB 109 ; insl (%dx),%es:(%rdi)
+ DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
+ DB 144 ; nop
+ DB 63 ; (bad)
+ DB 252 ; cld
+ DB 191,16,62,252,191 ; mov $0xbffc3e10,%edi
+ DB 16,62 ; adc %bh,(%rsi)
+ DB 252 ; cld
+ DB 191,16,62,252,191 ; mov $0xbffc3e10,%edi
+ DB 16,62 ; adc %bh,(%rsi)
+ DB 168,177 ; test $0xb1,%al
DB 152 ; cwtl
DB 59,168,177,152,59,168 ; cmp -0x57c4674f(%rax),%ebp
DB 177,152 ; mov $0x98,%cl
@@ -23345,18 +23359,29 @@ ALIGN 16
DB 60,186 ; cmp $0xba,%al
DB 159 ; lahf
DB 98 ; (bad)
- DB 60,13 ; cmp $0xd,%al
- DB 20,145 ; adc $0x91,%al
+ DB 60,109 ; cmp $0x6d,%al
+ DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
+ DB 144 ; nop
DB 63 ; (bad)
- DB 13,20,145,63,13 ; or $0xd3f9114,%eax
- DB 20,145 ; adc $0x91,%al
+ DB 109 ; insl (%dx),%es:(%rdi)
+ DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
+ DB 144 ; nop
+ DB 63 ; (bad)
+ DB 109 ; insl (%dx),%es:(%rdi)
+ DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
+ DB 144 ; nop
DB 63 ; (bad)
- DB 13,20,145,63,141 ; or $0x8d3f9114,%eax
- DB 158 ; sahf
- DB 20,62 ; adc $0x3e,%al
- DB 141,158,20,62,141,158 ; lea -0x6172c1ec(%rsi),%ebx
- DB 20,62 ; adc $0x3e,%al
- DB 141,158,20,62,168,177 ; lea -0x4e57c1ec(%rsi),%ebx
+ DB 109 ; insl (%dx),%es:(%rdi)
+ DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
+ DB 144 ; nop
+ DB 63 ; (bad)
+ DB 252 ; cld
+ DB 191,16,62,252,191 ; mov $0xbffc3e10,%edi
+ DB 16,62 ; adc %bh,(%rsi)
+ DB 252 ; cld
+ DB 191,16,62,252,191 ; mov $0xbffc3e10,%edi
+ DB 16,62 ; adc %bh,(%rsi)
+ DB 168,177 ; test $0xb1,%al
DB 152 ; cwtl
DB 59,168,177,152,59,168 ; cmp -0x57c4674f(%rax),%ebp
DB 177,152 ; mov $0x98,%cl
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index 39e2eb83b0..d1bbe9f442 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -621,10 +621,21 @@ STAGE(from_srgb) {
}
STAGE(to_srgb) {
auto fn = [&](F l) {
+ // We tweak c and d for each instruction set to make sure fn(1) is exactly 1.
+ #if defined(JUMPER) && defined(__SSE2__)
+ const float c = 1.130048394203f,
+ d = 0.141357362270f;
+ #elif defined(JUMPER) && (defined(__aarch64__) || defined(__arm__))
+ const float c = 1.129999995232f,
+ d = 0.141381442547f;
+ #else
+ const float c = 1.129999995232f,
+ d = 0.141377761960f;
+ #endif
F t = rsqrt(l);
auto lo = l * 12.92f;
- auto hi = mad(t, mad(t, -0.0024542345f, 0.013832027f), 1.1334244f)
- * rcp(0.14513608f + t);
+ auto hi = mad(t, mad(t, -0.0024542345f, 0.013832027f), c)
+ * rcp(d + t);
return if_then_else(l < 0.00465985f, lo, hi);
};
r = fn(r);
diff --git a/tests/SRGBTest.cpp b/tests/SRGBTest.cpp
index 78855a34c8..b1e4570f50 100644
--- a/tests/SRGBTest.cpp
+++ b/tests/SRGBTest.cpp
@@ -5,6 +5,7 @@
* found in the LICENSE file.
*/
+#include "SkPM4f.h"
#include "SkRasterPipeline.h"
#include "SkSRGB.h"
#include "SkTypes.h"
@@ -61,3 +62,26 @@ DEF_TEST(sk_pipeline_srgb_roundtrip, r) {
}
}
}
+
+DEF_TEST(sk_pipeline_srgb_edge_cases, r) {
+ // We need to run at least 4 pixels to make sure we hit all specializations.
+ SkPM4f colors[4] = { {{0,1,1,1}}, {{0,0,0,0}}, {{0,0,0,0}}, {{0,0,0,0}} };
+ auto& color = colors[0];
+ void* dst = &color;
+
+ SkRasterPipeline_<256> p;
+ p.append(SkRasterPipeline::constant_color, &color);
+ p.append(SkRasterPipeline::to_srgb);
+ p.append(SkRasterPipeline::store_f32, &dst);
+ p.run(0,4);
+
+ if (color.r() != 0.0f) {
+ ERRORF(r, "expected to_srgb() to map 0.0f to 0.0f, got %f", color.r());
+ }
+ if (color.g() != 1.0f) {
+ float f = color.g();
+ uint32_t x;
+ memcpy(&x, &f, 4);
+ ERRORF(r, "expected to_srgb() to map 1.0f to 1.0f, got %f (%08x)", color.g(), x);
+ }
+}