diff options
author | Mike Klein <mtklein@chromium.org> | 2017-06-14 15:10:24 -0700 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-06-15 04:48:14 +0000 |
commit | 277f7f240f127772f7aa158baefc503dca05cf08 (patch) | |
tree | 1f04c9ccb2b137d312fc6f80dd4b615ee45ae081 | |
parent | a0485d94529905e76320b7aa941a0d94b5578ac2 (diff) |
delete lowp plus
I have figured out how to implement lowp clamp_1/clamp_a, and
implementing clamp_1 would make lowp plus active.
But... the way we have factored blend modes requires us to be able to
lerp between the dst and possibly-out-of-range src values. This is not
possible in lowp. If we try to multiply with values in [0x8001,0xffff],
we'll just get garbage. We'll clamp them back in range, but sadly
clamped garbage is still garbage.
So the simplest thing to do is keep plus blends in floats. This CL
doesn't even change that... we'd use floats before and after it. It
just removes the lowp plus stage code that is both dead and buggy.
As far as I can tell, no other drawing is currently gated by lowp
missing clamp_1 or clamp_a.
Change-Id: I55b73c840614f1bff9cd610dff90ca5e2b5c73e5
Reviewed-on: https://skia-review.googlesource.com/19909
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
-rw-r--r-- | src/jumper/SkJumper.cpp | 1 | ||||
-rw-r--r-- | src/jumper/SkJumper_generated.S | 65 | ||||
-rw-r--r-- | src/jumper/SkJumper_generated_win.S | 63 | ||||
-rw-r--r-- | src/jumper/SkJumper_stages_lowp.cpp | 1 |
4 files changed, 54 insertions, 76 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp index f10207a058..459ca6233c 100644 --- a/src/jumper/SkJumper.cpp +++ b/src/jumper/SkJumper.cpp @@ -101,7 +101,6 @@ using StartPipelineFn = void(size_t,size_t,size_t,void**,K*); M(dstover) \ M(modulate) \ M(multiply) \ - M(plus_) \ M(screen) \ M(xor_) diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S index 34d01ce9af..06f75005aa 100644 --- a/src/jumper/SkJumper_generated.S +++ b/src/jumper/SkJumper_generated.S @@ -37124,7 +37124,7 @@ FUNCTION(_sk_constant_color_ssse3_lowp) _sk_constant_color_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 15,16,24 // movups (%rax),%xmm3 - .byte 15,88,29,224,16,0,0 // addps 0x10e0(%rip),%xmm3 # 11a0 <_sk_xor__ssse3_lowp+0xa3> + .byte 15,88,29,208,16,0,0 // addps 0x10d0(%rip),%xmm3 # 1190 <_sk_xor__ssse3_lowp+0xa7> .byte 242,15,112,195,0 // pshuflw $0x0,%xmm3,%xmm0 .byte 102,15,112,192,80 // pshufd $0x50,%xmm0,%xmm0 .byte 242,15,112,203,170 // pshuflw $0xaa,%xmm3,%xmm1 @@ -37141,7 +37141,7 @@ HIDDEN _sk_set_rgb_ssse3_lowp FUNCTION(_sk_set_rgb_ssse3_lowp) _sk_set_rgb_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax - .byte 243,15,16,21,10,18,0,0 // movss 0x120a(%rip),%xmm2 # 1300 <_sk_xor__ssse3_lowp+0x203> + .byte 243,15,16,21,250,17,0,0 // movss 0x11fa(%rip),%xmm2 # 12f0 <_sk_xor__ssse3_lowp+0x207> .byte 243,15,16,0 // movss (%rax),%xmm0 .byte 243,15,88,194 // addss %xmm2,%xmm0 .byte 102,65,15,126,193 // movd %xmm0,%r9d @@ -37185,7 +37185,7 @@ _sk_load_8888_ssse3_lowp: .byte 117,113 // jne 1e7 <_sk_load_8888_ssse3_lowp+0x7b> .byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9 .byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8 - .byte 102,15,111,5,39,16,0,0 // movdqa 0x1027(%rip),%xmm0 # 11b0 <_sk_xor__ssse3_lowp+0xb3> + .byte 102,15,111,5,23,16,0,0 // movdqa 0x1017(%rip),%xmm0 # 11a0 <_sk_xor__ssse3_lowp+0xb7> .byte 102,68,15,56,0,192 // pshufb %xmm0,%xmm8 .byte 102,68,15,56,0,200 // pshufb %xmm0,%xmm9 .byte 102,65,15,111,208 // movdqa %xmm8,%xmm2 @@ -37199,7 +37199,7 @@ _sk_load_8888_ssse3_lowp: .byte 102,15,239,210 // pxor %xmm2,%xmm2 .byte 102,65,15,96,208 // punpcklbw %xmm8,%xmm2 .byte 102,65,15,104,216 // punpckhbw %xmm8,%xmm3 - .byte 102,68,15,111,5,241,15,0,0 // movdqa 0xff1(%rip),%xmm8 # 11c0 <_sk_xor__ssse3_lowp+0xc3> + .byte 102,68,15,111,5,225,15,0,0 // movdqa 0xfe1(%rip),%xmm8 # 11b0 <_sk_xor__ssse3_lowp+0xc7> .byte 102,65,15,228,192 // pmulhuw %xmm8,%xmm0 .byte 102,65,15,228,200 // pmulhuw %xmm8,%xmm1 .byte 102,65,15,228,208 // pmulhuw %xmm8,%xmm2 @@ -37342,7 +37342,7 @@ _sk_load_a8_ssse3_lowp: .byte 243,65,15,126,28,19 // movq (%r11,%rdx,1),%xmm3 .byte 102,15,96,216 // punpcklbw %xmm0,%xmm3 .byte 102,15,113,243,8 // psllw $0x8,%xmm3 - .byte 102,15,228,29,3,14,0,0 // pmulhuw 0xe03(%rip),%xmm3 # 11d0 <_sk_xor__ssse3_lowp+0xd3> + .byte 102,15,228,29,243,13,0,0 // pmulhuw 0xdf3(%rip),%xmm3 # 11c0 <_sk_xor__ssse3_lowp+0xd7> .byte 72,173 // lods %ds:(%rsi),%rax .byte 102,15,239,192 // pxor %xmm0,%xmm0 .byte 15,87,201 // xorps %xmm1,%xmm1 @@ -37444,7 +37444,7 @@ _sk_store_a8_ssse3_lowp: .byte 65,136,4,19 // mov %al,(%r11,%rdx,1) .byte 233,95,255,255,255 // jmpq 494 <_sk_store_a8_ssse3_lowp+0x20> .byte 15,31,0 // nopl (%rax) - .byte 233,255,255,255,217 // jmpq ffffffffda00053c <_sk_xor__ssse3_lowp+0xffffffffd9fff43f> + .byte 233,255,255,255,217 // jmpq ffffffffda00053c <_sk_xor__ssse3_lowp+0xffffffffd9fff453> .byte 255 // (bad) .byte 255 // (bad) .byte 255,201 // dec %ecx @@ -37470,9 +37470,9 @@ _sk_load_g8_ssse3_lowp: .byte 243,65,15,126,4,19 // movq (%r11,%rdx,1),%xmm0 .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 .byte 102,15,113,240,8 // psllw $0x8,%xmm0 - .byte 102,15,228,5,107,12,0,0 // pmulhuw 0xc6b(%rip),%xmm0 # 11e0 <_sk_xor__ssse3_lowp+0xe3> + .byte 102,15,228,5,91,12,0,0 // pmulhuw 0xc5b(%rip),%xmm0 # 11d0 <_sk_xor__ssse3_lowp+0xe7> .byte 72,173 // lods %ds:(%rsi),%rax - .byte 15,40,29,114,12,0,0 // movaps 0xc72(%rip),%xmm3 # 11f0 <_sk_xor__ssse3_lowp+0xf3> + .byte 15,40,29,98,12,0,0 // movaps 0xc62(%rip),%xmm3 # 11e0 <_sk_xor__ssse3_lowp+0xf7> .byte 102,15,111,200 // movdqa %xmm0,%xmm1 .byte 102,15,111,208 // movdqa %xmm0,%xmm2 .byte 255,224 // jmpq *%rax @@ -37535,7 +37535,7 @@ _sk_srcover_rgba_8888_ssse3_lowp: .byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9 .byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8 .byte 77,133,192 // test %r8,%r8 - .byte 102,15,111,37,184,11,0,0 // movdqa 0xbb8(%rip),%xmm4 # 1200 <_sk_xor__ssse3_lowp+0x103> + .byte 102,15,111,37,168,11,0,0 // movdqa 0xba8(%rip),%xmm4 # 11f0 <_sk_xor__ssse3_lowp+0x107> .byte 102,68,15,56,0,196 // pshufb %xmm4,%xmm8 .byte 102,68,15,56,0,204 // pshufb %xmm4,%xmm9 .byte 102,65,15,111,240 // movdqa %xmm8,%xmm6 @@ -37549,12 +37549,12 @@ _sk_srcover_rgba_8888_ssse3_lowp: .byte 102,15,239,246 // pxor %xmm6,%xmm6 .byte 102,65,15,96,240 // punpcklbw %xmm8,%xmm6 .byte 102,65,15,104,248 // punpckhbw %xmm8,%xmm7 - .byte 102,68,15,111,5,130,11,0,0 // movdqa 0xb82(%rip),%xmm8 # 1210 <_sk_xor__ssse3_lowp+0x113> + .byte 102,68,15,111,5,114,11,0,0 // movdqa 0xb72(%rip),%xmm8 # 1200 <_sk_xor__ssse3_lowp+0x117> .byte 102,65,15,228,224 // pmulhuw %xmm8,%xmm4 .byte 102,65,15,228,232 // pmulhuw %xmm8,%xmm5 .byte 102,65,15,228,240 // pmulhuw %xmm8,%xmm6 .byte 102,65,15,228,248 // pmulhuw %xmm8,%xmm7 - .byte 102,68,15,111,29,117,11,0,0 // movdqa 0xb75(%rip),%xmm11 # 1220 <_sk_xor__ssse3_lowp+0x123> + .byte 102,68,15,111,29,101,11,0,0 // movdqa 0xb65(%rip),%xmm11 # 1210 <_sk_xor__ssse3_lowp+0x127> .byte 102,68,15,249,219 // psubw %xmm3,%xmm11 .byte 102,68,15,111,196 // movdqa %xmm4,%xmm8 .byte 102,69,15,56,11,195 // pmulhrsw %xmm11,%xmm8 @@ -37692,7 +37692,7 @@ FUNCTION(_sk_scale_1_float_ssse3_lowp) _sk_scale_1_float_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,68,15,16,0 // movss (%rax),%xmm8 - .byte 243,68,15,88,5,40,10,0,0 // addss 0xa28(%rip),%xmm8 # 1304 <_sk_xor__ssse3_lowp+0x207> + .byte 243,68,15,88,5,24,10,0,0 // addss 0xa18(%rip),%xmm8 # 12f4 <_sk_xor__ssse3_lowp+0x20b> .byte 102,68,15,126,192 // movd %xmm8,%eax .byte 102,68,15,110,192 // movd %eax,%xmm8 .byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8 @@ -37719,7 +37719,7 @@ _sk_scale_u8_ssse3_lowp: .byte 243,69,15,126,4,19 // movq (%r11,%rdx,1),%xmm8 .byte 102,68,15,96,192 // punpcklbw %xmm0,%xmm8 .byte 102,65,15,113,240,8 // psllw $0x8,%xmm8 - .byte 102,68,15,228,5,234,8,0,0 // pmulhuw 0x8ea(%rip),%xmm8 # 1230 <_sk_xor__ssse3_lowp+0x133> + .byte 102,68,15,228,5,218,8,0,0 // pmulhuw 0x8da(%rip),%xmm8 # 1220 <_sk_xor__ssse3_lowp+0x137> .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 .byte 102,65,15,56,11,200 // pmulhrsw %xmm8,%xmm1 @@ -37781,14 +37781,14 @@ FUNCTION(_sk_lerp_1_float_ssse3_lowp) _sk_lerp_1_float_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,68,15,16,0 // movss (%rax),%xmm8 - .byte 243,68,15,88,5,224,8,0,0 // addss 0x8e0(%rip),%xmm8 # 1308 <_sk_xor__ssse3_lowp+0x20b> + .byte 243,68,15,88,5,208,8,0,0 // addss 0x8d0(%rip),%xmm8 # 12f8 <_sk_xor__ssse3_lowp+0x20f> .byte 102,68,15,126,192 // movd %xmm8,%eax .byte 102,68,15,110,192 // movd %eax,%xmm8 .byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8 .byte 102,69,15,112,192,80 // pshufd $0x50,%xmm8,%xmm8 .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 .byte 102,68,15,56,29,200 // pabsw %xmm0,%xmm9 - .byte 102,68,15,111,21,237,7,0,0 // movdqa 0x7ed(%rip),%xmm10 # 1240 <_sk_xor__ssse3_lowp+0x143> + .byte 102,68,15,111,21,221,7,0,0 // movdqa 0x7dd(%rip),%xmm10 # 1230 <_sk_xor__ssse3_lowp+0x147> .byte 102,69,15,249,208 // psubw %xmm8,%xmm10 .byte 102,15,111,196 // movdqa %xmm4,%xmm0 .byte 102,65,15,56,11,194 // pmulhrsw %xmm10,%xmm0 @@ -37825,10 +37825,10 @@ _sk_lerp_u8_ssse3_lowp: .byte 243,69,15,126,4,19 // movq (%r11,%rdx,1),%xmm8 .byte 102,68,15,96,192 // punpcklbw %xmm0,%xmm8 .byte 102,65,15,113,240,8 // psllw $0x8,%xmm8 - .byte 102,68,15,228,5,91,7,0,0 // pmulhuw 0x75b(%rip),%xmm8 # 1250 <_sk_xor__ssse3_lowp+0x153> + .byte 102,68,15,228,5,75,7,0,0 // pmulhuw 0x74b(%rip),%xmm8 # 1240 <_sk_xor__ssse3_lowp+0x157> .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 .byte 102,68,15,56,29,200 // pabsw %xmm0,%xmm9 - .byte 102,68,15,111,21,86,7,0,0 // movdqa 0x756(%rip),%xmm10 # 1260 <_sk_xor__ssse3_lowp+0x163> + .byte 102,68,15,111,21,70,7,0,0 // movdqa 0x746(%rip),%xmm10 # 1250 <_sk_xor__ssse3_lowp+0x167> .byte 102,69,15,249,208 // psubw %xmm8,%xmm10 .byte 102,15,111,196 // movdqa %xmm4,%xmm0 .byte 102,65,15,56,11,194 // pmulhrsw %xmm10,%xmm0 @@ -37971,7 +37971,7 @@ FUNCTION(_sk_srcatop_ssse3_lowp) _sk_srcatop_ssse3_lowp: .byte 102,15,56,11,199 // pmulhrsw %xmm7,%xmm0 .byte 102,68,15,56,29,192 // pabsw %xmm0,%xmm8 - .byte 102,68,15,111,13,193,5,0,0 // movdqa 0x5c1(%rip),%xmm9 # 1270 <_sk_xor__ssse3_lowp+0x173> + .byte 102,68,15,111,13,177,5,0,0 // movdqa 0x5b1(%rip),%xmm9 # 1260 <_sk_xor__ssse3_lowp+0x177> .byte 102,68,15,249,203 // psubw %xmm3,%xmm9 .byte 102,15,111,196 // movdqa %xmm4,%xmm0 .byte 102,65,15,56,11,193 // pmulhrsw %xmm9,%xmm0 @@ -38004,7 +38004,7 @@ _sk_dstatop_ssse3_lowp: .byte 102,68,15,111,196 // movdqa %xmm4,%xmm8 .byte 102,68,15,56,11,195 // pmulhrsw %xmm3,%xmm8 .byte 102,69,15,56,29,192 // pabsw %xmm8,%xmm8 - .byte 102,68,15,111,13,64,5,0,0 // movdqa 0x540(%rip),%xmm9 # 1280 <_sk_xor__ssse3_lowp+0x183> + .byte 102,68,15,111,13,48,5,0,0 // movdqa 0x530(%rip),%xmm9 # 1270 <_sk_xor__ssse3_lowp+0x187> .byte 102,68,15,249,207 // psubw %xmm7,%xmm9 .byte 102,65,15,56,11,193 // pmulhrsw %xmm9,%xmm0 .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 @@ -38067,7 +38067,7 @@ HIDDEN _sk_srcout_ssse3_lowp .globl _sk_srcout_ssse3_lowp FUNCTION(_sk_srcout_ssse3_lowp) _sk_srcout_ssse3_lowp: - .byte 102,68,15,111,5,102,4,0,0 // movdqa 0x466(%rip),%xmm8 # 1290 <_sk_xor__ssse3_lowp+0x193> + .byte 102,68,15,111,5,86,4,0,0 // movdqa 0x456(%rip),%xmm8 # 1280 <_sk_xor__ssse3_lowp+0x197> .byte 102,68,15,249,199 // psubw %xmm7,%xmm8 .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 @@ -38084,7 +38084,7 @@ HIDDEN _sk_dstout_ssse3_lowp .globl _sk_dstout_ssse3_lowp FUNCTION(_sk_dstout_ssse3_lowp) _sk_dstout_ssse3_lowp: - .byte 102,68,15,111,5,55,4,0,0 // movdqa 0x437(%rip),%xmm8 # 12a0 <_sk_xor__ssse3_lowp+0x1a3> + .byte 102,68,15,111,5,39,4,0,0 // movdqa 0x427(%rip),%xmm8 # 1290 <_sk_xor__ssse3_lowp+0x1a7> .byte 102,68,15,249,195 // psubw %xmm3,%xmm8 .byte 102,15,111,196 // movdqa %xmm4,%xmm0 .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 @@ -38104,7 +38104,7 @@ HIDDEN _sk_srcover_ssse3_lowp .globl _sk_srcover_ssse3_lowp FUNCTION(_sk_srcover_ssse3_lowp) _sk_srcover_ssse3_lowp: - .byte 102,68,15,111,5,252,3,0,0 // movdqa 0x3fc(%rip),%xmm8 # 12b0 <_sk_xor__ssse3_lowp+0x1b3> + .byte 102,68,15,111,5,236,3,0,0 // movdqa 0x3ec(%rip),%xmm8 # 12a0 <_sk_xor__ssse3_lowp+0x1b7> .byte 102,68,15,249,195 // psubw %xmm3,%xmm8 .byte 102,68,15,111,204 // movdqa %xmm4,%xmm9 .byte 102,69,15,56,11,200 // pmulhrsw %xmm8,%xmm9 @@ -38128,7 +38128,7 @@ HIDDEN _sk_dstover_ssse3_lowp .globl _sk_dstover_ssse3_lowp FUNCTION(_sk_dstover_ssse3_lowp) _sk_dstover_ssse3_lowp: - .byte 102,68,15,111,5,167,3,0,0 // movdqa 0x3a7(%rip),%xmm8 # 12c0 <_sk_xor__ssse3_lowp+0x1c3> + .byte 102,68,15,111,5,151,3,0,0 // movdqa 0x397(%rip),%xmm8 # 12b0 <_sk_xor__ssse3_lowp+0x1c7> .byte 102,68,15,249,199 // psubw %xmm7,%xmm8 .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 @@ -38164,7 +38164,7 @@ HIDDEN _sk_multiply_ssse3_lowp .globl _sk_multiply_ssse3_lowp FUNCTION(_sk_multiply_ssse3_lowp) _sk_multiply_ssse3_lowp: - .byte 102,68,15,111,5,60,3,0,0 // movdqa 0x33c(%rip),%xmm8 # 12d0 <_sk_xor__ssse3_lowp+0x1d3> + .byte 102,68,15,111,5,44,3,0,0 // movdqa 0x32c(%rip),%xmm8 # 12c0 <_sk_xor__ssse3_lowp+0x1d7> .byte 102,69,15,111,200 // movdqa %xmm8,%xmm9 .byte 102,68,15,249,207 // psubw %xmm7,%xmm9 .byte 102,68,15,111,208 // movdqa %xmm0,%xmm10 @@ -38209,22 +38209,11 @@ _sk_multiply_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 255,224 // jmpq *%rax -HIDDEN _sk_plus__ssse3_lowp -.globl _sk_plus__ssse3_lowp -FUNCTION(_sk_plus__ssse3_lowp) -_sk_plus__ssse3_lowp: - .byte 102,15,253,196 // paddw %xmm4,%xmm0 - .byte 102,15,253,205 // paddw %xmm5,%xmm1 - .byte 102,15,253,214 // paddw %xmm6,%xmm2 - .byte 102,15,253,223 // paddw %xmm7,%xmm3 - .byte 72,173 // lods %ds:(%rsi),%rax - .byte 255,224 // jmpq *%rax - HIDDEN _sk_screen_ssse3_lowp .globl _sk_screen_ssse3_lowp FUNCTION(_sk_screen_ssse3_lowp) _sk_screen_ssse3_lowp: - .byte 102,68,15,111,5,78,2,0,0 // movdqa 0x24e(%rip),%xmm8 # 12e0 <_sk_xor__ssse3_lowp+0x1e3> + .byte 102,68,15,111,5,82,2,0,0 // movdqa 0x252(%rip),%xmm8 # 12d0 <_sk_xor__ssse3_lowp+0x1e7> .byte 102,69,15,111,200 // movdqa %xmm8,%xmm9 .byte 102,68,15,249,200 // psubw %xmm0,%xmm9 .byte 102,68,15,56,11,204 // pmulhrsw %xmm4,%xmm9 @@ -38251,7 +38240,7 @@ HIDDEN _sk_xor__ssse3_lowp .globl _sk_xor__ssse3_lowp FUNCTION(_sk_xor__ssse3_lowp) _sk_xor__ssse3_lowp: - .byte 102,68,15,111,5,234,1,0,0 // movdqa 0x1ea(%rip),%xmm8 # 12f0 <_sk_xor__ssse3_lowp+0x1f3> + .byte 102,68,15,111,5,238,1,0,0 // movdqa 0x1ee(%rip),%xmm8 # 12e0 <_sk_xor__ssse3_lowp+0x1f7> .byte 102,69,15,111,200 // movdqa %xmm8,%xmm9 .byte 102,68,15,249,207 // psubw %xmm7,%xmm9 .byte 102,65,15,56,11,193 // pmulhrsw %xmm9,%xmm0 @@ -38300,7 +38289,7 @@ BALIGN16 .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) .byte 0,128,0,4,8,12 // add %al,0xc080400(%rax) - .byte 1,5,9,13,2,6 // add %eax,0x6020d09(%rip) # 6021f13 <_sk_xor__ssse3_lowp+0x6020e16> + .byte 1,5,9,13,2,6 // add %eax,0x6020d09(%rip) # 6021f03 <_sk_xor__ssse3_lowp+0x6020e1a> .byte 10,14 // or (%rsi),%cl .byte 3,7 // add (%rdi),%eax .byte 11,15 // or (%rdi),%ecx diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S index d7cdd3bc0a..c061ec2405 100644 --- a/src/jumper/SkJumper_generated_win.S +++ b/src/jumper/SkJumper_generated_win.S @@ -26542,7 +26542,7 @@ PUBLIC _sk_constant_color_ssse3_lowp _sk_constant_color_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 15,16,24 ; movups (%rax),%xmm3 - DB 15,88,29,239,16,0,0 ; addps 0x10ef(%rip),%xmm3 # 1250 <_sk_xor__ssse3_lowp+0xa3> + DB 15,88,29,223,16,0,0 ; addps 0x10df(%rip),%xmm3 # 1240 <_sk_xor__ssse3_lowp+0xa7> DB 242,15,112,195,0 ; pshuflw $0x0,%xmm3,%xmm0 DB 102,15,112,192,80 ; pshufd $0x50,%xmm0,%xmm0 DB 242,15,112,203,170 ; pshuflw $0xaa,%xmm3,%xmm1 @@ -26557,7 +26557,7 @@ _sk_constant_color_ssse3_lowp LABEL PROC PUBLIC _sk_set_rgb_ssse3_lowp _sk_set_rgb_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 243,15,16,21,25,18,0,0 ; movss 0x1219(%rip),%xmm2 # 13b0 <_sk_xor__ssse3_lowp+0x203> + DB 243,15,16,21,9,18,0,0 ; movss 0x1209(%rip),%xmm2 # 13a0 <_sk_xor__ssse3_lowp+0x207> DB 243,15,16,0 ; movss (%rax),%xmm0 DB 243,15,88,194 ; addss %xmm2,%xmm0 DB 102,65,15,126,193 ; movd %xmm0,%r9d @@ -26597,7 +26597,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC DB 117,113 ; jne 288 <_sk_load_8888_ssse3_lowp+0x7b> DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9 DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8 - DB 102,15,111,5,54,16,0,0 ; movdqa 0x1036(%rip),%xmm0 # 1260 <_sk_xor__ssse3_lowp+0xb3> + DB 102,15,111,5,38,16,0,0 ; movdqa 0x1026(%rip),%xmm0 # 1250 <_sk_xor__ssse3_lowp+0xb7> DB 102,68,15,56,0,192 ; pshufb %xmm0,%xmm8 DB 102,68,15,56,0,200 ; pshufb %xmm0,%xmm9 DB 102,65,15,111,208 ; movdqa %xmm8,%xmm2 @@ -26611,7 +26611,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC DB 102,15,239,210 ; pxor %xmm2,%xmm2 DB 102,65,15,96,208 ; punpcklbw %xmm8,%xmm2 DB 102,65,15,104,216 ; punpckhbw %xmm8,%xmm3 - DB 102,68,15,111,5,0,16,0,0 ; movdqa 0x1000(%rip),%xmm8 # 1270 <_sk_xor__ssse3_lowp+0xc3> + DB 102,68,15,111,5,240,15,0,0 ; movdqa 0xff0(%rip),%xmm8 # 1260 <_sk_xor__ssse3_lowp+0xc7> DB 102,65,15,228,192 ; pmulhuw %xmm8,%xmm0 DB 102,65,15,228,200 ; pmulhuw %xmm8,%xmm1 DB 102,65,15,228,208 ; pmulhuw %xmm8,%xmm2 @@ -26751,7 +26751,7 @@ _sk_load_a8_ssse3_lowp LABEL PROC DB 243,65,15,126,28,19 ; movq (%r11,%rdx,1),%xmm3 DB 102,15,96,216 ; punpcklbw %xmm0,%xmm3 DB 102,15,113,243,8 ; psllw $0x8,%xmm3 - DB 102,15,228,29,18,14,0,0 ; pmulhuw 0xe12(%rip),%xmm3 # 1280 <_sk_xor__ssse3_lowp+0xd3> + DB 102,15,228,29,2,14,0,0 ; pmulhuw 0xe02(%rip),%xmm3 # 1270 <_sk_xor__ssse3_lowp+0xd7> DB 72,173 ; lods %ds:(%rsi),%rax DB 102,15,239,192 ; pxor %xmm0,%xmm0 DB 15,87,201 ; xorps %xmm1,%xmm1 @@ -26854,7 +26854,7 @@ _sk_store_a8_ssse3_lowp LABEL PROC DB 72,131,196,120 ; add $0x78,%rsp DB 233,89,255,255,255 ; jmpq 538 <_sk_store_a8_ssse3_lowp+0x20> DB 144 ; nop - DB 233,255,255,255,217 ; jmpq ffffffffda0005e4 <_sk_xor__ssse3_lowp+0xffffffffd9fff437> + DB 233,255,255,255,217 ; jmpq ffffffffda0005e4 <_sk_xor__ssse3_lowp+0xffffffffd9fff44b> DB 255 ; (bad) DB 255 ; (bad) DB 255,201 ; dec %ecx @@ -26879,9 +26879,9 @@ _sk_load_g8_ssse3_lowp LABEL PROC DB 243,65,15,126,4,19 ; movq (%r11,%rdx,1),%xmm0 DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 DB 102,15,113,240,8 ; psllw $0x8,%xmm0 - DB 102,15,228,5,114,12,0,0 ; pmulhuw 0xc72(%rip),%xmm0 # 1290 <_sk_xor__ssse3_lowp+0xe3> + DB 102,15,228,5,98,12,0,0 ; pmulhuw 0xc62(%rip),%xmm0 # 1280 <_sk_xor__ssse3_lowp+0xe7> DB 72,173 ; lods %ds:(%rsi),%rax - DB 15,40,29,121,12,0,0 ; movaps 0xc79(%rip),%xmm3 # 12a0 <_sk_xor__ssse3_lowp+0xf3> + DB 15,40,29,105,12,0,0 ; movaps 0xc69(%rip),%xmm3 # 1290 <_sk_xor__ssse3_lowp+0xf7> DB 102,15,111,200 ; movdqa %xmm0,%xmm1 DB 102,15,111,208 ; movdqa %xmm0,%xmm2 DB 65,89 ; pop %r9 @@ -26943,7 +26943,7 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9 DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8 DB 77,133,192 ; test %r8,%r8 - DB 102,15,111,37,192,11,0,0 ; movdqa 0xbc0(%rip),%xmm4 # 12b0 <_sk_xor__ssse3_lowp+0x103> + DB 102,15,111,37,176,11,0,0 ; movdqa 0xbb0(%rip),%xmm4 # 12a0 <_sk_xor__ssse3_lowp+0x107> DB 102,68,15,56,0,196 ; pshufb %xmm4,%xmm8 DB 102,68,15,56,0,204 ; pshufb %xmm4,%xmm9 DB 102,65,15,111,240 ; movdqa %xmm8,%xmm6 @@ -26957,12 +26957,12 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC DB 102,15,239,246 ; pxor %xmm6,%xmm6 DB 102,65,15,96,240 ; punpcklbw %xmm8,%xmm6 DB 102,65,15,104,248 ; punpckhbw %xmm8,%xmm7 - DB 102,68,15,111,5,138,11,0,0 ; movdqa 0xb8a(%rip),%xmm8 # 12c0 <_sk_xor__ssse3_lowp+0x113> + DB 102,68,15,111,5,122,11,0,0 ; movdqa 0xb7a(%rip),%xmm8 # 12b0 <_sk_xor__ssse3_lowp+0x117> DB 102,65,15,228,224 ; pmulhuw %xmm8,%xmm4 DB 102,65,15,228,232 ; pmulhuw %xmm8,%xmm5 DB 102,65,15,228,240 ; pmulhuw %xmm8,%xmm6 DB 102,65,15,228,248 ; pmulhuw %xmm8,%xmm7 - DB 102,68,15,111,29,125,11,0,0 ; movdqa 0xb7d(%rip),%xmm11 # 12d0 <_sk_xor__ssse3_lowp+0x123> + DB 102,68,15,111,29,109,11,0,0 ; movdqa 0xb6d(%rip),%xmm11 # 12c0 <_sk_xor__ssse3_lowp+0x127> DB 102,68,15,249,219 ; psubw %xmm3,%xmm11 DB 102,68,15,111,196 ; movdqa %xmm4,%xmm8 DB 102,69,15,56,11,195 ; pmulhrsw %xmm11,%xmm8 @@ -27098,7 +27098,7 @@ PUBLIC _sk_scale_1_float_ssse3_lowp _sk_scale_1_float_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 243,68,15,16,0 ; movss (%rax),%xmm8 - DB 243,68,15,88,5,48,10,0,0 ; addss 0xa30(%rip),%xmm8 # 13b4 <_sk_xor__ssse3_lowp+0x207> + DB 243,68,15,88,5,32,10,0,0 ; addss 0xa20(%rip),%xmm8 # 13a4 <_sk_xor__ssse3_lowp+0x20b> DB 102,68,15,126,192 ; movd %xmm8,%eax DB 102,68,15,110,192 ; movd %eax,%xmm8 DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8 @@ -27124,7 +27124,7 @@ _sk_scale_u8_ssse3_lowp LABEL PROC DB 243,69,15,126,4,19 ; movq (%r11,%rdx,1),%xmm8 DB 102,68,15,96,192 ; punpcklbw %xmm0,%xmm8 DB 102,65,15,113,240,8 ; psllw $0x8,%xmm8 - DB 102,68,15,228,5,241,8,0,0 ; pmulhuw 0x8f1(%rip),%xmm8 # 12e0 <_sk_xor__ssse3_lowp+0x133> + DB 102,68,15,228,5,225,8,0,0 ; pmulhuw 0x8e1(%rip),%xmm8 # 12d0 <_sk_xor__ssse3_lowp+0x137> DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 DB 102,65,15,56,11,200 ; pmulhrsw %xmm8,%xmm1 @@ -27190,14 +27190,14 @@ PUBLIC _sk_lerp_1_float_ssse3_lowp _sk_lerp_1_float_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 243,68,15,16,0 ; movss (%rax),%xmm8 - DB 243,68,15,88,5,228,8,0,0 ; addss 0x8e4(%rip),%xmm8 # 13b8 <_sk_xor__ssse3_lowp+0x20b> + DB 243,68,15,88,5,212,8,0,0 ; addss 0x8d4(%rip),%xmm8 # 13a8 <_sk_xor__ssse3_lowp+0x20f> DB 102,68,15,126,192 ; movd %xmm8,%eax DB 102,68,15,110,192 ; movd %eax,%xmm8 DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8 DB 102,69,15,112,192,80 ; pshufd $0x50,%xmm8,%xmm8 DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 DB 102,68,15,56,29,200 ; pabsw %xmm0,%xmm9 - DB 102,68,15,111,21,241,7,0,0 ; movdqa 0x7f1(%rip),%xmm10 # 12f0 <_sk_xor__ssse3_lowp+0x143> + DB 102,68,15,111,21,225,7,0,0 ; movdqa 0x7e1(%rip),%xmm10 # 12e0 <_sk_xor__ssse3_lowp+0x147> DB 102,69,15,249,208 ; psubw %xmm8,%xmm10 DB 102,15,111,196 ; movdqa %xmm4,%xmm0 DB 102,65,15,56,11,194 ; pmulhrsw %xmm10,%xmm0 @@ -27233,10 +27233,10 @@ _sk_lerp_u8_ssse3_lowp LABEL PROC DB 243,69,15,126,4,19 ; movq (%r11,%rdx,1),%xmm8 DB 102,68,15,96,192 ; punpcklbw %xmm0,%xmm8 DB 102,65,15,113,240,8 ; psllw $0x8,%xmm8 - DB 102,68,15,228,5,94,7,0,0 ; pmulhuw 0x75e(%rip),%xmm8 # 1300 <_sk_xor__ssse3_lowp+0x153> + DB 102,68,15,228,5,78,7,0,0 ; pmulhuw 0x74e(%rip),%xmm8 # 12f0 <_sk_xor__ssse3_lowp+0x157> DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 DB 102,68,15,56,29,200 ; pabsw %xmm0,%xmm9 - DB 102,68,15,111,21,89,7,0,0 ; movdqa 0x759(%rip),%xmm10 # 1310 <_sk_xor__ssse3_lowp+0x163> + DB 102,68,15,111,21,73,7,0,0 ; movdqa 0x749(%rip),%xmm10 # 1300 <_sk_xor__ssse3_lowp+0x167> DB 102,69,15,249,208 ; psubw %xmm8,%xmm10 DB 102,15,111,196 ; movdqa %xmm4,%xmm0 DB 102,65,15,56,11,194 ; pmulhrsw %xmm10,%xmm0 @@ -27368,7 +27368,7 @@ PUBLIC _sk_srcatop_ssse3_lowp _sk_srcatop_ssse3_lowp LABEL PROC DB 102,15,56,11,199 ; pmulhrsw %xmm7,%xmm0 DB 102,68,15,56,29,192 ; pabsw %xmm0,%xmm8 - DB 102,68,15,111,13,193,5,0,0 ; movdqa 0x5c1(%rip),%xmm9 # 1320 <_sk_xor__ssse3_lowp+0x173> + DB 102,68,15,111,13,177,5,0,0 ; movdqa 0x5b1(%rip),%xmm9 # 1310 <_sk_xor__ssse3_lowp+0x177> DB 102,68,15,249,203 ; psubw %xmm3,%xmm9 DB 102,15,111,196 ; movdqa %xmm4,%xmm0 DB 102,65,15,56,11,193 ; pmulhrsw %xmm9,%xmm0 @@ -27399,7 +27399,7 @@ _sk_dstatop_ssse3_lowp LABEL PROC DB 102,68,15,111,196 ; movdqa %xmm4,%xmm8 DB 102,68,15,56,11,195 ; pmulhrsw %xmm3,%xmm8 DB 102,69,15,56,29,192 ; pabsw %xmm8,%xmm8 - DB 102,68,15,111,13,64,5,0,0 ; movdqa 0x540(%rip),%xmm9 # 1330 <_sk_xor__ssse3_lowp+0x183> + DB 102,68,15,111,13,48,5,0,0 ; movdqa 0x530(%rip),%xmm9 # 1320 <_sk_xor__ssse3_lowp+0x187> DB 102,68,15,249,207 ; psubw %xmm7,%xmm9 DB 102,65,15,56,11,193 ; pmulhrsw %xmm9,%xmm0 DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 @@ -27456,7 +27456,7 @@ _sk_dstin_ssse3_lowp LABEL PROC PUBLIC _sk_srcout_ssse3_lowp _sk_srcout_ssse3_lowp LABEL PROC - DB 102,68,15,111,5,102,4,0,0 ; movdqa 0x466(%rip),%xmm8 # 1340 <_sk_xor__ssse3_lowp+0x193> + DB 102,68,15,111,5,86,4,0,0 ; movdqa 0x456(%rip),%xmm8 # 1330 <_sk_xor__ssse3_lowp+0x197> DB 102,68,15,249,199 ; psubw %xmm7,%xmm8 DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 @@ -27471,7 +27471,7 @@ _sk_srcout_ssse3_lowp LABEL PROC PUBLIC _sk_dstout_ssse3_lowp _sk_dstout_ssse3_lowp LABEL PROC - DB 102,68,15,111,5,55,4,0,0 ; movdqa 0x437(%rip),%xmm8 # 1350 <_sk_xor__ssse3_lowp+0x1a3> + DB 102,68,15,111,5,39,4,0,0 ; movdqa 0x427(%rip),%xmm8 # 1340 <_sk_xor__ssse3_lowp+0x1a7> DB 102,68,15,249,195 ; psubw %xmm3,%xmm8 DB 102,15,111,196 ; movdqa %xmm4,%xmm0 DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 @@ -27489,7 +27489,7 @@ _sk_dstout_ssse3_lowp LABEL PROC PUBLIC _sk_srcover_ssse3_lowp _sk_srcover_ssse3_lowp LABEL PROC - DB 102,68,15,111,5,252,3,0,0 ; movdqa 0x3fc(%rip),%xmm8 # 1360 <_sk_xor__ssse3_lowp+0x1b3> + DB 102,68,15,111,5,236,3,0,0 ; movdqa 0x3ec(%rip),%xmm8 # 1350 <_sk_xor__ssse3_lowp+0x1b7> DB 102,68,15,249,195 ; psubw %xmm3,%xmm8 DB 102,68,15,111,204 ; movdqa %xmm4,%xmm9 DB 102,69,15,56,11,200 ; pmulhrsw %xmm8,%xmm9 @@ -27511,7 +27511,7 @@ _sk_srcover_ssse3_lowp LABEL PROC PUBLIC _sk_dstover_ssse3_lowp _sk_dstover_ssse3_lowp LABEL PROC - DB 102,68,15,111,5,167,3,0,0 ; movdqa 0x3a7(%rip),%xmm8 # 1370 <_sk_xor__ssse3_lowp+0x1c3> + DB 102,68,15,111,5,151,3,0,0 ; movdqa 0x397(%rip),%xmm8 # 1360 <_sk_xor__ssse3_lowp+0x1c7> DB 102,68,15,249,199 ; psubw %xmm7,%xmm8 DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 @@ -27543,7 +27543,7 @@ _sk_modulate_ssse3_lowp LABEL PROC PUBLIC _sk_multiply_ssse3_lowp _sk_multiply_ssse3_lowp LABEL PROC - DB 102,68,15,111,5,60,3,0,0 ; movdqa 0x33c(%rip),%xmm8 # 1380 <_sk_xor__ssse3_lowp+0x1d3> + DB 102,68,15,111,5,44,3,0,0 ; movdqa 0x32c(%rip),%xmm8 # 1370 <_sk_xor__ssse3_lowp+0x1d7> DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 DB 102,68,15,249,207 ; psubw %xmm7,%xmm9 DB 102,68,15,111,208 ; movdqa %xmm0,%xmm10 @@ -27588,18 +27588,9 @@ _sk_multiply_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax -PUBLIC _sk_plus__ssse3_lowp -_sk_plus__ssse3_lowp LABEL PROC - DB 102,15,253,196 ; paddw %xmm4,%xmm0 - DB 102,15,253,205 ; paddw %xmm5,%xmm1 - DB 102,15,253,214 ; paddw %xmm6,%xmm2 - DB 102,15,253,223 ; paddw %xmm7,%xmm3 - DB 72,173 ; lods %ds:(%rsi),%rax - DB 255,224 ; jmpq *%rax - PUBLIC _sk_screen_ssse3_lowp _sk_screen_ssse3_lowp LABEL PROC - DB 102,68,15,111,5,78,2,0,0 ; movdqa 0x24e(%rip),%xmm8 # 1390 <_sk_xor__ssse3_lowp+0x1e3> + DB 102,68,15,111,5,82,2,0,0 ; movdqa 0x252(%rip),%xmm8 # 1380 <_sk_xor__ssse3_lowp+0x1e7> DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 DB 102,68,15,249,200 ; psubw %xmm0,%xmm9 DB 102,68,15,56,11,204 ; pmulhrsw %xmm4,%xmm9 @@ -27624,7 +27615,7 @@ _sk_screen_ssse3_lowp LABEL PROC PUBLIC _sk_xor__ssse3_lowp _sk_xor__ssse3_lowp LABEL PROC - DB 102,68,15,111,5,234,1,0,0 ; movdqa 0x1ea(%rip),%xmm8 # 13a0 <_sk_xor__ssse3_lowp+0x1f3> + DB 102,68,15,111,5,238,1,0,0 ; movdqa 0x1ee(%rip),%xmm8 # 1390 <_sk_xor__ssse3_lowp+0x1f7> DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 DB 102,68,15,249,207 ; psubw %xmm7,%xmm9 DB 102,65,15,56,11,193 ; pmulhrsw %xmm9,%xmm0 @@ -27673,7 +27664,7 @@ ALIGN 16 DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) DB 0,128,0,4,8,12 ; add %al,0xc080400(%rax) - DB 1,5,9,13,2,6 ; add %eax,0x6020d09(%rip) # 6021fc3 <_sk_xor__ssse3_lowp+0x6020e16> + DB 1,5,9,13,2,6 ; add %eax,0x6020d09(%rip) # 6021fb3 <_sk_xor__ssse3_lowp+0x6020e1a> DB 10,14 ; or (%rsi),%cl DB 3,7 ; add (%rdi),%eax DB 11,15 ; or (%rdi),%ecx diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp index 73b6def4ac..286133e38f 100644 --- a/src/jumper/SkJumper_stages_lowp.cpp +++ b/src/jumper/SkJumper_stages_lowp.cpp @@ -336,7 +336,6 @@ BLEND_MODE(dstover) { return mad(s, inv(da), d); } BLEND_MODE(modulate) { return s*d; } BLEND_MODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; } -BLEND_MODE(plus_) { return s + d; } BLEND_MODE(screen) { return s + inv(s)*d; } BLEND_MODE(xor_) { return s*inv(da) + d*inv(sa); } |