aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-06-06 11:26:33 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-06-06 16:24:43 +0000
commitce4b6c100f6e700b50933f75e3b4429357001028 (patch)
tree047b1d60ba06f0dd9bf257315e0178aaa333e636 /src
parentc665fddeab30dde8f43862d2e8381f4c73e80079 (diff)
less naive lowp constant_color
This is as good as we can get without switching away from float inputs. All diffs due to rounding (from the +256.0f). Change-Id: I0d314f111d313577ce9078660178be17e865f11e Reviewed-on: https://skia-review.googlesource.com/18845 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Mike Reed <reed@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/jumper/SkJumper_generated.S192
-rw-r--r--src/jumper/SkJumper_generated_win.S193
-rw-r--r--src/jumper/SkJumper_stages_lowp.cpp12
3 files changed, 189 insertions, 208 deletions
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 7328536fa0..00ca2e080f 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -36805,30 +36805,16 @@ HIDDEN _sk_constant_color_ssse3_lowp
FUNCTION(_sk_constant_color_ssse3_lowp)
_sk_constant_color_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,15,16,29,26,17,0,0 // movss 0x111a(%rip),%xmm3 # 11d8 <_sk_xor__ssse3_lowp+0x9b>
- .byte 243,15,16,0 // movss (%rax),%xmm0
- .byte 243,15,89,195 // mulss %xmm3,%xmm0
- .byte 243,68,15,44,200 // cvttss2si %xmm0,%r9d
- .byte 102,65,15,110,193 // movd %r9d,%xmm0
- .byte 242,15,112,192,0 // pshuflw $0x0,%xmm0,%xmm0
+ .byte 15,16,24 // movups (%rax),%xmm3
+ .byte 15,88,29,224,16,0,0 // addps 0x10e0(%rip),%xmm3 # 11a0 <_sk_xor__ssse3_lowp+0xa3>
+ .byte 242,15,112,195,0 // pshuflw $0x0,%xmm3,%xmm0
.byte 102,15,112,192,80 // pshufd $0x50,%xmm0,%xmm0
- .byte 243,15,16,72,4 // movss 0x4(%rax),%xmm1
- .byte 243,15,89,203 // mulss %xmm3,%xmm1
- .byte 243,68,15,44,201 // cvttss2si %xmm1,%r9d
- .byte 102,65,15,110,201 // movd %r9d,%xmm1
- .byte 242,15,112,201,0 // pshuflw $0x0,%xmm1,%xmm1
+ .byte 242,15,112,203,170 // pshuflw $0xaa,%xmm3,%xmm1
.byte 102,15,112,201,80 // pshufd $0x50,%xmm1,%xmm1
- .byte 243,15,16,80,8 // movss 0x8(%rax),%xmm2
- .byte 243,15,89,211 // mulss %xmm3,%xmm2
- .byte 243,68,15,44,202 // cvttss2si %xmm2,%r9d
- .byte 102,65,15,110,209 // movd %r9d,%xmm2
- .byte 242,15,112,210,0 // pshuflw $0x0,%xmm2,%xmm2
- .byte 102,15,112,210,80 // pshufd $0x50,%xmm2,%xmm2
- .byte 243,15,89,88,12 // mulss 0xc(%rax),%xmm3
- .byte 243,15,44,195 // cvttss2si %xmm3,%eax
- .byte 102,15,110,216 // movd %eax,%xmm3
- .byte 242,15,112,219,0 // pshuflw $0x0,%xmm3,%xmm3
- .byte 102,15,112,219,80 // pshufd $0x50,%xmm3,%xmm3
+ .byte 243,15,112,211,0 // pshufhw $0x0,%xmm3,%xmm2
+ .byte 102,15,112,210,250 // pshufd $0xfa,%xmm2,%xmm2
+ .byte 243,15,112,219,170 // pshufhw $0xaa,%xmm3,%xmm3
+ .byte 102,15,112,219,250 // pshufd $0xfa,%xmm3,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -36837,7 +36823,7 @@ HIDDEN _sk_set_rgb_ssse3_lowp
FUNCTION(_sk_set_rgb_ssse3_lowp)
_sk_set_rgb_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,15,16,21,163,16,0,0 // movss 0x10a3(%rip),%xmm2 # 11dc <_sk_xor__ssse3_lowp+0x9f>
+ .byte 243,15,16,21,10,18,0,0 // movss 0x120a(%rip),%xmm2 # 1300 <_sk_xor__ssse3_lowp+0x203>
.byte 243,15,16,0 // movss (%rax),%xmm0
.byte 243,15,89,194 // mulss %xmm2,%xmm0
.byte 243,68,15,44,200 // cvttss2si %xmm0,%r9d
@@ -36878,10 +36864,10 @@ _sk_load_8888_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,24 // mov (%rax),%r11
.byte 77,133,192 // test %r8,%r8
- .byte 117,113 // jne 22a <_sk_load_8888_ssse3_lowp+0x7b>
+ .byte 117,113 // jne 1e7 <_sk_load_8888_ssse3_lowp+0x7b>
.byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9
.byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8
- .byte 102,15,111,5,36,16,0,0 // movdqa 0x1024(%rip),%xmm0 # 11f0 <_sk_xor__ssse3_lowp+0xb3>
+ .byte 102,15,111,5,39,16,0,0 // movdqa 0x1027(%rip),%xmm0 # 11b0 <_sk_xor__ssse3_lowp+0xb3>
.byte 102,68,15,56,0,192 // pshufb %xmm0,%xmm8
.byte 102,68,15,56,0,200 // pshufb %xmm0,%xmm9
.byte 102,65,15,111,208 // movdqa %xmm8,%xmm2
@@ -36895,7 +36881,7 @@ _sk_load_8888_ssse3_lowp:
.byte 102,15,239,210 // pxor %xmm2,%xmm2
.byte 102,65,15,96,208 // punpcklbw %xmm8,%xmm2
.byte 102,65,15,104,216 // punpckhbw %xmm8,%xmm3
- .byte 102,68,15,111,5,238,15,0,0 // movdqa 0xfee(%rip),%xmm8 # 1200 <_sk_xor__ssse3_lowp+0xc3>
+ .byte 102,68,15,111,5,241,15,0,0 // movdqa 0xff1(%rip),%xmm8 # 11c0 <_sk_xor__ssse3_lowp+0xc3>
.byte 102,65,15,228,192 // pmulhuw %xmm8,%xmm0
.byte 102,65,15,228,200 // pmulhuw %xmm8,%xmm1
.byte 102,65,15,228,208 // pmulhuw %xmm8,%xmm2
@@ -36908,9 +36894,9 @@ _sk_load_8888_ssse3_lowp:
.byte 69,15,87,192 // xorps %xmm8,%xmm8
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 119,129 // ja 1c4 <_sk_load_8888_ssse3_lowp+0x15>
+ .byte 119,129 // ja 181 <_sk_load_8888_ssse3_lowp+0x15>
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,130,0,0,0 // lea 0x82(%rip),%r10 # 2d0 <_sk_load_8888_ssse3_lowp+0x121>
+ .byte 76,141,21,133,0,0,0 // lea 0x85(%rip),%r10 # 290 <_sk_load_8888_ssse3_lowp+0x124>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -36935,20 +36921,21 @@ _sk_load_8888_ssse3_lowp:
.byte 68,15,40,192 // movaps %xmm0,%xmm8
.byte 243,65,15,16,4,147 // movss (%r11,%rdx,4),%xmm0
.byte 243,68,15,16,192 // movss %xmm0,%xmm8
- .byte 233,244,254,255,255 // jmpq 1c4 <_sk_load_8888_ssse3_lowp+0x15>
- .byte 240,255 // lock (bad)
+ .byte 233,244,254,255,255 // jmpq 181 <_sk_load_8888_ssse3_lowp+0x15>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 237 // in (%dx),%eax
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 219,255 // (bad)
.byte 255 // (bad)
- .byte 255,202 // dec %edx
+ .byte 216,255 // fdivr %st(7),%st
.byte 255 // (bad)
+ .byte 255,199 // inc %edi
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 185,255,255,255,173 // mov $0xadffffff,%ecx
+ .byte 255,182,255,255,255,170 // pushq -0x55000001(%rsi)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,152,255,255,255,135 // lcall *-0x78000001(%rax)
+ .byte 255,149,255,255,255,132 // callq *-0x7b000001(%rbp)
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -36976,7 +36963,7 @@ _sk_store_8888_ssse3_lowp:
.byte 102,69,15,97,194 // punpcklwd %xmm10,%xmm8
.byte 102,69,15,105,202 // punpckhwd %xmm10,%xmm9
.byte 77,133,192 // test %r8,%r8
- .byte 117,17 // jne 35b <_sk_store_8888_ssse3_lowp+0x6f>
+ .byte 117,17 // jne 31b <_sk_store_8888_ssse3_lowp+0x6f>
.byte 243,69,15,127,76,147,16 // movdqu %xmm9,0x10(%r11,%rdx,4)
.byte 243,69,15,127,4,147 // movdqu %xmm8,(%r11,%rdx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -36985,9 +36972,9 @@ _sk_store_8888_ssse3_lowp:
.byte 65,128,225,7 // and $0x7,%r9b
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 119,236 // ja 357 <_sk_store_8888_ssse3_lowp+0x6b>
+ .byte 119,236 // ja 317 <_sk_store_8888_ssse3_lowp+0x6b>
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,90,0,0,0 // lea 0x5a(%rip),%r10 # 3d0 <_sk_store_8888_ssse3_lowp+0xe4>
+ .byte 76,141,21,90,0,0,0 // lea 0x5a(%rip),%r10 # 390 <_sk_store_8888_ssse3_lowp+0xe4>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -37003,7 +36990,7 @@ _sk_store_8888_ssse3_lowp:
.byte 102,69,15,112,200,229 // pshufd $0xe5,%xmm8,%xmm9
.byte 102,69,15,126,76,147,4 // movd %xmm9,0x4(%r11,%rdx,4)
.byte 102,69,15,126,4,147 // movd %xmm8,(%r11,%rdx,4)
- .byte 235,136 // jmp 357 <_sk_store_8888_ssse3_lowp+0x6b>
+ .byte 235,136 // jmp 317 <_sk_store_8888_ssse3_lowp+0x6b>
.byte 144 // nop
.byte 247,255 // idiv %edi
.byte 255 // (bad)
@@ -37033,11 +37020,11 @@ _sk_load_a8_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,24 // mov (%rax),%r11
.byte 77,133,192 // test %r8,%r8
- .byte 117,37 // jne 41b <_sk_load_a8_ssse3_lowp+0x2f>
+ .byte 117,37 // jne 3db <_sk_load_a8_ssse3_lowp+0x2f>
.byte 243,65,15,126,28,19 // movq (%r11,%rdx,1),%xmm3
.byte 102,15,96,216 // punpcklbw %xmm0,%xmm3
.byte 102,15,113,243,8 // psllw $0x8,%xmm3
- .byte 102,15,228,29,3,14,0,0 // pmulhuw 0xe03(%rip),%xmm3 # 1210 <_sk_xor__ssse3_lowp+0xd3>
+ .byte 102,15,228,29,3,14,0,0 // pmulhuw 0xe03(%rip),%xmm3 # 11d0 <_sk_xor__ssse3_lowp+0xd3>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 102,15,239,192 // pxor %xmm0,%xmm0
.byte 15,87,201 // xorps %xmm1,%xmm1
@@ -37048,9 +37035,9 @@ _sk_load_a8_ssse3_lowp:
.byte 102,15,239,219 // pxor %xmm3,%xmm3
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 119,209 // ja 400 <_sk_load_a8_ssse3_lowp+0x14>
+ .byte 119,209 // ja 3c0 <_sk_load_a8_ssse3_lowp+0x14>
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,94,0,0,0 // lea 0x5e(%rip),%r10 # 498 <_sk_load_a8_ssse3_lowp+0xac>
+ .byte 76,141,21,94,0,0,0 // lea 0x5e(%rip),%r10 # 458 <_sk_load_a8_ssse3_lowp+0xac>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -37069,7 +37056,7 @@ _sk_load_a8_ssse3_lowp:
.byte 102,15,196,216,1 // pinsrw $0x1,%eax,%xmm3
.byte 65,15,182,4,19 // movzbl (%r11,%rdx,1),%eax
.byte 102,15,196,216,0 // pinsrw $0x0,%eax,%xmm3
- .byte 233,104,255,255,255 // jmpq 400 <_sk_load_a8_ssse3_lowp+0x14>
+ .byte 233,104,255,255,255 // jmpq 3c0 <_sk_load_a8_ssse3_lowp+0x14>
.byte 241 // icebp
.byte 255 // (bad)
.byte 255 // (bad)
@@ -37101,7 +37088,7 @@ _sk_store_a8_ssse3_lowp:
.byte 102,65,15,113,208,7 // psrlw $0x7,%xmm8
.byte 102,69,15,103,192 // packuswb %xmm8,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,10 // jne 4d8 <_sk_store_a8_ssse3_lowp+0x24>
+ .byte 117,10 // jne 498 <_sk_store_a8_ssse3_lowp+0x24>
.byte 242,69,15,17,4,19 // movsd %xmm8,(%r11,%rdx,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -37109,10 +37096,10 @@ _sk_store_a8_ssse3_lowp:
.byte 65,128,225,7 // and $0x7,%r9b
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 119,236 // ja 4d4 <_sk_store_a8_ssse3_lowp+0x20>
+ .byte 119,236 // ja 494 <_sk_store_a8_ssse3_lowp+0x20>
.byte 102,68,15,96,192 // punpcklbw %xmm0,%xmm8
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,128,0,0,0 // lea 0x80(%rip),%r10 # 578 <_sk_store_a8_ssse3_lowp+0xc4>
+ .byte 76,141,21,128,0,0,0 // lea 0x80(%rip),%r10 # 538 <_sk_store_a8_ssse3_lowp+0xc4>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -37137,9 +37124,9 @@ _sk_store_a8_ssse3_lowp:
.byte 102,68,15,127,68,36,136 // movdqa %xmm8,-0x78(%rsp)
.byte 138,68,36,136 // mov -0x78(%rsp),%al
.byte 65,136,4,19 // mov %al,(%r11,%rdx,1)
- .byte 233,95,255,255,255 // jmpq 4d4 <_sk_store_a8_ssse3_lowp+0x20>
+ .byte 233,95,255,255,255 // jmpq 494 <_sk_store_a8_ssse3_lowp+0x20>
.byte 15,31,0 // nopl (%rax)
- .byte 233,255,255,255,217 // jmpq ffffffffda00057c <_sk_xor__ssse3_lowp+0xffffffffd9fff43f>
+ .byte 233,255,255,255,217 // jmpq ffffffffda00053c <_sk_xor__ssse3_lowp+0xffffffffd9fff43f>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,201 // dec %ecx
@@ -37161,13 +37148,13 @@ _sk_load_g8_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,24 // mov (%rax),%r11
.byte 77,133,192 // test %r8,%r8
- .byte 117,42 // jne 5c8 <_sk_load_g8_ssse3_lowp+0x34>
+ .byte 117,42 // jne 588 <_sk_load_g8_ssse3_lowp+0x34>
.byte 243,65,15,126,4,19 // movq (%r11,%rdx,1),%xmm0
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
.byte 102,15,113,240,8 // psllw $0x8,%xmm0
- .byte 102,15,228,5,107,12,0,0 // pmulhuw 0xc6b(%rip),%xmm0 # 1220 <_sk_xor__ssse3_lowp+0xe3>
+ .byte 102,15,228,5,107,12,0,0 // pmulhuw 0xc6b(%rip),%xmm0 # 11e0 <_sk_xor__ssse3_lowp+0xe3>
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 15,40,29,114,12,0,0 // movaps 0xc72(%rip),%xmm3 # 1230 <_sk_xor__ssse3_lowp+0xf3>
+ .byte 15,40,29,114,12,0,0 // movaps 0xc72(%rip),%xmm3 # 11f0 <_sk_xor__ssse3_lowp+0xf3>
.byte 102,15,111,200 // movdqa %xmm0,%xmm1
.byte 102,15,111,208 // movdqa %xmm0,%xmm2
.byte 255,224 // jmpq *%rax
@@ -37176,9 +37163,9 @@ _sk_load_g8_ssse3_lowp:
.byte 102,15,239,192 // pxor %xmm0,%xmm0
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 119,204 // ja 5a8 <_sk_load_g8_ssse3_lowp+0x14>
+ .byte 119,204 // ja 568 <_sk_load_g8_ssse3_lowp+0x14>
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,97,0,0,0 // lea 0x61(%rip),%r10 # 648 <_sk_load_g8_ssse3_lowp+0xb4>
+ .byte 76,141,21,97,0,0,0 // lea 0x61(%rip),%r10 # 608 <_sk_load_g8_ssse3_lowp+0xb4>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -37197,7 +37184,7 @@ _sk_load_g8_ssse3_lowp:
.byte 102,15,196,192,1 // pinsrw $0x1,%eax,%xmm0
.byte 65,15,182,4,19 // movzbl (%r11,%rdx,1),%eax
.byte 102,15,196,192,0 // pinsrw $0x0,%eax,%xmm0
- .byte 233,99,255,255,255 // jmpq 5a8 <_sk_load_g8_ssse3_lowp+0x14>
+ .byte 233,99,255,255,255 // jmpq 568 <_sk_load_g8_ssse3_lowp+0x14>
.byte 15,31,0 // nopl (%rax)
.byte 238 // out %al,(%dx)
.byte 255 // (bad)
@@ -37226,11 +37213,11 @@ _sk_srcover_rgba_8888_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,24 // mov (%rax),%r11
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,66,1,0,0 // jne 7b4 <_sk_srcover_rgba_8888_ssse3_lowp+0x150>
+ .byte 15,133,66,1,0,0 // jne 774 <_sk_srcover_rgba_8888_ssse3_lowp+0x150>
.byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9
.byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 102,15,111,37,184,11,0,0 // movdqa 0xbb8(%rip),%xmm4 # 1240 <_sk_xor__ssse3_lowp+0x103>
+ .byte 102,15,111,37,184,11,0,0 // movdqa 0xbb8(%rip),%xmm4 # 1200 <_sk_xor__ssse3_lowp+0x103>
.byte 102,68,15,56,0,196 // pshufb %xmm4,%xmm8
.byte 102,68,15,56,0,204 // pshufb %xmm4,%xmm9
.byte 102,65,15,111,240 // movdqa %xmm8,%xmm6
@@ -37244,12 +37231,12 @@ _sk_srcover_rgba_8888_ssse3_lowp:
.byte 102,15,239,246 // pxor %xmm6,%xmm6
.byte 102,65,15,96,240 // punpcklbw %xmm8,%xmm6
.byte 102,65,15,104,248 // punpckhbw %xmm8,%xmm7
- .byte 102,68,15,111,5,130,11,0,0 // movdqa 0xb82(%rip),%xmm8 # 1250 <_sk_xor__ssse3_lowp+0x113>
+ .byte 102,68,15,111,5,130,11,0,0 // movdqa 0xb82(%rip),%xmm8 # 1210 <_sk_xor__ssse3_lowp+0x113>
.byte 102,65,15,228,224 // pmulhuw %xmm8,%xmm4
.byte 102,65,15,228,232 // pmulhuw %xmm8,%xmm5
.byte 102,65,15,228,240 // pmulhuw %xmm8,%xmm6
.byte 102,65,15,228,248 // pmulhuw %xmm8,%xmm7
- .byte 102,68,15,111,29,117,11,0,0 // movdqa 0xb75(%rip),%xmm11 # 1260 <_sk_xor__ssse3_lowp+0x123>
+ .byte 102,68,15,111,29,117,11,0,0 // movdqa 0xb75(%rip),%xmm11 # 1220 <_sk_xor__ssse3_lowp+0x123>
.byte 102,68,15,249,219 // psubw %xmm3,%xmm11
.byte 102,68,15,111,196 // movdqa %xmm4,%xmm8
.byte 102,69,15,56,11,195 // pmulhrsw %xmm11,%xmm8
@@ -37282,7 +37269,7 @@ _sk_srcover_rgba_8888_ssse3_lowp:
.byte 102,15,111,193 // movdqa %xmm1,%xmm0
.byte 102,15,97,194 // punpcklwd %xmm2,%xmm0
.byte 102,15,105,202 // punpckhwd %xmm2,%xmm1
- .byte 15,133,207,0,0,0 // jne 85e <_sk_srcover_rgba_8888_ssse3_lowp+0x1fa>
+ .byte 15,133,207,0,0,0 // jne 81e <_sk_srcover_rgba_8888_ssse3_lowp+0x1fa>
.byte 243,65,15,127,76,147,16 // movdqu %xmm1,0x10(%r11,%rdx,4)
.byte 243,65,15,127,4,147 // movdqu %xmm0,(%r11,%rdx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -37297,9 +37284,9 @@ _sk_srcover_rgba_8888_ssse3_lowp:
.byte 69,15,87,192 // xorps %xmm8,%xmm8
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 15,135,172,254,255,255 // ja 67d <_sk_srcover_rgba_8888_ssse3_lowp+0x19>
+ .byte 15,135,172,254,255,255 // ja 63d <_sk_srcover_rgba_8888_ssse3_lowp+0x19>
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,248,0,0,0 // lea 0xf8(%rip),%r10 # 8d4 <_sk_srcover_rgba_8888_ssse3_lowp+0x270>
+ .byte 76,141,21,248,0,0,0 // lea 0xf8(%rip),%r10 # 894 <_sk_srcover_rgba_8888_ssse3_lowp+0x270>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -37324,14 +37311,14 @@ _sk_srcover_rgba_8888_ssse3_lowp:
.byte 68,15,40,196 // movaps %xmm4,%xmm8
.byte 243,65,15,16,36,147 // movss (%r11,%rdx,4),%xmm4
.byte 243,68,15,16,196 // movss %xmm4,%xmm8
- .byte 233,31,254,255,255 // jmpq 67d <_sk_srcover_rgba_8888_ssse3_lowp+0x19>
+ .byte 233,31,254,255,255 // jmpq 63d <_sk_srcover_rgba_8888_ssse3_lowp+0x19>
.byte 69,137,193 // mov %r8d,%r9d
.byte 65,128,225,7 // and $0x7,%r9b
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 15,135,42,255,255,255 // ja 79c <_sk_srcover_rgba_8888_ssse3_lowp+0x138>
+ .byte 15,135,42,255,255,255 // ja 75c <_sk_srcover_rgba_8888_ssse3_lowp+0x138>
.byte 65,15,182,193 // movzbl %r9b,%eax
- .byte 76,141,13,115,0,0,0 // lea 0x73(%rip),%r9 # 8f0 <_sk_srcover_rgba_8888_ssse3_lowp+0x28c>
+ .byte 76,141,13,115,0,0,0 // lea 0x73(%rip),%r9 # 8b0 <_sk_srcover_rgba_8888_ssse3_lowp+0x28c>
.byte 73,99,4,129 // movslq (%r9,%rax,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -37347,8 +37334,8 @@ _sk_srcover_rgba_8888_ssse3_lowp:
.byte 102,15,112,200,229 // pshufd $0xe5,%xmm0,%xmm1
.byte 102,65,15,126,76,147,4 // movd %xmm1,0x4(%r11,%rdx,4)
.byte 102,65,15,126,4,147 // movd %xmm0,(%r11,%rdx,4)
- .byte 233,200,254,255,255 // jmpq 79c <_sk_srcover_rgba_8888_ssse3_lowp+0x138>
- .byte 122,255 // jp 8d5 <_sk_srcover_rgba_8888_ssse3_lowp+0x271>
+ .byte 233,200,254,255,255 // jmpq 75c <_sk_srcover_rgba_8888_ssse3_lowp+0x138>
+ .byte 122,255 // jp 895 <_sk_srcover_rgba_8888_ssse3_lowp+0x271>
.byte 255 // (bad)
.byte 255,101,255 // jmpq *-0x1(%rbp)
.byte 255 // (bad)
@@ -37387,7 +37374,7 @@ FUNCTION(_sk_scale_1_float_ssse3_lowp)
_sk_scale_1_float_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,68,15,16,0 // movss (%rax),%xmm8
- .byte 243,68,15,89,5,196,8,0,0 // mulss 0x8c4(%rip),%xmm8 # 11e0 <_sk_xor__ssse3_lowp+0xa3>
+ .byte 243,68,15,89,5,40,10,0,0 // mulss 0xa28(%rip),%xmm8 # 1304 <_sk_xor__ssse3_lowp+0x207>
.byte 243,65,15,44,192 // cvttss2si %xmm8,%eax
.byte 102,68,15,110,192 // movd %eax,%xmm8
.byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8
@@ -37410,11 +37397,11 @@ _sk_scale_u8_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,24 // mov (%rax),%r11
.byte 77,133,192 // test %r8,%r8
- .byte 117,74 // jne 9b6 <_sk_scale_u8_ssse3_lowp+0x54>
+ .byte 117,74 // jne 976 <_sk_scale_u8_ssse3_lowp+0x54>
.byte 243,69,15,126,4,19 // movq (%r11,%rdx,1),%xmm8
.byte 102,68,15,96,192 // punpcklbw %xmm0,%xmm8
.byte 102,65,15,113,240,8 // psllw $0x8,%xmm8
- .byte 102,68,15,228,5,234,8,0,0 // pmulhuw 0x8ea(%rip),%xmm8 # 1270 <_sk_xor__ssse3_lowp+0x133>
+ .byte 102,68,15,228,5,234,8,0,0 // pmulhuw 0x8ea(%rip),%xmm8 # 1230 <_sk_xor__ssse3_lowp+0x133>
.byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0
.byte 102,15,56,29,192 // pabsw %xmm0,%xmm0
.byte 102,65,15,56,11,200 // pmulhrsw %xmm8,%xmm1
@@ -37430,9 +37417,9 @@ _sk_scale_u8_ssse3_lowp:
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 119,172 // ja 977 <_sk_scale_u8_ssse3_lowp+0x15>
+ .byte 119,172 // ja 937 <_sk_scale_u8_ssse3_lowp+0x15>
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,102,0,0,0 // lea 0x66(%rip),%r10 # a3c <_sk_scale_u8_ssse3_lowp+0xda>
+ .byte 76,141,21,102,0,0,0 // lea 0x66(%rip),%r10 # 9fc <_sk_scale_u8_ssse3_lowp+0xda>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -37451,7 +37438,7 @@ _sk_scale_u8_ssse3_lowp:
.byte 102,68,15,196,192,1 // pinsrw $0x1,%eax,%xmm8
.byte 65,15,182,4,19 // movzbl (%r11,%rdx,1),%eax
.byte 102,68,15,196,192,0 // pinsrw $0x0,%eax,%xmm8
- .byte 233,59,255,255,255 // jmpq 977 <_sk_scale_u8_ssse3_lowp+0x15>
+ .byte 233,59,255,255,255 // jmpq 937 <_sk_scale_u8_ssse3_lowp+0x15>
.byte 240,255 // lock (bad)
.byte 255 // (bad)
.byte 255,228 // jmpq *%rsp
@@ -37476,14 +37463,14 @@ FUNCTION(_sk_lerp_1_float_ssse3_lowp)
_sk_lerp_1_float_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,68,15,16,0 // movss (%rax),%xmm8
- .byte 243,68,15,89,5,124,7,0,0 // mulss 0x77c(%rip),%xmm8 # 11e4 <_sk_xor__ssse3_lowp+0xa7>
+ .byte 243,68,15,89,5,224,8,0,0 // mulss 0x8e0(%rip),%xmm8 # 1308 <_sk_xor__ssse3_lowp+0x20b>
.byte 243,65,15,44,192 // cvttss2si %xmm8,%eax
.byte 102,68,15,110,192 // movd %eax,%xmm8
.byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8
.byte 102,69,15,112,192,80 // pshufd $0x50,%xmm8,%xmm8
.byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0
.byte 102,68,15,56,29,200 // pabsw %xmm0,%xmm9
- .byte 102,68,15,111,21,237,7,0,0 // movdqa 0x7ed(%rip),%xmm10 # 1280 <_sk_xor__ssse3_lowp+0x143>
+ .byte 102,68,15,111,21,237,7,0,0 // movdqa 0x7ed(%rip),%xmm10 # 1240 <_sk_xor__ssse3_lowp+0x143>
.byte 102,69,15,249,208 // psubw %xmm8,%xmm10
.byte 102,15,111,196 // movdqa %xmm4,%xmm0
.byte 102,65,15,56,11,194 // pmulhrsw %xmm10,%xmm0
@@ -37516,14 +37503,14 @@ _sk_lerp_u8_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,24 // mov (%rax),%r11
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,169,0,0,0 // jne bc4 <_sk_lerp_u8_ssse3_lowp+0xb7>
+ .byte 15,133,169,0,0,0 // jne b84 <_sk_lerp_u8_ssse3_lowp+0xb7>
.byte 243,69,15,126,4,19 // movq (%r11,%rdx,1),%xmm8
.byte 102,68,15,96,192 // punpcklbw %xmm0,%xmm8
.byte 102,65,15,113,240,8 // psllw $0x8,%xmm8
- .byte 102,68,15,228,5,91,7,0,0 // pmulhuw 0x75b(%rip),%xmm8 # 1290 <_sk_xor__ssse3_lowp+0x153>
+ .byte 102,68,15,228,5,91,7,0,0 // pmulhuw 0x75b(%rip),%xmm8 # 1250 <_sk_xor__ssse3_lowp+0x153>
.byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0
.byte 102,68,15,56,29,200 // pabsw %xmm0,%xmm9
- .byte 102,68,15,111,21,86,7,0,0 // movdqa 0x756(%rip),%xmm10 # 12a0 <_sk_xor__ssse3_lowp+0x163>
+ .byte 102,68,15,111,21,86,7,0,0 // movdqa 0x756(%rip),%xmm10 # 1260 <_sk_xor__ssse3_lowp+0x163>
.byte 102,69,15,249,208 // psubw %xmm8,%xmm10
.byte 102,15,111,196 // movdqa %xmm4,%xmm0
.byte 102,65,15,56,11,194 // pmulhrsw %xmm10,%xmm0
@@ -37553,9 +37540,9 @@ _sk_lerp_u8_ssse3_lowp:
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 15,135,73,255,255,255 // ja b26 <_sk_lerp_u8_ssse3_lowp+0x19>
+ .byte 15,135,73,255,255,255 // ja ae6 <_sk_lerp_u8_ssse3_lowp+0x19>
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,104,0,0,0 // lea 0x68(%rip),%r10 # c50 <_sk_lerp_u8_ssse3_lowp+0x143>
+ .byte 76,141,21,104,0,0,0 // lea 0x68(%rip),%r10 # c10 <_sk_lerp_u8_ssse3_lowp+0x143>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -37574,7 +37561,7 @@ _sk_lerp_u8_ssse3_lowp:
.byte 102,68,15,196,192,1 // pinsrw $0x1,%eax,%xmm8
.byte 65,15,182,4,19 // movzbl (%r11,%rdx,1),%eax
.byte 102,68,15,196,192,0 // pinsrw $0x0,%eax,%xmm8
- .byte 233,216,254,255,255 // jmpq b26 <_sk_lerp_u8_ssse3_lowp+0x19>
+ .byte 233,216,254,255,255 // jmpq ae6 <_sk_lerp_u8_ssse3_lowp+0x19>
.byte 102,144 // xchg %ax,%ax
.byte 238 // out %al,(%dx)
.byte 255 // (bad)
@@ -37666,7 +37653,7 @@ FUNCTION(_sk_srcatop_ssse3_lowp)
_sk_srcatop_ssse3_lowp:
.byte 102,15,56,11,199 // pmulhrsw %xmm7,%xmm0
.byte 102,68,15,56,29,192 // pabsw %xmm0,%xmm8
- .byte 102,68,15,111,13,193,5,0,0 // movdqa 0x5c1(%rip),%xmm9 # 12b0 <_sk_xor__ssse3_lowp+0x173>
+ .byte 102,68,15,111,13,193,5,0,0 // movdqa 0x5c1(%rip),%xmm9 # 1270 <_sk_xor__ssse3_lowp+0x173>
.byte 102,68,15,249,203 // psubw %xmm3,%xmm9
.byte 102,15,111,196 // movdqa %xmm4,%xmm0
.byte 102,65,15,56,11,193 // pmulhrsw %xmm9,%xmm0
@@ -37699,7 +37686,7 @@ _sk_dstatop_ssse3_lowp:
.byte 102,68,15,111,196 // movdqa %xmm4,%xmm8
.byte 102,68,15,56,11,195 // pmulhrsw %xmm3,%xmm8
.byte 102,69,15,56,29,192 // pabsw %xmm8,%xmm8
- .byte 102,68,15,111,13,64,5,0,0 // movdqa 0x540(%rip),%xmm9 # 12c0 <_sk_xor__ssse3_lowp+0x183>
+ .byte 102,68,15,111,13,64,5,0,0 // movdqa 0x540(%rip),%xmm9 # 1280 <_sk_xor__ssse3_lowp+0x183>
.byte 102,68,15,249,207 // psubw %xmm7,%xmm9
.byte 102,65,15,56,11,193 // pmulhrsw %xmm9,%xmm0
.byte 102,15,56,29,192 // pabsw %xmm0,%xmm0
@@ -37762,7 +37749,7 @@ HIDDEN _sk_srcout_ssse3_lowp
.globl _sk_srcout_ssse3_lowp
FUNCTION(_sk_srcout_ssse3_lowp)
_sk_srcout_ssse3_lowp:
- .byte 102,68,15,111,5,102,4,0,0 // movdqa 0x466(%rip),%xmm8 # 12d0 <_sk_xor__ssse3_lowp+0x193>
+ .byte 102,68,15,111,5,102,4,0,0 // movdqa 0x466(%rip),%xmm8 # 1290 <_sk_xor__ssse3_lowp+0x193>
.byte 102,68,15,249,199 // psubw %xmm7,%xmm8
.byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0
.byte 102,15,56,29,192 // pabsw %xmm0,%xmm0
@@ -37779,7 +37766,7 @@ HIDDEN _sk_dstout_ssse3_lowp
.globl _sk_dstout_ssse3_lowp
FUNCTION(_sk_dstout_ssse3_lowp)
_sk_dstout_ssse3_lowp:
- .byte 102,68,15,111,5,55,4,0,0 // movdqa 0x437(%rip),%xmm8 # 12e0 <_sk_xor__ssse3_lowp+0x1a3>
+ .byte 102,68,15,111,5,55,4,0,0 // movdqa 0x437(%rip),%xmm8 # 12a0 <_sk_xor__ssse3_lowp+0x1a3>
.byte 102,68,15,249,195 // psubw %xmm3,%xmm8
.byte 102,15,111,196 // movdqa %xmm4,%xmm0
.byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0
@@ -37799,7 +37786,7 @@ HIDDEN _sk_srcover_ssse3_lowp
.globl _sk_srcover_ssse3_lowp
FUNCTION(_sk_srcover_ssse3_lowp)
_sk_srcover_ssse3_lowp:
- .byte 102,68,15,111,5,252,3,0,0 // movdqa 0x3fc(%rip),%xmm8 # 12f0 <_sk_xor__ssse3_lowp+0x1b3>
+ .byte 102,68,15,111,5,252,3,0,0 // movdqa 0x3fc(%rip),%xmm8 # 12b0 <_sk_xor__ssse3_lowp+0x1b3>
.byte 102,68,15,249,195 // psubw %xmm3,%xmm8
.byte 102,68,15,111,204 // movdqa %xmm4,%xmm9
.byte 102,69,15,56,11,200 // pmulhrsw %xmm8,%xmm9
@@ -37823,7 +37810,7 @@ HIDDEN _sk_dstover_ssse3_lowp
.globl _sk_dstover_ssse3_lowp
FUNCTION(_sk_dstover_ssse3_lowp)
_sk_dstover_ssse3_lowp:
- .byte 102,68,15,111,5,167,3,0,0 // movdqa 0x3a7(%rip),%xmm8 # 1300 <_sk_xor__ssse3_lowp+0x1c3>
+ .byte 102,68,15,111,5,167,3,0,0 // movdqa 0x3a7(%rip),%xmm8 # 12c0 <_sk_xor__ssse3_lowp+0x1c3>
.byte 102,68,15,249,199 // psubw %xmm7,%xmm8
.byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0
.byte 102,15,56,29,192 // pabsw %xmm0,%xmm0
@@ -37859,7 +37846,7 @@ HIDDEN _sk_multiply_ssse3_lowp
.globl _sk_multiply_ssse3_lowp
FUNCTION(_sk_multiply_ssse3_lowp)
_sk_multiply_ssse3_lowp:
- .byte 102,68,15,111,5,60,3,0,0 // movdqa 0x33c(%rip),%xmm8 # 1310 <_sk_xor__ssse3_lowp+0x1d3>
+ .byte 102,68,15,111,5,60,3,0,0 // movdqa 0x33c(%rip),%xmm8 # 12d0 <_sk_xor__ssse3_lowp+0x1d3>
.byte 102,69,15,111,200 // movdqa %xmm8,%xmm9
.byte 102,68,15,249,207 // psubw %xmm7,%xmm9
.byte 102,68,15,111,208 // movdqa %xmm0,%xmm10
@@ -37919,7 +37906,7 @@ HIDDEN _sk_screen_ssse3_lowp
.globl _sk_screen_ssse3_lowp
FUNCTION(_sk_screen_ssse3_lowp)
_sk_screen_ssse3_lowp:
- .byte 102,68,15,111,5,78,2,0,0 // movdqa 0x24e(%rip),%xmm8 # 1320 <_sk_xor__ssse3_lowp+0x1e3>
+ .byte 102,68,15,111,5,78,2,0,0 // movdqa 0x24e(%rip),%xmm8 # 12e0 <_sk_xor__ssse3_lowp+0x1e3>
.byte 102,69,15,111,200 // movdqa %xmm8,%xmm9
.byte 102,68,15,249,200 // psubw %xmm0,%xmm9
.byte 102,68,15,56,11,204 // pmulhrsw %xmm4,%xmm9
@@ -37946,7 +37933,7 @@ HIDDEN _sk_xor__ssse3_lowp
.globl _sk_xor__ssse3_lowp
FUNCTION(_sk_xor__ssse3_lowp)
_sk_xor__ssse3_lowp:
- .byte 102,68,15,111,5,234,1,0,0 // movdqa 0x1ea(%rip),%xmm8 # 1330 <_sk_xor__ssse3_lowp+0x1f3>
+ .byte 102,68,15,111,5,234,1,0,0 // movdqa 0x1ea(%rip),%xmm8 # 12f0 <_sk_xor__ssse3_lowp+0x1f3>
.byte 102,69,15,111,200 // movdqa %xmm8,%xmm9
.byte 102,68,15,249,207 // psubw %xmm7,%xmm9
.byte 102,65,15,56,11,193 // pmulhrsw %xmm9,%xmm0
@@ -37976,18 +37963,13 @@ _sk_xor__ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
-BALIGN4
- .byte 0,0 // add %al,(%rax)
- .byte 0,71,0 // add %al,0x0(%rdi)
- .byte 0,0 // add %al,(%rax)
- .byte 71,0,0 // rex.RXB add %r8b,(%r8)
- .byte 0,71,0 // add %al,0x0(%rdi)
- .byte 0,0 // add %al,(%rax)
- .byte 71 // rex.RXB
-
BALIGN16
- .byte 0,4,8 // add %al,(%rax,%rcx,1)
- .byte 12,1 // or $0x1,%al
+ .byte 0,0 // add %al,(%rax)
+ .byte 128,67,0,0 // addb $0x0,0x0(%rbx)
+ .byte 128,67,0,0 // addb $0x0,0x0(%rbx)
+ .byte 128,67,0,0 // addb $0x0,0x0(%rbx)
+ .byte 128,67,0,4 // addb $0x4,0x0(%rbx)
+ .byte 8,12,1 // or %cl,(%rcx,%rax,1)
.byte 5,9,13,2,6 // add $0x6020d09,%eax
.byte 10,14 // or (%rsi),%cl
.byte 3,7 // add (%rdi),%eax
@@ -38000,7 +37982,7 @@ BALIGN16
.byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax)
.byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax)
.byte 0,128,0,4,8,12 // add %al,0xc080400(%rax)
- .byte 1,5,9,13,2,6 // add %eax,0x6020d09(%rip) # 6021f53 <_sk_xor__ssse3_lowp+0x6020e16>
+ .byte 1,5,9,13,2,6 // add %eax,0x6020d09(%rip) # 6021f13 <_sk_xor__ssse3_lowp+0x6020e16>
.byte 10,14 // or (%rsi),%cl
.byte 3,7 // add (%rdi),%eax
.byte 11,15 // or (%rdi),%ecx
@@ -38040,4 +38022,12 @@ BALIGN16
.byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax)
.byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax)
.byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax)
+
+BALIGN4
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,71,0 // add %al,0x0(%rdi)
+ .byte 0,0 // add %al,(%rax)
+ .byte 71,0,0 // rex.RXB add %r8b,(%r8)
+ .byte 0 // .byte 0x0
+ .byte 71 // rex.RXB
#endif
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 5c0624f477..3d0ee6f78f 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -26226,37 +26226,23 @@ _sk_just_return_ssse3_lowp LABEL PROC
PUBLIC _sk_constant_color_ssse3_lowp
_sk_constant_color_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,15,16,29,45,17,0,0 ; movss 0x112d(%rip),%xmm3 # 128c <_sk_xor__ssse3_lowp+0x9b>
- DB 243,15,16,0 ; movss (%rax),%xmm0
- DB 243,15,89,195 ; mulss %xmm3,%xmm0
- DB 243,68,15,44,200 ; cvttss2si %xmm0,%r9d
- DB 102,65,15,110,193 ; movd %r9d,%xmm0
- DB 242,15,112,192,0 ; pshuflw $0x0,%xmm0,%xmm0
+ DB 15,16,24 ; movups (%rax),%xmm3
+ DB 15,88,29,239,16,0,0 ; addps 0x10ef(%rip),%xmm3 # 1250 <_sk_xor__ssse3_lowp+0xa3>
+ DB 242,15,112,195,0 ; pshuflw $0x0,%xmm3,%xmm0
DB 102,15,112,192,80 ; pshufd $0x50,%xmm0,%xmm0
- DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1
- DB 243,15,89,203 ; mulss %xmm3,%xmm1
- DB 243,68,15,44,201 ; cvttss2si %xmm1,%r9d
- DB 102,65,15,110,201 ; movd %r9d,%xmm1
- DB 242,15,112,201,0 ; pshuflw $0x0,%xmm1,%xmm1
+ DB 242,15,112,203,170 ; pshuflw $0xaa,%xmm3,%xmm1
DB 102,15,112,201,80 ; pshufd $0x50,%xmm1,%xmm1
- DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2
- DB 243,15,89,211 ; mulss %xmm3,%xmm2
- DB 243,68,15,44,202 ; cvttss2si %xmm2,%r9d
- DB 102,65,15,110,209 ; movd %r9d,%xmm2
- DB 242,15,112,210,0 ; pshuflw $0x0,%xmm2,%xmm2
- DB 102,15,112,210,80 ; pshufd $0x50,%xmm2,%xmm2
- DB 243,15,89,88,12 ; mulss 0xc(%rax),%xmm3
- DB 243,15,44,195 ; cvttss2si %xmm3,%eax
- DB 102,15,110,216 ; movd %eax,%xmm3
- DB 242,15,112,219,0 ; pshuflw $0x0,%xmm3,%xmm3
- DB 102,15,112,219,80 ; pshufd $0x50,%xmm3,%xmm3
+ DB 243,15,112,211,0 ; pshufhw $0x0,%xmm3,%xmm2
+ DB 102,15,112,210,250 ; pshufd $0xfa,%xmm2,%xmm2
+ DB 243,15,112,219,170 ; pshufhw $0xaa,%xmm3,%xmm3
+ DB 102,15,112,219,250 ; pshufd $0xfa,%xmm3,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
PUBLIC _sk_set_rgb_ssse3_lowp
_sk_set_rgb_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,15,16,21,182,16,0,0 ; movss 0x10b6(%rip),%xmm2 # 1290 <_sk_xor__ssse3_lowp+0x9f>
+ DB 243,15,16,21,25,18,0,0 ; movss 0x1219(%rip),%xmm2 # 13b0 <_sk_xor__ssse3_lowp+0x203>
DB 243,15,16,0 ; movss (%rax),%xmm0
DB 243,15,89,194 ; mulss %xmm2,%xmm0
DB 243,68,15,44,200 ; cvttss2si %xmm0,%r9d
@@ -26293,10 +26279,10 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,24 ; mov (%rax),%r11
DB 77,133,192 ; test %r8,%r8
- DB 117,113 ; jne 2cb <_sk_load_8888_ssse3_lowp+0x7b>
+ DB 117,113 ; jne 288 <_sk_load_8888_ssse3_lowp+0x7b>
DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9
DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8
- DB 102,15,111,5,51,16,0,0 ; movdqa 0x1033(%rip),%xmm0 # 12a0 <_sk_xor__ssse3_lowp+0xaf>
+ DB 102,15,111,5,54,16,0,0 ; movdqa 0x1036(%rip),%xmm0 # 1260 <_sk_xor__ssse3_lowp+0xb3>
DB 102,68,15,56,0,192 ; pshufb %xmm0,%xmm8
DB 102,68,15,56,0,200 ; pshufb %xmm0,%xmm9
DB 102,65,15,111,208 ; movdqa %xmm8,%xmm2
@@ -26310,7 +26296,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 102,15,239,210 ; pxor %xmm2,%xmm2
DB 102,65,15,96,208 ; punpcklbw %xmm8,%xmm2
DB 102,65,15,104,216 ; punpckhbw %xmm8,%xmm3
- DB 102,68,15,111,5,253,15,0,0 ; movdqa 0xffd(%rip),%xmm8 # 12b0 <_sk_xor__ssse3_lowp+0xbf>
+ DB 102,68,15,111,5,0,16,0,0 ; movdqa 0x1000(%rip),%xmm8 # 1270 <_sk_xor__ssse3_lowp+0xc3>
DB 102,65,15,228,192 ; pmulhuw %xmm8,%xmm0
DB 102,65,15,228,200 ; pmulhuw %xmm8,%xmm1
DB 102,65,15,228,208 ; pmulhuw %xmm8,%xmm2
@@ -26323,9 +26309,9 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 69,15,87,192 ; xorps %xmm8,%xmm8
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 119,129 ; ja 265 <_sk_load_8888_ssse3_lowp+0x15>
+ DB 119,129 ; ja 222 <_sk_load_8888_ssse3_lowp+0x15>
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,133,0,0,0 ; lea 0x85(%rip),%r10 # 374 <_sk_load_8888_ssse3_lowp+0x124>
+ DB 76,141,21,132,0,0,0 ; lea 0x84(%rip),%r10 # 330 <_sk_load_8888_ssse3_lowp+0x123>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26350,21 +26336,21 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 68,15,40,192 ; movaps %xmm0,%xmm8
DB 243,65,15,16,4,147 ; movss (%r11,%rdx,4),%xmm0
DB 243,68,15,16,192 ; movss %xmm0,%xmm8
- DB 233,244,254,255,255 ; jmpq 265 <_sk_load_8888_ssse3_lowp+0x15>
- DB 15,31,0 ; nopl (%rax)
- DB 237 ; in (%dx),%eax
+ DB 233,244,254,255,255 ; jmpq 222 <_sk_load_8888_ssse3_lowp+0x15>
+ DB 102,144 ; xchg %ax,%ax
+ DB 238 ; out %al,(%dx)
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 216,255 ; fdivr %st(7),%st
+ DB 217,255 ; fcos
DB 255 ; (bad)
- DB 255,199 ; inc %edi
+ DB 255,200 ; dec %eax
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,182,255,255,255,170 ; pushq -0x55000001(%rsi)
+ DB 255,183,255,255,255,171 ; pushq -0x54000001(%rdi)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,149,255,255,255,132 ; callq *-0x7b000001(%rbp)
+ DB 255,150,255,255,255,133 ; callq *-0x7a000001(%rsi)
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -26390,7 +26376,7 @@ _sk_store_8888_ssse3_lowp LABEL PROC
DB 102,69,15,97,194 ; punpcklwd %xmm10,%xmm8
DB 102,69,15,105,202 ; punpckhwd %xmm10,%xmm9
DB 77,133,192 ; test %r8,%r8
- DB 117,17 ; jne 3ff <_sk_store_8888_ssse3_lowp+0x6f>
+ DB 117,17 ; jne 3bb <_sk_store_8888_ssse3_lowp+0x6f>
DB 243,69,15,127,76,147,16 ; movdqu %xmm9,0x10(%r11,%rdx,4)
DB 243,69,15,127,4,147 ; movdqu %xmm8,(%r11,%rdx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -26399,9 +26385,9 @@ _sk_store_8888_ssse3_lowp LABEL PROC
DB 65,128,225,7 ; and $0x7,%r9b
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 119,236 ; ja 3fb <_sk_store_8888_ssse3_lowp+0x6b>
+ DB 119,236 ; ja 3b7 <_sk_store_8888_ssse3_lowp+0x6b>
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,90,0,0,0 ; lea 0x5a(%rip),%r10 # 474 <_sk_store_8888_ssse3_lowp+0xe4>
+ DB 76,141,21,90,0,0,0 ; lea 0x5a(%rip),%r10 # 430 <_sk_store_8888_ssse3_lowp+0xe4>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26417,7 +26403,7 @@ _sk_store_8888_ssse3_lowp LABEL PROC
DB 102,69,15,112,200,229 ; pshufd $0xe5,%xmm8,%xmm9
DB 102,69,15,126,76,147,4 ; movd %xmm9,0x4(%r11,%rdx,4)
DB 102,69,15,126,4,147 ; movd %xmm8,(%r11,%rdx,4)
- DB 235,136 ; jmp 3fb <_sk_store_8888_ssse3_lowp+0x6b>
+ DB 235,136 ; jmp 3b7 <_sk_store_8888_ssse3_lowp+0x6b>
DB 144 ; nop
DB 247,255 ; idiv %edi
DB 255 ; (bad)
@@ -26446,11 +26432,11 @@ _sk_load_a8_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,24 ; mov (%rax),%r11
DB 77,133,192 ; test %r8,%r8
- DB 117,39 ; jne 4c2 <_sk_load_a8_ssse3_lowp+0x32>
+ DB 117,39 ; jne 47e <_sk_load_a8_ssse3_lowp+0x32>
DB 243,65,15,126,28,19 ; movq (%r11,%rdx,1),%xmm3
DB 102,15,96,216 ; punpcklbw %xmm0,%xmm3
DB 102,15,113,243,8 ; psllw $0x8,%xmm3
- DB 102,15,228,29,14,14,0,0 ; pmulhuw 0xe0e(%rip),%xmm3 # 12c0 <_sk_xor__ssse3_lowp+0xcf>
+ DB 102,15,228,29,18,14,0,0 ; pmulhuw 0xe12(%rip),%xmm3 # 1280 <_sk_xor__ssse3_lowp+0xd3>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 102,15,239,192 ; pxor %xmm0,%xmm0
DB 15,87,201 ; xorps %xmm1,%xmm1
@@ -26462,9 +26448,9 @@ _sk_load_a8_ssse3_lowp LABEL PROC
DB 102,15,239,219 ; pxor %xmm3,%xmm3
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 119,207 ; ja 4a5 <_sk_load_a8_ssse3_lowp+0x15>
+ DB 119,207 ; ja 461 <_sk_load_a8_ssse3_lowp+0x15>
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,95,0,0,0 ; lea 0x5f(%rip),%r10 # 540 <_sk_load_a8_ssse3_lowp+0xb0>
+ DB 76,141,21,95,0,0,0 ; lea 0x5f(%rip),%r10 # 4fc <_sk_load_a8_ssse3_lowp+0xb0>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26483,7 +26469,7 @@ _sk_load_a8_ssse3_lowp LABEL PROC
DB 102,15,196,216,1 ; pinsrw $0x1,%eax,%xmm3
DB 65,15,182,4,19 ; movzbl (%r11,%rdx,1),%eax
DB 102,15,196,216,0 ; pinsrw $0x0,%eax,%xmm3
- DB 233,102,255,255,255 ; jmpq 4a5 <_sk_load_a8_ssse3_lowp+0x15>
+ DB 233,102,255,255,255 ; jmpq 461 <_sk_load_a8_ssse3_lowp+0x15>
DB 144 ; nop
DB 240,255 ; lock (bad)
DB 255 ; (bad)
@@ -26513,7 +26499,7 @@ _sk_store_a8_ssse3_lowp LABEL PROC
DB 102,65,15,113,208,7 ; psrlw $0x7,%xmm8
DB 102,69,15,103,192 ; packuswb %xmm8,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,10 ; jne 580 <_sk_store_a8_ssse3_lowp+0x24>
+ DB 117,10 ; jne 53c <_sk_store_a8_ssse3_lowp+0x24>
DB 242,69,15,17,4,19 ; movsd %xmm8,(%r11,%rdx,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -26521,11 +26507,11 @@ _sk_store_a8_ssse3_lowp LABEL PROC
DB 65,128,225,7 ; and $0x7,%r9b
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 119,236 ; ja 57c <_sk_store_a8_ssse3_lowp+0x20>
+ DB 119,236 ; ja 538 <_sk_store_a8_ssse3_lowp+0x20>
DB 72,131,236,120 ; sub $0x78,%rsp
DB 102,68,15,96,192 ; punpcklbw %xmm0,%xmm8
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,128,0,0,0 ; lea 0x80(%rip),%r10 # 624 <_sk_store_a8_ssse3_lowp+0xc8>
+ DB 76,141,21,128,0,0,0 ; lea 0x80(%rip),%r10 # 5e0 <_sk_store_a8_ssse3_lowp+0xc8>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26551,9 +26537,9 @@ _sk_store_a8_ssse3_lowp LABEL PROC
DB 138,4,36 ; mov (%rsp),%al
DB 65,136,4,19 ; mov %al,(%r11,%rdx,1)
DB 72,131,196,120 ; add $0x78,%rsp
- DB 233,89,255,255,255 ; jmpq 57c <_sk_store_a8_ssse3_lowp+0x20>
+ DB 233,89,255,255,255 ; jmpq 538 <_sk_store_a8_ssse3_lowp+0x20>
DB 144 ; nop
- DB 233,255,255,255,217 ; jmpq ffffffffda000628 <_sk_xor__ssse3_lowp+0xffffffffd9fff437>
+ DB 233,255,255,255,217 ; jmpq ffffffffda0005e4 <_sk_xor__ssse3_lowp+0xffffffffd9fff437>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,201 ; dec %ecx
@@ -26574,13 +26560,13 @@ _sk_load_g8_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,24 ; mov (%rax),%r11
DB 77,133,192 ; test %r8,%r8
- DB 117,44 ; jne 677 <_sk_load_g8_ssse3_lowp+0x37>
+ DB 117,44 ; jne 633 <_sk_load_g8_ssse3_lowp+0x37>
DB 243,65,15,126,4,19 ; movq (%r11,%rdx,1),%xmm0
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
DB 102,15,113,240,8 ; psllw $0x8,%xmm0
- DB 102,15,228,5,110,12,0,0 ; pmulhuw 0xc6e(%rip),%xmm0 # 12d0 <_sk_xor__ssse3_lowp+0xdf>
+ DB 102,15,228,5,114,12,0,0 ; pmulhuw 0xc72(%rip),%xmm0 # 1290 <_sk_xor__ssse3_lowp+0xe3>
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 15,40,29,117,12,0,0 ; movaps 0xc75(%rip),%xmm3 # 12e0 <_sk_xor__ssse3_lowp+0xef>
+ DB 15,40,29,121,12,0,0 ; movaps 0xc79(%rip),%xmm3 # 12a0 <_sk_xor__ssse3_lowp+0xf3>
DB 102,15,111,200 ; movdqa %xmm0,%xmm1
DB 102,15,111,208 ; movdqa %xmm0,%xmm2
DB 65,89 ; pop %r9
@@ -26590,9 +26576,9 @@ _sk_load_g8_ssse3_lowp LABEL PROC
DB 102,15,239,192 ; pxor %xmm0,%xmm0
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 119,202 ; ja 655 <_sk_load_g8_ssse3_lowp+0x15>
+ DB 119,202 ; ja 611 <_sk_load_g8_ssse3_lowp+0x15>
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,94,0,0,0 ; lea 0x5e(%rip),%r10 # 6f4 <_sk_load_g8_ssse3_lowp+0xb4>
+ DB 76,141,21,94,0,0,0 ; lea 0x5e(%rip),%r10 # 6b0 <_sk_load_g8_ssse3_lowp+0xb4>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26611,7 +26597,7 @@ _sk_load_g8_ssse3_lowp LABEL PROC
DB 102,15,196,192,1 ; pinsrw $0x1,%eax,%xmm0
DB 65,15,182,4,19 ; movzbl (%r11,%rdx,1),%eax
DB 102,15,196,192,0 ; pinsrw $0x0,%eax,%xmm0
- DB 233,97,255,255,255 ; jmpq 655 <_sk_load_g8_ssse3_lowp+0x15>
+ DB 233,97,255,255,255 ; jmpq 611 <_sk_load_g8_ssse3_lowp+0x15>
DB 241 ; icebp
DB 255 ; (bad)
DB 255 ; (bad)
@@ -26638,11 +26624,11 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,24 ; mov (%rax),%r11
DB 77,133,192 ; test %r8,%r8
- DB 15,133,66,1,0,0 ; jne 860 <_sk_srcover_rgba_8888_ssse3_lowp+0x150>
+ DB 15,133,66,1,0,0 ; jne 81c <_sk_srcover_rgba_8888_ssse3_lowp+0x150>
DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9
DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 102,15,111,37,188,11,0,0 ; movdqa 0xbbc(%rip),%xmm4 # 12f0 <_sk_xor__ssse3_lowp+0xff>
+ DB 102,15,111,37,192,11,0,0 ; movdqa 0xbc0(%rip),%xmm4 # 12b0 <_sk_xor__ssse3_lowp+0x103>
DB 102,68,15,56,0,196 ; pshufb %xmm4,%xmm8
DB 102,68,15,56,0,204 ; pshufb %xmm4,%xmm9
DB 102,65,15,111,240 ; movdqa %xmm8,%xmm6
@@ -26656,12 +26642,12 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC
DB 102,15,239,246 ; pxor %xmm6,%xmm6
DB 102,65,15,96,240 ; punpcklbw %xmm8,%xmm6
DB 102,65,15,104,248 ; punpckhbw %xmm8,%xmm7
- DB 102,68,15,111,5,134,11,0,0 ; movdqa 0xb86(%rip),%xmm8 # 1300 <_sk_xor__ssse3_lowp+0x10f>
+ DB 102,68,15,111,5,138,11,0,0 ; movdqa 0xb8a(%rip),%xmm8 # 12c0 <_sk_xor__ssse3_lowp+0x113>
DB 102,65,15,228,224 ; pmulhuw %xmm8,%xmm4
DB 102,65,15,228,232 ; pmulhuw %xmm8,%xmm5
DB 102,65,15,228,240 ; pmulhuw %xmm8,%xmm6
DB 102,65,15,228,248 ; pmulhuw %xmm8,%xmm7
- DB 102,68,15,111,29,121,11,0,0 ; movdqa 0xb79(%rip),%xmm11 # 1310 <_sk_xor__ssse3_lowp+0x11f>
+ DB 102,68,15,111,29,125,11,0,0 ; movdqa 0xb7d(%rip),%xmm11 # 12d0 <_sk_xor__ssse3_lowp+0x123>
DB 102,68,15,249,219 ; psubw %xmm3,%xmm11
DB 102,68,15,111,196 ; movdqa %xmm4,%xmm8
DB 102,69,15,56,11,195 ; pmulhrsw %xmm11,%xmm8
@@ -26694,7 +26680,7 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC
DB 102,15,111,193 ; movdqa %xmm1,%xmm0
DB 102,15,97,194 ; punpcklwd %xmm2,%xmm0
DB 102,15,105,202 ; punpckhwd %xmm2,%xmm1
- DB 15,133,207,0,0,0 ; jne 90a <_sk_srcover_rgba_8888_ssse3_lowp+0x1fa>
+ DB 15,133,207,0,0,0 ; jne 8c6 <_sk_srcover_rgba_8888_ssse3_lowp+0x1fa>
DB 243,65,15,127,76,147,16 ; movdqu %xmm1,0x10(%r11,%rdx,4)
DB 243,65,15,127,4,147 ; movdqu %xmm0,(%r11,%rdx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -26709,9 +26695,9 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC
DB 69,15,87,192 ; xorps %xmm8,%xmm8
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 15,135,172,254,255,255 ; ja 729 <_sk_srcover_rgba_8888_ssse3_lowp+0x19>
+ DB 15,135,172,254,255,255 ; ja 6e5 <_sk_srcover_rgba_8888_ssse3_lowp+0x19>
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,248,0,0,0 ; lea 0xf8(%rip),%r10 # 980 <_sk_srcover_rgba_8888_ssse3_lowp+0x270>
+ DB 76,141,21,248,0,0,0 ; lea 0xf8(%rip),%r10 # 93c <_sk_srcover_rgba_8888_ssse3_lowp+0x270>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26736,14 +26722,14 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC
DB 68,15,40,196 ; movaps %xmm4,%xmm8
DB 243,65,15,16,36,147 ; movss (%r11,%rdx,4),%xmm4
DB 243,68,15,16,196 ; movss %xmm4,%xmm8
- DB 233,31,254,255,255 ; jmpq 729 <_sk_srcover_rgba_8888_ssse3_lowp+0x19>
+ DB 233,31,254,255,255 ; jmpq 6e5 <_sk_srcover_rgba_8888_ssse3_lowp+0x19>
DB 69,137,193 ; mov %r8d,%r9d
DB 65,128,225,7 ; and $0x7,%r9b
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 15,135,42,255,255,255 ; ja 848 <_sk_srcover_rgba_8888_ssse3_lowp+0x138>
+ DB 15,135,42,255,255,255 ; ja 804 <_sk_srcover_rgba_8888_ssse3_lowp+0x138>
DB 65,15,182,193 ; movzbl %r9b,%eax
- DB 76,141,13,115,0,0,0 ; lea 0x73(%rip),%r9 # 99c <_sk_srcover_rgba_8888_ssse3_lowp+0x28c>
+ DB 76,141,13,115,0,0,0 ; lea 0x73(%rip),%r9 # 958 <_sk_srcover_rgba_8888_ssse3_lowp+0x28c>
DB 73,99,4,129 ; movslq (%r9,%rax,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -26759,8 +26745,8 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC
DB 102,15,112,200,229 ; pshufd $0xe5,%xmm0,%xmm1
DB 102,65,15,126,76,147,4 ; movd %xmm1,0x4(%r11,%rdx,4)
DB 102,65,15,126,4,147 ; movd %xmm0,(%r11,%rdx,4)
- DB 233,200,254,255,255 ; jmpq 848 <_sk_srcover_rgba_8888_ssse3_lowp+0x138>
- DB 122,255 ; jp 981 <_sk_srcover_rgba_8888_ssse3_lowp+0x271>
+ DB 233,200,254,255,255 ; jmpq 804 <_sk_srcover_rgba_8888_ssse3_lowp+0x138>
+ DB 122,255 ; jp 93d <_sk_srcover_rgba_8888_ssse3_lowp+0x271>
DB 255 ; (bad)
DB 255,101,255 ; jmpq *-0x1(%rbp)
DB 255 ; (bad)
@@ -26797,7 +26783,7 @@ PUBLIC _sk_scale_1_float_ssse3_lowp
_sk_scale_1_float_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,68,15,16,0 ; movss (%rax),%xmm8
- DB 243,68,15,89,5,204,8,0,0 ; mulss 0x8cc(%rip),%xmm8 # 1294 <_sk_xor__ssse3_lowp+0xa3>
+ DB 243,68,15,89,5,48,10,0,0 ; mulss 0xa30(%rip),%xmm8 # 13b4 <_sk_xor__ssse3_lowp+0x207>
DB 243,65,15,44,192 ; cvttss2si %xmm8,%eax
DB 102,68,15,110,192 ; movd %eax,%xmm8
DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8
@@ -26819,11 +26805,11 @@ _sk_scale_u8_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,24 ; mov (%rax),%r11
DB 77,133,192 ; test %r8,%r8
- DB 117,76 ; jne a65 <_sk_scale_u8_ssse3_lowp+0x57>
+ DB 117,76 ; jne a21 <_sk_scale_u8_ssse3_lowp+0x57>
DB 243,69,15,126,4,19 ; movq (%r11,%rdx,1),%xmm8
DB 102,68,15,96,192 ; punpcklbw %xmm0,%xmm8
DB 102,65,15,113,240,8 ; psllw $0x8,%xmm8
- DB 102,68,15,228,5,237,8,0,0 ; pmulhuw 0x8ed(%rip),%xmm8 # 1320 <_sk_xor__ssse3_lowp+0x12f>
+ DB 102,68,15,228,5,241,8,0,0 ; pmulhuw 0x8f1(%rip),%xmm8 # 12e0 <_sk_xor__ssse3_lowp+0x133>
DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0
DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0
DB 102,65,15,56,11,200 ; pmulhrsw %xmm8,%xmm1
@@ -26840,9 +26826,9 @@ _sk_scale_u8_ssse3_lowp LABEL PROC
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 119,170 ; ja a24 <_sk_scale_u8_ssse3_lowp+0x16>
+ DB 119,170 ; ja 9e0 <_sk_scale_u8_ssse3_lowp+0x16>
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,103,0,0,0 ; lea 0x67(%rip),%r10 # aec <_sk_scale_u8_ssse3_lowp+0xde>
+ DB 76,141,21,103,0,0,0 ; lea 0x67(%rip),%r10 # aa8 <_sk_scale_u8_ssse3_lowp+0xde>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26861,7 +26847,7 @@ _sk_scale_u8_ssse3_lowp LABEL PROC
DB 102,68,15,196,192,1 ; pinsrw $0x1,%eax,%xmm8
DB 65,15,182,4,19 ; movzbl (%r11,%rdx,1),%eax
DB 102,68,15,196,192,0 ; pinsrw $0x0,%eax,%xmm8
- DB 233,57,255,255,255 ; jmpq a24 <_sk_scale_u8_ssse3_lowp+0x16>
+ DB 233,57,255,255,255 ; jmpq 9e0 <_sk_scale_u8_ssse3_lowp+0x16>
DB 144 ; nop
DB 239 ; out %eax,(%dx)
DB 255 ; (bad)
@@ -26889,14 +26875,14 @@ PUBLIC _sk_lerp_1_float_ssse3_lowp
_sk_lerp_1_float_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,68,15,16,0 ; movss (%rax),%xmm8
- DB 243,68,15,89,5,128,7,0,0 ; mulss 0x780(%rip),%xmm8 # 1298 <_sk_xor__ssse3_lowp+0xa7>
+ DB 243,68,15,89,5,228,8,0,0 ; mulss 0x8e4(%rip),%xmm8 # 13b8 <_sk_xor__ssse3_lowp+0x20b>
DB 243,65,15,44,192 ; cvttss2si %xmm8,%eax
DB 102,68,15,110,192 ; movd %eax,%xmm8
DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8
DB 102,69,15,112,192,80 ; pshufd $0x50,%xmm8,%xmm8
DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0
DB 102,68,15,56,29,200 ; pabsw %xmm0,%xmm9
- DB 102,68,15,111,21,237,7,0,0 ; movdqa 0x7ed(%rip),%xmm10 # 1330 <_sk_xor__ssse3_lowp+0x13f>
+ DB 102,68,15,111,21,241,7,0,0 ; movdqa 0x7f1(%rip),%xmm10 # 12f0 <_sk_xor__ssse3_lowp+0x143>
DB 102,69,15,249,208 ; psubw %xmm8,%xmm10
DB 102,15,111,196 ; movdqa %xmm4,%xmm0
DB 102,65,15,56,11,194 ; pmulhrsw %xmm10,%xmm0
@@ -26928,14 +26914,14 @@ _sk_lerp_u8_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,24 ; mov (%rax),%r11
DB 77,133,192 ; test %r8,%r8
- DB 15,133,171,0,0,0 ; jne c77 <_sk_lerp_u8_ssse3_lowp+0xba>
+ DB 15,133,171,0,0,0 ; jne c33 <_sk_lerp_u8_ssse3_lowp+0xba>
DB 243,69,15,126,4,19 ; movq (%r11,%rdx,1),%xmm8
DB 102,68,15,96,192 ; punpcklbw %xmm0,%xmm8
DB 102,65,15,113,240,8 ; psllw $0x8,%xmm8
- DB 102,68,15,228,5,90,7,0,0 ; pmulhuw 0x75a(%rip),%xmm8 # 1340 <_sk_xor__ssse3_lowp+0x14f>
+ DB 102,68,15,228,5,94,7,0,0 ; pmulhuw 0x75e(%rip),%xmm8 # 1300 <_sk_xor__ssse3_lowp+0x153>
DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0
DB 102,68,15,56,29,200 ; pabsw %xmm0,%xmm9
- DB 102,68,15,111,21,85,7,0,0 ; movdqa 0x755(%rip),%xmm10 # 1350 <_sk_xor__ssse3_lowp+0x15f>
+ DB 102,68,15,111,21,89,7,0,0 ; movdqa 0x759(%rip),%xmm10 # 1310 <_sk_xor__ssse3_lowp+0x163>
DB 102,69,15,249,208 ; psubw %xmm8,%xmm10
DB 102,15,111,196 ; movdqa %xmm4,%xmm0
DB 102,65,15,56,11,194 ; pmulhrsw %xmm10,%xmm0
@@ -26966,9 +26952,9 @@ _sk_lerp_u8_ssse3_lowp LABEL PROC
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 15,135,71,255,255,255 ; ja bd7 <_sk_lerp_u8_ssse3_lowp+0x1a>
+ DB 15,135,71,255,255,255 ; ja b93 <_sk_lerp_u8_ssse3_lowp+0x1a>
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,105,0,0,0 ; lea 0x69(%rip),%r10 # d04 <_sk_lerp_u8_ssse3_lowp+0x147>
+ DB 76,141,21,105,0,0,0 ; lea 0x69(%rip),%r10 # cc0 <_sk_lerp_u8_ssse3_lowp+0x147>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26987,7 +26973,7 @@ _sk_lerp_u8_ssse3_lowp LABEL PROC
DB 102,68,15,196,192,1 ; pinsrw $0x1,%eax,%xmm8
DB 65,15,182,4,19 ; movzbl (%r11,%rdx,1),%eax
DB 102,68,15,196,192,0 ; pinsrw $0x0,%eax,%xmm8
- DB 233,214,254,255,255 ; jmpq bd7 <_sk_lerp_u8_ssse3_lowp+0x1a>
+ DB 233,214,254,255,255 ; jmpq b93 <_sk_lerp_u8_ssse3_lowp+0x1a>
DB 15,31,0 ; nopl (%rax)
DB 237 ; in (%dx),%eax
DB 255 ; (bad)
@@ -27067,7 +27053,7 @@ PUBLIC _sk_srcatop_ssse3_lowp
_sk_srcatop_ssse3_lowp LABEL PROC
DB 102,15,56,11,199 ; pmulhrsw %xmm7,%xmm0
DB 102,68,15,56,29,192 ; pabsw %xmm0,%xmm8
- DB 102,68,15,111,13,189,5,0,0 ; movdqa 0x5bd(%rip),%xmm9 # 1360 <_sk_xor__ssse3_lowp+0x16f>
+ DB 102,68,15,111,13,193,5,0,0 ; movdqa 0x5c1(%rip),%xmm9 # 1320 <_sk_xor__ssse3_lowp+0x173>
DB 102,68,15,249,203 ; psubw %xmm3,%xmm9
DB 102,15,111,196 ; movdqa %xmm4,%xmm0
DB 102,65,15,56,11,193 ; pmulhrsw %xmm9,%xmm0
@@ -27098,7 +27084,7 @@ _sk_dstatop_ssse3_lowp LABEL PROC
DB 102,68,15,111,196 ; movdqa %xmm4,%xmm8
DB 102,68,15,56,11,195 ; pmulhrsw %xmm3,%xmm8
DB 102,69,15,56,29,192 ; pabsw %xmm8,%xmm8
- DB 102,68,15,111,13,60,5,0,0 ; movdqa 0x53c(%rip),%xmm9 # 1370 <_sk_xor__ssse3_lowp+0x17f>
+ DB 102,68,15,111,13,64,5,0,0 ; movdqa 0x540(%rip),%xmm9 # 1330 <_sk_xor__ssse3_lowp+0x183>
DB 102,68,15,249,207 ; psubw %xmm7,%xmm9
DB 102,65,15,56,11,193 ; pmulhrsw %xmm9,%xmm0
DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0
@@ -27155,7 +27141,7 @@ _sk_dstin_ssse3_lowp LABEL PROC
PUBLIC _sk_srcout_ssse3_lowp
_sk_srcout_ssse3_lowp LABEL PROC
- DB 102,68,15,111,5,98,4,0,0 ; movdqa 0x462(%rip),%xmm8 # 1380 <_sk_xor__ssse3_lowp+0x18f>
+ DB 102,68,15,111,5,102,4,0,0 ; movdqa 0x466(%rip),%xmm8 # 1340 <_sk_xor__ssse3_lowp+0x193>
DB 102,68,15,249,199 ; psubw %xmm7,%xmm8
DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0
DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0
@@ -27170,7 +27156,7 @@ _sk_srcout_ssse3_lowp LABEL PROC
PUBLIC _sk_dstout_ssse3_lowp
_sk_dstout_ssse3_lowp LABEL PROC
- DB 102,68,15,111,5,51,4,0,0 ; movdqa 0x433(%rip),%xmm8 # 1390 <_sk_xor__ssse3_lowp+0x19f>
+ DB 102,68,15,111,5,55,4,0,0 ; movdqa 0x437(%rip),%xmm8 # 1350 <_sk_xor__ssse3_lowp+0x1a3>
DB 102,68,15,249,195 ; psubw %xmm3,%xmm8
DB 102,15,111,196 ; movdqa %xmm4,%xmm0
DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0
@@ -27188,7 +27174,7 @@ _sk_dstout_ssse3_lowp LABEL PROC
PUBLIC _sk_srcover_ssse3_lowp
_sk_srcover_ssse3_lowp LABEL PROC
- DB 102,68,15,111,5,248,3,0,0 ; movdqa 0x3f8(%rip),%xmm8 # 13a0 <_sk_xor__ssse3_lowp+0x1af>
+ DB 102,68,15,111,5,252,3,0,0 ; movdqa 0x3fc(%rip),%xmm8 # 1360 <_sk_xor__ssse3_lowp+0x1b3>
DB 102,68,15,249,195 ; psubw %xmm3,%xmm8
DB 102,68,15,111,204 ; movdqa %xmm4,%xmm9
DB 102,69,15,56,11,200 ; pmulhrsw %xmm8,%xmm9
@@ -27210,7 +27196,7 @@ _sk_srcover_ssse3_lowp LABEL PROC
PUBLIC _sk_dstover_ssse3_lowp
_sk_dstover_ssse3_lowp LABEL PROC
- DB 102,68,15,111,5,163,3,0,0 ; movdqa 0x3a3(%rip),%xmm8 # 13b0 <_sk_xor__ssse3_lowp+0x1bf>
+ DB 102,68,15,111,5,167,3,0,0 ; movdqa 0x3a7(%rip),%xmm8 # 1370 <_sk_xor__ssse3_lowp+0x1c3>
DB 102,68,15,249,199 ; psubw %xmm7,%xmm8
DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0
DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0
@@ -27242,7 +27228,7 @@ _sk_modulate_ssse3_lowp LABEL PROC
PUBLIC _sk_multiply_ssse3_lowp
_sk_multiply_ssse3_lowp LABEL PROC
- DB 102,68,15,111,5,56,3,0,0 ; movdqa 0x338(%rip),%xmm8 # 13c0 <_sk_xor__ssse3_lowp+0x1cf>
+ DB 102,68,15,111,5,60,3,0,0 ; movdqa 0x33c(%rip),%xmm8 # 1380 <_sk_xor__ssse3_lowp+0x1d3>
DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9
DB 102,68,15,249,207 ; psubw %xmm7,%xmm9
DB 102,68,15,111,208 ; movdqa %xmm0,%xmm10
@@ -27298,7 +27284,7 @@ _sk_plus__ssse3_lowp LABEL PROC
PUBLIC _sk_screen_ssse3_lowp
_sk_screen_ssse3_lowp LABEL PROC
- DB 102,68,15,111,5,74,2,0,0 ; movdqa 0x24a(%rip),%xmm8 # 13d0 <_sk_xor__ssse3_lowp+0x1df>
+ DB 102,68,15,111,5,78,2,0,0 ; movdqa 0x24e(%rip),%xmm8 # 1390 <_sk_xor__ssse3_lowp+0x1e3>
DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9
DB 102,68,15,249,200 ; psubw %xmm0,%xmm9
DB 102,68,15,56,11,204 ; pmulhrsw %xmm4,%xmm9
@@ -27323,7 +27309,7 @@ _sk_screen_ssse3_lowp LABEL PROC
PUBLIC _sk_xor__ssse3_lowp
_sk_xor__ssse3_lowp LABEL PROC
- DB 102,68,15,111,5,230,1,0,0 ; movdqa 0x1e6(%rip),%xmm8 # 13e0 <_sk_xor__ssse3_lowp+0x1ef>
+ DB 102,68,15,111,5,234,1,0,0 ; movdqa 0x1ea(%rip),%xmm8 # 13a0 <_sk_xor__ssse3_lowp+0x1f3>
DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9
DB 102,68,15,249,207 ; psubw %xmm7,%xmm9
DB 102,65,15,56,11,193 ; pmulhrsw %xmm9,%xmm0
@@ -27353,18 +27339,13 @@ _sk_xor__ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
-ALIGN 4
- DB 0,0 ; add %al,(%rax)
- DB 0,71,0 ; add %al,0x0(%rdi)
- DB 0,0 ; add %al,(%rax)
- DB 71,0,0 ; rex.RXB add %r8b,(%r8)
- DB 0,71,0 ; add %al,0x0(%rdi)
- DB 0,0 ; add %al,(%rax)
- DB 71 ; rex.RXB
-
ALIGN 16
- DB 0,4,8 ; add %al,(%rax,%rcx,1)
- DB 12,1 ; or $0x1,%al
+ DB 0,0 ; add %al,(%rax)
+ DB 128,67,0,0 ; addb $0x0,0x0(%rbx)
+ DB 128,67,0,0 ; addb $0x0,0x0(%rbx)
+ DB 128,67,0,0 ; addb $0x0,0x0(%rbx)
+ DB 128,67,0,4 ; addb $0x4,0x0(%rbx)
+ DB 8,12,1 ; or %cl,(%rcx,%rax,1)
DB 5,9,13,2,6 ; add $0x6020d09,%eax
DB 10,14 ; or (%rsi),%cl
DB 3,7 ; add (%rdi),%eax
@@ -27377,7 +27358,7 @@ ALIGN 16
DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax)
DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax)
DB 0,128,0,4,8,12 ; add %al,0xc080400(%rax)
- DB 1,5,9,13,2,6 ; add %eax,0x6020d09(%rip) # 6022003 <_sk_xor__ssse3_lowp+0x6020e12>
+ DB 1,5,9,13,2,6 ; add %eax,0x6020d09(%rip) # 6021fc3 <_sk_xor__ssse3_lowp+0x6020e16>
DB 10,14 ; or (%rsi),%cl
DB 3,7 ; add (%rdi),%eax
DB 11,15 ; or (%rdi),%ecx
@@ -27417,5 +27398,13 @@ ALIGN 16
DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax)
DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax)
DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax)
+
+ALIGN 4
+ DB 0,0 ; add %al,(%rax)
+ DB 0,71,0 ; add %al,0x0(%rdi)
+ DB 0,0 ; add %al,(%rax)
+ DB 71,0,0 ; rex.RXB add %r8b,(%r8)
+ DB 0 ; .byte 0x0
+ DB 71 ; rex.RXB
ENDIF
END
diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp
index b8d3cdae47..3cb39e5af0 100644
--- a/src/jumper/SkJumper_stages_lowp.cpp
+++ b/src/jumper/SkJumper_stages_lowp.cpp
@@ -179,11 +179,13 @@ SI U8 to_byte(F v) {
// Stages!
STAGE(constant_color) {
- auto rgba = (const float*)ctx;
- r = rgba[0];
- g = rgba[1];
- b = rgba[2];
- a = rgba[3];
+ // We're converting to fixed point, which lets us play some IEEE representation tricks,
+ // replacing a naive *32768 and float->int conversion with a simple float add.
+ __m128i bits = _mm_loadu_ps((const float*)ctx) + _mm_set1_ps(256.0f);
+ r = _mm_shuffle_epi8(bits, _mm_set1_epi16(0x0100));
+ g = _mm_shuffle_epi8(bits, _mm_set1_epi16(0x0504));
+ b = _mm_shuffle_epi8(bits, _mm_set1_epi16(0x0908));
+ a = _mm_shuffle_epi8(bits, _mm_set1_epi16(0x0d0c));
}
STAGE(set_rgb) {