aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-05-22 10:28:27 -0400
committerGravatar Mike Klein <mtklein@chromium.org>2017-05-22 17:08:19 +0000
commit139e463dc6f965fdaed854efcb20c6cafbb6dbdc (patch)
tree4a30cac42be605bdbba0d5da49685c8809cb7f74
parent0a76b413eac46ec218b367c5456709059557f5db (diff)
We can mask load and store with just AVX.
Previously we were using AVX2 instructions to generate the masks, and AVX2 instructions for the mask load and stores themselves. AVX came with float mask loads and stores, which will work perfectly fine. I don't really get what the point of the 32-bit int loads and stores are in AVX2, beyond maybe syntax sugar? Change-Id: I81fa55fb09daea4f5546f8c9ebbc886015edce51 Reviewed-on: https://skia-review.googlesource.com/17452 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Ravi Mistry <rmistry@google.com>
-rw-r--r--src/jumper/SkJumper_generated.S1249
-rw-r--r--src/jumper/SkJumper_generated_win.S1241
-rw-r--r--src/jumper/SkJumper_stages.cpp12
3 files changed, 1208 insertions, 1294 deletions
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 143f6b5e1d..37d3fd2fa4 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -10432,8 +10432,8 @@ _sk_load_tables_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 77,133,192 // test %r8,%r8
.byte 117,105 // jne 1a4a <_sk_load_tables_hsw+0x7e>
- .byte 196,193,126,111,25 // vmovdqu (%r9),%ymm3
- .byte 197,229,219,13,18,50,0,0 // vpand 0x3212(%rip),%ymm3,%ymm1 # 4c00 <_sk_callback_hsw+0x513>
+ .byte 196,193,124,16,25 // vmovups (%r9),%ymm3
+ .byte 197,228,84,13,18,50,0,0 // vandps 0x3212(%rip),%ymm3,%ymm1 # 4c00 <_sk_callback_hsw+0x513>
.byte 196,65,61,118,192 // vpcmpeqd %ymm8,%ymm8,%ymm8
.byte 72,139,72,8 // mov 0x8(%rax),%rcx
.byte 76,139,72,16 // mov 0x10(%rax),%r9
@@ -10459,7 +10459,7 @@ _sk_load_tables_hsw:
.byte 73,211,234 // shr %cl,%r10
.byte 196,193,249,110,194 // vmovq %r10,%xmm0
.byte 196,226,125,33,192 // vpmovsxbd %xmm0,%ymm0
- .byte 196,194,125,140,25 // vpmaskmovd (%r9),%ymm0,%ymm3
+ .byte 196,194,125,44,25 // vmaskmovps (%r9),%ymm0,%ymm3
.byte 233,115,255,255,255 // jmpq 19e6 <_sk_load_tables_hsw+0x1a>
HIDDEN _sk_load_tables_u16_be_hsw
@@ -11984,8 +11984,8 @@ _sk_load_8888_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 77,133,192 // test %r8,%r8
.byte 117,88 // jne 336d <_sk_load_8888_hsw+0x6d>
- .byte 196,193,126,111,25 // vmovdqu (%r9),%ymm3
- .byte 197,229,219,5,158,25,0,0 // vpand 0x199e(%rip),%ymm3,%ymm0 # 4cc0 <_sk_callback_hsw+0x5d3>
+ .byte 196,193,124,16,25 // vmovups (%r9),%ymm3
+ .byte 197,228,84,5,158,25,0,0 // vandps 0x199e(%rip),%ymm3,%ymm0 # 4cc0 <_sk_callback_hsw+0x5d3>
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
.byte 196,98,125,24,5,217,23,0,0 // vbroadcastss 0x17d9(%rip),%ymm8 # 4b08 <_sk_callback_hsw+0x41b>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
@@ -12008,7 +12008,7 @@ _sk_load_8888_hsw:
.byte 72,211,232 // shr %cl,%rax
.byte 196,225,249,110,192 // vmovq %rax,%xmm0
.byte 196,226,125,33,192 // vpmovsxbd %xmm0,%ymm0
- .byte 196,194,125,140,25 // vpmaskmovd (%r9),%ymm0,%ymm3
+ .byte 196,194,125,44,25 // vmaskmovps (%r9),%ymm0,%ymm3
.byte 235,135 // jmp 331a <_sk_load_8888_hsw+0x1a>
HIDDEN _sk_gather_8888_hsw
@@ -12065,7 +12065,7 @@ _sk_store_8888_hsw:
.byte 196,65,53,235,192 // vpor %ymm8,%ymm9,%ymm8
.byte 77,133,192 // test %r8,%r8
.byte 117,12 // jne 347c <_sk_store_8888_hsw+0x73>
- .byte 196,65,126,127,1 // vmovdqu %ymm8,(%r9)
+ .byte 196,65,124,17,1 // vmovups %ymm8,(%r9)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,137,193 // mov %r8,%rcx
.byte 255,224 // jmpq *%rax
@@ -12076,7 +12076,7 @@ _sk_store_8888_hsw:
.byte 72,211,232 // shr %cl,%rax
.byte 196,97,249,110,200 // vmovq %rax,%xmm9
.byte 196,66,125,33,201 // vpmovsxbd %xmm9,%ymm9
- .byte 196,66,53,142,1 // vpmaskmovd %ymm8,%ymm9,(%r9)
+ .byte 196,66,53,46,1 // vmaskmovps %ymm8,%ymm9,(%r9)
.byte 235,211 // jmp 3475 <_sk_store_8888_hsw+0x6c>
HIDDEN _sk_load_f16_hsw
@@ -13984,14 +13984,14 @@ _sk_seed_shader_avx:
.byte 197,249,112,192,0 // vpshufd $0x0,%xmm0,%xmm0
.byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,95,99,0,0 // vbroadcastss 0x635f(%rip),%ymm1 # 6428 <_sk_callback_avx+0x125>
+ .byte 196,226,125,24,13,67,98,0,0 // vbroadcastss 0x6243(%rip),%ymm1 # 630c <_sk_callback_avx+0x125>
.byte 197,252,88,193 // vaddps %ymm1,%ymm0,%ymm0
.byte 197,252,88,2 // vaddps (%rdx),%ymm0,%ymm0
.byte 196,226,125,24,16 // vbroadcastss (%rax),%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 197,236,88,201 // vaddps %ymm1,%ymm2,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,21,67,99,0,0 // vbroadcastss 0x6343(%rip),%ymm2 # 642c <_sk_callback_avx+0x129>
+ .byte 196,226,125,24,21,39,98,0,0 // vbroadcastss 0x6227(%rip),%ymm2 # 6310 <_sk_callback_avx+0x129>
.byte 197,228,87,219 // vxorps %ymm3,%ymm3,%ymm3
.byte 197,220,87,228 // vxorps %ymm4,%ymm4,%ymm4
.byte 197,212,87,237 // vxorps %ymm5,%ymm5,%ymm5
@@ -14014,7 +14014,7 @@ _sk_dither_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 196,66,125,24,8 // vbroadcastss (%r8),%ymm9
.byte 196,65,60,87,209 // vxorps %ymm9,%ymm8,%ymm10
- .byte 196,98,125,24,29,244,98,0,0 // vbroadcastss 0x62f4(%rip),%ymm11 # 6430 <_sk_callback_avx+0x12d>
+ .byte 196,98,125,24,29,216,97,0,0 // vbroadcastss 0x61d8(%rip),%ymm11 # 6314 <_sk_callback_avx+0x12d>
.byte 196,65,44,84,203 // vandps %ymm11,%ymm10,%ymm9
.byte 196,193,25,114,241,5 // vpslld $0x5,%xmm9,%xmm12
.byte 196,67,125,25,201,1 // vextractf128 $0x1,%ymm9,%xmm9
@@ -14025,8 +14025,8 @@ _sk_dither_avx:
.byte 196,67,125,25,219,1 // vextractf128 $0x1,%ymm11,%xmm11
.byte 196,193,33,114,243,4 // vpslld $0x4,%xmm11,%xmm11
.byte 196,67,29,24,219,1 // vinsertf128 $0x1,%xmm11,%ymm12,%ymm11
- .byte 196,98,125,24,37,181,98,0,0 // vbroadcastss 0x62b5(%rip),%ymm12 # 6434 <_sk_callback_avx+0x131>
- .byte 196,98,125,24,45,176,98,0,0 // vbroadcastss 0x62b0(%rip),%ymm13 # 6438 <_sk_callback_avx+0x135>
+ .byte 196,98,125,24,37,153,97,0,0 // vbroadcastss 0x6199(%rip),%ymm12 # 6318 <_sk_callback_avx+0x131>
+ .byte 196,98,125,24,45,148,97,0,0 // vbroadcastss 0x6194(%rip),%ymm13 # 631c <_sk_callback_avx+0x135>
.byte 196,65,44,84,245 // vandps %ymm13,%ymm10,%ymm14
.byte 196,193,1,114,246,2 // vpslld $0x2,%xmm14,%xmm15
.byte 196,67,125,25,246,1 // vextractf128 $0x1,%ymm14,%xmm14
@@ -14053,9 +14053,9 @@ _sk_dither_avx:
.byte 196,65,12,86,202 // vorps %ymm10,%ymm14,%ymm9
.byte 196,65,60,86,193 // vorps %ymm9,%ymm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,13,27,98,0,0 // vbroadcastss 0x621b(%rip),%ymm9 # 643c <_sk_callback_avx+0x139>
+ .byte 196,98,125,24,13,255,96,0,0 // vbroadcastss 0x60ff(%rip),%ymm9 # 6320 <_sk_callback_avx+0x139>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
- .byte 196,98,125,24,13,17,98,0,0 // vbroadcastss 0x6211(%rip),%ymm9 # 6440 <_sk_callback_avx+0x13d>
+ .byte 196,98,125,24,13,245,96,0,0 // vbroadcastss 0x60f5(%rip),%ymm9 # 6324 <_sk_callback_avx+0x13d>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 196,98,125,24,72,8 // vbroadcastss 0x8(%rax),%ymm9
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
@@ -14124,7 +14124,7 @@ HIDDEN _sk_srcatop_avx
FUNCTION(_sk_srcatop_avx)
_sk_srcatop_avx:
.byte 197,252,89,199 // vmulps %ymm7,%ymm0,%ymm0
- .byte 196,98,125,24,5,104,97,0,0 // vbroadcastss 0x6168(%rip),%ymm8 # 6444 <_sk_callback_avx+0x141>
+ .byte 196,98,125,24,5,76,96,0,0 // vbroadcastss 0x604c(%rip),%ymm8 # 6328 <_sk_callback_avx+0x141>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,204 // vmulps %ymm4,%ymm8,%ymm9
.byte 197,180,88,192 // vaddps %ymm0,%ymm9,%ymm0
@@ -14145,7 +14145,7 @@ HIDDEN _sk_dstatop_avx
FUNCTION(_sk_dstatop_avx)
_sk_dstatop_avx:
.byte 197,100,89,196 // vmulps %ymm4,%ymm3,%ymm8
- .byte 196,98,125,24,13,42,97,0,0 // vbroadcastss 0x612a(%rip),%ymm9 # 6448 <_sk_callback_avx+0x145>
+ .byte 196,98,125,24,13,14,96,0,0 // vbroadcastss 0x600e(%rip),%ymm9 # 632c <_sk_callback_avx+0x145>
.byte 197,52,92,207 // vsubps %ymm7,%ymm9,%ymm9
.byte 197,180,89,192 // vmulps %ymm0,%ymm9,%ymm0
.byte 197,188,88,192 // vaddps %ymm0,%ymm8,%ymm0
@@ -14187,7 +14187,7 @@ HIDDEN _sk_srcout_avx
.globl _sk_srcout_avx
FUNCTION(_sk_srcout_avx)
_sk_srcout_avx:
- .byte 196,98,125,24,5,201,96,0,0 // vbroadcastss 0x60c9(%rip),%ymm8 # 644c <_sk_callback_avx+0x149>
+ .byte 196,98,125,24,5,173,95,0,0 // vbroadcastss 0x5fad(%rip),%ymm8 # 6330 <_sk_callback_avx+0x149>
.byte 197,60,92,199 // vsubps %ymm7,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 197,188,89,201 // vmulps %ymm1,%ymm8,%ymm1
@@ -14200,7 +14200,7 @@ HIDDEN _sk_dstout_avx
.globl _sk_dstout_avx
FUNCTION(_sk_dstout_avx)
_sk_dstout_avx:
- .byte 196,226,125,24,5,172,96,0,0 // vbroadcastss 0x60ac(%rip),%ymm0 # 6450 <_sk_callback_avx+0x14d>
+ .byte 196,226,125,24,5,144,95,0,0 // vbroadcastss 0x5f90(%rip),%ymm0 # 6334 <_sk_callback_avx+0x14d>
.byte 197,252,92,219 // vsubps %ymm3,%ymm0,%ymm3
.byte 197,228,89,196 // vmulps %ymm4,%ymm3,%ymm0
.byte 197,228,89,205 // vmulps %ymm5,%ymm3,%ymm1
@@ -14213,7 +14213,7 @@ HIDDEN _sk_srcover_avx
.globl _sk_srcover_avx
FUNCTION(_sk_srcover_avx)
_sk_srcover_avx:
- .byte 196,98,125,24,5,143,96,0,0 // vbroadcastss 0x608f(%rip),%ymm8 # 6454 <_sk_callback_avx+0x151>
+ .byte 196,98,125,24,5,115,95,0,0 // vbroadcastss 0x5f73(%rip),%ymm8 # 6338 <_sk_callback_avx+0x151>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,204 // vmulps %ymm4,%ymm8,%ymm9
.byte 197,180,88,192 // vaddps %ymm0,%ymm9,%ymm0
@@ -14230,7 +14230,7 @@ HIDDEN _sk_dstover_avx
.globl _sk_dstover_avx
FUNCTION(_sk_dstover_avx)
_sk_dstover_avx:
- .byte 196,98,125,24,5,98,96,0,0 // vbroadcastss 0x6062(%rip),%ymm8 # 6458 <_sk_callback_avx+0x155>
+ .byte 196,98,125,24,5,70,95,0,0 // vbroadcastss 0x5f46(%rip),%ymm8 # 633c <_sk_callback_avx+0x155>
.byte 197,60,92,199 // vsubps %ymm7,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 197,252,88,196 // vaddps %ymm4,%ymm0,%ymm0
@@ -14258,7 +14258,7 @@ HIDDEN _sk_multiply_avx
.globl _sk_multiply_avx
FUNCTION(_sk_multiply_avx)
_sk_multiply_avx:
- .byte 196,98,125,24,5,33,96,0,0 // vbroadcastss 0x6021(%rip),%ymm8 # 645c <_sk_callback_avx+0x159>
+ .byte 196,98,125,24,5,5,95,0,0 // vbroadcastss 0x5f05(%rip),%ymm8 # 6340 <_sk_callback_avx+0x159>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,52,89,208 // vmulps %ymm0,%ymm9,%ymm10
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -14318,7 +14318,7 @@ HIDDEN _sk_xor__avx
.globl _sk_xor__avx
FUNCTION(_sk_xor__avx)
_sk_xor__avx:
- .byte 196,98,125,24,5,112,95,0,0 // vbroadcastss 0x5f70(%rip),%ymm8 # 6460 <_sk_callback_avx+0x15d>
+ .byte 196,98,125,24,5,84,94,0,0 // vbroadcastss 0x5e54(%rip),%ymm8 # 6344 <_sk_callback_avx+0x15d>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,180,89,192 // vmulps %ymm0,%ymm9,%ymm0
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -14355,7 +14355,7 @@ _sk_darken_avx:
.byte 197,100,89,206 // vmulps %ymm6,%ymm3,%ymm9
.byte 196,193,108,95,209 // vmaxps %ymm9,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,240,94,0,0 // vbroadcastss 0x5ef0(%rip),%ymm8 # 6464 <_sk_callback_avx+0x161>
+ .byte 196,98,125,24,5,212,93,0,0 // vbroadcastss 0x5dd4(%rip),%ymm8 # 6348 <_sk_callback_avx+0x161>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,199 // vmulps %ymm7,%ymm8,%ymm8
.byte 197,188,88,219 // vaddps %ymm3,%ymm8,%ymm3
@@ -14381,7 +14381,7 @@ _sk_lighten_avx:
.byte 197,100,89,206 // vmulps %ymm6,%ymm3,%ymm9
.byte 196,193,108,93,209 // vminps %ymm9,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,156,94,0,0 // vbroadcastss 0x5e9c(%rip),%ymm8 # 6468 <_sk_callback_avx+0x165>
+ .byte 196,98,125,24,5,128,93,0,0 // vbroadcastss 0x5d80(%rip),%ymm8 # 634c <_sk_callback_avx+0x165>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,199 // vmulps %ymm7,%ymm8,%ymm8
.byte 197,188,88,219 // vaddps %ymm3,%ymm8,%ymm3
@@ -14410,7 +14410,7 @@ _sk_difference_avx:
.byte 196,193,108,93,209 // vminps %ymm9,%ymm2,%ymm2
.byte 197,236,88,210 // vaddps %ymm2,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,60,94,0,0 // vbroadcastss 0x5e3c(%rip),%ymm8 # 646c <_sk_callback_avx+0x169>
+ .byte 196,98,125,24,5,32,93,0,0 // vbroadcastss 0x5d20(%rip),%ymm8 # 6350 <_sk_callback_avx+0x169>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,199 // vmulps %ymm7,%ymm8,%ymm8
.byte 197,188,88,219 // vaddps %ymm3,%ymm8,%ymm3
@@ -14433,7 +14433,7 @@ _sk_exclusion_avx:
.byte 197,236,89,214 // vmulps %ymm6,%ymm2,%ymm2
.byte 197,236,88,210 // vaddps %ymm2,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,247,93,0,0 // vbroadcastss 0x5df7(%rip),%ymm8 # 6470 <_sk_callback_avx+0x16d>
+ .byte 196,98,125,24,5,219,92,0,0 // vbroadcastss 0x5cdb(%rip),%ymm8 # 6354 <_sk_callback_avx+0x16d>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,199 // vmulps %ymm7,%ymm8,%ymm8
.byte 197,188,88,219 // vaddps %ymm3,%ymm8,%ymm3
@@ -14444,7 +14444,7 @@ HIDDEN _sk_colorburn_avx
.globl _sk_colorburn_avx
FUNCTION(_sk_colorburn_avx)
_sk_colorburn_avx:
- .byte 196,98,125,24,5,226,93,0,0 // vbroadcastss 0x5de2(%rip),%ymm8 # 6474 <_sk_callback_avx+0x171>
+ .byte 196,98,125,24,5,198,92,0,0 // vbroadcastss 0x5cc6(%rip),%ymm8 # 6358 <_sk_callback_avx+0x171>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,52,89,216 // vmulps %ymm0,%ymm9,%ymm11
.byte 196,65,44,87,210 // vxorps %ymm10,%ymm10,%ymm10
@@ -14506,7 +14506,7 @@ HIDDEN _sk_colordodge_avx
FUNCTION(_sk_colordodge_avx)
_sk_colordodge_avx:
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
- .byte 196,98,125,24,13,222,92,0,0 // vbroadcastss 0x5cde(%rip),%ymm9 # 6478 <_sk_callback_avx+0x175>
+ .byte 196,98,125,24,13,194,91,0,0 // vbroadcastss 0x5bc2(%rip),%ymm9 # 635c <_sk_callback_avx+0x175>
.byte 197,52,92,215 // vsubps %ymm7,%ymm9,%ymm10
.byte 197,44,89,216 // vmulps %ymm0,%ymm10,%ymm11
.byte 197,52,92,203 // vsubps %ymm3,%ymm9,%ymm9
@@ -14563,7 +14563,7 @@ HIDDEN _sk_hardlight_avx
.globl _sk_hardlight_avx
FUNCTION(_sk_hardlight_avx)
_sk_hardlight_avx:
- .byte 196,98,125,24,5,240,91,0,0 // vbroadcastss 0x5bf0(%rip),%ymm8 # 647c <_sk_callback_avx+0x179>
+ .byte 196,98,125,24,5,212,90,0,0 // vbroadcastss 0x5ad4(%rip),%ymm8 # 6360 <_sk_callback_avx+0x179>
.byte 197,60,92,215 // vsubps %ymm7,%ymm8,%ymm10
.byte 197,44,89,200 // vmulps %ymm0,%ymm10,%ymm9
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -14618,7 +14618,7 @@ HIDDEN _sk_overlay_avx
.globl _sk_overlay_avx
FUNCTION(_sk_overlay_avx)
_sk_overlay_avx:
- .byte 196,98,125,24,5,25,91,0,0 // vbroadcastss 0x5b19(%rip),%ymm8 # 6480 <_sk_callback_avx+0x17d>
+ .byte 196,98,125,24,5,253,89,0,0 // vbroadcastss 0x59fd(%rip),%ymm8 # 6364 <_sk_callback_avx+0x17d>
.byte 197,60,92,215 // vsubps %ymm7,%ymm8,%ymm10
.byte 197,44,89,200 // vmulps %ymm0,%ymm10,%ymm9
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -14684,10 +14684,10 @@ _sk_softlight_avx:
.byte 196,65,60,88,192 // vaddps %ymm8,%ymm8,%ymm8
.byte 196,65,60,89,216 // vmulps %ymm8,%ymm8,%ymm11
.byte 196,65,60,88,195 // vaddps %ymm11,%ymm8,%ymm8
- .byte 196,98,125,24,29,16,90,0,0 // vbroadcastss 0x5a10(%rip),%ymm11 # 6488 <_sk_callback_avx+0x185>
+ .byte 196,98,125,24,29,244,88,0,0 // vbroadcastss 0x58f4(%rip),%ymm11 # 636c <_sk_callback_avx+0x185>
.byte 196,65,28,88,235 // vaddps %ymm11,%ymm12,%ymm13
.byte 196,65,20,89,192 // vmulps %ymm8,%ymm13,%ymm8
- .byte 196,98,125,24,45,1,90,0,0 // vbroadcastss 0x5a01(%rip),%ymm13 # 648c <_sk_callback_avx+0x189>
+ .byte 196,98,125,24,45,229,88,0,0 // vbroadcastss 0x58e5(%rip),%ymm13 # 6370 <_sk_callback_avx+0x189>
.byte 196,65,28,89,245 // vmulps %ymm13,%ymm12,%ymm14
.byte 196,65,12,88,192 // vaddps %ymm8,%ymm14,%ymm8
.byte 196,65,124,82,244 // vrsqrtps %ymm12,%ymm14
@@ -14698,7 +14698,7 @@ _sk_softlight_avx:
.byte 197,4,194,255,2 // vcmpleps %ymm7,%ymm15,%ymm15
.byte 196,67,13,74,240,240 // vblendvps %ymm15,%ymm8,%ymm14,%ymm14
.byte 197,116,88,249 // vaddps %ymm1,%ymm1,%ymm15
- .byte 196,98,125,24,5,191,89,0,0 // vbroadcastss 0x59bf(%rip),%ymm8 # 6484 <_sk_callback_avx+0x181>
+ .byte 196,98,125,24,5,163,88,0,0 // vbroadcastss 0x58a3(%rip),%ymm8 # 6368 <_sk_callback_avx+0x181>
.byte 196,65,60,92,228 // vsubps %ymm12,%ymm8,%ymm12
.byte 197,132,92,195 // vsubps %ymm3,%ymm15,%ymm0
.byte 196,65,124,89,228 // vmulps %ymm12,%ymm0,%ymm12
@@ -14825,12 +14825,12 @@ _sk_hue_avx:
.byte 196,65,28,89,219 // vmulps %ymm11,%ymm12,%ymm11
.byte 196,65,36,94,222 // vdivps %ymm14,%ymm11,%ymm11
.byte 196,67,37,74,224,240 // vblendvps %ymm15,%ymm8,%ymm11,%ymm12
- .byte 196,98,125,24,53,142,87,0,0 // vbroadcastss 0x578e(%rip),%ymm14 # 6490 <_sk_callback_avx+0x18d>
+ .byte 196,98,125,24,53,114,86,0,0 // vbroadcastss 0x5672(%rip),%ymm14 # 6374 <_sk_callback_avx+0x18d>
.byte 196,65,92,89,222 // vmulps %ymm14,%ymm4,%ymm11
- .byte 196,98,125,24,61,132,87,0,0 // vbroadcastss 0x5784(%rip),%ymm15 # 6494 <_sk_callback_avx+0x191>
+ .byte 196,98,125,24,61,104,86,0,0 // vbroadcastss 0x5668(%rip),%ymm15 # 6378 <_sk_callback_avx+0x191>
.byte 196,65,84,89,239 // vmulps %ymm15,%ymm5,%ymm13
.byte 196,65,36,88,221 // vaddps %ymm13,%ymm11,%ymm11
- .byte 196,226,125,24,5,117,87,0,0 // vbroadcastss 0x5775(%rip),%ymm0 # 6498 <_sk_callback_avx+0x195>
+ .byte 196,226,125,24,5,89,86,0,0 // vbroadcastss 0x5659(%rip),%ymm0 # 637c <_sk_callback_avx+0x195>
.byte 197,76,89,232 // vmulps %ymm0,%ymm6,%ymm13
.byte 196,65,36,88,221 // vaddps %ymm13,%ymm11,%ymm11
.byte 196,65,52,89,238 // vmulps %ymm14,%ymm9,%ymm13
@@ -14891,7 +14891,7 @@ _sk_hue_avx:
.byte 196,65,36,95,208 // vmaxps %ymm8,%ymm11,%ymm10
.byte 196,195,109,74,209,240 // vblendvps %ymm15,%ymm9,%ymm2,%ymm2
.byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,78,86,0,0 // vbroadcastss 0x564e(%rip),%ymm8 # 649c <_sk_callback_avx+0x199>
+ .byte 196,98,125,24,5,50,85,0,0 // vbroadcastss 0x5532(%rip),%ymm8 # 6380 <_sk_callback_avx+0x199>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,180,89,201 // vmulps %ymm1,%ymm9,%ymm1
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -14948,12 +14948,12 @@ _sk_saturation_avx:
.byte 196,65,28,89,219 // vmulps %ymm11,%ymm12,%ymm11
.byte 196,65,36,94,222 // vdivps %ymm14,%ymm11,%ymm11
.byte 196,67,37,74,224,240 // vblendvps %ymm15,%ymm8,%ymm11,%ymm12
- .byte 196,98,125,24,53,92,85,0,0 // vbroadcastss 0x555c(%rip),%ymm14 # 64a0 <_sk_callback_avx+0x19d>
+ .byte 196,98,125,24,53,64,84,0,0 // vbroadcastss 0x5440(%rip),%ymm14 # 6384 <_sk_callback_avx+0x19d>
.byte 196,65,92,89,222 // vmulps %ymm14,%ymm4,%ymm11
- .byte 196,98,125,24,61,82,85,0,0 // vbroadcastss 0x5552(%rip),%ymm15 # 64a4 <_sk_callback_avx+0x1a1>
+ .byte 196,98,125,24,61,54,84,0,0 // vbroadcastss 0x5436(%rip),%ymm15 # 6388 <_sk_callback_avx+0x1a1>
.byte 196,65,84,89,239 // vmulps %ymm15,%ymm5,%ymm13
.byte 196,65,36,88,221 // vaddps %ymm13,%ymm11,%ymm11
- .byte 196,226,125,24,5,67,85,0,0 // vbroadcastss 0x5543(%rip),%ymm0 # 64a8 <_sk_callback_avx+0x1a5>
+ .byte 196,226,125,24,5,39,84,0,0 // vbroadcastss 0x5427(%rip),%ymm0 # 638c <_sk_callback_avx+0x1a5>
.byte 197,76,89,232 // vmulps %ymm0,%ymm6,%ymm13
.byte 196,65,36,88,221 // vaddps %ymm13,%ymm11,%ymm11
.byte 196,65,52,89,238 // vmulps %ymm14,%ymm9,%ymm13
@@ -15014,7 +15014,7 @@ _sk_saturation_avx:
.byte 196,65,36,95,208 // vmaxps %ymm8,%ymm11,%ymm10
.byte 196,195,109,74,209,240 // vblendvps %ymm15,%ymm9,%ymm2,%ymm2
.byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,28,84,0,0 // vbroadcastss 0x541c(%rip),%ymm8 # 64ac <_sk_callback_avx+0x1a9>
+ .byte 196,98,125,24,5,0,83,0,0 // vbroadcastss 0x5300(%rip),%ymm8 # 6390 <_sk_callback_avx+0x1a9>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,180,89,201 // vmulps %ymm1,%ymm9,%ymm1
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -15043,12 +15043,12 @@ _sk_color_avx:
.byte 197,252,17,68,36,168 // vmovups %ymm0,-0x58(%rsp)
.byte 197,124,89,199 // vmulps %ymm7,%ymm0,%ymm8
.byte 197,116,89,207 // vmulps %ymm7,%ymm1,%ymm9
- .byte 196,98,125,24,45,178,83,0,0 // vbroadcastss 0x53b2(%rip),%ymm13 # 64b0 <_sk_callback_avx+0x1ad>
+ .byte 196,98,125,24,45,150,82,0,0 // vbroadcastss 0x5296(%rip),%ymm13 # 6394 <_sk_callback_avx+0x1ad>
.byte 196,65,92,89,213 // vmulps %ymm13,%ymm4,%ymm10
- .byte 196,98,125,24,53,168,83,0,0 // vbroadcastss 0x53a8(%rip),%ymm14 # 64b4 <_sk_callback_avx+0x1b1>
+ .byte 196,98,125,24,53,140,82,0,0 // vbroadcastss 0x528c(%rip),%ymm14 # 6398 <_sk_callback_avx+0x1b1>
.byte 196,65,84,89,222 // vmulps %ymm14,%ymm5,%ymm11
.byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
- .byte 196,98,125,24,61,153,83,0,0 // vbroadcastss 0x5399(%rip),%ymm15 # 64b8 <_sk_callback_avx+0x1b5>
+ .byte 196,98,125,24,61,125,82,0,0 // vbroadcastss 0x527d(%rip),%ymm15 # 639c <_sk_callback_avx+0x1b5>
.byte 196,65,76,89,223 // vmulps %ymm15,%ymm6,%ymm11
.byte 196,193,44,88,195 // vaddps %ymm11,%ymm10,%ymm0
.byte 196,65,60,89,221 // vmulps %ymm13,%ymm8,%ymm11
@@ -15111,7 +15111,7 @@ _sk_color_avx:
.byte 196,65,44,95,207 // vmaxps %ymm15,%ymm10,%ymm9
.byte 196,195,37,74,192,0 // vblendvps %ymm0,%ymm8,%ymm11,%ymm0
.byte 196,65,124,95,199 // vmaxps %ymm15,%ymm0,%ymm8
- .byte 196,226,125,24,5,96,82,0,0 // vbroadcastss 0x5260(%rip),%ymm0 # 64bc <_sk_callback_avx+0x1b9>
+ .byte 196,226,125,24,5,68,81,0,0 // vbroadcastss 0x5144(%rip),%ymm0 # 63a0 <_sk_callback_avx+0x1b9>
.byte 197,124,92,215 // vsubps %ymm7,%ymm0,%ymm10
.byte 197,172,89,84,36,168 // vmulps -0x58(%rsp),%ymm10,%ymm2
.byte 197,124,92,219 // vsubps %ymm3,%ymm0,%ymm11
@@ -15141,12 +15141,12 @@ _sk_luminosity_avx:
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
.byte 197,100,89,196 // vmulps %ymm4,%ymm3,%ymm8
.byte 197,100,89,205 // vmulps %ymm5,%ymm3,%ymm9
- .byte 196,98,125,24,45,242,81,0,0 // vbroadcastss 0x51f2(%rip),%ymm13 # 64c0 <_sk_callback_avx+0x1bd>
+ .byte 196,98,125,24,45,214,80,0,0 // vbroadcastss 0x50d6(%rip),%ymm13 # 63a4 <_sk_callback_avx+0x1bd>
.byte 196,65,108,89,213 // vmulps %ymm13,%ymm2,%ymm10
- .byte 196,98,125,24,53,232,81,0,0 // vbroadcastss 0x51e8(%rip),%ymm14 # 64c4 <_sk_callback_avx+0x1c1>
+ .byte 196,98,125,24,53,204,80,0,0 // vbroadcastss 0x50cc(%rip),%ymm14 # 63a8 <_sk_callback_avx+0x1c1>
.byte 196,65,116,89,222 // vmulps %ymm14,%ymm1,%ymm11
.byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
- .byte 196,98,125,24,61,217,81,0,0 // vbroadcastss 0x51d9(%rip),%ymm15 # 64c8 <_sk_callback_avx+0x1c5>
+ .byte 196,98,125,24,61,189,80,0,0 // vbroadcastss 0x50bd(%rip),%ymm15 # 63ac <_sk_callback_avx+0x1c5>
.byte 196,65,28,89,223 // vmulps %ymm15,%ymm12,%ymm11
.byte 196,193,44,88,195 // vaddps %ymm11,%ymm10,%ymm0
.byte 196,65,60,89,221 // vmulps %ymm13,%ymm8,%ymm11
@@ -15209,7 +15209,7 @@ _sk_luminosity_avx:
.byte 196,65,44,95,207 // vmaxps %ymm15,%ymm10,%ymm9
.byte 196,195,37,74,192,0 // vblendvps %ymm0,%ymm8,%ymm11,%ymm0
.byte 196,65,124,95,199 // vmaxps %ymm15,%ymm0,%ymm8
- .byte 196,226,125,24,5,160,80,0,0 // vbroadcastss 0x50a0(%rip),%ymm0 # 64cc <_sk_callback_avx+0x1c9>
+ .byte 196,226,125,24,5,132,79,0,0 // vbroadcastss 0x4f84(%rip),%ymm0 # 63b0 <_sk_callback_avx+0x1c9>
.byte 197,124,92,215 // vsubps %ymm7,%ymm0,%ymm10
.byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
.byte 197,124,92,219 // vsubps %ymm3,%ymm0,%ymm11
@@ -15245,7 +15245,7 @@ HIDDEN _sk_clamp_1_avx
.globl _sk_clamp_1_avx
FUNCTION(_sk_clamp_1_avx)
_sk_clamp_1_avx:
- .byte 196,98,125,24,5,51,80,0,0 // vbroadcastss 0x5033(%rip),%ymm8 # 64d0 <_sk_callback_avx+0x1cd>
+ .byte 196,98,125,24,5,23,79,0,0 // vbroadcastss 0x4f17(%rip),%ymm8 # 63b4 <_sk_callback_avx+0x1cd>
.byte 196,193,124,93,192 // vminps %ymm8,%ymm0,%ymm0
.byte 196,193,116,93,200 // vminps %ymm8,%ymm1,%ymm1
.byte 196,193,108,93,208 // vminps %ymm8,%ymm2,%ymm2
@@ -15257,7 +15257,7 @@ HIDDEN _sk_clamp_a_avx
.globl _sk_clamp_a_avx
FUNCTION(_sk_clamp_a_avx)
_sk_clamp_a_avx:
- .byte 196,98,125,24,5,22,80,0,0 // vbroadcastss 0x5016(%rip),%ymm8 # 64d4 <_sk_callback_avx+0x1d1>
+ .byte 196,98,125,24,5,250,78,0,0 // vbroadcastss 0x4efa(%rip),%ymm8 # 63b8 <_sk_callback_avx+0x1d1>
.byte 196,193,100,93,216 // vminps %ymm8,%ymm3,%ymm3
.byte 197,252,93,195 // vminps %ymm3,%ymm0,%ymm0
.byte 197,244,93,203 // vminps %ymm3,%ymm1,%ymm1
@@ -15343,7 +15343,7 @@ FUNCTION(_sk_unpremul_avx)
_sk_unpremul_avx:
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,65,100,194,200,0 // vcmpeqps %ymm8,%ymm3,%ymm9
- .byte 196,98,125,24,21,94,79,0,0 // vbroadcastss 0x4f5e(%rip),%ymm10 # 64d8 <_sk_callback_avx+0x1d5>
+ .byte 196,98,125,24,21,66,78,0,0 // vbroadcastss 0x4e42(%rip),%ymm10 # 63bc <_sk_callback_avx+0x1d5>
.byte 197,44,94,211 // vdivps %ymm3,%ymm10,%ymm10
.byte 196,67,45,74,192,144 // vblendvps %ymm9,%ymm8,%ymm10,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
@@ -15356,17 +15356,17 @@ HIDDEN _sk_from_srgb_avx
.globl _sk_from_srgb_avx
FUNCTION(_sk_from_srgb_avx)
_sk_from_srgb_avx:
- .byte 196,98,125,24,5,63,79,0,0 // vbroadcastss 0x4f3f(%rip),%ymm8 # 64dc <_sk_callback_avx+0x1d9>
+ .byte 196,98,125,24,5,35,78,0,0 // vbroadcastss 0x4e23(%rip),%ymm8 # 63c0 <_sk_callback_avx+0x1d9>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 197,124,89,208 // vmulps %ymm0,%ymm0,%ymm10
- .byte 196,98,125,24,29,49,79,0,0 // vbroadcastss 0x4f31(%rip),%ymm11 # 64e0 <_sk_callback_avx+0x1dd>
+ .byte 196,98,125,24,29,21,78,0,0 // vbroadcastss 0x4e15(%rip),%ymm11 # 63c4 <_sk_callback_avx+0x1dd>
.byte 196,65,124,89,227 // vmulps %ymm11,%ymm0,%ymm12
- .byte 196,98,125,24,45,39,79,0,0 // vbroadcastss 0x4f27(%rip),%ymm13 # 64e4 <_sk_callback_avx+0x1e1>
+ .byte 196,98,125,24,45,11,78,0,0 // vbroadcastss 0x4e0b(%rip),%ymm13 # 63c8 <_sk_callback_avx+0x1e1>
.byte 196,65,28,88,229 // vaddps %ymm13,%ymm12,%ymm12
.byte 196,65,44,89,212 // vmulps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,37,24,79,0,0 // vbroadcastss 0x4f18(%rip),%ymm12 # 64e8 <_sk_callback_avx+0x1e5>
+ .byte 196,98,125,24,37,252,77,0,0 // vbroadcastss 0x4dfc(%rip),%ymm12 # 63cc <_sk_callback_avx+0x1e5>
.byte 196,65,44,88,212 // vaddps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,53,14,79,0,0 // vbroadcastss 0x4f0e(%rip),%ymm14 # 64ec <_sk_callback_avx+0x1e9>
+ .byte 196,98,125,24,53,242,77,0,0 // vbroadcastss 0x4df2(%rip),%ymm14 # 63d0 <_sk_callback_avx+0x1e9>
.byte 196,193,124,194,198,1 // vcmpltps %ymm14,%ymm0,%ymm0
.byte 196,195,45,74,193,0 // vblendvps %ymm0,%ymm9,%ymm10,%ymm0
.byte 196,65,116,89,200 // vmulps %ymm8,%ymm1,%ymm9
@@ -15393,20 +15393,20 @@ HIDDEN _sk_to_srgb_avx
FUNCTION(_sk_to_srgb_avx)
_sk_to_srgb_avx:
.byte 197,124,82,200 // vrsqrtps %ymm0,%ymm9
- .byte 196,98,125,24,5,163,78,0,0 // vbroadcastss 0x4ea3(%rip),%ymm8 # 64f0 <_sk_callback_avx+0x1ed>
+ .byte 196,98,125,24,5,135,77,0,0 // vbroadcastss 0x4d87(%rip),%ymm8 # 63d4 <_sk_callback_avx+0x1ed>
.byte 196,65,124,89,208 // vmulps %ymm8,%ymm0,%ymm10
- .byte 196,98,125,24,29,153,78,0,0 // vbroadcastss 0x4e99(%rip),%ymm11 # 64f4 <_sk_callback_avx+0x1f1>
+ .byte 196,98,125,24,29,125,77,0,0 // vbroadcastss 0x4d7d(%rip),%ymm11 # 63d8 <_sk_callback_avx+0x1f1>
.byte 196,65,52,89,227 // vmulps %ymm11,%ymm9,%ymm12
- .byte 196,98,125,24,45,143,78,0,0 // vbroadcastss 0x4e8f(%rip),%ymm13 # 64f8 <_sk_callback_avx+0x1f5>
+ .byte 196,98,125,24,45,115,77,0,0 // vbroadcastss 0x4d73(%rip),%ymm13 # 63dc <_sk_callback_avx+0x1f5>
.byte 196,65,28,88,229 // vaddps %ymm13,%ymm12,%ymm12
.byte 196,65,52,89,228 // vmulps %ymm12,%ymm9,%ymm12
- .byte 196,98,125,24,53,128,78,0,0 // vbroadcastss 0x4e80(%rip),%ymm14 # 64fc <_sk_callback_avx+0x1f9>
+ .byte 196,98,125,24,53,100,77,0,0 // vbroadcastss 0x4d64(%rip),%ymm14 # 63e0 <_sk_callback_avx+0x1f9>
.byte 196,65,28,88,230 // vaddps %ymm14,%ymm12,%ymm12
- .byte 196,98,125,24,61,118,78,0,0 // vbroadcastss 0x4e76(%rip),%ymm15 # 6500 <_sk_callback_avx+0x1fd>
+ .byte 196,98,125,24,61,90,77,0,0 // vbroadcastss 0x4d5a(%rip),%ymm15 # 63e4 <_sk_callback_avx+0x1fd>
.byte 196,65,52,88,207 // vaddps %ymm15,%ymm9,%ymm9
.byte 196,65,124,83,201 // vrcpps %ymm9,%ymm9
.byte 196,65,52,89,204 // vmulps %ymm12,%ymm9,%ymm9
- .byte 196,98,125,24,37,98,78,0,0 // vbroadcastss 0x4e62(%rip),%ymm12 # 6504 <_sk_callback_avx+0x201>
+ .byte 196,98,125,24,37,70,77,0,0 // vbroadcastss 0x4d46(%rip),%ymm12 # 63e8 <_sk_callback_avx+0x201>
.byte 196,193,124,194,196,1 // vcmpltps %ymm12,%ymm0,%ymm0
.byte 196,195,53,74,194,0 // vblendvps %ymm0,%ymm10,%ymm9,%ymm0
.byte 197,124,82,201 // vrsqrtps %ymm1,%ymm9
@@ -15443,7 +15443,7 @@ _sk_rgb_to_hsl_avx:
.byte 197,124,93,201 // vminps %ymm1,%ymm0,%ymm9
.byte 197,52,93,202 // vminps %ymm2,%ymm9,%ymm9
.byte 196,65,60,92,209 // vsubps %ymm9,%ymm8,%ymm10
- .byte 196,98,125,24,29,200,77,0,0 // vbroadcastss 0x4dc8(%rip),%ymm11 # 6508 <_sk_callback_avx+0x205>
+ .byte 196,98,125,24,29,172,76,0,0 // vbroadcastss 0x4cac(%rip),%ymm11 # 63ec <_sk_callback_avx+0x205>
.byte 196,65,36,94,218 // vdivps %ymm10,%ymm11,%ymm11
.byte 197,116,92,226 // vsubps %ymm2,%ymm1,%ymm12
.byte 196,65,28,89,227 // vmulps %ymm11,%ymm12,%ymm12
@@ -15453,19 +15453,19 @@ _sk_rgb_to_hsl_avx:
.byte 196,193,108,89,211 // vmulps %ymm11,%ymm2,%ymm2
.byte 197,252,92,201 // vsubps %ymm1,%ymm0,%ymm1
.byte 196,193,116,89,203 // vmulps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,29,161,77,0,0 // vbroadcastss 0x4da1(%rip),%ymm11 # 6514 <_sk_callback_avx+0x211>
+ .byte 196,98,125,24,29,133,76,0,0 // vbroadcastss 0x4c85(%rip),%ymm11 # 63f8 <_sk_callback_avx+0x211>
.byte 196,193,116,88,203 // vaddps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,29,143,77,0,0 // vbroadcastss 0x4d8f(%rip),%ymm11 # 6510 <_sk_callback_avx+0x20d>
+ .byte 196,98,125,24,29,115,76,0,0 // vbroadcastss 0x4c73(%rip),%ymm11 # 63f4 <_sk_callback_avx+0x20d>
.byte 196,193,108,88,211 // vaddps %ymm11,%ymm2,%ymm2
.byte 196,227,117,74,202,224 // vblendvps %ymm14,%ymm2,%ymm1,%ymm1
- .byte 196,226,125,24,21,119,77,0,0 // vbroadcastss 0x4d77(%rip),%ymm2 # 650c <_sk_callback_avx+0x209>
+ .byte 196,226,125,24,21,91,76,0,0 // vbroadcastss 0x4c5b(%rip),%ymm2 # 63f0 <_sk_callback_avx+0x209>
.byte 196,65,12,87,246 // vxorps %ymm14,%ymm14,%ymm14
.byte 196,227,13,74,210,208 // vblendvps %ymm13,%ymm2,%ymm14,%ymm2
.byte 197,188,194,192,0 // vcmpeqps %ymm0,%ymm8,%ymm0
.byte 196,193,108,88,212 // vaddps %ymm12,%ymm2,%ymm2
.byte 196,227,117,74,194,0 // vblendvps %ymm0,%ymm2,%ymm1,%ymm0
.byte 196,193,60,88,201 // vaddps %ymm9,%ymm8,%ymm1
- .byte 196,98,125,24,37,94,77,0,0 // vbroadcastss 0x4d5e(%rip),%ymm12 # 651c <_sk_callback_avx+0x219>
+ .byte 196,98,125,24,37,66,76,0,0 // vbroadcastss 0x4c42(%rip),%ymm12 # 6400 <_sk_callback_avx+0x219>
.byte 196,193,116,89,212 // vmulps %ymm12,%ymm1,%ymm2
.byte 197,28,194,226,1 // vcmpltps %ymm2,%ymm12,%ymm12
.byte 196,65,36,92,216 // vsubps %ymm8,%ymm11,%ymm11
@@ -15475,7 +15475,7 @@ _sk_rgb_to_hsl_avx:
.byte 197,172,94,201 // vdivps %ymm1,%ymm10,%ymm1
.byte 196,195,125,74,198,128 // vblendvps %ymm8,%ymm14,%ymm0,%ymm0
.byte 196,195,117,74,206,128 // vblendvps %ymm8,%ymm14,%ymm1,%ymm1
- .byte 196,98,125,24,5,33,77,0,0 // vbroadcastss 0x4d21(%rip),%ymm8 # 6518 <_sk_callback_avx+0x215>
+ .byte 196,98,125,24,5,5,76,0,0 // vbroadcastss 0x4c05(%rip),%ymm8 # 63fc <_sk_callback_avx+0x215>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -15492,7 +15492,7 @@ _sk_hsl_to_rgb_avx:
.byte 197,252,17,92,36,128 // vmovups %ymm3,-0x80(%rsp)
.byte 197,252,40,225 // vmovaps %ymm1,%ymm4
.byte 197,252,40,216 // vmovaps %ymm0,%ymm3
- .byte 196,98,125,24,5,238,76,0,0 // vbroadcastss 0x4cee(%rip),%ymm8 # 6520 <_sk_callback_avx+0x21d>
+ .byte 196,98,125,24,5,210,75,0,0 // vbroadcastss 0x4bd2(%rip),%ymm8 # 6404 <_sk_callback_avx+0x21d>
.byte 197,60,194,202,2 // vcmpleps %ymm2,%ymm8,%ymm9
.byte 197,92,89,210 // vmulps %ymm2,%ymm4,%ymm10
.byte 196,65,92,92,218 // vsubps %ymm10,%ymm4,%ymm11
@@ -15500,23 +15500,23 @@ _sk_hsl_to_rgb_avx:
.byte 197,52,88,210 // vaddps %ymm2,%ymm9,%ymm10
.byte 197,108,88,202 // vaddps %ymm2,%ymm2,%ymm9
.byte 196,65,52,92,202 // vsubps %ymm10,%ymm9,%ymm9
- .byte 196,98,125,24,29,200,76,0,0 // vbroadcastss 0x4cc8(%rip),%ymm11 # 6524 <_sk_callback_avx+0x221>
+ .byte 196,98,125,24,29,172,75,0,0 // vbroadcastss 0x4bac(%rip),%ymm11 # 6408 <_sk_callback_avx+0x221>
.byte 196,65,100,88,219 // vaddps %ymm11,%ymm3,%ymm11
.byte 196,67,125,8,227,1 // vroundps $0x1,%ymm11,%ymm12
.byte 196,65,36,92,252 // vsubps %ymm12,%ymm11,%ymm15
.byte 196,65,44,92,217 // vsubps %ymm9,%ymm10,%ymm11
- .byte 196,98,125,24,37,178,76,0,0 // vbroadcastss 0x4cb2(%rip),%ymm12 # 652c <_sk_callback_avx+0x229>
+ .byte 196,98,125,24,37,150,75,0,0 // vbroadcastss 0x4b96(%rip),%ymm12 # 6410 <_sk_callback_avx+0x229>
.byte 196,193,4,89,196 // vmulps %ymm12,%ymm15,%ymm0
- .byte 196,98,125,24,45,168,76,0,0 // vbroadcastss 0x4ca8(%rip),%ymm13 # 6530 <_sk_callback_avx+0x22d>
+ .byte 196,98,125,24,45,140,75,0,0 // vbroadcastss 0x4b8c(%rip),%ymm13 # 6414 <_sk_callback_avx+0x22d>
.byte 197,20,92,240 // vsubps %ymm0,%ymm13,%ymm14
.byte 196,65,36,89,246 // vmulps %ymm14,%ymm11,%ymm14
.byte 196,65,52,88,246 // vaddps %ymm14,%ymm9,%ymm14
- .byte 196,226,125,24,13,137,76,0,0 // vbroadcastss 0x4c89(%rip),%ymm1 # 6528 <_sk_callback_avx+0x225>
+ .byte 196,226,125,24,13,109,75,0,0 // vbroadcastss 0x4b6d(%rip),%ymm1 # 640c <_sk_callback_avx+0x225>
.byte 196,193,116,194,255,2 // vcmpleps %ymm15,%ymm1,%ymm7
.byte 196,195,13,74,249,112 // vblendvps %ymm7,%ymm9,%ymm14,%ymm7
.byte 196,65,60,194,247,2 // vcmpleps %ymm15,%ymm8,%ymm14
.byte 196,227,45,74,255,224 // vblendvps %ymm14,%ymm7,%ymm10,%ymm7
- .byte 196,98,125,24,53,116,76,0,0 // vbroadcastss 0x4c74(%rip),%ymm14 # 6534 <_sk_callback_avx+0x231>
+ .byte 196,98,125,24,53,88,75,0,0 // vbroadcastss 0x4b58(%rip),%ymm14 # 6418 <_sk_callback_avx+0x231>
.byte 196,65,12,194,255,2 // vcmpleps %ymm15,%ymm14,%ymm15
.byte 196,193,124,89,195 // vmulps %ymm11,%ymm0,%ymm0
.byte 197,180,88,192 // vaddps %ymm0,%ymm9,%ymm0
@@ -15535,7 +15535,7 @@ _sk_hsl_to_rgb_avx:
.byte 197,164,89,247 // vmulps %ymm7,%ymm11,%ymm6
.byte 197,180,88,246 // vaddps %ymm6,%ymm9,%ymm6
.byte 196,227,77,74,237,0 // vblendvps %ymm0,%ymm5,%ymm6,%ymm5
- .byte 196,226,125,24,5,22,76,0,0 // vbroadcastss 0x4c16(%rip),%ymm0 # 6538 <_sk_callback_avx+0x235>
+ .byte 196,226,125,24,5,250,74,0,0 // vbroadcastss 0x4afa(%rip),%ymm0 # 641c <_sk_callback_avx+0x235>
.byte 197,228,88,192 // vaddps %ymm0,%ymm3,%ymm0
.byte 196,227,125,8,216,1 // vroundps $0x1,%ymm0,%ymm3
.byte 197,252,92,195 // vsubps %ymm3,%ymm0,%ymm0
@@ -15594,7 +15594,7 @@ _sk_scale_u8_avx:
.byte 196,66,121,49,192 // vpmovzxbd %xmm8,%xmm8
.byte 196,67,53,24,192,1 // vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,13,63,75,0,0 // vbroadcastss 0x4b3f(%rip),%ymm9 # 653c <_sk_callback_avx+0x239>
+ .byte 196,98,125,24,13,35,74,0,0 // vbroadcastss 0x4a23(%rip),%ymm9 # 6420 <_sk_callback_avx+0x239>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 197,188,89,201 // vmulps %ymm1,%ymm8,%ymm1
@@ -15653,7 +15653,7 @@ _sk_lerp_u8_avx:
.byte 196,66,121,49,192 // vpmovzxbd %xmm8,%xmm8
.byte 196,67,53,24,192,1 // vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,13,139,74,0,0 // vbroadcastss 0x4a8b(%rip),%ymm9 # 6540 <_sk_callback_avx+0x23d>
+ .byte 196,98,125,24,13,111,73,0,0 // vbroadcastss 0x496f(%rip),%ymm9 # 6424 <_sk_callback_avx+0x23d>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,252,92,196 // vsubps %ymm4,%ymm0,%ymm0
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
@@ -15696,20 +15696,20 @@ _sk_lerp_565_avx:
.byte 196,65,57,105,201 // vpunpckhwd %xmm9,%xmm8,%xmm9
.byte 196,66,121,51,192 // vpmovzxwd %xmm8,%xmm8
.byte 196,67,61,24,193,1 // vinsertf128 $0x1,%xmm9,%ymm8,%ymm8
- .byte 196,98,125,24,13,245,73,0,0 // vbroadcastss 0x49f5(%rip),%ymm9 # 6544 <_sk_callback_avx+0x241>
+ .byte 196,98,125,24,13,217,72,0,0 // vbroadcastss 0x48d9(%rip),%ymm9 # 6428 <_sk_callback_avx+0x241>
.byte 196,65,60,84,201 // vandps %ymm9,%ymm8,%ymm9
.byte 196,65,124,91,201 // vcvtdq2ps %ymm9,%ymm9
- .byte 196,98,125,24,21,230,73,0,0 // vbroadcastss 0x49e6(%rip),%ymm10 # 6548 <_sk_callback_avx+0x245>
+ .byte 196,98,125,24,21,202,72,0,0 // vbroadcastss 0x48ca(%rip),%ymm10 # 642c <_sk_callback_avx+0x245>
.byte 196,65,52,89,202 // vmulps %ymm10,%ymm9,%ymm9
- .byte 196,98,125,24,21,220,73,0,0 // vbroadcastss 0x49dc(%rip),%ymm10 # 654c <_sk_callback_avx+0x249>
+ .byte 196,98,125,24,21,192,72,0,0 // vbroadcastss 0x48c0(%rip),%ymm10 # 6430 <_sk_callback_avx+0x249>
.byte 196,65,60,84,210 // vandps %ymm10,%ymm8,%ymm10
.byte 196,65,124,91,210 // vcvtdq2ps %ymm10,%ymm10
- .byte 196,98,125,24,29,205,73,0,0 // vbroadcastss 0x49cd(%rip),%ymm11 # 6550 <_sk_callback_avx+0x24d>
+ .byte 196,98,125,24,29,177,72,0,0 // vbroadcastss 0x48b1(%rip),%ymm11 # 6434 <_sk_callback_avx+0x24d>
.byte 196,65,44,89,211 // vmulps %ymm11,%ymm10,%ymm10
- .byte 196,98,125,24,29,195,73,0,0 // vbroadcastss 0x49c3(%rip),%ymm11 # 6554 <_sk_callback_avx+0x251>
+ .byte 196,98,125,24,29,167,72,0,0 // vbroadcastss 0x48a7(%rip),%ymm11 # 6438 <_sk_callback_avx+0x251>
.byte 196,65,60,84,195 // vandps %ymm11,%ymm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,29,180,73,0,0 // vbroadcastss 0x49b4(%rip),%ymm11 # 6558 <_sk_callback_avx+0x255>
+ .byte 196,98,125,24,29,152,72,0,0 // vbroadcastss 0x4898(%rip),%ymm11 # 643c <_sk_callback_avx+0x255>
.byte 196,65,60,89,195 // vmulps %ymm11,%ymm8,%ymm8
.byte 197,252,92,196 // vsubps %ymm4,%ymm0,%ymm0
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
@@ -15756,7 +15756,7 @@ _sk_lerp_565_avx:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,225 // jmpq ffffffffe2001c74 <_sk_callback_avx+0xffffffffe1ffb971>
+ .byte 233,255,255,255,225 // jmpq ffffffffe2001c74 <_sk_callback_avx+0xffffffffe1ffba8d>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -15778,19 +15778,21 @@ HIDDEN _sk_load_tables_avx
.globl _sk_load_tables_avx
FUNCTION(_sk_load_tables_avx)
_sk_load_tables_avx:
+ .byte 73,137,200 // mov %rcx,%r8
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,139,0 // mov (%rax),%r8
- .byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,26,2,0,0 // jne 1eb0 <_sk_load_tables_avx+0x228>
- .byte 196,65,124,16,4,184 // vmovups (%r8,%rdi,4),%ymm8
+ .byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 77,133,192 // test %r8,%r8
+ .byte 15,133,26,2,0,0 // jne 1ebb <_sk_load_tables_avx+0x233>
+ .byte 196,65,124,16,17 // vmovups (%r9),%ymm10
.byte 85 // push %rbp
.byte 65,87 // push %r15
.byte 65,86 // push %r14
.byte 65,85 // push %r13
.byte 65,84 // push %r12
.byte 83 // push %rbx
- .byte 197,124,40,13,146,75,0,0 // vmovaps 0x4b92(%rip),%ymm9 # 6840 <_sk_callback_avx+0x53d>
- .byte 196,193,60,84,193 // vandps %ymm9,%ymm8,%ymm0
+ .byte 197,124,40,13,232,74,0,0 // vmovaps 0x4ae8(%rip),%ymm9 # 67a0 <_sk_callback_avx+0x5b9>
+ .byte 196,193,44,84,193 // vandps %ymm9,%ymm10,%ymm0
.byte 196,193,249,126,193 // vmovq %xmm0,%r9
.byte 69,137,203 // mov %r9d,%r11d
.byte 196,195,249,22,194,1 // vpextrq $0x1,%xmm0,%r10
@@ -15798,26 +15800,26 @@ _sk_load_tables_avx:
.byte 73,193,234,32 // shr $0x20,%r10
.byte 73,193,233,32 // shr $0x20,%r9
.byte 196,227,125,25,192,1 // vextractf128 $0x1,%ymm0,%xmm0
- .byte 196,193,249,126,196 // vmovq %xmm0,%r12
- .byte 69,137,231 // mov %r12d,%r15d
- .byte 196,227,249,22,195,1 // vpextrq $0x1,%xmm0,%rbx
- .byte 65,137,221 // mov %ebx,%r13d
+ .byte 196,225,249,126,195 // vmovq %xmm0,%rbx
+ .byte 65,137,223 // mov %ebx,%r15d
+ .byte 196,227,249,22,193,1 // vpextrq $0x1,%xmm0,%rcx
+ .byte 65,137,205 // mov %ecx,%r13d
+ .byte 72,193,233,32 // shr $0x20,%rcx
.byte 72,193,235,32 // shr $0x20,%rbx
- .byte 73,193,236,32 // shr $0x20,%r12
.byte 72,139,104,8 // mov 0x8(%rax),%rbp
- .byte 76,139,64,16 // mov 0x10(%rax),%r8
+ .byte 76,139,96,16 // mov 0x10(%rax),%r12
.byte 196,161,122,16,68,189,0 // vmovss 0x0(%rbp,%r15,4),%xmm0
- .byte 196,163,121,33,68,165,0,16 // vinsertps $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
+ .byte 196,227,121,33,68,157,0,16 // vinsertps $0x10,0x0(%rbp,%rbx,4),%xmm0,%xmm0
.byte 196,163,121,33,68,173,0,32 // vinsertps $0x20,0x0(%rbp,%r13,4),%xmm0,%xmm0
- .byte 196,227,121,33,68,157,0,48 // vinsertps $0x30,0x0(%rbp,%rbx,4),%xmm0,%xmm0
+ .byte 196,227,121,33,68,141,0,48 // vinsertps $0x30,0x0(%rbp,%rcx,4),%xmm0,%xmm0
.byte 196,161,122,16,76,157,0 // vmovss 0x0(%rbp,%r11,4),%xmm1
.byte 196,163,113,33,76,141,0,16 // vinsertps $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
.byte 196,163,113,33,76,181,0,32 // vinsertps $0x20,0x0(%rbp,%r14,4),%xmm1,%xmm1
.byte 196,163,113,33,76,149,0,48 // vinsertps $0x30,0x0(%rbp,%r10,4),%xmm1,%xmm1
.byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- .byte 196,193,113,114,208,8 // vpsrld $0x8,%xmm8,%xmm1
- .byte 196,67,125,25,194,1 // vextractf128 $0x1,%ymm8,%xmm10
- .byte 196,193,105,114,210,8 // vpsrld $0x8,%xmm10,%xmm2
+ .byte 196,193,113,114,210,8 // vpsrld $0x8,%xmm10,%xmm1
+ .byte 196,67,125,25,208,1 // vextractf128 $0x1,%ymm10,%xmm8
+ .byte 196,193,105,114,208,8 // vpsrld $0x8,%xmm8,%xmm2
.byte 196,227,117,24,202,1 // vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
.byte 196,193,116,84,201 // vandps %ymm9,%ymm1,%ymm1
.byte 196,193,249,126,201 // vmovq %xmm1,%r9
@@ -15827,36 +15829,36 @@ _sk_load_tables_avx:
.byte 73,193,234,32 // shr $0x20,%r10
.byte 73,193,233,32 // shr $0x20,%r9
.byte 196,227,125,25,201,1 // vextractf128 $0x1,%ymm1,%xmm1
- .byte 196,225,249,126,205 // vmovq %xmm1,%rbp
- .byte 65,137,239 // mov %ebp,%r15d
- .byte 196,227,249,22,203,1 // vpextrq $0x1,%xmm1,%rbx
- .byte 65,137,220 // mov %ebx,%r12d
- .byte 72,193,235,32 // shr $0x20,%rbx
+ .byte 196,225,249,126,203 // vmovq %xmm1,%rbx
+ .byte 65,137,223 // mov %ebx,%r15d
+ .byte 196,227,249,22,205,1 // vpextrq $0x1,%xmm1,%rbp
+ .byte 137,233 // mov %ebp,%ecx
.byte 72,193,237,32 // shr $0x20,%rbp
- .byte 196,129,122,16,12,184 // vmovss (%r8,%r15,4),%xmm1
- .byte 196,195,113,33,12,168,16 // vinsertps $0x10,(%r8,%rbp,4),%xmm1,%xmm1
- .byte 196,129,122,16,20,160 // vmovss (%r8,%r12,4),%xmm2
+ .byte 72,193,235,32 // shr $0x20,%rbx
+ .byte 196,129,122,16,12,188 // vmovss (%r12,%r15,4),%xmm1
+ .byte 196,195,113,33,12,156,16 // vinsertps $0x10,(%r12,%rbx,4),%xmm1,%xmm1
+ .byte 196,193,122,16,20,140 // vmovss (%r12,%rcx,4),%xmm2
.byte 196,227,113,33,202,32 // vinsertps $0x20,%xmm2,%xmm1,%xmm1
- .byte 196,193,122,16,20,152 // vmovss (%r8,%rbx,4),%xmm2
+ .byte 196,193,122,16,20,172 // vmovss (%r12,%rbp,4),%xmm2
.byte 196,227,113,33,202,48 // vinsertps $0x30,%xmm2,%xmm1,%xmm1
- .byte 196,129,122,16,20,152 // vmovss (%r8,%r11,4),%xmm2
- .byte 196,131,105,33,20,136,16 // vinsertps $0x10,(%r8,%r9,4),%xmm2,%xmm2
- .byte 196,129,122,16,28,176 // vmovss (%r8,%r14,4),%xmm3
+ .byte 196,129,122,16,20,156 // vmovss (%r12,%r11,4),%xmm2
+ .byte 196,131,105,33,20,140,16 // vinsertps $0x10,(%r12,%r9,4),%xmm2,%xmm2
+ .byte 196,129,122,16,28,180 // vmovss (%r12,%r14,4),%xmm3
.byte 196,227,105,33,211,32 // vinsertps $0x20,%xmm3,%xmm2,%xmm2
- .byte 196,129,122,16,28,144 // vmovss (%r8,%r10,4),%xmm3
+ .byte 196,129,122,16,28,148 // vmovss (%r12,%r10,4),%xmm3
.byte 196,227,105,33,211,48 // vinsertps $0x30,%xmm3,%xmm2,%xmm2
.byte 196,227,109,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
.byte 72,139,64,24 // mov 0x18(%rax),%rax
- .byte 196,193,105,114,208,16 // vpsrld $0x10,%xmm8,%xmm2
- .byte 196,193,97,114,210,16 // vpsrld $0x10,%xmm10,%xmm3
+ .byte 196,193,105,114,210,16 // vpsrld $0x10,%xmm10,%xmm2
+ .byte 196,193,97,114,208,16 // vpsrld $0x10,%xmm8,%xmm3
.byte 196,227,109,24,211,1 // vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
.byte 196,193,108,84,209 // vandps %ymm9,%ymm2,%ymm2
- .byte 196,193,249,126,208 // vmovq %xmm2,%r8
- .byte 69,137,194 // mov %r8d,%r10d
- .byte 196,195,249,22,209,1 // vpextrq $0x1,%xmm2,%r9
- .byte 69,137,203 // mov %r9d,%r11d
+ .byte 196,193,249,126,209 // vmovq %xmm2,%r9
+ .byte 69,137,202 // mov %r9d,%r10d
+ .byte 196,227,249,22,209,1 // vpextrq $0x1,%xmm2,%rcx
+ .byte 65,137,203 // mov %ecx,%r11d
+ .byte 72,193,233,32 // shr $0x20,%rcx
.byte 73,193,233,32 // shr $0x20,%r9
- .byte 73,193,232,32 // shr $0x20,%r8
.byte 196,227,125,25,210,1 // vextractf128 $0x1,%ymm2,%xmm2
.byte 196,225,249,126,213 // vmovq %xmm2,%rbp
.byte 65,137,238 // mov %ebp,%r14d
@@ -15871,19 +15873,20 @@ _sk_load_tables_avx:
.byte 197,250,16,28,152 // vmovss (%rax,%rbx,4),%xmm3
.byte 196,99,105,33,203,48 // vinsertps $0x30,%xmm3,%xmm2,%xmm9
.byte 196,161,122,16,28,144 // vmovss (%rax,%r10,4),%xmm3
- .byte 196,163,97,33,28,128,16 // vinsertps $0x10,(%rax,%r8,4),%xmm3,%xmm3
+ .byte 196,163,97,33,28,136,16 // vinsertps $0x10,(%rax,%r9,4),%xmm3,%xmm3
.byte 196,161,122,16,20,152 // vmovss (%rax,%r11,4),%xmm2
.byte 196,227,97,33,210,32 // vinsertps $0x20,%xmm2,%xmm3,%xmm2
- .byte 196,161,122,16,28,136 // vmovss (%rax,%r9,4),%xmm3
+ .byte 197,250,16,28,136 // vmovss (%rax,%rcx,4),%xmm3
.byte 196,227,105,33,211,48 // vinsertps $0x30,%xmm3,%xmm2,%xmm2
.byte 196,195,109,24,209,1 // vinsertf128 $0x1,%xmm9,%ymm2,%ymm2
- .byte 196,193,57,114,208,24 // vpsrld $0x18,%xmm8,%xmm8
- .byte 196,193,97,114,210,24 // vpsrld $0x18,%xmm10,%xmm3
- .byte 196,227,61,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
+ .byte 196,193,49,114,210,24 // vpsrld $0x18,%xmm10,%xmm9
+ .byte 196,193,97,114,208,24 // vpsrld $0x18,%xmm8,%xmm3
+ .byte 196,227,53,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,191,70,0,0 // vbroadcastss 0x46bf(%rip),%ymm8 # 655c <_sk_callback_avx+0x259>
+ .byte 196,98,125,24,5,155,69,0,0 // vbroadcastss 0x459b(%rip),%ymm8 # 6440 <_sk_callback_avx+0x259>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 76,137,193 // mov %r8,%rcx
.byte 91 // pop %rbx
.byte 65,92 // pop %r12
.byte 65,93 // pop %r13
@@ -15891,57 +15894,20 @@ _sk_load_tables_avx:
.byte 65,95 // pop %r15
.byte 93 // pop %rbp
.byte 255,224 // jmpq *%rax
- .byte 65,137,201 // mov %ecx,%r9d
- .byte 65,128,225,7 // and $0x7,%r9b
- .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
- .byte 65,254,201 // dec %r9b
- .byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 15,135,211,253,255,255 // ja 1c9c <_sk_load_tables_avx+0x14>
- .byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,140,0,0,0 // lea 0x8c(%rip),%r10 # 1f60 <_sk_load_tables_avx+0x2d8>
- .byte 79,99,12,138 // movslq (%r10,%r9,4),%r9
- .byte 77,1,209 // add %r10,%r9
- .byte 65,255,225 // jmpq *%r9
- .byte 196,193,121,110,68,184,24 // vmovd 0x18(%r8,%rdi,4),%xmm0
- .byte 197,249,112,192,68 // vpshufd $0x44,%xmm0,%xmm0
- .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
- .byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
- .byte 196,99,117,12,192,64 // vblendps $0x40,%ymm0,%ymm1,%ymm8
- .byte 196,99,125,25,192,1 // vextractf128 $0x1,%ymm8,%xmm0
- .byte 196,195,121,34,68,184,20,1 // vpinsrd $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
- .byte 196,99,61,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
- .byte 196,99,125,25,192,1 // vextractf128 $0x1,%ymm8,%xmm0
- .byte 196,195,121,34,68,184,16,0 // vpinsrd $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
- .byte 196,99,61,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
- .byte 196,195,57,34,68,184,12,3 // vpinsrd $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
- .byte 196,99,61,12,192,15 // vblendps $0xf,%ymm0,%ymm8,%ymm8
- .byte 196,195,57,34,68,184,8,2 // vpinsrd $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
- .byte 196,99,61,12,192,15 // vblendps $0xf,%ymm0,%ymm8,%ymm8
- .byte 196,195,57,34,68,184,4,1 // vpinsrd $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
- .byte 196,99,61,12,192,15 // vblendps $0xf,%ymm0,%ymm8,%ymm8
- .byte 196,195,57,34,4,184,0 // vpinsrd $0x0,(%r8,%rdi,4),%xmm8,%xmm0
- .byte 196,99,61,12,192,15 // vblendps $0xf,%ymm0,%ymm8,%ymm8
- .byte 233,62,253,255,255 // jmpq 1c9c <_sk_load_tables_avx+0x14>
- .byte 102,144 // xchg %ax,%ax
- .byte 236 // in (%dx),%al
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 222,255 // fdivrp %st,%st(7)
- .byte 255 // (bad)
- .byte 255,208 // callq *%rax
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255,194 // inc %edx
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255,174,255,255,255,154 // ljmp *-0x65000001(%rsi)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 126,255 // jle 1f79 <_sk_load_tables_avx+0x2f1>
- .byte 255 // (bad)
- .byte 255 // .byte 0xff
+ .byte 185,8,0,0,0 // mov $0x8,%ecx
+ .byte 68,41,193 // sub %r8d,%ecx
+ .byte 192,225,3 // shl $0x3,%cl
+ .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
+ .byte 73,211,234 // shr %cl,%r10
+ .byte 196,193,249,110,194 // vmovq %r10,%xmm0
+ .byte 196,226,121,48,192 // vpmovzxbw %xmm0,%xmm0
+ .byte 196,226,121,0,13,61,72,0,0 // vpshufb 0x483d(%rip),%xmm0,%xmm1 # 6720 <_sk_callback_avx+0x539>
+ .byte 196,226,121,33,201 // vpmovsxbd %xmm1,%xmm1
+ .byte 196,226,121,0,5,63,72,0,0 // vpshufb 0x483f(%rip),%xmm0,%xmm0 # 6730 <_sk_callback_avx+0x549>
+ .byte 196,226,121,33,192 // vpmovsxbd %xmm0,%xmm0
+ .byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
+ .byte 196,66,125,44,17 // vmaskmovps (%r9),%ymm0,%ymm10
+ .byte 233,160,253,255,255 // jmpq 1ca6 <_sk_load_tables_avx+0x1e>
HIDDEN _sk_load_tables_u16_be_avx
.globl _sk_load_tables_u16_be_avx
@@ -15951,7 +15917,7 @@ _sk_load_tables_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,113,2,0,0 // jne 2203 <_sk_load_tables_u16_be_avx+0x287>
+ .byte 15,133,113,2,0,0 // jne 218d <_sk_load_tables_u16_be_avx+0x287>
.byte 196,1,121,16,4,72 // vmovupd (%r8,%r9,2),%xmm8
.byte 196,129,121,16,84,72,16 // vmovupd 0x10(%r8,%r9,2),%xmm2
.byte 196,129,121,16,92,72,32 // vmovupd 0x20(%r8,%r9,2),%xmm3
@@ -15973,7 +15939,7 @@ _sk_load_tables_u16_be_avx:
.byte 197,177,108,208 // vpunpcklqdq %xmm0,%xmm9,%xmm2
.byte 197,177,109,200 // vpunpckhqdq %xmm0,%xmm9,%xmm1
.byte 196,65,57,108,212 // vpunpcklqdq %xmm12,%xmm8,%xmm10
- .byte 197,121,111,29,210,72,0,0 // vmovdqa 0x48d2(%rip),%xmm11 # 68c0 <_sk_callback_avx+0x5bd>
+ .byte 197,121,111,29,200,71,0,0 // vmovdqa 0x47c8(%rip),%xmm11 # 6740 <_sk_callback_avx+0x559>
.byte 196,193,105,219,195 // vpand %xmm11,%xmm2,%xmm0
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 196,193,121,105,209 // vpunpckhwd %xmm9,%xmm0,%xmm2
@@ -16072,7 +16038,7 @@ _sk_load_tables_u16_be_avx:
.byte 196,226,121,51,219 // vpmovzxwd %xmm3,%xmm3
.byte 196,195,101,24,216,1 // vinsertf128 $0x1,%xmm8,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,112,67,0,0 // vbroadcastss 0x4370(%rip),%ymm8 # 6560 <_sk_callback_avx+0x25d>
+ .byte 196,98,125,24,5,202,66,0,0 // vbroadcastss 0x42ca(%rip),%ymm8 # 6444 <_sk_callback_avx+0x25d>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -16085,29 +16051,29 @@ _sk_load_tables_u16_be_avx:
.byte 196,1,123,16,4,72 // vmovsd (%r8,%r9,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 2269 <_sk_load_tables_u16_be_avx+0x2ed>
+ .byte 116,85 // je 21f3 <_sk_load_tables_u16_be_avx+0x2ed>
.byte 196,1,57,22,68,72,8 // vmovhpd 0x8(%r8,%r9,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 2269 <_sk_load_tables_u16_be_avx+0x2ed>
+ .byte 114,72 // jb 21f3 <_sk_load_tables_u16_be_avx+0x2ed>
.byte 196,129,123,16,84,72,16 // vmovsd 0x10(%r8,%r9,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 2276 <_sk_load_tables_u16_be_avx+0x2fa>
+ .byte 116,72 // je 2200 <_sk_load_tables_u16_be_avx+0x2fa>
.byte 196,129,105,22,84,72,24 // vmovhpd 0x18(%r8,%r9,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 2276 <_sk_load_tables_u16_be_avx+0x2fa>
+ .byte 114,59 // jb 2200 <_sk_load_tables_u16_be_avx+0x2fa>
.byte 196,129,123,16,92,72,32 // vmovsd 0x20(%r8,%r9,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,97,253,255,255 // je 1fad <_sk_load_tables_u16_be_avx+0x31>
+ .byte 15,132,97,253,255,255 // je 1f37 <_sk_load_tables_u16_be_avx+0x31>
.byte 196,129,97,22,92,72,40 // vmovhpd 0x28(%r8,%r9,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,80,253,255,255 // jb 1fad <_sk_load_tables_u16_be_avx+0x31>
+ .byte 15,130,80,253,255,255 // jb 1f37 <_sk_load_tables_u16_be_avx+0x31>
.byte 196,1,122,126,76,72,48 // vmovq 0x30(%r8,%r9,2),%xmm9
- .byte 233,68,253,255,255 // jmpq 1fad <_sk_load_tables_u16_be_avx+0x31>
+ .byte 233,68,253,255,255 // jmpq 1f37 <_sk_load_tables_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,55,253,255,255 // jmpq 1fad <_sk_load_tables_u16_be_avx+0x31>
+ .byte 233,55,253,255,255 // jmpq 1f37 <_sk_load_tables_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,46,253,255,255 // jmpq 1fad <_sk_load_tables_u16_be_avx+0x31>
+ .byte 233,46,253,255,255 // jmpq 1f37 <_sk_load_tables_u16_be_avx+0x31>
HIDDEN _sk_load_tables_rgb_u16_be_avx
.globl _sk_load_tables_rgb_u16_be_avx
@@ -16117,7 +16083,7 @@ _sk_load_tables_rgb_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,127 // lea (%rdi,%rdi,2),%r9
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,93,2,0,0 // jne 24ee <_sk_load_tables_rgb_u16_be_avx+0x26f>
+ .byte 15,133,93,2,0,0 // jne 2478 <_sk_load_tables_rgb_u16_be_avx+0x26f>
.byte 196,129,122,111,4,72 // vmovdqu (%r8,%r9,2),%xmm0
.byte 196,129,122,111,84,72,12 // vmovdqu 0xc(%r8,%r9,2),%xmm2
.byte 196,129,122,111,76,72,24 // vmovdqu 0x18(%r8,%r9,2),%xmm1
@@ -16144,7 +16110,7 @@ _sk_load_tables_rgb_u16_be_avx:
.byte 197,185,108,202 // vpunpcklqdq %xmm2,%xmm8,%xmm1
.byte 197,185,109,210 // vpunpckhqdq %xmm2,%xmm8,%xmm2
.byte 197,121,108,195 // vpunpcklqdq %xmm3,%xmm0,%xmm8
- .byte 197,121,111,13,203,69,0,0 // vmovdqa 0x45cb(%rip),%xmm9 # 68d0 <_sk_callback_avx+0x5cd>
+ .byte 197,121,111,13,193,68,0,0 // vmovdqa 0x44c1(%rip),%xmm9 # 6750 <_sk_callback_avx+0x569>
.byte 196,193,113,219,193 // vpand %xmm9,%xmm1,%xmm0
.byte 196,65,41,239,210 // vpxor %xmm10,%xmm10,%xmm10
.byte 196,193,121,105,202 // vpunpckhwd %xmm10,%xmm0,%xmm1
@@ -16236,7 +16202,7 @@ _sk_load_tables_rgb_u16_be_avx:
.byte 196,227,105,33,211,48 // vinsertps $0x30,%xmm3,%xmm2,%xmm2
.byte 196,195,109,24,208,1 // vinsertf128 $0x1,%xmm8,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,130,64,0,0 // vbroadcastss 0x4082(%rip),%ymm3 # 6564 <_sk_callback_avx+0x261>
+ .byte 196,226,125,24,29,220,63,0,0 // vbroadcastss 0x3fdc(%rip),%ymm3 # 6448 <_sk_callback_avx+0x261>
.byte 91 // pop %rbx
.byte 65,92 // pop %r12
.byte 65,93 // pop %r13
@@ -16247,36 +16213,36 @@ _sk_load_tables_rgb_u16_be_avx:
.byte 196,129,121,110,4,72 // vmovd (%r8,%r9,2),%xmm0
.byte 196,129,121,196,68,72,4,2 // vpinsrw $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 2507 <_sk_load_tables_rgb_u16_be_avx+0x288>
- .byte 233,190,253,255,255 // jmpq 22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 117,5 // jne 2491 <_sk_load_tables_rgb_u16_be_avx+0x288>
+ .byte 233,190,253,255,255 // jmpq 224f <_sk_load_tables_rgb_u16_be_avx+0x46>
.byte 196,129,121,110,76,72,6 // vmovd 0x6(%r8,%r9,2),%xmm1
.byte 196,1,113,196,68,72,10,2 // vpinsrw $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 2536 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
+ .byte 114,26 // jb 24c0 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
.byte 196,129,121,110,76,72,12 // vmovd 0xc(%r8,%r9,2),%xmm1
.byte 196,129,113,196,84,72,16,2 // vpinsrw $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 253b <_sk_load_tables_rgb_u16_be_avx+0x2bc>
- .byte 233,143,253,255,255 // jmpq 22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
- .byte 233,138,253,255,255 // jmpq 22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 24c5 <_sk_load_tables_rgb_u16_be_avx+0x2bc>
+ .byte 233,143,253,255,255 // jmpq 224f <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 233,138,253,255,255 // jmpq 224f <_sk_load_tables_rgb_u16_be_avx+0x46>
.byte 196,129,121,110,76,72,18 // vmovd 0x12(%r8,%r9,2),%xmm1
.byte 196,1,113,196,76,72,22,2 // vpinsrw $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 256a <_sk_load_tables_rgb_u16_be_avx+0x2eb>
+ .byte 114,26 // jb 24f4 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
.byte 196,129,121,110,76,72,24 // vmovd 0x18(%r8,%r9,2),%xmm1
.byte 196,129,113,196,76,72,28,2 // vpinsrw $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 256f <_sk_load_tables_rgb_u16_be_avx+0x2f0>
- .byte 233,91,253,255,255 // jmpq 22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
- .byte 233,86,253,255,255 // jmpq 22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 24f9 <_sk_load_tables_rgb_u16_be_avx+0x2f0>
+ .byte 233,91,253,255,255 // jmpq 224f <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 233,86,253,255,255 // jmpq 224f <_sk_load_tables_rgb_u16_be_avx+0x46>
.byte 196,129,121,110,92,72,30 // vmovd 0x1e(%r8,%r9,2),%xmm3
.byte 196,1,97,196,92,72,34,2 // vpinsrw $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 2598 <_sk_load_tables_rgb_u16_be_avx+0x319>
+ .byte 114,20 // jb 2522 <_sk_load_tables_rgb_u16_be_avx+0x319>
.byte 196,129,121,110,92,72,36 // vmovd 0x24(%r8,%r9,2),%xmm3
.byte 196,129,97,196,92,72,40,2 // vpinsrw $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
- .byte 233,45,253,255,255 // jmpq 22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
- .byte 233,40,253,255,255 // jmpq 22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 233,45,253,255,255 // jmpq 224f <_sk_load_tables_rgb_u16_be_avx+0x46>
+ .byte 233,40,253,255,255 // jmpq 224f <_sk_load_tables_rgb_u16_be_avx+0x46>
HIDDEN _sk_byte_tables_avx
.globl _sk_byte_tables_avx
@@ -16289,7 +16255,7 @@ _sk_byte_tables_avx:
.byte 65,84 // push %r12
.byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,182,63,0,0 // vbroadcastss 0x3fb6(%rip),%ymm8 # 6568 <_sk_callback_avx+0x265>
+ .byte 196,98,125,24,5,16,63,0,0 // vbroadcastss 0x3f10(%rip),%ymm8 # 644c <_sk_callback_avx+0x265>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 197,253,91,192 // vcvtps2dq %ymm0,%ymm0
.byte 196,195,249,22,192,1 // vpextrq $0x1,%xmm0,%r8
@@ -16326,7 +16292,7 @@ _sk_byte_tables_avx:
.byte 196,226,121,49,192 // vpmovzxbd %xmm0,%xmm0
.byte 196,227,53,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm9,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,13,4,63,0,0 // vbroadcastss 0x3f04(%rip),%ymm9 # 656c <_sk_callback_avx+0x269>
+ .byte 196,98,125,24,13,94,62,0,0 // vbroadcastss 0x3e5e(%rip),%ymm9 # 6450 <_sk_callback_avx+0x269>
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
.byte 197,253,91,201 // vcvtps2dq %ymm1,%ymm1
@@ -16488,7 +16454,7 @@ _sk_byte_tables_rgb_avx:
.byte 196,226,121,49,192 // vpmovzxbd %xmm0,%xmm0
.byte 196,227,53,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm9,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,13,42,60,0,0 // vbroadcastss 0x3c2a(%rip),%ymm9 # 6570 <_sk_callback_avx+0x26d>
+ .byte 196,98,125,24,13,132,59,0,0 // vbroadcastss 0x3b84(%rip),%ymm9 # 6454 <_sk_callback_avx+0x26d>
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
.byte 197,188,89,201 // vmulps %ymm1,%ymm8,%ymm1
.byte 197,253,91,201 // vcvtps2dq %ymm1,%ymm1
@@ -16785,36 +16751,36 @@ _sk_parametric_r_avx:
.byte 196,193,124,88,195 // vaddps %ymm11,%ymm0,%ymm0
.byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
.byte 197,124,91,216 // vcvtdq2ps %ymm0,%ymm11
- .byte 196,98,125,24,37,136,55,0,0 // vbroadcastss 0x3788(%rip),%ymm12 # 6574 <_sk_callback_avx+0x271>
+ .byte 196,98,125,24,37,226,54,0,0 // vbroadcastss 0x36e2(%rip),%ymm12 # 6458 <_sk_callback_avx+0x271>
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,126,55,0,0 // vbroadcastss 0x377e(%rip),%ymm12 # 6578 <_sk_callback_avx+0x275>
+ .byte 196,98,125,24,37,216,54,0,0 // vbroadcastss 0x36d8(%rip),%ymm12 # 645c <_sk_callback_avx+0x275>
.byte 196,193,124,84,196 // vandps %ymm12,%ymm0,%ymm0
- .byte 196,98,125,24,37,116,55,0,0 // vbroadcastss 0x3774(%rip),%ymm12 # 657c <_sk_callback_avx+0x279>
+ .byte 196,98,125,24,37,206,54,0,0 // vbroadcastss 0x36ce(%rip),%ymm12 # 6460 <_sk_callback_avx+0x279>
.byte 196,193,124,86,196 // vorps %ymm12,%ymm0,%ymm0
- .byte 196,98,125,24,37,106,55,0,0 // vbroadcastss 0x376a(%rip),%ymm12 # 6580 <_sk_callback_avx+0x27d>
+ .byte 196,98,125,24,37,196,54,0,0 // vbroadcastss 0x36c4(%rip),%ymm12 # 6464 <_sk_callback_avx+0x27d>
.byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,96,55,0,0 // vbroadcastss 0x3760(%rip),%ymm12 # 6584 <_sk_callback_avx+0x281>
+ .byte 196,98,125,24,37,186,54,0,0 // vbroadcastss 0x36ba(%rip),%ymm12 # 6468 <_sk_callback_avx+0x281>
.byte 196,65,124,89,228 // vmulps %ymm12,%ymm0,%ymm12
.byte 196,65,36,92,220 // vsubps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,81,55,0,0 // vbroadcastss 0x3751(%rip),%ymm12 # 6588 <_sk_callback_avx+0x285>
+ .byte 196,98,125,24,37,171,54,0,0 // vbroadcastss 0x36ab(%rip),%ymm12 # 646c <_sk_callback_avx+0x285>
.byte 196,193,124,88,196 // vaddps %ymm12,%ymm0,%ymm0
- .byte 196,98,125,24,37,71,55,0,0 // vbroadcastss 0x3747(%rip),%ymm12 # 658c <_sk_callback_avx+0x289>
+ .byte 196,98,125,24,37,161,54,0,0 // vbroadcastss 0x36a1(%rip),%ymm12 # 6470 <_sk_callback_avx+0x289>
.byte 197,156,94,192 // vdivps %ymm0,%ymm12,%ymm0
.byte 197,164,92,192 // vsubps %ymm0,%ymm11,%ymm0
.byte 197,172,89,192 // vmulps %ymm0,%ymm10,%ymm0
.byte 196,99,125,8,208,1 // vroundps $0x1,%ymm0,%ymm10
.byte 196,65,124,92,210 // vsubps %ymm10,%ymm0,%ymm10
- .byte 196,98,125,24,29,43,55,0,0 // vbroadcastss 0x372b(%rip),%ymm11 # 6590 <_sk_callback_avx+0x28d>
+ .byte 196,98,125,24,29,133,54,0,0 // vbroadcastss 0x3685(%rip),%ymm11 # 6474 <_sk_callback_avx+0x28d>
.byte 196,193,124,88,195 // vaddps %ymm11,%ymm0,%ymm0
- .byte 196,98,125,24,29,33,55,0,0 // vbroadcastss 0x3721(%rip),%ymm11 # 6594 <_sk_callback_avx+0x291>
+ .byte 196,98,125,24,29,123,54,0,0 // vbroadcastss 0x367b(%rip),%ymm11 # 6478 <_sk_callback_avx+0x291>
.byte 196,65,44,89,219 // vmulps %ymm11,%ymm10,%ymm11
.byte 196,193,124,92,195 // vsubps %ymm11,%ymm0,%ymm0
- .byte 196,98,125,24,29,18,55,0,0 // vbroadcastss 0x3712(%rip),%ymm11 # 6598 <_sk_callback_avx+0x295>
+ .byte 196,98,125,24,29,108,54,0,0 // vbroadcastss 0x366c(%rip),%ymm11 # 647c <_sk_callback_avx+0x295>
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
- .byte 196,98,125,24,29,8,55,0,0 // vbroadcastss 0x3708(%rip),%ymm11 # 659c <_sk_callback_avx+0x299>
+ .byte 196,98,125,24,29,98,54,0,0 // vbroadcastss 0x3662(%rip),%ymm11 # 6480 <_sk_callback_avx+0x299>
.byte 196,65,36,94,210 // vdivps %ymm10,%ymm11,%ymm10
.byte 196,193,124,88,194 // vaddps %ymm10,%ymm0,%ymm0
- .byte 196,98,125,24,21,249,54,0,0 // vbroadcastss 0x36f9(%rip),%ymm10 # 65a0 <_sk_callback_avx+0x29d>
+ .byte 196,98,125,24,21,83,54,0,0 // vbroadcastss 0x3653(%rip),%ymm10 # 6484 <_sk_callback_avx+0x29d>
.byte 196,193,124,89,194 // vmulps %ymm10,%ymm0,%ymm0
.byte 197,253,91,192 // vcvtps2dq %ymm0,%ymm0
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -16822,7 +16788,7 @@ _sk_parametric_r_avx:
.byte 196,195,125,74,193,128 // vblendvps %ymm8,%ymm9,%ymm0,%ymm0
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,124,95,192 // vmaxps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,5,208,54,0,0 // vbroadcastss 0x36d0(%rip),%ymm8 # 65a4 <_sk_callback_avx+0x2a1>
+ .byte 196,98,125,24,5,42,54,0,0 // vbroadcastss 0x362a(%rip),%ymm8 # 6488 <_sk_callback_avx+0x2a1>
.byte 196,193,124,93,192 // vminps %ymm8,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -16844,36 +16810,36 @@ _sk_parametric_g_avx:
.byte 196,193,116,88,203 // vaddps %ymm11,%ymm1,%ymm1
.byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
.byte 197,124,91,217 // vcvtdq2ps %ymm1,%ymm11
- .byte 196,98,125,24,37,129,54,0,0 // vbroadcastss 0x3681(%rip),%ymm12 # 65a8 <_sk_callback_avx+0x2a5>
+ .byte 196,98,125,24,37,219,53,0,0 // vbroadcastss 0x35db(%rip),%ymm12 # 648c <_sk_callback_avx+0x2a5>
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,119,54,0,0 // vbroadcastss 0x3677(%rip),%ymm12 # 65ac <_sk_callback_avx+0x2a9>
+ .byte 196,98,125,24,37,209,53,0,0 // vbroadcastss 0x35d1(%rip),%ymm12 # 6490 <_sk_callback_avx+0x2a9>
.byte 196,193,116,84,204 // vandps %ymm12,%ymm1,%ymm1
- .byte 196,98,125,24,37,109,54,0,0 // vbroadcastss 0x366d(%rip),%ymm12 # 65b0 <_sk_callback_avx+0x2ad>
+ .byte 196,98,125,24,37,199,53,0,0 // vbroadcastss 0x35c7(%rip),%ymm12 # 6494 <_sk_callback_avx+0x2ad>
.byte 196,193,116,86,204 // vorps %ymm12,%ymm1,%ymm1
- .byte 196,98,125,24,37,99,54,0,0 // vbroadcastss 0x3663(%rip),%ymm12 # 65b4 <_sk_callback_avx+0x2b1>
+ .byte 196,98,125,24,37,189,53,0,0 // vbroadcastss 0x35bd(%rip),%ymm12 # 6498 <_sk_callback_avx+0x2b1>
.byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,89,54,0,0 // vbroadcastss 0x3659(%rip),%ymm12 # 65b8 <_sk_callback_avx+0x2b5>
+ .byte 196,98,125,24,37,179,53,0,0 // vbroadcastss 0x35b3(%rip),%ymm12 # 649c <_sk_callback_avx+0x2b5>
.byte 196,65,116,89,228 // vmulps %ymm12,%ymm1,%ymm12
.byte 196,65,36,92,220 // vsubps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,74,54,0,0 // vbroadcastss 0x364a(%rip),%ymm12 # 65bc <_sk_callback_avx+0x2b9>
+ .byte 196,98,125,24,37,164,53,0,0 // vbroadcastss 0x35a4(%rip),%ymm12 # 64a0 <_sk_callback_avx+0x2b9>
.byte 196,193,116,88,204 // vaddps %ymm12,%ymm1,%ymm1
- .byte 196,98,125,24,37,64,54,0,0 // vbroadcastss 0x3640(%rip),%ymm12 # 65c0 <_sk_callback_avx+0x2bd>
+ .byte 196,98,125,24,37,154,53,0,0 // vbroadcastss 0x359a(%rip),%ymm12 # 64a4 <_sk_callback_avx+0x2bd>
.byte 197,156,94,201 // vdivps %ymm1,%ymm12,%ymm1
.byte 197,164,92,201 // vsubps %ymm1,%ymm11,%ymm1
.byte 197,172,89,201 // vmulps %ymm1,%ymm10,%ymm1
.byte 196,99,125,8,209,1 // vroundps $0x1,%ymm1,%ymm10
.byte 196,65,116,92,210 // vsubps %ymm10,%ymm1,%ymm10
- .byte 196,98,125,24,29,36,54,0,0 // vbroadcastss 0x3624(%rip),%ymm11 # 65c4 <_sk_callback_avx+0x2c1>
+ .byte 196,98,125,24,29,126,53,0,0 // vbroadcastss 0x357e(%rip),%ymm11 # 64a8 <_sk_callback_avx+0x2c1>
.byte 196,193,116,88,203 // vaddps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,29,26,54,0,0 // vbroadcastss 0x361a(%rip),%ymm11 # 65c8 <_sk_callback_avx+0x2c5>
+ .byte 196,98,125,24,29,116,53,0,0 // vbroadcastss 0x3574(%rip),%ymm11 # 64ac <_sk_callback_avx+0x2c5>
.byte 196,65,44,89,219 // vmulps %ymm11,%ymm10,%ymm11
.byte 196,193,116,92,203 // vsubps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,29,11,54,0,0 // vbroadcastss 0x360b(%rip),%ymm11 # 65cc <_sk_callback_avx+0x2c9>
+ .byte 196,98,125,24,29,101,53,0,0 // vbroadcastss 0x3565(%rip),%ymm11 # 64b0 <_sk_callback_avx+0x2c9>
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
- .byte 196,98,125,24,29,1,54,0,0 // vbroadcastss 0x3601(%rip),%ymm11 # 65d0 <_sk_callback_avx+0x2cd>
+ .byte 196,98,125,24,29,91,53,0,0 // vbroadcastss 0x355b(%rip),%ymm11 # 64b4 <_sk_callback_avx+0x2cd>
.byte 196,65,36,94,210 // vdivps %ymm10,%ymm11,%ymm10
.byte 196,193,116,88,202 // vaddps %ymm10,%ymm1,%ymm1
- .byte 196,98,125,24,21,242,53,0,0 // vbroadcastss 0x35f2(%rip),%ymm10 # 65d4 <_sk_callback_avx+0x2d1>
+ .byte 196,98,125,24,21,76,53,0,0 // vbroadcastss 0x354c(%rip),%ymm10 # 64b8 <_sk_callback_avx+0x2d1>
.byte 196,193,116,89,202 // vmulps %ymm10,%ymm1,%ymm1
.byte 197,253,91,201 // vcvtps2dq %ymm1,%ymm1
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -16881,7 +16847,7 @@ _sk_parametric_g_avx:
.byte 196,195,117,74,201,128 // vblendvps %ymm8,%ymm9,%ymm1,%ymm1
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,116,95,200 // vmaxps %ymm8,%ymm1,%ymm1
- .byte 196,98,125,24,5,201,53,0,0 // vbroadcastss 0x35c9(%rip),%ymm8 # 65d8 <_sk_callback_avx+0x2d5>
+ .byte 196,98,125,24,5,35,53,0,0 // vbroadcastss 0x3523(%rip),%ymm8 # 64bc <_sk_callback_avx+0x2d5>
.byte 196,193,116,93,200 // vminps %ymm8,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -16903,36 +16869,36 @@ _sk_parametric_b_avx:
.byte 196,193,108,88,211 // vaddps %ymm11,%ymm2,%ymm2
.byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
.byte 197,124,91,218 // vcvtdq2ps %ymm2,%ymm11
- .byte 196,98,125,24,37,122,53,0,0 // vbroadcastss 0x357a(%rip),%ymm12 # 65dc <_sk_callback_avx+0x2d9>
+ .byte 196,98,125,24,37,212,52,0,0 // vbroadcastss 0x34d4(%rip),%ymm12 # 64c0 <_sk_callback_avx+0x2d9>
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,112,53,0,0 // vbroadcastss 0x3570(%rip),%ymm12 # 65e0 <_sk_callback_avx+0x2dd>
+ .byte 196,98,125,24,37,202,52,0,0 // vbroadcastss 0x34ca(%rip),%ymm12 # 64c4 <_sk_callback_avx+0x2dd>
.byte 196,193,108,84,212 // vandps %ymm12,%ymm2,%ymm2
- .byte 196,98,125,24,37,102,53,0,0 // vbroadcastss 0x3566(%rip),%ymm12 # 65e4 <_sk_callback_avx+0x2e1>
+ .byte 196,98,125,24,37,192,52,0,0 // vbroadcastss 0x34c0(%rip),%ymm12 # 64c8 <_sk_callback_avx+0x2e1>
.byte 196,193,108,86,212 // vorps %ymm12,%ymm2,%ymm2
- .byte 196,98,125,24,37,92,53,0,0 // vbroadcastss 0x355c(%rip),%ymm12 # 65e8 <_sk_callback_avx+0x2e5>
+ .byte 196,98,125,24,37,182,52,0,0 // vbroadcastss 0x34b6(%rip),%ymm12 # 64cc <_sk_callback_avx+0x2e5>
.byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,82,53,0,0 // vbroadcastss 0x3552(%rip),%ymm12 # 65ec <_sk_callback_avx+0x2e9>
+ .byte 196,98,125,24,37,172,52,0,0 // vbroadcastss 0x34ac(%rip),%ymm12 # 64d0 <_sk_callback_avx+0x2e9>
.byte 196,65,108,89,228 // vmulps %ymm12,%ymm2,%ymm12
.byte 196,65,36,92,220 // vsubps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,67,53,0,0 // vbroadcastss 0x3543(%rip),%ymm12 # 65f0 <_sk_callback_avx+0x2ed>
+ .byte 196,98,125,24,37,157,52,0,0 // vbroadcastss 0x349d(%rip),%ymm12 # 64d4 <_sk_callback_avx+0x2ed>
.byte 196,193,108,88,212 // vaddps %ymm12,%ymm2,%ymm2
- .byte 196,98,125,24,37,57,53,0,0 // vbroadcastss 0x3539(%rip),%ymm12 # 65f4 <_sk_callback_avx+0x2f1>
+ .byte 196,98,125,24,37,147,52,0,0 // vbroadcastss 0x3493(%rip),%ymm12 # 64d8 <_sk_callback_avx+0x2f1>
.byte 197,156,94,210 // vdivps %ymm2,%ymm12,%ymm2
.byte 197,164,92,210 // vsubps %ymm2,%ymm11,%ymm2
.byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
.byte 196,99,125,8,210,1 // vroundps $0x1,%ymm2,%ymm10
.byte 196,65,108,92,210 // vsubps %ymm10,%ymm2,%ymm10
- .byte 196,98,125,24,29,29,53,0,0 // vbroadcastss 0x351d(%rip),%ymm11 # 65f8 <_sk_callback_avx+0x2f5>
+ .byte 196,98,125,24,29,119,52,0,0 // vbroadcastss 0x3477(%rip),%ymm11 # 64dc <_sk_callback_avx+0x2f5>
.byte 196,193,108,88,211 // vaddps %ymm11,%ymm2,%ymm2
- .byte 196,98,125,24,29,19,53,0,0 // vbroadcastss 0x3513(%rip),%ymm11 # 65fc <_sk_callback_avx+0x2f9>
+ .byte 196,98,125,24,29,109,52,0,0 // vbroadcastss 0x346d(%rip),%ymm11 # 64e0 <_sk_callback_avx+0x2f9>
.byte 196,65,44,89,219 // vmulps %ymm11,%ymm10,%ymm11
.byte 196,193,108,92,211 // vsubps %ymm11,%ymm2,%ymm2
- .byte 196,98,125,24,29,4,53,0,0 // vbroadcastss 0x3504(%rip),%ymm11 # 6600 <_sk_callback_avx+0x2fd>
+ .byte 196,98,125,24,29,94,52,0,0 // vbroadcastss 0x345e(%rip),%ymm11 # 64e4 <_sk_callback_avx+0x2fd>
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
- .byte 196,98,125,24,29,250,52,0,0 // vbroadcastss 0x34fa(%rip),%ymm11 # 6604 <_sk_callback_avx+0x301>
+ .byte 196,98,125,24,29,84,52,0,0 // vbroadcastss 0x3454(%rip),%ymm11 # 64e8 <_sk_callback_avx+0x301>
.byte 196,65,36,94,210 // vdivps %ymm10,%ymm11,%ymm10
.byte 196,193,108,88,210 // vaddps %ymm10,%ymm2,%ymm2
- .byte 196,98,125,24,21,235,52,0,0 // vbroadcastss 0x34eb(%rip),%ymm10 # 6608 <_sk_callback_avx+0x305>
+ .byte 196,98,125,24,21,69,52,0,0 // vbroadcastss 0x3445(%rip),%ymm10 # 64ec <_sk_callback_avx+0x305>
.byte 196,193,108,89,210 // vmulps %ymm10,%ymm2,%ymm2
.byte 197,253,91,210 // vcvtps2dq %ymm2,%ymm2
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -16940,7 +16906,7 @@ _sk_parametric_b_avx:
.byte 196,195,109,74,209,128 // vblendvps %ymm8,%ymm9,%ymm2,%ymm2
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,194,52,0,0 // vbroadcastss 0x34c2(%rip),%ymm8 # 660c <_sk_callback_avx+0x309>
+ .byte 196,98,125,24,5,28,52,0,0 // vbroadcastss 0x341c(%rip),%ymm8 # 64f0 <_sk_callback_avx+0x309>
.byte 196,193,108,93,208 // vminps %ymm8,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -16962,36 +16928,36 @@ _sk_parametric_a_avx:
.byte 196,193,100,88,219 // vaddps %ymm11,%ymm3,%ymm3
.byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
.byte 197,124,91,219 // vcvtdq2ps %ymm3,%ymm11
- .byte 196,98,125,24,37,115,52,0,0 // vbroadcastss 0x3473(%rip),%ymm12 # 6610 <_sk_callback_avx+0x30d>
+ .byte 196,98,125,24,37,205,51,0,0 // vbroadcastss 0x33cd(%rip),%ymm12 # 64f4 <_sk_callback_avx+0x30d>
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,105,52,0,0 // vbroadcastss 0x3469(%rip),%ymm12 # 6614 <_sk_callback_avx+0x311>
+ .byte 196,98,125,24,37,195,51,0,0 // vbroadcastss 0x33c3(%rip),%ymm12 # 64f8 <_sk_callback_avx+0x311>
.byte 196,193,100,84,220 // vandps %ymm12,%ymm3,%ymm3
- .byte 196,98,125,24,37,95,52,0,0 // vbroadcastss 0x345f(%rip),%ymm12 # 6618 <_sk_callback_avx+0x315>
+ .byte 196,98,125,24,37,185,51,0,0 // vbroadcastss 0x33b9(%rip),%ymm12 # 64fc <_sk_callback_avx+0x315>
.byte 196,193,100,86,220 // vorps %ymm12,%ymm3,%ymm3
- .byte 196,98,125,24,37,85,52,0,0 // vbroadcastss 0x3455(%rip),%ymm12 # 661c <_sk_callback_avx+0x319>
+ .byte 196,98,125,24,37,175,51,0,0 // vbroadcastss 0x33af(%rip),%ymm12 # 6500 <_sk_callback_avx+0x319>
.byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,75,52,0,0 // vbroadcastss 0x344b(%rip),%ymm12 # 6620 <_sk_callback_avx+0x31d>
+ .byte 196,98,125,24,37,165,51,0,0 // vbroadcastss 0x33a5(%rip),%ymm12 # 6504 <_sk_callback_avx+0x31d>
.byte 196,65,100,89,228 // vmulps %ymm12,%ymm3,%ymm12
.byte 196,65,36,92,220 // vsubps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,60,52,0,0 // vbroadcastss 0x343c(%rip),%ymm12 # 6624 <_sk_callback_avx+0x321>
+ .byte 196,98,125,24,37,150,51,0,0 // vbroadcastss 0x3396(%rip),%ymm12 # 6508 <_sk_callback_avx+0x321>
.byte 196,193,100,88,220 // vaddps %ymm12,%ymm3,%ymm3
- .byte 196,98,125,24,37,50,52,0,0 // vbroadcastss 0x3432(%rip),%ymm12 # 6628 <_sk_callback_avx+0x325>
+ .byte 196,98,125,24,37,140,51,0,0 // vbroadcastss 0x338c(%rip),%ymm12 # 650c <_sk_callback_avx+0x325>
.byte 197,156,94,219 // vdivps %ymm3,%ymm12,%ymm3
.byte 197,164,92,219 // vsubps %ymm3,%ymm11,%ymm3
.byte 197,172,89,219 // vmulps %ymm3,%ymm10,%ymm3
.byte 196,99,125,8,211,1 // vroundps $0x1,%ymm3,%ymm10
.byte 196,65,100,92,210 // vsubps %ymm10,%ymm3,%ymm10
- .byte 196,98,125,24,29,22,52,0,0 // vbroadcastss 0x3416(%rip),%ymm11 # 662c <_sk_callback_avx+0x329>
+ .byte 196,98,125,24,29,112,51,0,0 // vbroadcastss 0x3370(%rip),%ymm11 # 6510 <_sk_callback_avx+0x329>
.byte 196,193,100,88,219 // vaddps %ymm11,%ymm3,%ymm3
- .byte 196,98,125,24,29,12,52,0,0 // vbroadcastss 0x340c(%rip),%ymm11 # 6630 <_sk_callback_avx+0x32d>
+ .byte 196,98,125,24,29,102,51,0,0 // vbroadcastss 0x3366(%rip),%ymm11 # 6514 <_sk_callback_avx+0x32d>
.byte 196,65,44,89,219 // vmulps %ymm11,%ymm10,%ymm11
.byte 196,193,100,92,219 // vsubps %ymm11,%ymm3,%ymm3
- .byte 196,98,125,24,29,253,51,0,0 // vbroadcastss 0x33fd(%rip),%ymm11 # 6634 <_sk_callback_avx+0x331>
+ .byte 196,98,125,24,29,87,51,0,0 // vbroadcastss 0x3357(%rip),%ymm11 # 6518 <_sk_callback_avx+0x331>
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
- .byte 196,98,125,24,29,243,51,0,0 // vbroadcastss 0x33f3(%rip),%ymm11 # 6638 <_sk_callback_avx+0x335>
+ .byte 196,98,125,24,29,77,51,0,0 // vbroadcastss 0x334d(%rip),%ymm11 # 651c <_sk_callback_avx+0x335>
.byte 196,65,36,94,210 // vdivps %ymm10,%ymm11,%ymm10
.byte 196,193,100,88,218 // vaddps %ymm10,%ymm3,%ymm3
- .byte 196,98,125,24,21,228,51,0,0 // vbroadcastss 0x33e4(%rip),%ymm10 # 663c <_sk_callback_avx+0x339>
+ .byte 196,98,125,24,21,62,51,0,0 // vbroadcastss 0x333e(%rip),%ymm10 # 6520 <_sk_callback_avx+0x339>
.byte 196,193,100,89,218 // vmulps %ymm10,%ymm3,%ymm3
.byte 197,253,91,219 // vcvtps2dq %ymm3,%ymm3
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -16999,7 +16965,7 @@ _sk_parametric_a_avx:
.byte 196,195,101,74,217,128 // vblendvps %ymm8,%ymm9,%ymm3,%ymm3
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,100,95,216 // vmaxps %ymm8,%ymm3,%ymm3
- .byte 196,98,125,24,5,187,51,0,0 // vbroadcastss 0x33bb(%rip),%ymm8 # 6640 <_sk_callback_avx+0x33d>
+ .byte 196,98,125,24,5,21,51,0,0 // vbroadcastss 0x3315(%rip),%ymm8 # 6524 <_sk_callback_avx+0x33d>
.byte 196,193,100,93,216 // vminps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -17008,31 +16974,31 @@ HIDDEN _sk_lab_to_xyz_avx
.globl _sk_lab_to_xyz_avx
FUNCTION(_sk_lab_to_xyz_avx)
_sk_lab_to_xyz_avx:
- .byte 196,98,125,24,5,173,51,0,0 // vbroadcastss 0x33ad(%rip),%ymm8 # 6644 <_sk_callback_avx+0x341>
+ .byte 196,98,125,24,5,7,51,0,0 // vbroadcastss 0x3307(%rip),%ymm8 # 6528 <_sk_callback_avx+0x341>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,5,163,51,0,0 // vbroadcastss 0x33a3(%rip),%ymm8 # 6648 <_sk_callback_avx+0x345>
+ .byte 196,98,125,24,5,253,50,0,0 // vbroadcastss 0x32fd(%rip),%ymm8 # 652c <_sk_callback_avx+0x345>
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
- .byte 196,98,125,24,13,153,51,0,0 // vbroadcastss 0x3399(%rip),%ymm9 # 664c <_sk_callback_avx+0x349>
+ .byte 196,98,125,24,13,243,50,0,0 // vbroadcastss 0x32f3(%rip),%ymm9 # 6530 <_sk_callback_avx+0x349>
.byte 196,193,116,88,201 // vaddps %ymm9,%ymm1,%ymm1
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
.byte 196,193,108,88,209 // vaddps %ymm9,%ymm2,%ymm2
- .byte 196,98,125,24,5,133,51,0,0 // vbroadcastss 0x3385(%rip),%ymm8 # 6650 <_sk_callback_avx+0x34d>
+ .byte 196,98,125,24,5,223,50,0,0 // vbroadcastss 0x32df(%rip),%ymm8 # 6534 <_sk_callback_avx+0x34d>
.byte 196,193,124,88,192 // vaddps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,5,123,51,0,0 // vbroadcastss 0x337b(%rip),%ymm8 # 6654 <_sk_callback_avx+0x351>
+ .byte 196,98,125,24,5,213,50,0,0 // vbroadcastss 0x32d5(%rip),%ymm8 # 6538 <_sk_callback_avx+0x351>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,5,113,51,0,0 // vbroadcastss 0x3371(%rip),%ymm8 # 6658 <_sk_callback_avx+0x355>
+ .byte 196,98,125,24,5,203,50,0,0 // vbroadcastss 0x32cb(%rip),%ymm8 # 653c <_sk_callback_avx+0x355>
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
.byte 197,252,88,201 // vaddps %ymm1,%ymm0,%ymm1
- .byte 196,98,125,24,5,99,51,0,0 // vbroadcastss 0x3363(%rip),%ymm8 # 665c <_sk_callback_avx+0x359>
+ .byte 196,98,125,24,5,189,50,0,0 // vbroadcastss 0x32bd(%rip),%ymm8 # 6540 <_sk_callback_avx+0x359>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
.byte 197,252,92,210 // vsubps %ymm2,%ymm0,%ymm2
.byte 197,116,89,193 // vmulps %ymm1,%ymm1,%ymm8
.byte 196,65,116,89,192 // vmulps %ymm8,%ymm1,%ymm8
- .byte 196,98,125,24,13,76,51,0,0 // vbroadcastss 0x334c(%rip),%ymm9 # 6660 <_sk_callback_avx+0x35d>
+ .byte 196,98,125,24,13,166,50,0,0 // vbroadcastss 0x32a6(%rip),%ymm9 # 6544 <_sk_callback_avx+0x35d>
.byte 196,65,52,194,208,1 // vcmpltps %ymm8,%ymm9,%ymm10
- .byte 196,98,125,24,29,65,51,0,0 // vbroadcastss 0x3341(%rip),%ymm11 # 6664 <_sk_callback_avx+0x361>
+ .byte 196,98,125,24,29,155,50,0,0 // vbroadcastss 0x329b(%rip),%ymm11 # 6548 <_sk_callback_avx+0x361>
.byte 196,193,116,88,203 // vaddps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,37,55,51,0,0 // vbroadcastss 0x3337(%rip),%ymm12 # 6668 <_sk_callback_avx+0x365>
+ .byte 196,98,125,24,37,145,50,0,0 // vbroadcastss 0x3291(%rip),%ymm12 # 654c <_sk_callback_avx+0x365>
.byte 196,193,116,89,204 // vmulps %ymm12,%ymm1,%ymm1
.byte 196,67,117,74,192,160 // vblendvps %ymm10,%ymm8,%ymm1,%ymm8
.byte 197,252,89,200 // vmulps %ymm0,%ymm0,%ymm1
@@ -17047,9 +17013,9 @@ _sk_lab_to_xyz_avx:
.byte 196,193,108,88,211 // vaddps %ymm11,%ymm2,%ymm2
.byte 196,193,108,89,212 // vmulps %ymm12,%ymm2,%ymm2
.byte 196,227,109,74,208,144 // vblendvps %ymm9,%ymm0,%ymm2,%ymm2
- .byte 196,226,125,24,5,237,50,0,0 // vbroadcastss 0x32ed(%rip),%ymm0 # 666c <_sk_callback_avx+0x369>
+ .byte 196,226,125,24,5,71,50,0,0 // vbroadcastss 0x3247(%rip),%ymm0 # 6550 <_sk_callback_avx+0x369>
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
- .byte 196,98,125,24,5,228,50,0,0 // vbroadcastss 0x32e4(%rip),%ymm8 # 6670 <_sk_callback_avx+0x36d>
+ .byte 196,98,125,24,5,62,50,0,0 // vbroadcastss 0x323e(%rip),%ymm8 # 6554 <_sk_callback_avx+0x36d>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -17063,14 +17029,14 @@ _sk_load_a8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,62 // jne 33e3 <_sk_load_a8_avx+0x4e>
+ .byte 117,62 // jne 336d <_sk_load_a8_avx+0x4e>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,121,49,200 // vpmovzxbd %xmm0,%xmm1
.byte 196,227,121,4,192,229 // vpermilps $0xe5,%xmm0,%xmm0
.byte 196,226,121,49,192 // vpmovzxbd %xmm0,%xmm0
.byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,168,50,0,0 // vbroadcastss 0x32a8(%rip),%ymm1 # 6674 <_sk_callback_avx+0x371>
+ .byte 196,226,125,24,13,2,50,0,0 // vbroadcastss 0x3202(%rip),%ymm1 # 6558 <_sk_callback_avx+0x371>
.byte 197,252,89,217 // vmulps %ymm1,%ymm0,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,87,192 // vxorps %ymm0,%ymm0,%ymm0
@@ -17087,9 +17053,9 @@ _sk_load_a8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 33eb <_sk_load_a8_avx+0x56>
+ .byte 117,234 // jne 3375 <_sk_load_a8_avx+0x56>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,161 // jmp 33a9 <_sk_load_a8_avx+0x14>
+ .byte 235,161 // jmp 3333 <_sk_load_a8_avx+0x14>
HIDDEN _sk_gather_a8_avx
.globl _sk_gather_a8_avx
@@ -17139,7 +17105,7 @@ _sk_gather_a8_avx:
.byte 196,226,121,49,201 // vpmovzxbd %xmm1,%xmm1
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,157,49,0,0 // vbroadcastss 0x319d(%rip),%ymm1 # 6678 <_sk_callback_avx+0x375>
+ .byte 196,226,125,24,13,247,48,0,0 // vbroadcastss 0x30f7(%rip),%ymm1 # 655c <_sk_callback_avx+0x375>
.byte 197,252,89,217 // vmulps %ymm1,%ymm0,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,87,192 // vxorps %ymm0,%ymm0,%ymm0
@@ -17157,14 +17123,14 @@ FUNCTION(_sk_store_a8_avx)
_sk_store_a8_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
- .byte 196,98,125,24,5,120,49,0,0 // vbroadcastss 0x3178(%rip),%ymm8 # 667c <_sk_callback_avx+0x379>
+ .byte 196,98,125,24,5,210,48,0,0 // vbroadcastss 0x30d2(%rip),%ymm8 # 6560 <_sk_callback_avx+0x379>
.byte 196,65,100,89,192 // vmulps %ymm8,%ymm3,%ymm8
.byte 196,65,125,91,192 // vcvtps2dq %ymm8,%ymm8
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 196,65,57,103,192 // vpackuswb %xmm8,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 352d <_sk_store_a8_avx+0x37>
+ .byte 117,10 // jne 34b7 <_sk_store_a8_avx+0x37>
.byte 196,65,123,17,4,58 // vmovsd %xmm8,(%r10,%rdi,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -17172,10 +17138,10 @@ _sk_store_a8_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3529 <_sk_store_a8_avx+0x33>
+ .byte 119,236 // ja 34b3 <_sk_store_a8_avx+0x33>
.byte 196,66,121,48,192 // vpmovzxbw %xmm8,%xmm8
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,67,0,0,0 // lea 0x43(%rip),%r9 # 3590 <_sk_store_a8_avx+0x9a>
+ .byte 76,141,13,69,0,0,0 // lea 0x45(%rip),%r9 # 351c <_sk_store_a8_avx+0x9c>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -17186,27 +17152,28 @@ _sk_store_a8_avx:
.byte 196,67,121,20,68,58,2,4 // vpextrb $0x4,%xmm8,0x2(%r10,%rdi,1)
.byte 196,67,121,20,68,58,1,2 // vpextrb $0x2,%xmm8,0x1(%r10,%rdi,1)
.byte 196,67,121,20,4,58,0 // vpextrb $0x0,%xmm8,(%r10,%rdi,1)
- .byte 235,154 // jmp 3529 <_sk_store_a8_avx+0x33>
- .byte 144 // nop
- .byte 246,255 // idiv %bh
+ .byte 235,154 // jmp 34b3 <_sk_store_a8_avx+0x33>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 244 // hlt
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 238 // out %al,(%dx)
.byte 255 // (bad)
+ .byte 236 // in (%dx),%al
.byte 255 // (bad)
- .byte 255,230 // jmpq *%rsi
.byte 255 // (bad)
+ .byte 255,228 // jmpq *%rsp
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 222,255 // fdivrp %st,%st(7)
.byte 255 // (bad)
- .byte 255,214 // callq *%rsi
+ .byte 220,255 // fdivr %st,%st(7)
.byte 255 // (bad)
+ .byte 255,212 // callq *%rsp
.byte 255 // (bad)
- .byte 255,206 // dec %esi
.byte 255 // (bad)
+ .byte 255,204 // dec %esp
.byte 255 // (bad)
- .byte 255,198 // inc %esi
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -17220,17 +17187,17 @@ _sk_load_g8_avx:
.byte 72,139,0 // mov (%rax),%rax
.byte 72,1,248 // add %rdi,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,67 // jne 35ff <_sk_load_g8_avx+0x53>
+ .byte 117,67 // jne 358b <_sk_load_g8_avx+0x53>
.byte 197,250,126,0 // vmovq (%rax),%xmm0
.byte 196,226,121,49,200 // vpmovzxbd %xmm0,%xmm1
.byte 196,227,121,4,192,229 // vpermilps $0xe5,%xmm0,%xmm0
.byte 196,226,121,49,192 // vpmovzxbd %xmm0,%xmm0
.byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,157,48,0,0 // vbroadcastss 0x309d(%rip),%ymm1 # 6680 <_sk_callback_avx+0x37d>
+ .byte 196,226,125,24,13,245,47,0,0 // vbroadcastss 0x2ff5(%rip),%ymm1 # 6564 <_sk_callback_avx+0x37d>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,146,48,0,0 // vbroadcastss 0x3092(%rip),%ymm3 # 6684 <_sk_callback_avx+0x381>
+ .byte 196,226,125,24,29,234,47,0,0 // vbroadcastss 0x2fea(%rip),%ymm3 # 6568 <_sk_callback_avx+0x381>
.byte 76,137,193 // mov %r8,%rcx
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
@@ -17244,9 +17211,9 @@ _sk_load_g8_avx:
.byte 77,9,217 // or %r11,%r9
.byte 72,131,193,8 // add $0x8,%rcx
.byte 73,255,202 // dec %r10
- .byte 117,234 // jne 3607 <_sk_load_g8_avx+0x5b>
+ .byte 117,234 // jne 3593 <_sk_load_g8_avx+0x5b>
.byte 196,193,249,110,193 // vmovq %r9,%xmm0
- .byte 235,156 // jmp 35c0 <_sk_load_g8_avx+0x14>
+ .byte 235,156 // jmp 354c <_sk_load_g8_avx+0x14>
HIDDEN _sk_gather_g8_avx
.globl _sk_gather_g8_avx
@@ -17296,10 +17263,10 @@ _sk_gather_g8_avx:
.byte 196,226,121,49,201 // vpmovzxbd %xmm1,%xmm1
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,145,47,0,0 // vbroadcastss 0x2f91(%rip),%ymm1 # 6688 <_sk_callback_avx+0x385>
+ .byte 196,226,125,24,13,233,46,0,0 // vbroadcastss 0x2ee9(%rip),%ymm1 # 656c <_sk_callback_avx+0x385>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,134,47,0,0 // vbroadcastss 0x2f86(%rip),%ymm3 # 668c <_sk_callback_avx+0x389>
+ .byte 196,226,125,24,29,222,46,0,0 // vbroadcastss 0x2ede(%rip),%ymm3 # 6570 <_sk_callback_avx+0x389>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
.byte 91 // pop %rbx
@@ -17315,9 +17282,9 @@ _sk_gather_i8_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,137,192 // mov %rax,%r8
.byte 77,133,192 // test %r8,%r8
- .byte 116,5 // je 3726 <_sk_gather_i8_avx+0xf>
+ .byte 116,5 // je 36b2 <_sk_gather_i8_avx+0xf>
.byte 76,137,192 // mov %r8,%rax
- .byte 235,2 // jmp 3728 <_sk_gather_i8_avx+0x11>
+ .byte 235,2 // jmp 36b4 <_sk_gather_i8_avx+0x11>
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 65,87 // push %r15
.byte 65,86 // push %r14
@@ -17379,10 +17346,10 @@ _sk_gather_i8_avx:
.byte 196,163,121,34,4,163,2 // vpinsrd $0x2,(%rbx,%r12,4),%xmm0,%xmm0
.byte 196,163,121,34,28,19,3 // vpinsrd $0x3,(%rbx,%r10,1),%xmm0,%xmm3
.byte 196,227,61,24,195,1 // vinsertf128 $0x1,%xmm3,%ymm8,%ymm0
- .byte 197,124,40,21,14,48,0,0 // vmovaps 0x300e(%rip),%ymm10 # 6860 <_sk_callback_avx+0x55d>
+ .byte 197,124,40,21,226,47,0,0 // vmovaps 0x2fe2(%rip),%ymm10 # 67c0 <_sk_callback_avx+0x5d9>
.byte 196,193,124,84,194 // vandps %ymm10,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,13,44,46,0,0 // vbroadcastss 0x2e2c(%rip),%ymm9 # 6690 <_sk_callback_avx+0x38d>
+ .byte 196,98,125,24,13,132,45,0,0 // vbroadcastss 0x2d84(%rip),%ymm9 # 6574 <_sk_callback_avx+0x38d>
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
.byte 196,193,113,114,208,8 // vpsrld $0x8,%xmm8,%xmm1
.byte 197,233,114,211,8 // vpsrld $0x8,%xmm3,%xmm2
@@ -17416,38 +17383,38 @@ _sk_load_565_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,128,0,0,0 // jne 395c <_sk_load_565_avx+0x8e>
+ .byte 15,133,128,0,0,0 // jne 38e8 <_sk_load_565_avx+0x8e>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,209,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
- .byte 196,226,125,24,5,150,45,0,0 // vbroadcastss 0x2d96(%rip),%ymm0 # 6694 <_sk_callback_avx+0x391>
+ .byte 196,226,125,24,5,238,44,0,0 // vbroadcastss 0x2cee(%rip),%ymm0 # 6578 <_sk_callback_avx+0x391>
.byte 197,236,84,192 // vandps %ymm0,%ymm2,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,137,45,0,0 // vbroadcastss 0x2d89(%rip),%ymm1 # 6698 <_sk_callback_avx+0x395>
+ .byte 196,226,125,24,13,225,44,0,0 // vbroadcastss 0x2ce1(%rip),%ymm1 # 657c <_sk_callback_avx+0x395>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,128,45,0,0 // vbroadcastss 0x2d80(%rip),%ymm1 # 669c <_sk_callback_avx+0x399>
+ .byte 196,226,125,24,13,216,44,0,0 // vbroadcastss 0x2cd8(%rip),%ymm1 # 6580 <_sk_callback_avx+0x399>
.byte 197,236,84,201 // vandps %ymm1,%ymm2,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,29,115,45,0,0 // vbroadcastss 0x2d73(%rip),%ymm3 # 66a0 <_sk_callback_avx+0x39d>
+ .byte 196,226,125,24,29,203,44,0,0 // vbroadcastss 0x2ccb(%rip),%ymm3 # 6584 <_sk_callback_avx+0x39d>
.byte 197,244,89,203 // vmulps %ymm3,%ymm1,%ymm1
- .byte 196,226,125,24,29,106,45,0,0 // vbroadcastss 0x2d6a(%rip),%ymm3 # 66a4 <_sk_callback_avx+0x3a1>
+ .byte 196,226,125,24,29,194,44,0,0 // vbroadcastss 0x2cc2(%rip),%ymm3 # 6588 <_sk_callback_avx+0x3a1>
.byte 197,236,84,211 // vandps %ymm3,%ymm2,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,226,125,24,29,93,45,0,0 // vbroadcastss 0x2d5d(%rip),%ymm3 # 66a8 <_sk_callback_avx+0x3a5>
+ .byte 196,226,125,24,29,181,44,0,0 // vbroadcastss 0x2cb5(%rip),%ymm3 # 658c <_sk_callback_avx+0x3a5>
.byte 197,236,89,211 // vmulps %ymm3,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,82,45,0,0 // vbroadcastss 0x2d52(%rip),%ymm3 # 66ac <_sk_callback_avx+0x3a9>
+ .byte 196,226,125,24,29,170,44,0,0 // vbroadcastss 0x2caa(%rip),%ymm3 # 6590 <_sk_callback_avx+0x3a9>
.byte 255,224 // jmpq *%rax
.byte 65,137,200 // mov %ecx,%r8d
.byte 65,128,224,7 // and $0x7,%r8b
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,110,255,255,255 // ja 38e2 <_sk_load_565_avx+0x14>
+ .byte 15,135,110,255,255,255 // ja 386e <_sk_load_565_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,73,0,0,0 // lea 0x49(%rip),%r9 # 39c8 <_sk_load_565_avx+0xfa>
+ .byte 76,141,13,73,0,0,0 // lea 0x49(%rip),%r9 # 3954 <_sk_load_565_avx+0xfa>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -17459,7 +17426,7 @@ _sk_load_565_avx:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,26,255,255,255 // jmpq 38e2 <_sk_load_565_avx+0x14>
+ .byte 233,26,255,255,255 // jmpq 386e <_sk_load_565_avx+0x14>
.byte 244 // hlt
.byte 255 // (bad)
.byte 255 // (bad)
@@ -17537,23 +17504,23 @@ _sk_gather_565_avx:
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,209,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
- .byte 196,226,125,24,5,242,43,0,0 // vbroadcastss 0x2bf2(%rip),%ymm0 # 66b0 <_sk_callback_avx+0x3ad>
+ .byte 196,226,125,24,5,74,43,0,0 // vbroadcastss 0x2b4a(%rip),%ymm0 # 6594 <_sk_callback_avx+0x3ad>
.byte 197,236,84,192 // vandps %ymm0,%ymm2,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,229,43,0,0 // vbroadcastss 0x2be5(%rip),%ymm1 # 66b4 <_sk_callback_avx+0x3b1>
+ .byte 196,226,125,24,13,61,43,0,0 // vbroadcastss 0x2b3d(%rip),%ymm1 # 6598 <_sk_callback_avx+0x3b1>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,220,43,0,0 // vbroadcastss 0x2bdc(%rip),%ymm1 # 66b8 <_sk_callback_avx+0x3b5>
+ .byte 196,226,125,24,13,52,43,0,0 // vbroadcastss 0x2b34(%rip),%ymm1 # 659c <_sk_callback_avx+0x3b5>
.byte 197,236,84,201 // vandps %ymm1,%ymm2,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,29,207,43,0,0 // vbroadcastss 0x2bcf(%rip),%ymm3 # 66bc <_sk_callback_avx+0x3b9>
+ .byte 196,226,125,24,29,39,43,0,0 // vbroadcastss 0x2b27(%rip),%ymm3 # 65a0 <_sk_callback_avx+0x3b9>
.byte 197,244,89,203 // vmulps %ymm3,%ymm1,%ymm1
- .byte 196,226,125,24,29,198,43,0,0 // vbroadcastss 0x2bc6(%rip),%ymm3 # 66c0 <_sk_callback_avx+0x3bd>
+ .byte 196,226,125,24,29,30,43,0,0 // vbroadcastss 0x2b1e(%rip),%ymm3 # 65a4 <_sk_callback_avx+0x3bd>
.byte 197,236,84,211 // vandps %ymm3,%ymm2,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,226,125,24,29,185,43,0,0 // vbroadcastss 0x2bb9(%rip),%ymm3 # 66c4 <_sk_callback_avx+0x3c1>
+ .byte 196,226,125,24,29,17,43,0,0 // vbroadcastss 0x2b11(%rip),%ymm3 # 65a8 <_sk_callback_avx+0x3c1>
.byte 197,236,89,211 // vmulps %ymm3,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,174,43,0,0 // vbroadcastss 0x2bae(%rip),%ymm3 # 66c8 <_sk_callback_avx+0x3c5>
+ .byte 196,226,125,24,29,6,43,0,0 // vbroadcastss 0x2b06(%rip),%ymm3 # 65ac <_sk_callback_avx+0x3c5>
.byte 91 // pop %rbx
.byte 65,92 // pop %r12
.byte 65,94 // pop %r14
@@ -17567,14 +17534,14 @@ FUNCTION(_sk_store_565_avx)
_sk_store_565_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
- .byte 196,98,125,24,5,154,43,0,0 // vbroadcastss 0x2b9a(%rip),%ymm8 # 66cc <_sk_callback_avx+0x3c9>
+ .byte 196,98,125,24,5,242,42,0,0 // vbroadcastss 0x2af2(%rip),%ymm8 # 65b0 <_sk_callback_avx+0x3c9>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,193,41,114,241,11 // vpslld $0xb,%xmm9,%xmm10
.byte 196,67,125,25,201,1 // vextractf128 $0x1,%ymm9,%xmm9
.byte 196,193,49,114,241,11 // vpslld $0xb,%xmm9,%xmm9
.byte 196,67,45,24,201,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- .byte 196,98,125,24,21,115,43,0,0 // vbroadcastss 0x2b73(%rip),%ymm10 # 66d0 <_sk_callback_avx+0x3cd>
+ .byte 196,98,125,24,21,203,42,0,0 // vbroadcastss 0x2acb(%rip),%ymm10 # 65b4 <_sk_callback_avx+0x3cd>
.byte 196,65,116,89,210 // vmulps %ymm10,%ymm1,%ymm10
.byte 196,65,125,91,210 // vcvtps2dq %ymm10,%ymm10
.byte 196,193,33,114,242,5 // vpslld $0x5,%xmm10,%xmm11
@@ -17588,7 +17555,7 @@ _sk_store_565_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 3bad <_sk_store_565_avx+0x89>
+ .byte 117,10 // jne 3b39 <_sk_store_565_avx+0x89>
.byte 196,65,122,127,4,122 // vmovdqu %xmm8,(%r10,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -17596,9 +17563,9 @@ _sk_store_565_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3ba9 <_sk_store_565_avx+0x85>
+ .byte 119,236 // ja 3b35 <_sk_store_565_avx+0x85>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,68,0,0,0 // lea 0x44(%rip),%r9 # 3c0c <_sk_store_565_avx+0xe8>
+ .byte 76,141,13,68,0,0,0 // lea 0x44(%rip),%r9 # 3b98 <_sk_store_565_avx+0xe8>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -17609,7 +17576,7 @@ _sk_store_565_avx:
.byte 196,67,121,21,68,122,4,2 // vpextrw $0x2,%xmm8,0x4(%r10,%rdi,2)
.byte 196,67,121,21,68,122,2,1 // vpextrw $0x1,%xmm8,0x2(%r10,%rdi,2)
.byte 196,67,121,21,4,122,0 // vpextrw $0x0,%xmm8,(%r10,%rdi,2)
- .byte 235,159 // jmp 3ba9 <_sk_store_565_avx+0x85>
+ .byte 235,159 // jmp 3b35 <_sk_store_565_avx+0x85>
.byte 102,144 // xchg %ax,%ax
.byte 245 // cmc
.byte 255 // (bad)
@@ -17642,31 +17609,31 @@ _sk_load_4444_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,152,0,0,0 // jne 3cce <_sk_load_4444_avx+0xa6>
+ .byte 15,133,152,0,0,0 // jne 3c5a <_sk_load_4444_avx+0xa6>
.byte 196,193,122,111,4,122 // vmovdqu (%r10,%rdi,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,217,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm3
- .byte 196,226,125,24,5,124,42,0,0 // vbroadcastss 0x2a7c(%rip),%ymm0 # 66d4 <_sk_callback_avx+0x3d1>
+ .byte 196,226,125,24,5,212,41,0,0 // vbroadcastss 0x29d4(%rip),%ymm0 # 65b8 <_sk_callback_avx+0x3d1>
.byte 197,228,84,192 // vandps %ymm0,%ymm3,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,111,42,0,0 // vbroadcastss 0x2a6f(%rip),%ymm1 # 66d8 <_sk_callback_avx+0x3d5>
+ .byte 196,226,125,24,13,199,41,0,0 // vbroadcastss 0x29c7(%rip),%ymm1 # 65bc <_sk_callback_avx+0x3d5>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,102,42,0,0 // vbroadcastss 0x2a66(%rip),%ymm1 # 66dc <_sk_callback_avx+0x3d9>
+ .byte 196,226,125,24,13,190,41,0,0 // vbroadcastss 0x29be(%rip),%ymm1 # 65c0 <_sk_callback_avx+0x3d9>
.byte 197,228,84,201 // vandps %ymm1,%ymm3,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,21,89,42,0,0 // vbroadcastss 0x2a59(%rip),%ymm2 # 66e0 <_sk_callback_avx+0x3dd>
+ .byte 196,226,125,24,21,177,41,0,0 // vbroadcastss 0x29b1(%rip),%ymm2 # 65c4 <_sk_callback_avx+0x3dd>
.byte 197,244,89,202 // vmulps %ymm2,%ymm1,%ymm1
- .byte 196,226,125,24,21,80,42,0,0 // vbroadcastss 0x2a50(%rip),%ymm2 # 66e4 <_sk_callback_avx+0x3e1>
+ .byte 196,226,125,24,21,168,41,0,0 // vbroadcastss 0x29a8(%rip),%ymm2 # 65c8 <_sk_callback_avx+0x3e1>
.byte 197,228,84,210 // vandps %ymm2,%ymm3,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,98,125,24,5,67,42,0,0 // vbroadcastss 0x2a43(%rip),%ymm8 # 66e8 <_sk_callback_avx+0x3e5>
+ .byte 196,98,125,24,5,155,41,0,0 // vbroadcastss 0x299b(%rip),%ymm8 # 65cc <_sk_callback_avx+0x3e5>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,57,42,0,0 // vbroadcastss 0x2a39(%rip),%ymm8 # 66ec <_sk_callback_avx+0x3e9>
+ .byte 196,98,125,24,5,145,41,0,0 // vbroadcastss 0x2991(%rip),%ymm8 # 65d0 <_sk_callback_avx+0x3e9>
.byte 196,193,100,84,216 // vandps %ymm8,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,43,42,0,0 // vbroadcastss 0x2a2b(%rip),%ymm8 # 66f0 <_sk_callback_avx+0x3ed>
+ .byte 196,98,125,24,5,131,41,0,0 // vbroadcastss 0x2983(%rip),%ymm8 # 65d4 <_sk_callback_avx+0x3ed>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -17675,9 +17642,9 @@ _sk_load_4444_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,86,255,255,255 // ja 3c3c <_sk_load_4444_avx+0x14>
+ .byte 15,135,86,255,255,255 // ja 3bc8 <_sk_load_4444_avx+0x14>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 3d3c <_sk_load_4444_avx+0x114>
+ .byte 76,141,13,75,0,0,0 // lea 0x4b(%rip),%r9 # 3cc8 <_sk_load_4444_avx+0x114>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -17689,7 +17656,7 @@ _sk_load_4444_avx:
.byte 196,193,121,196,68,122,4,2 // vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,68,122,2,1 // vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
.byte 196,193,121,196,4,122,0 // vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- .byte 233,2,255,255,255 // jmpq 3c3c <_sk_load_4444_avx+0x14>
+ .byte 233,2,255,255,255 // jmpq 3bc8 <_sk_load_4444_avx+0x14>
.byte 102,144 // xchg %ax,%ax
.byte 242,255 // repnz (bad)
.byte 255 // (bad)
@@ -17768,25 +17735,25 @@ _sk_gather_4444_avx:
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,217,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm3
- .byte 196,226,125,24,5,194,40,0,0 // vbroadcastss 0x28c2(%rip),%ymm0 # 66f4 <_sk_callback_avx+0x3f1>
+ .byte 196,226,125,24,5,26,40,0,0 // vbroadcastss 0x281a(%rip),%ymm0 # 65d8 <_sk_callback_avx+0x3f1>
.byte 197,228,84,192 // vandps %ymm0,%ymm3,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,181,40,0,0 // vbroadcastss 0x28b5(%rip),%ymm1 # 66f8 <_sk_callback_avx+0x3f5>
+ .byte 196,226,125,24,13,13,40,0,0 // vbroadcastss 0x280d(%rip),%ymm1 # 65dc <_sk_callback_avx+0x3f5>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,172,40,0,0 // vbroadcastss 0x28ac(%rip),%ymm1 # 66fc <_sk_callback_avx+0x3f9>
+ .byte 196,226,125,24,13,4,40,0,0 // vbroadcastss 0x2804(%rip),%ymm1 # 65e0 <_sk_callback_avx+0x3f9>
.byte 197,228,84,201 // vandps %ymm1,%ymm3,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,21,159,40,0,0 // vbroadcastss 0x289f(%rip),%ymm2 # 6700 <_sk_callback_avx+0x3fd>
+ .byte 196,226,125,24,21,247,39,0,0 // vbroadcastss 0x27f7(%rip),%ymm2 # 65e4 <_sk_callback_avx+0x3fd>
.byte 197,244,89,202 // vmulps %ymm2,%ymm1,%ymm1
- .byte 196,226,125,24,21,150,40,0,0 // vbroadcastss 0x2896(%rip),%ymm2 # 6704 <_sk_callback_avx+0x401>
+ .byte 196,226,125,24,21,238,39,0,0 // vbroadcastss 0x27ee(%rip),%ymm2 # 65e8 <_sk_callback_avx+0x401>
.byte 197,228,84,210 // vandps %ymm2,%ymm3,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,98,125,24,5,137,40,0,0 // vbroadcastss 0x2889(%rip),%ymm8 # 6708 <_sk_callback_avx+0x405>
+ .byte 196,98,125,24,5,225,39,0,0 // vbroadcastss 0x27e1(%rip),%ymm8 # 65ec <_sk_callback_avx+0x405>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,127,40,0,0 // vbroadcastss 0x287f(%rip),%ymm8 # 670c <_sk_callback_avx+0x409>
+ .byte 196,98,125,24,5,215,39,0,0 // vbroadcastss 0x27d7(%rip),%ymm8 # 65f0 <_sk_callback_avx+0x409>
.byte 196,193,100,84,216 // vandps %ymm8,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,113,40,0,0 // vbroadcastss 0x2871(%rip),%ymm8 # 6710 <_sk_callback_avx+0x40d>
+ .byte 196,98,125,24,5,201,39,0,0 // vbroadcastss 0x27c9(%rip),%ymm8 # 65f4 <_sk_callback_avx+0x40d>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -17802,7 +17769,7 @@ FUNCTION(_sk_store_4444_avx)
_sk_store_4444_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,16 // mov (%rax),%r10
- .byte 196,98,125,24,5,86,40,0,0 // vbroadcastss 0x2856(%rip),%ymm8 # 6714 <_sk_callback_avx+0x411>
+ .byte 196,98,125,24,5,174,39,0,0 // vbroadcastss 0x27ae(%rip),%ymm8 # 65f8 <_sk_callback_avx+0x411>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,193,41,114,241,12 // vpslld $0xc,%xmm9,%xmm10
@@ -17829,7 +17796,7 @@ _sk_store_4444_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 3f57 <_sk_store_4444_avx+0xa7>
+ .byte 117,10 // jne 3ee3 <_sk_store_4444_avx+0xa7>
.byte 196,65,122,127,4,122 // vmovdqu %xmm8,(%r10,%rdi,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -17837,9 +17804,9 @@ _sk_store_4444_avx:
.byte 65,128,224,7 // and $0x7,%r8b
.byte 65,254,200 // dec %r8b
.byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 3f53 <_sk_store_4444_avx+0xa3>
+ .byte 119,236 // ja 3edf <_sk_store_4444_avx+0xa3>
.byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,66,0,0,0 // lea 0x42(%rip),%r9 # 3fb4 <_sk_store_4444_avx+0x104>
+ .byte 76,141,13,66,0,0,0 // lea 0x42(%rip),%r9 # 3f40 <_sk_store_4444_avx+0x104>
.byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
.byte 76,1,200 // add %r9,%rax
.byte 255,224 // jmpq *%rax
@@ -17850,7 +17817,7 @@ _sk_store_4444_avx:
.byte 196,67,121,21,68,122,4,2 // vpextrw $0x2,%xmm8,0x4(%r10,%rdi,2)
.byte 196,67,121,21,68,122,2,1 // vpextrw $0x1,%xmm8,0x2(%r10,%rdi,2)
.byte 196,67,121,21,4,122,0 // vpextrw $0x0,%xmm8,(%r10,%rdi,2)
- .byte 235,159 // jmp 3f53 <_sk_store_4444_avx+0xa3>
+ .byte 235,159 // jmp 3edf <_sk_store_4444_avx+0xa3>
.byte 247,255 // idiv %edi
.byte 255 // (bad)
.byte 255 // (bad)
@@ -17878,87 +17845,53 @@ HIDDEN _sk_load_8888_avx
.globl _sk_load_8888_avx
FUNCTION(_sk_load_8888_avx)
_sk_load_8888_avx:
+ .byte 73,137,200 // mov %rcx,%r8
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,139,16 // mov (%rax),%r10
- .byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,135,0,0,0 // jne 4065 <_sk_load_8888_avx+0x95>
- .byte 196,65,124,16,12,186 // vmovups (%r10,%rdi,4),%ymm9
- .byte 197,124,40,21,148,40,0,0 // vmovaps 0x2894(%rip),%ymm10 # 6880 <_sk_callback_avx+0x57d>
- .byte 196,193,52,84,194 // vandps %ymm10,%ymm9,%ymm0
+ .byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 77,133,192 // test %r8,%r8
+ .byte 15,133,137,0,0,0 // jne 3ffe <_sk_load_8888_avx+0xa2>
+ .byte 196,193,124,16,25 // vmovups (%r9),%ymm3
+ .byte 197,124,40,21,94,40,0,0 // vmovaps 0x285e(%rip),%ymm10 # 67e0 <_sk_callback_avx+0x5f9>
+ .byte 196,193,100,84,194 // vandps %ymm10,%ymm3,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,5,26,39,0,0 // vbroadcastss 0x271a(%rip),%ymm8 # 6718 <_sk_callback_avx+0x415>
+ .byte 196,98,125,24,5,104,38,0,0 // vbroadcastss 0x2668(%rip),%ymm8 # 65fc <_sk_callback_avx+0x415>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 196,193,113,114,209,8 // vpsrld $0x8,%xmm9,%xmm1
- .byte 196,99,125,25,203,1 // vextractf128 $0x1,%ymm9,%xmm3
- .byte 197,233,114,211,8 // vpsrld $0x8,%xmm3,%xmm2
+ .byte 197,241,114,211,8 // vpsrld $0x8,%xmm3,%xmm1
+ .byte 196,195,125,25,217,1 // vextractf128 $0x1,%ymm3,%xmm9
+ .byte 196,193,105,114,209,8 // vpsrld $0x8,%xmm9,%xmm2
.byte 196,227,117,24,202,1 // vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
.byte 196,193,116,84,202 // vandps %ymm10,%ymm1,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
- .byte 196,193,33,114,209,16 // vpsrld $0x10,%xmm9,%xmm11
- .byte 197,233,114,211,16 // vpsrld $0x10,%xmm3,%xmm2
+ .byte 197,161,114,211,16 // vpsrld $0x10,%xmm3,%xmm11
+ .byte 196,193,105,114,209,16 // vpsrld $0x10,%xmm9,%xmm2
.byte 196,227,37,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm11,%ymm2
.byte 196,193,108,84,210 // vandps %ymm10,%ymm2,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
- .byte 196,193,49,114,209,24 // vpsrld $0x18,%xmm9,%xmm9
- .byte 197,225,114,211,24 // vpsrld $0x18,%xmm3,%xmm3
- .byte 196,227,53,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
+ .byte 197,169,114,211,24 // vpsrld $0x18,%xmm3,%xmm10
+ .byte 196,193,97,114,209,24 // vpsrld $0x18,%xmm9,%xmm3
+ .byte 196,227,45,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm10,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 76,137,193 // mov %r8,%rcx
.byte 255,224 // jmpq *%rax
- .byte 65,137,200 // mov %ecx,%r8d
- .byte 65,128,224,7 // and $0x7,%r8b
- .byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
- .byte 65,254,200 // dec %r8b
- .byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 15,135,102,255,255,255 // ja 3fe4 <_sk_load_8888_avx+0x14>
- .byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,139,0,0,0 // lea 0x8b(%rip),%r9 # 4114 <_sk_load_8888_avx+0x144>
- .byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
- .byte 76,1,200 // add %r9,%rax
- .byte 255,224 // jmpq *%rax
- .byte 196,193,121,110,68,186,24 // vmovd 0x18(%r10,%rdi,4),%xmm0
- .byte 197,249,112,192,68 // vpshufd $0x44,%xmm0,%xmm0
- .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
- .byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
- .byte 196,99,117,12,200,64 // vblendps $0x40,%ymm0,%ymm1,%ymm9
- .byte 196,99,125,25,200,1 // vextractf128 $0x1,%ymm9,%xmm0
- .byte 196,195,121,34,68,186,20,1 // vpinsrd $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
- .byte 196,99,53,24,200,1 // vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
- .byte 196,99,125,25,200,1 // vextractf128 $0x1,%ymm9,%xmm0
- .byte 196,195,121,34,68,186,16,0 // vpinsrd $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
- .byte 196,99,53,24,200,1 // vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
- .byte 196,195,49,34,68,186,12,3 // vpinsrd $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
- .byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
- .byte 196,195,49,34,68,186,8,2 // vpinsrd $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
- .byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
- .byte 196,195,49,34,68,186,4,1 // vpinsrd $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
- .byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
- .byte 196,195,49,34,4,186,0 // vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
- .byte 196,99,53,12,200,15 // vblendps $0xf,%ymm0,%ymm9,%ymm9
- .byte 233,210,254,255,255 // jmpq 3fe4 <_sk_load_8888_avx+0x14>
- .byte 102,144 // xchg %ax,%ax
- .byte 236 // in (%dx),%al
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 222,255 // fdivrp %st,%st(7)
- .byte 255 // (bad)
- .byte 255,208 // callq *%rax
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255,194 // inc %edx
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255,174,255,255,255,154 // ljmp *-0x65000001(%rsi)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 126,255 // jle 412d <_sk_load_8888_avx+0x15d>
- .byte 255 // (bad)
- .byte 255 // .byte 0xff
+ .byte 185,8,0,0,0 // mov $0x8,%ecx
+ .byte 68,41,193 // sub %r8d,%ecx
+ .byte 192,225,3 // shl $0x3,%cl
+ .byte 72,199,192,255,255,255,255 // mov $0xffffffffffffffff,%rax
+ .byte 72,211,232 // shr %cl,%rax
+ .byte 196,225,249,110,192 // vmovq %rax,%xmm0
+ .byte 196,226,121,48,192 // vpmovzxbw %xmm0,%xmm0
+ .byte 196,226,121,0,13,58,39,0,0 // vpshufb 0x273a(%rip),%xmm0,%xmm1 # 6760 <_sk_callback_avx+0x579>
+ .byte 196,226,121,33,201 // vpmovsxbd %xmm1,%xmm1
+ .byte 196,226,121,0,5,60,39,0,0 // vpshufb 0x273c(%rip),%xmm0,%xmm0 # 6770 <_sk_callback_avx+0x589>
+ .byte 196,226,121,33,192 // vpmovsxbd %xmm0,%xmm0
+ .byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
+ .byte 196,194,125,44,25 // vmaskmovps (%r9),%ymm0,%ymm3
+ .byte 233,49,255,255,255 // jmpq 3f7a <_sk_load_8888_avx+0x1e>
HIDDEN _sk_gather_8888_avx
.globl _sk_gather_8888_avx
@@ -18001,10 +17934,10 @@ _sk_gather_8888_avx:
.byte 196,131,121,34,4,152,2 // vpinsrd $0x2,(%r8,%r11,4),%xmm0,%xmm0
.byte 196,131,121,34,28,144,3 // vpinsrd $0x3,(%r8,%r10,4),%xmm0,%xmm3
.byte 196,227,61,24,195,1 // vinsertf128 $0x1,%xmm3,%ymm8,%ymm0
- .byte 197,124,40,21,190,38,0,0 // vmovaps 0x26be(%rip),%ymm10 # 68a0 <_sk_callback_avx+0x59d>
+ .byte 197,124,40,21,5,39,0,0 // vmovaps 0x2705(%rip),%ymm10 # 6800 <_sk_callback_avx+0x619>
.byte 196,193,124,84,194 // vandps %ymm10,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,13,40,37,0,0 // vbroadcastss 0x2528(%rip),%ymm9 # 671c <_sk_callback_avx+0x419>
+ .byte 196,98,125,24,13,243,36,0,0 // vbroadcastss 0x24f3(%rip),%ymm9 # 6600 <_sk_callback_avx+0x419>
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
.byte 196,193,113,114,208,8 // vpsrld $0x8,%xmm8,%xmm1
.byte 197,233,114,211,8 // vpsrld $0x8,%xmm3,%xmm2
@@ -18034,9 +17967,11 @@ HIDDEN _sk_store_8888_avx
.globl _sk_store_8888_avx
FUNCTION(_sk_store_8888_avx)
_sk_store_8888_avx:
+ .byte 73,137,200 // mov %rcx,%r8
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,139,16 // mov (%rax),%r10
- .byte 196,98,125,24,5,182,36,0,0 // vbroadcastss 0x24b6(%rip),%ymm8 # 6720 <_sk_callback_avx+0x41d>
+ .byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 196,98,125,24,5,118,36,0,0 // vbroadcastss 0x2476(%rip),%ymm8 # 6604 <_sk_callback_avx+0x41d>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,65,116,89,208 // vmulps %ymm8,%ymm1,%ymm10
@@ -18060,56 +17995,26 @@ _sk_store_8888_avx:
.byte 196,67,37,24,192,1 // vinsertf128 $0x1,%xmm8,%ymm11,%ymm8
.byte 196,65,45,86,192 // vorpd %ymm8,%ymm10,%ymm8
.byte 196,65,53,86,192 // vorpd %ymm8,%ymm9,%ymm8
- .byte 72,133,201 // test %rcx,%rcx
- .byte 117,10 // jne 42f8 <_sk_store_8888_avx+0x9c>
- .byte 196,65,124,17,4,186 // vmovups %ymm8,(%r10,%rdi,4)
+ .byte 77,133,192 // test %r8,%r8
+ .byte 117,12 // jne 421e <_sk_store_8888_avx+0xa9>
+ .byte 196,65,124,17,1 // vmovups %ymm8,(%r9)
.byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 76,137,193 // mov %r8,%rcx
.byte 255,224 // jmpq *%rax
- .byte 65,137,200 // mov %ecx,%r8d
- .byte 65,128,224,7 // and $0x7,%r8b
- .byte 65,254,200 // dec %r8b
- .byte 65,128,248,6 // cmp $0x6,%r8b
- .byte 119,236 // ja 42f4 <_sk_store_8888_avx+0x98>
- .byte 69,15,182,192 // movzbl %r8b,%r8d
- .byte 76,141,13,85,0,0,0 // lea 0x55(%rip),%r9 # 4368 <_sk_store_8888_avx+0x10c>
- .byte 75,99,4,129 // movslq (%r9,%r8,4),%rax
- .byte 76,1,200 // add %r9,%rax
- .byte 255,224 // jmpq *%rax
- .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
- .byte 196,67,121,22,76,186,24,2 // vpextrd $0x2,%xmm9,0x18(%r10,%rdi,4)
- .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
- .byte 196,67,121,22,76,186,20,1 // vpextrd $0x1,%xmm9,0x14(%r10,%rdi,4)
- .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
- .byte 196,65,122,17,76,186,16 // vmovss %xmm9,0x10(%r10,%rdi,4)
- .byte 196,67,121,22,68,186,12,3 // vpextrd $0x3,%xmm8,0xc(%r10,%rdi,4)
- .byte 196,67,121,22,68,186,8,2 // vpextrd $0x2,%xmm8,0x8(%r10,%rdi,4)
- .byte 196,67,121,22,68,186,4,1 // vpextrd $0x1,%xmm8,0x4(%r10,%rdi,4)
- .byte 196,65,121,126,4,186 // vmovd %xmm8,(%r10,%rdi,4)
- .byte 235,143 // jmp 42f4 <_sk_store_8888_avx+0x98>
- .byte 15,31,0 // nopl (%rax)
- .byte 245 // cmc
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 237 // in (%dx),%eax
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255,229 // jmpq *%rbp
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 221,255 // (bad)
- .byte 255 // (bad)
- .byte 255,208 // callq *%rax
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255,194 // inc %edx
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255 // .byte 0xff
- .byte 180,255 // mov $0xff,%ah
- .byte 255 // (bad)
- .byte 255 // .byte 0xff
+ .byte 185,8,0,0,0 // mov $0x8,%ecx
+ .byte 68,41,193 // sub %r8d,%ecx
+ .byte 192,225,3 // shl $0x3,%cl
+ .byte 72,199,192,255,255,255,255 // mov $0xffffffffffffffff,%rax
+ .byte 72,211,232 // shr %cl,%rax
+ .byte 196,97,249,110,200 // vmovq %rax,%xmm9
+ .byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
+ .byte 196,98,49,0,21,58,37,0,0 // vpshufb 0x253a(%rip),%xmm9,%xmm10 # 6780 <_sk_callback_avx+0x599>
+ .byte 196,66,121,33,210 // vpmovsxbd %xmm10,%xmm10
+ .byte 196,98,49,0,13,60,37,0,0 // vpshufb 0x253c(%rip),%xmm9,%xmm9 # 6790 <_sk_callback_avx+0x5a9>
+ .byte 196,66,121,33,201 // vpmovsxbd %xmm9,%xmm9
+ .byte 196,67,45,24,201,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
+ .byte 196,66,53,46,1 // vmaskmovps %ymm8,%ymm9,(%r9)
+ .byte 235,177 // jmp 4217 <_sk_store_8888_avx+0xa2>
HIDDEN _sk_load_f16_avx
.globl _sk_load_f16_avx
@@ -18123,7 +18028,7 @@ _sk_load_f16_avx:
.byte 197,252,17,116,36,192 // vmovups %ymm6,-0x40(%rsp)
.byte 197,252,17,108,36,160 // vmovups %ymm5,-0x60(%rsp)
.byte 197,254,127,100,36,128 // vmovdqu %ymm4,-0x80(%rsp)
- .byte 15,133,141,2,0,0 // jne 463b <_sk_load_f16_avx+0x2b7>
+ .byte 15,133,141,2,0,0 // jne 451d <_sk_load_f16_avx+0x2b7>
.byte 197,121,16,4,248 // vmovupd (%rax,%rdi,8),%xmm8
.byte 197,249,16,84,248,16 // vmovupd 0x10(%rax,%rdi,8),%xmm2
.byte 197,249,16,76,248,32 // vmovupd 0x20(%rax,%rdi,8),%xmm1
@@ -18141,13 +18046,13 @@ _sk_load_f16_avx:
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
- .byte 196,98,125,24,37,29,35,0,0 // vbroadcastss 0x231d(%rip),%ymm12 # 6724 <_sk_callback_avx+0x421>
+ .byte 196,98,125,24,37,31,35,0,0 // vbroadcastss 0x231f(%rip),%ymm12 # 6608 <_sk_callback_avx+0x421>
.byte 196,193,124,84,204 // vandps %ymm12,%ymm0,%ymm1
.byte 197,252,87,193 // vxorps %ymm1,%ymm0,%ymm0
.byte 196,195,125,25,198,1 // vextractf128 $0x1,%ymm0,%xmm14
- .byte 196,98,121,24,29,9,35,0,0 // vbroadcastss 0x2309(%rip),%xmm11 # 6728 <_sk_callback_avx+0x425>
+ .byte 196,98,121,24,29,11,35,0,0 // vbroadcastss 0x230b(%rip),%xmm11 # 660c <_sk_callback_avx+0x425>
.byte 196,193,8,87,219 // vxorps %xmm11,%xmm14,%xmm3
- .byte 196,98,121,24,45,255,34,0,0 // vbroadcastss 0x22ff(%rip),%xmm13 # 672c <_sk_callback_avx+0x429>
+ .byte 196,98,121,24,45,1,35,0,0 // vbroadcastss 0x2301(%rip),%xmm13 # 6610 <_sk_callback_avx+0x429>
.byte 197,145,102,219 // vpcmpgtd %xmm3,%xmm13,%xmm3
.byte 196,65,120,87,211 // vxorps %xmm11,%xmm0,%xmm10
.byte 196,65,17,102,210 // vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -18161,7 +18066,7 @@ _sk_load_f16_avx:
.byte 196,227,125,24,195,1 // vinsertf128 $0x1,%xmm3,%ymm0,%ymm0
.byte 197,252,86,193 // vorps %ymm1,%ymm0,%ymm0
.byte 196,227,125,25,193,1 // vextractf128 $0x1,%ymm0,%xmm1
- .byte 196,226,121,24,29,181,34,0,0 // vbroadcastss 0x22b5(%rip),%xmm3 # 6730 <_sk_callback_avx+0x42d>
+ .byte 196,226,121,24,29,183,34,0,0 // vbroadcastss 0x22b7(%rip),%xmm3 # 6614 <_sk_callback_avx+0x42d>
.byte 197,241,254,203 // vpaddd %xmm3,%xmm1,%xmm1
.byte 197,249,254,195 // vpaddd %xmm3,%xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
@@ -18254,29 +18159,29 @@ _sk_load_f16_avx:
.byte 197,123,16,4,248 // vmovsd (%rax,%rdi,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,79 // je 469a <_sk_load_f16_avx+0x316>
+ .byte 116,79 // je 457c <_sk_load_f16_avx+0x316>
.byte 197,57,22,68,248,8 // vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,67 // jb 469a <_sk_load_f16_avx+0x316>
+ .byte 114,67 // jb 457c <_sk_load_f16_avx+0x316>
.byte 197,251,16,84,248,16 // vmovsd 0x10(%rax,%rdi,8),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,68 // je 46a7 <_sk_load_f16_avx+0x323>
+ .byte 116,68 // je 4589 <_sk_load_f16_avx+0x323>
.byte 197,233,22,84,248,24 // vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,56 // jb 46a7 <_sk_load_f16_avx+0x323>
+ .byte 114,56 // jb 4589 <_sk_load_f16_avx+0x323>
.byte 197,251,16,76,248,32 // vmovsd 0x20(%rax,%rdi,8),%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,70,253,255,255 // je 43c5 <_sk_load_f16_avx+0x41>
+ .byte 15,132,70,253,255,255 // je 42a7 <_sk_load_f16_avx+0x41>
.byte 197,241,22,76,248,40 // vmovhpd 0x28(%rax,%rdi,8),%xmm1,%xmm1
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,54,253,255,255 // jb 43c5 <_sk_load_f16_avx+0x41>
+ .byte 15,130,54,253,255,255 // jb 42a7 <_sk_load_f16_avx+0x41>
.byte 197,122,126,76,248,48 // vmovq 0x30(%rax,%rdi,8),%xmm9
- .byte 233,43,253,255,255 // jmpq 43c5 <_sk_load_f16_avx+0x41>
+ .byte 233,43,253,255,255 // jmpq 42a7 <_sk_load_f16_avx+0x41>
.byte 197,241,87,201 // vxorpd %xmm1,%xmm1,%xmm1
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,30,253,255,255 // jmpq 43c5 <_sk_load_f16_avx+0x41>
+ .byte 233,30,253,255,255 // jmpq 42a7 <_sk_load_f16_avx+0x41>
.byte 197,241,87,201 // vxorpd %xmm1,%xmm1,%xmm1
- .byte 233,21,253,255,255 // jmpq 43c5 <_sk_load_f16_avx+0x41>
+ .byte 233,21,253,255,255 // jmpq 42a7 <_sk_load_f16_avx+0x41>
HIDDEN _sk_gather_f16_avx
.globl _sk_gather_f16_avx
@@ -18340,13 +18245,13 @@ _sk_gather_f16_avx:
.byte 197,249,105,210 // vpunpckhwd %xmm2,%xmm0,%xmm2
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,194,1 // vinsertf128 $0x1,%xmm2,%ymm0,%ymm0
- .byte 196,98,125,24,37,121,31,0,0 // vbroadcastss 0x1f79(%rip),%ymm12 # 6734 <_sk_callback_avx+0x431>
+ .byte 196,98,125,24,37,123,31,0,0 // vbroadcastss 0x1f7b(%rip),%ymm12 # 6618 <_sk_callback_avx+0x431>
.byte 196,193,124,84,212 // vandps %ymm12,%ymm0,%ymm2
.byte 197,252,87,194 // vxorps %ymm2,%ymm0,%ymm0
.byte 196,195,125,25,198,1 // vextractf128 $0x1,%ymm0,%xmm14
- .byte 196,98,121,24,29,101,31,0,0 // vbroadcastss 0x1f65(%rip),%xmm11 # 6738 <_sk_callback_avx+0x435>
+ .byte 196,98,121,24,29,103,31,0,0 // vbroadcastss 0x1f67(%rip),%xmm11 # 661c <_sk_callback_avx+0x435>
.byte 196,193,8,87,219 // vxorps %xmm11,%xmm14,%xmm3
- .byte 196,98,121,24,45,91,31,0,0 // vbroadcastss 0x1f5b(%rip),%xmm13 # 673c <_sk_callback_avx+0x439>
+ .byte 196,98,121,24,45,93,31,0,0 // vbroadcastss 0x1f5d(%rip),%xmm13 # 6620 <_sk_callback_avx+0x439>
.byte 197,145,102,219 // vpcmpgtd %xmm3,%xmm13,%xmm3
.byte 196,65,120,87,211 // vxorps %xmm11,%xmm0,%xmm10
.byte 196,65,17,102,210 // vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -18360,7 +18265,7 @@ _sk_gather_f16_avx:
.byte 196,227,125,24,195,1 // vinsertf128 $0x1,%xmm3,%ymm0,%ymm0
.byte 197,252,86,194 // vorps %ymm2,%ymm0,%ymm0
.byte 196,227,125,25,194,1 // vextractf128 $0x1,%ymm0,%xmm2
- .byte 196,226,121,24,29,17,31,0,0 // vbroadcastss 0x1f11(%rip),%xmm3 # 6740 <_sk_callback_avx+0x43d>
+ .byte 196,226,121,24,29,19,31,0,0 // vbroadcastss 0x1f13(%rip),%xmm3 # 6624 <_sk_callback_avx+0x43d>
.byte 197,233,254,211 // vpaddd %xmm3,%xmm2,%xmm2
.byte 197,249,254,195 // vpaddd %xmm3,%xmm0,%xmm0
.byte 196,227,125,24,194,1 // vinsertf128 $0x1,%xmm2,%ymm0,%ymm0
@@ -18464,12 +18369,12 @@ _sk_store_f16_avx:
.byte 197,252,17,52,36 // vmovups %ymm6,(%rsp)
.byte 197,252,17,108,36,224 // vmovups %ymm5,-0x20(%rsp)
.byte 197,252,17,100,36,192 // vmovups %ymm4,-0x40(%rsp)
- .byte 196,98,125,24,13,42,29,0,0 // vbroadcastss 0x1d2a(%rip),%ymm9 # 6744 <_sk_callback_avx+0x441>
+ .byte 196,98,125,24,13,44,29,0,0 // vbroadcastss 0x1d2c(%rip),%ymm9 # 6628 <_sk_callback_avx+0x441>
.byte 196,65,124,84,209 // vandps %ymm9,%ymm0,%ymm10
.byte 197,252,17,68,36,128 // vmovups %ymm0,-0x80(%rsp)
.byte 196,65,124,87,218 // vxorps %ymm10,%ymm0,%ymm11
.byte 196,67,125,25,220,1 // vextractf128 $0x1,%ymm11,%xmm12
- .byte 196,98,121,24,5,15,29,0,0 // vbroadcastss 0x1d0f(%rip),%xmm8 # 6748 <_sk_callback_avx+0x445>
+ .byte 196,98,121,24,5,17,29,0,0 // vbroadcastss 0x1d11(%rip),%xmm8 # 662c <_sk_callback_avx+0x445>
.byte 196,65,57,102,236 // vpcmpgtd %xmm12,%xmm8,%xmm13
.byte 196,65,57,102,243 // vpcmpgtd %xmm11,%xmm8,%xmm14
.byte 196,67,13,24,237,1 // vinsertf128 $0x1,%xmm13,%ymm14,%ymm13
@@ -18479,7 +18384,7 @@ _sk_store_f16_avx:
.byte 196,67,13,24,242,1 // vinsertf128 $0x1,%xmm10,%ymm14,%ymm14
.byte 196,193,33,114,211,13 // vpsrld $0xd,%xmm11,%xmm11
.byte 196,193,25,114,212,13 // vpsrld $0xd,%xmm12,%xmm12
- .byte 196,98,125,24,21,214,28,0,0 // vbroadcastss 0x1cd6(%rip),%ymm10 # 674c <_sk_callback_avx+0x449>
+ .byte 196,98,125,24,21,216,28,0,0 // vbroadcastss 0x1cd8(%rip),%ymm10 # 6630 <_sk_callback_avx+0x449>
.byte 196,65,12,86,242 // vorps %ymm10,%ymm14,%ymm14
.byte 196,67,125,25,247,1 // vextractf128 $0x1,%ymm14,%xmm15
.byte 196,65,1,254,228 // vpaddd %xmm12,%xmm15,%xmm12
@@ -18561,7 +18466,7 @@ _sk_store_f16_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,139,0 // mov (%rax),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,66 // jne 4c54 <_sk_store_f16_avx+0x25e>
+ .byte 117,66 // jne 4b36 <_sk_store_f16_avx+0x25e>
.byte 197,120,17,28,248 // vmovups %xmm11,(%rax,%rdi,8)
.byte 197,120,17,84,248,16 // vmovups %xmm10,0x10(%rax,%rdi,8)
.byte 197,120,17,76,248,32 // vmovups %xmm9,0x20(%rax,%rdi,8)
@@ -18577,22 +18482,22 @@ _sk_store_f16_avx:
.byte 255,224 // jmpq *%rax
.byte 197,121,214,28,248 // vmovq %xmm11,(%rax,%rdi,8)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,202 // je 4c29 <_sk_store_f16_avx+0x233>
+ .byte 116,202 // je 4b0b <_sk_store_f16_avx+0x233>
.byte 197,121,23,92,248,8 // vmovhpd %xmm11,0x8(%rax,%rdi,8)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,190 // jb 4c29 <_sk_store_f16_avx+0x233>
+ .byte 114,190 // jb 4b0b <_sk_store_f16_avx+0x233>
.byte 197,121,214,84,248,16 // vmovq %xmm10,0x10(%rax,%rdi,8)
- .byte 116,182 // je 4c29 <_sk_store_f16_avx+0x233>
+ .byte 116,182 // je 4b0b <_sk_store_f16_avx+0x233>
.byte 197,121,23,84,248,24 // vmovhpd %xmm10,0x18(%rax,%rdi,8)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,170 // jb 4c29 <_sk_store_f16_avx+0x233>
+ .byte 114,170 // jb 4b0b <_sk_store_f16_avx+0x233>
.byte 197,121,214,76,248,32 // vmovq %xmm9,0x20(%rax,%rdi,8)
- .byte 116,162 // je 4c29 <_sk_store_f16_avx+0x233>
+ .byte 116,162 // je 4b0b <_sk_store_f16_avx+0x233>
.byte 197,121,23,76,248,40 // vmovhpd %xmm9,0x28(%rax,%rdi,8)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,150 // jb 4c29 <_sk_store_f16_avx+0x233>
+ .byte 114,150 // jb 4b0b <_sk_store_f16_avx+0x233>
.byte 197,121,214,68,248,48 // vmovq %xmm8,0x30(%rax,%rdi,8)
- .byte 235,142 // jmp 4c29 <_sk_store_f16_avx+0x233>
+ .byte 235,142 // jmp 4b0b <_sk_store_f16_avx+0x233>
HIDDEN _sk_load_u16_be_avx
.globl _sk_load_u16_be_avx
@@ -18602,7 +18507,7 @@ _sk_load_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,189,0,0,0,0 // lea 0x0(,%rdi,4),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,253,0,0,0 // jne 4dae <_sk_load_u16_be_avx+0x113>
+ .byte 15,133,253,0,0,0 // jne 4c90 <_sk_load_u16_be_avx+0x113>
.byte 196,65,121,16,4,64 // vmovupd (%r8,%rax,2),%xmm8
.byte 196,193,121,16,84,64,16 // vmovupd 0x10(%r8,%rax,2),%xmm2
.byte 196,193,121,16,92,64,32 // vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -18624,7 +18529,7 @@ _sk_load_u16_be_avx:
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,29,46,26,0,0 // vbroadcastss 0x1a2e(%rip),%ymm11 # 6750 <_sk_callback_avx+0x44d>
+ .byte 196,98,125,24,29,48,26,0,0 // vbroadcastss 0x1a30(%rip),%ymm11 # 6634 <_sk_callback_avx+0x44d>
.byte 196,193,124,89,195 // vmulps %ymm11,%ymm0,%ymm0
.byte 197,177,109,202 // vpunpckhqdq %xmm2,%xmm9,%xmm1
.byte 197,233,113,241,8 // vpsllw $0x8,%xmm1,%xmm2
@@ -18658,29 +18563,29 @@ _sk_load_u16_be_avx:
.byte 196,65,123,16,4,64 // vmovsd (%r8,%rax,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,85 // je 4e14 <_sk_load_u16_be_avx+0x179>
+ .byte 116,85 // je 4cf6 <_sk_load_u16_be_avx+0x179>
.byte 196,65,57,22,68,64,8 // vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,72 // jb 4e14 <_sk_load_u16_be_avx+0x179>
+ .byte 114,72 // jb 4cf6 <_sk_load_u16_be_avx+0x179>
.byte 196,193,123,16,84,64,16 // vmovsd 0x10(%r8,%rax,2),%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 116,72 // je 4e21 <_sk_load_u16_be_avx+0x186>
+ .byte 116,72 // je 4d03 <_sk_load_u16_be_avx+0x186>
.byte 196,193,105,22,84,64,24 // vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,59 // jb 4e21 <_sk_load_u16_be_avx+0x186>
+ .byte 114,59 // jb 4d03 <_sk_load_u16_be_avx+0x186>
.byte 196,193,123,16,92,64,32 // vmovsd 0x20(%r8,%rax,2),%xmm3
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 15,132,213,254,255,255 // je 4ccc <_sk_load_u16_be_avx+0x31>
+ .byte 15,132,213,254,255,255 // je 4bae <_sk_load_u16_be_avx+0x31>
.byte 196,193,97,22,92,64,40 // vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 15,130,196,254,255,255 // jb 4ccc <_sk_load_u16_be_avx+0x31>
+ .byte 15,130,196,254,255,255 // jb 4bae <_sk_load_u16_be_avx+0x31>
.byte 196,65,122,126,76,64,48 // vmovq 0x30(%r8,%rax,2),%xmm9
- .byte 233,184,254,255,255 // jmpq 4ccc <_sk_load_u16_be_avx+0x31>
+ .byte 233,184,254,255,255 // jmpq 4bae <_sk_load_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,171,254,255,255 // jmpq 4ccc <_sk_load_u16_be_avx+0x31>
+ .byte 233,171,254,255,255 // jmpq 4bae <_sk_load_u16_be_avx+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,162,254,255,255 // jmpq 4ccc <_sk_load_u16_be_avx+0x31>
+ .byte 233,162,254,255,255 // jmpq 4bae <_sk_load_u16_be_avx+0x31>
HIDDEN _sk_load_rgb_u16_be_avx
.globl _sk_load_rgb_u16_be_avx
@@ -18690,7 +18595,7 @@ _sk_load_rgb_u16_be_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,127 // lea (%rdi,%rdi,2),%rax
.byte 72,133,201 // test %rcx,%rcx
- .byte 15,133,243,0,0,0 // jne 4f2f <_sk_load_rgb_u16_be_avx+0x105>
+ .byte 15,133,243,0,0,0 // jne 4e11 <_sk_load_rgb_u16_be_avx+0x105>
.byte 196,193,122,111,4,64 // vmovdqu (%r8,%rax,2),%xmm0
.byte 196,193,122,111,84,64,12 // vmovdqu 0xc(%r8,%rax,2),%xmm2
.byte 196,193,122,111,76,64,24 // vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -18717,7 +18622,7 @@ _sk_load_rgb_u16_be_avx:
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,29,142,24,0,0 // vbroadcastss 0x188e(%rip),%ymm11 # 6754 <_sk_callback_avx+0x451>
+ .byte 196,98,125,24,29,144,24,0,0 // vbroadcastss 0x1890(%rip),%ymm11 # 6638 <_sk_callback_avx+0x451>
.byte 196,193,124,89,195 // vmulps %ymm11,%ymm0,%ymm0
.byte 197,185,109,202 // vpunpckhqdq %xmm2,%xmm8,%xmm1
.byte 197,233,113,241,8 // vpsllw $0x8,%xmm1,%xmm2
@@ -18738,41 +18643,41 @@ _sk_load_rgb_u16_be_avx:
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 196,193,108,89,211 // vmulps %ymm11,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,43,24,0,0 // vbroadcastss 0x182b(%rip),%ymm3 # 6758 <_sk_callback_avx+0x455>
+ .byte 196,226,125,24,29,45,24,0,0 // vbroadcastss 0x182d(%rip),%ymm3 # 663c <_sk_callback_avx+0x455>
.byte 255,224 // jmpq *%rax
.byte 196,193,121,110,4,64 // vmovd (%r8,%rax,2),%xmm0
.byte 196,193,121,196,68,64,4,2 // vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 117,5 // jne 4f48 <_sk_load_rgb_u16_be_avx+0x11e>
- .byte 233,40,255,255,255 // jmpq 4e70 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,5 // jne 4e2a <_sk_load_rgb_u16_be_avx+0x11e>
+ .byte 233,40,255,255,255 // jmpq 4d52 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,76,64,6 // vmovd 0x6(%r8,%rax,2),%xmm1
.byte 196,65,113,196,68,64,10,2 // vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,26 // jb 4f77 <_sk_load_rgb_u16_be_avx+0x14d>
+ .byte 114,26 // jb 4e59 <_sk_load_rgb_u16_be_avx+0x14d>
.byte 196,193,121,110,76,64,12 // vmovd 0xc(%r8,%rax,2),%xmm1
.byte 196,193,113,196,84,64,16,2 // vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 117,10 // jne 4f7c <_sk_load_rgb_u16_be_avx+0x152>
- .byte 233,249,254,255,255 // jmpq 4e70 <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,244,254,255,255 // jmpq 4e70 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 4e5e <_sk_load_rgb_u16_be_avx+0x152>
+ .byte 233,249,254,255,255 // jmpq 4d52 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,244,254,255,255 // jmpq 4d52 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,76,64,18 // vmovd 0x12(%r8,%rax,2),%xmm1
.byte 196,65,113,196,76,64,22,2 // vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,26 // jb 4fab <_sk_load_rgb_u16_be_avx+0x181>
+ .byte 114,26 // jb 4e8d <_sk_load_rgb_u16_be_avx+0x181>
.byte 196,193,121,110,76,64,24 // vmovd 0x18(%r8,%rax,2),%xmm1
.byte 196,193,113,196,76,64,28,2 // vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 117,10 // jne 4fb0 <_sk_load_rgb_u16_be_avx+0x186>
- .byte 233,197,254,255,255 // jmpq 4e70 <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,192,254,255,255 // jmpq 4e70 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 117,10 // jne 4e92 <_sk_load_rgb_u16_be_avx+0x186>
+ .byte 233,197,254,255,255 // jmpq 4d52 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,192,254,255,255 // jmpq 4d52 <_sk_load_rgb_u16_be_avx+0x46>
.byte 196,193,121,110,92,64,30 // vmovd 0x1e(%r8,%rax,2),%xmm3
.byte 196,65,97,196,92,64,34,2 // vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,20 // jb 4fd9 <_sk_load_rgb_u16_be_avx+0x1af>
+ .byte 114,20 // jb 4ebb <_sk_load_rgb_u16_be_avx+0x1af>
.byte 196,193,121,110,92,64,36 // vmovd 0x24(%r8,%rax,2),%xmm3
.byte 196,193,97,196,92,64,40,2 // vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- .byte 233,151,254,255,255 // jmpq 4e70 <_sk_load_rgb_u16_be_avx+0x46>
- .byte 233,146,254,255,255 // jmpq 4e70 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,151,254,255,255 // jmpq 4d52 <_sk_load_rgb_u16_be_avx+0x46>
+ .byte 233,146,254,255,255 // jmpq 4d52 <_sk_load_rgb_u16_be_avx+0x46>
HIDDEN _sk_store_u16_be_avx
.globl _sk_store_u16_be_avx
@@ -18781,7 +18686,7 @@ _sk_store_u16_be_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,0 // mov (%rax),%r8
.byte 72,141,4,189,0,0,0,0 // lea 0x0(,%rdi,4),%rax
- .byte 196,98,125,24,5,104,23,0,0 // vbroadcastss 0x1768(%rip),%ymm8 # 675c <_sk_callback_avx+0x459>
+ .byte 196,98,125,24,5,106,23,0,0 // vbroadcastss 0x176a(%rip),%ymm8 # 6640 <_sk_callback_avx+0x459>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,67,125,25,202,1 // vextractf128 $0x1,%ymm9,%xmm10
@@ -18819,7 +18724,7 @@ _sk_store_u16_be_avx:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,31 // jne 50d8 <_sk_store_u16_be_avx+0xfa>
+ .byte 117,31 // jne 4fba <_sk_store_u16_be_avx+0xfa>
.byte 196,65,120,17,28,64 // vmovups %xmm11,(%r8,%rax,2)
.byte 196,65,120,17,84,64,16 // vmovups %xmm10,0x10(%r8,%rax,2)
.byte 196,65,120,17,76,64,32 // vmovups %xmm9,0x20(%r8,%rax,2)
@@ -18828,22 +18733,22 @@ _sk_store_u16_be_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,214,28,64 // vmovq %xmm11,(%r8,%rax,2)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 50d4 <_sk_store_u16_be_avx+0xf6>
+ .byte 116,240 // je 4fb6 <_sk_store_u16_be_avx+0xf6>
.byte 196,65,121,23,92,64,8 // vmovhpd %xmm11,0x8(%r8,%rax,2)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 50d4 <_sk_store_u16_be_avx+0xf6>
+ .byte 114,227 // jb 4fb6 <_sk_store_u16_be_avx+0xf6>
.byte 196,65,121,214,84,64,16 // vmovq %xmm10,0x10(%r8,%rax,2)
- .byte 116,218 // je 50d4 <_sk_store_u16_be_avx+0xf6>
+ .byte 116,218 // je 4fb6 <_sk_store_u16_be_avx+0xf6>
.byte 196,65,121,23,84,64,24 // vmovhpd %xmm10,0x18(%r8,%rax,2)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 50d4 <_sk_store_u16_be_avx+0xf6>
+ .byte 114,205 // jb 4fb6 <_sk_store_u16_be_avx+0xf6>
.byte 196,65,121,214,76,64,32 // vmovq %xmm9,0x20(%r8,%rax,2)
- .byte 116,196 // je 50d4 <_sk_store_u16_be_avx+0xf6>
+ .byte 116,196 // je 4fb6 <_sk_store_u16_be_avx+0xf6>
.byte 196,65,121,23,76,64,40 // vmovhpd %xmm9,0x28(%r8,%rax,2)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,183 // jb 50d4 <_sk_store_u16_be_avx+0xf6>
+ .byte 114,183 // jb 4fb6 <_sk_store_u16_be_avx+0xf6>
.byte 196,65,121,214,68,64,48 // vmovq %xmm8,0x30(%r8,%rax,2)
- .byte 235,174 // jmp 50d4 <_sk_store_u16_be_avx+0xf6>
+ .byte 235,174 // jmp 4fb6 <_sk_store_u16_be_avx+0xf6>
HIDDEN _sk_load_f32_avx
.globl _sk_load_f32_avx
@@ -18851,10 +18756,10 @@ FUNCTION(_sk_load_f32_avx)
_sk_load_f32_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 119,110 // ja 519c <_sk_load_f32_avx+0x76>
+ .byte 119,110 // ja 507e <_sk_load_f32_avx+0x76>
.byte 76,139,0 // mov (%rax),%r8
.byte 76,141,12,189,0,0,0,0 // lea 0x0(,%rdi,4),%r9
- .byte 76,141,21,132,0,0,0 // lea 0x84(%rip),%r10 # 51c4 <_sk_load_f32_avx+0x9e>
+ .byte 76,141,21,134,0,0,0 // lea 0x86(%rip),%r10 # 50a8 <_sk_load_f32_avx+0xa0>
.byte 73,99,4,138 // movslq (%r10,%rcx,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -18880,19 +18785,19 @@ _sk_load_f32_avx:
.byte 196,193,101,21,216 // vunpckhpd %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
- .byte 133,255 // test %edi,%edi
- .byte 255 // (bad)
- .byte 255,204 // dec %esp
+ .byte 102,144 // xchg %ax,%ax
+ .byte 131,255,255 // cmp $0xffffffff,%edi
+ .byte 255,202 // dec %edx
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 191,255,255,255,178 // mov $0xb2ffffff,%edi
+ .byte 189,255,255,255,176 // mov $0xb0ffffff,%ebp
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,165,255,255,255,157 // jmpq *-0x62000001(%rbp)
+ .byte 255,163,255,255,255,155 // jmpq *-0x64000001(%rbx)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,149,255,255,255,141 // callq *-0x72000001(%rbp)
+ .byte 255,147,255,255,255,139 // callq *-0x74000001(%rbx)
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -18913,7 +18818,7 @@ _sk_store_f32_avx:
.byte 196,65,37,20,196 // vunpcklpd %ymm12,%ymm11,%ymm8
.byte 196,65,37,21,220 // vunpckhpd %ymm12,%ymm11,%ymm11
.byte 72,133,201 // test %rcx,%rcx
- .byte 117,55 // jne 5251 <_sk_store_f32_avx+0x6d>
+ .byte 117,55 // jne 5135 <_sk_store_f32_avx+0x6d>
.byte 196,67,45,24,225,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
.byte 196,67,61,24,235,1 // vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
.byte 196,67,45,6,201,49 // vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -18926,22 +18831,22 @@ _sk_store_f32_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,17,20,128 // vmovupd %xmm10,(%r8,%rax,4)
.byte 72,131,249,1 // cmp $0x1,%rcx
- .byte 116,240 // je 524d <_sk_store_f32_avx+0x69>
+ .byte 116,240 // je 5131 <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,76,128,16 // vmovupd %xmm9,0x10(%r8,%rax,4)
.byte 72,131,249,3 // cmp $0x3,%rcx
- .byte 114,227 // jb 524d <_sk_store_f32_avx+0x69>
+ .byte 114,227 // jb 5131 <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,68,128,32 // vmovupd %xmm8,0x20(%r8,%rax,4)
- .byte 116,218 // je 524d <_sk_store_f32_avx+0x69>
+ .byte 116,218 // je 5131 <_sk_store_f32_avx+0x69>
.byte 196,65,121,17,92,128,48 // vmovupd %xmm11,0x30(%r8,%rax,4)
.byte 72,131,249,5 // cmp $0x5,%rcx
- .byte 114,205 // jb 524d <_sk_store_f32_avx+0x69>
+ .byte 114,205 // jb 5131 <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,84,128,64,1 // vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- .byte 116,195 // je 524d <_sk_store_f32_avx+0x69>
+ .byte 116,195 // je 5131 <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,76,128,80,1 // vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
.byte 72,131,249,7 // cmp $0x7,%rcx
- .byte 114,181 // jb 524d <_sk_store_f32_avx+0x69>
+ .byte 114,181 // jb 5131 <_sk_store_f32_avx+0x69>
.byte 196,67,125,25,68,128,96,1 // vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- .byte 235,171 // jmp 524d <_sk_store_f32_avx+0x69>
+ .byte 235,171 // jmp 5131 <_sk_store_f32_avx+0x69>
HIDDEN _sk_clamp_x_avx
.globl _sk_clamp_x_avx
@@ -19047,12 +18952,12 @@ HIDDEN _sk_luminance_to_alpha_avx
.globl _sk_luminance_to_alpha_avx
FUNCTION(_sk_luminance_to_alpha_avx)
_sk_luminance_to_alpha_avx:
- .byte 196,226,125,24,29,143,19,0,0 // vbroadcastss 0x138f(%rip),%ymm3 # 6760 <_sk_callback_avx+0x45d>
+ .byte 196,226,125,24,29,143,19,0,0 // vbroadcastss 0x138f(%rip),%ymm3 # 6644 <_sk_callback_avx+0x45d>
.byte 197,252,89,195 // vmulps %ymm3,%ymm0,%ymm0
- .byte 196,226,125,24,29,134,19,0,0 // vbroadcastss 0x1386(%rip),%ymm3 # 6764 <_sk_callback_avx+0x461>
+ .byte 196,226,125,24,29,134,19,0,0 // vbroadcastss 0x1386(%rip),%ymm3 # 6648 <_sk_callback_avx+0x461>
.byte 197,244,89,203 // vmulps %ymm3,%ymm1,%ymm1
.byte 197,252,88,193 // vaddps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,121,19,0,0 // vbroadcastss 0x1379(%rip),%ymm1 # 6768 <_sk_callback_avx+0x465>
+ .byte 196,226,125,24,13,121,19,0,0 // vbroadcastss 0x1379(%rip),%ymm1 # 664c <_sk_callback_avx+0x465>
.byte 197,236,89,201 // vmulps %ymm1,%ymm2,%ymm1
.byte 197,252,88,217 // vaddps %ymm1,%ymm0,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -19271,9 +19176,9 @@ _sk_evenly_spaced_gradient_avx:
.byte 72,139,24 // mov (%rax),%rbx
.byte 72,139,104,8 // mov 0x8(%rax),%rbp
.byte 72,255,203 // dec %rbx
- .byte 120,7 // js 5745 <_sk_evenly_spaced_gradient_avx+0x1f>
+ .byte 120,7 // js 5629 <_sk_evenly_spaced_gradient_avx+0x1f>
.byte 196,225,242,42,203 // vcvtsi2ss %rbx,%xmm1,%xmm1
- .byte 235,21 // jmp 575a <_sk_evenly_spaced_gradient_avx+0x34>
+ .byte 235,21 // jmp 563e <_sk_evenly_spaced_gradient_avx+0x34>
.byte 73,137,216 // mov %rbx,%r8
.byte 73,209,232 // shr %r8
.byte 131,227,1 // and $0x1,%ebx
@@ -19440,12 +19345,12 @@ _sk_gradient_avx:
.byte 76,139,0 // mov (%rax),%r8
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 73,131,248,2 // cmp $0x2,%r8
- .byte 114,80 // jb 5ae8 <_sk_gradient_avx+0x69>
+ .byte 114,80 // jb 59cc <_sk_gradient_avx+0x69>
.byte 72,139,88,72 // mov 0x48(%rax),%rbx
.byte 73,255,200 // dec %r8
.byte 72,131,195,4 // add $0x4,%rbx
.byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
- .byte 196,98,125,24,21,187,12,0,0 // vbroadcastss 0xcbb(%rip),%ymm10 # 676c <_sk_callback_avx+0x469>
+ .byte 196,98,125,24,21,187,12,0,0 // vbroadcastss 0xcbb(%rip),%ymm10 # 6650 <_sk_callback_avx+0x469>
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 196,98,125,24,3 // vbroadcastss (%rbx),%ymm8
.byte 197,60,194,192,2 // vcmpleps %ymm0,%ymm8,%ymm8
@@ -19457,7 +19362,7 @@ _sk_gradient_avx:
.byte 196,227,117,24,202,1 // vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
.byte 72,131,195,4 // add $0x4,%rbx
.byte 73,255,200 // dec %r8
- .byte 117,205 // jne 5ab5 <_sk_gradient_avx+0x36>
+ .byte 117,205 // jne 5999 <_sk_gradient_avx+0x36>
.byte 196,195,249,22,200,1 // vpextrq $0x1,%xmm1,%r8
.byte 69,137,193 // mov %r8d,%r9d
.byte 73,193,232,32 // shr $0x20,%r8
@@ -19639,27 +19544,27 @@ _sk_xy_to_unit_angle_avx:
.byte 196,65,52,95,226 // vmaxps %ymm10,%ymm9,%ymm12
.byte 196,65,36,94,220 // vdivps %ymm12,%ymm11,%ymm11
.byte 196,65,36,89,227 // vmulps %ymm11,%ymm11,%ymm12
- .byte 196,98,125,24,45,223,8,0,0 // vbroadcastss 0x8df(%rip),%ymm13 # 6770 <_sk_callback_avx+0x46d>
+ .byte 196,98,125,24,45,223,8,0,0 // vbroadcastss 0x8df(%rip),%ymm13 # 6654 <_sk_callback_avx+0x46d>
.byte 196,65,28,89,237 // vmulps %ymm13,%ymm12,%ymm13
- .byte 196,98,125,24,53,213,8,0,0 // vbroadcastss 0x8d5(%rip),%ymm14 # 6774 <_sk_callback_avx+0x471>
+ .byte 196,98,125,24,53,213,8,0,0 // vbroadcastss 0x8d5(%rip),%ymm14 # 6658 <_sk_callback_avx+0x471>
.byte 196,65,20,88,238 // vaddps %ymm14,%ymm13,%ymm13
.byte 196,65,28,89,237 // vmulps %ymm13,%ymm12,%ymm13
- .byte 196,98,125,24,53,198,8,0,0 // vbroadcastss 0x8c6(%rip),%ymm14 # 6778 <_sk_callback_avx+0x475>
+ .byte 196,98,125,24,53,198,8,0,0 // vbroadcastss 0x8c6(%rip),%ymm14 # 665c <_sk_callback_avx+0x475>
.byte 196,65,20,88,238 // vaddps %ymm14,%ymm13,%ymm13
.byte 196,65,28,89,229 // vmulps %ymm13,%ymm12,%ymm12
- .byte 196,98,125,24,45,183,8,0,0 // vbroadcastss 0x8b7(%rip),%ymm13 # 677c <_sk_callback_avx+0x479>
+ .byte 196,98,125,24,45,183,8,0,0 // vbroadcastss 0x8b7(%rip),%ymm13 # 6660 <_sk_callback_avx+0x479>
.byte 196,65,28,88,229 // vaddps %ymm13,%ymm12,%ymm12
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
.byte 196,65,52,194,202,1 // vcmpltps %ymm10,%ymm9,%ymm9
- .byte 196,98,125,24,21,162,8,0,0 // vbroadcastss 0x8a2(%rip),%ymm10 # 6780 <_sk_callback_avx+0x47d>
+ .byte 196,98,125,24,21,162,8,0,0 // vbroadcastss 0x8a2(%rip),%ymm10 # 6664 <_sk_callback_avx+0x47d>
.byte 196,65,44,92,211 // vsubps %ymm11,%ymm10,%ymm10
.byte 196,67,37,74,202,144 // vblendvps %ymm9,%ymm10,%ymm11,%ymm9
.byte 196,193,124,194,192,1 // vcmpltps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,21,140,8,0,0 // vbroadcastss 0x88c(%rip),%ymm10 # 6784 <_sk_callback_avx+0x481>
+ .byte 196,98,125,24,21,140,8,0,0 // vbroadcastss 0x88c(%rip),%ymm10 # 6668 <_sk_callback_avx+0x481>
.byte 196,65,44,92,209 // vsubps %ymm9,%ymm10,%ymm10
.byte 196,195,53,74,194,0 // vblendvps %ymm0,%ymm10,%ymm9,%ymm0
.byte 196,65,116,194,200,1 // vcmpltps %ymm8,%ymm1,%ymm9
- .byte 196,98,125,24,21,118,8,0,0 // vbroadcastss 0x876(%rip),%ymm10 # 6788 <_sk_callback_avx+0x485>
+ .byte 196,98,125,24,21,118,8,0,0 // vbroadcastss 0x876(%rip),%ymm10 # 666c <_sk_callback_avx+0x485>
.byte 197,44,92,208 // vsubps %ymm0,%ymm10,%ymm10
.byte 196,195,125,74,194,144 // vblendvps %ymm9,%ymm10,%ymm0,%ymm0
.byte 196,65,124,194,200,3 // vcmpunordps %ymm8,%ymm0,%ymm9
@@ -19683,7 +19588,7 @@ HIDDEN _sk_save_xy_avx
FUNCTION(_sk_save_xy_avx)
_sk_save_xy_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,64,8,0,0 // vbroadcastss 0x840(%rip),%ymm8 # 678c <_sk_callback_avx+0x489>
+ .byte 196,98,125,24,5,64,8,0,0 // vbroadcastss 0x840(%rip),%ymm8 # 6670 <_sk_callback_avx+0x489>
.byte 196,65,124,88,200 // vaddps %ymm8,%ymm0,%ymm9
.byte 196,67,125,8,209,1 // vroundps $0x1,%ymm9,%ymm10
.byte 196,65,52,92,202 // vsubps %ymm10,%ymm9,%ymm9
@@ -19720,9 +19625,9 @@ HIDDEN _sk_bilinear_nx_avx
FUNCTION(_sk_bilinear_nx_avx)
_sk_bilinear_nx_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,204,7,0,0 // vbroadcastss 0x7cc(%rip),%ymm0 # 6790 <_sk_callback_avx+0x48d>
+ .byte 196,226,125,24,5,204,7,0,0 // vbroadcastss 0x7cc(%rip),%ymm0 # 6674 <_sk_callback_avx+0x48d>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
- .byte 196,98,125,24,5,195,7,0,0 // vbroadcastss 0x7c3(%rip),%ymm8 # 6794 <_sk_callback_avx+0x491>
+ .byte 196,98,125,24,5,195,7,0,0 // vbroadcastss 0x7c3(%rip),%ymm8 # 6678 <_sk_callback_avx+0x491>
.byte 197,60,92,64,64 // vsubps 0x40(%rax),%ymm8,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -19733,7 +19638,7 @@ HIDDEN _sk_bilinear_px_avx
FUNCTION(_sk_bilinear_px_avx)
_sk_bilinear_px_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,171,7,0,0 // vbroadcastss 0x7ab(%rip),%ymm0 # 6798 <_sk_callback_avx+0x495>
+ .byte 196,226,125,24,5,171,7,0,0 // vbroadcastss 0x7ab(%rip),%ymm0 # 667c <_sk_callback_avx+0x495>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
.byte 197,124,16,64,64 // vmovups 0x40(%rax),%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
@@ -19745,9 +19650,9 @@ HIDDEN _sk_bilinear_ny_avx
FUNCTION(_sk_bilinear_ny_avx)
_sk_bilinear_ny_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,143,7,0,0 // vbroadcastss 0x78f(%rip),%ymm1 # 679c <_sk_callback_avx+0x499>
+ .byte 196,226,125,24,13,143,7,0,0 // vbroadcastss 0x78f(%rip),%ymm1 # 6680 <_sk_callback_avx+0x499>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
- .byte 196,98,125,24,5,133,7,0,0 // vbroadcastss 0x785(%rip),%ymm8 # 67a0 <_sk_callback_avx+0x49d>
+ .byte 196,98,125,24,5,133,7,0,0 // vbroadcastss 0x785(%rip),%ymm8 # 6684 <_sk_callback_avx+0x49d>
.byte 197,60,92,64,96 // vsubps 0x60(%rax),%ymm8,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -19758,7 +19663,7 @@ HIDDEN _sk_bilinear_py_avx
FUNCTION(_sk_bilinear_py_avx)
_sk_bilinear_py_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,109,7,0,0 // vbroadcastss 0x76d(%rip),%ymm1 # 67a4 <_sk_callback_avx+0x4a1>
+ .byte 196,226,125,24,13,109,7,0,0 // vbroadcastss 0x76d(%rip),%ymm1 # 6688 <_sk_callback_avx+0x4a1>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
.byte 197,124,16,64,96 // vmovups 0x60(%rax),%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
@@ -19770,14 +19675,14 @@ HIDDEN _sk_bicubic_n3x_avx
FUNCTION(_sk_bicubic_n3x_avx)
_sk_bicubic_n3x_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,80,7,0,0 // vbroadcastss 0x750(%rip),%ymm0 # 67a8 <_sk_callback_avx+0x4a5>
+ .byte 196,226,125,24,5,80,7,0,0 // vbroadcastss 0x750(%rip),%ymm0 # 668c <_sk_callback_avx+0x4a5>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
- .byte 196,98,125,24,5,71,7,0,0 // vbroadcastss 0x747(%rip),%ymm8 # 67ac <_sk_callback_avx+0x4a9>
+ .byte 196,98,125,24,5,71,7,0,0 // vbroadcastss 0x747(%rip),%ymm8 # 6690 <_sk_callback_avx+0x4a9>
.byte 197,60,92,64,64 // vsubps 0x40(%rax),%ymm8,%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,56,7,0,0 // vbroadcastss 0x738(%rip),%ymm10 # 67b0 <_sk_callback_avx+0x4ad>
+ .byte 196,98,125,24,21,56,7,0,0 // vbroadcastss 0x738(%rip),%ymm10 # 6694 <_sk_callback_avx+0x4ad>
.byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,98,125,24,21,46,7,0,0 // vbroadcastss 0x72e(%rip),%ymm10 # 67b4 <_sk_callback_avx+0x4b1>
+ .byte 196,98,125,24,21,46,7,0,0 // vbroadcastss 0x72e(%rip),%ymm10 # 6698 <_sk_callback_avx+0x4b1>
.byte 196,65,60,88,194 // vaddps %ymm10,%ymm8,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
@@ -19789,19 +19694,19 @@ HIDDEN _sk_bicubic_n1x_avx
FUNCTION(_sk_bicubic_n1x_avx)
_sk_bicubic_n1x_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,17,7,0,0 // vbroadcastss 0x711(%rip),%ymm0 # 67b8 <_sk_callback_avx+0x4b5>
+ .byte 196,226,125,24,5,17,7,0,0 // vbroadcastss 0x711(%rip),%ymm0 # 669c <_sk_callback_avx+0x4b5>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
- .byte 196,98,125,24,5,8,7,0,0 // vbroadcastss 0x708(%rip),%ymm8 # 67bc <_sk_callback_avx+0x4b9>
+ .byte 196,98,125,24,5,8,7,0,0 // vbroadcastss 0x708(%rip),%ymm8 # 66a0 <_sk_callback_avx+0x4b9>
.byte 197,60,92,64,64 // vsubps 0x40(%rax),%ymm8,%ymm8
- .byte 196,98,125,24,13,254,6,0,0 // vbroadcastss 0x6fe(%rip),%ymm9 # 67c0 <_sk_callback_avx+0x4bd>
+ .byte 196,98,125,24,13,254,6,0,0 // vbroadcastss 0x6fe(%rip),%ymm9 # 66a4 <_sk_callback_avx+0x4bd>
.byte 196,65,60,89,201 // vmulps %ymm9,%ymm8,%ymm9
- .byte 196,98,125,24,21,244,6,0,0 // vbroadcastss 0x6f4(%rip),%ymm10 # 67c4 <_sk_callback_avx+0x4c1>
+ .byte 196,98,125,24,21,244,6,0,0 // vbroadcastss 0x6f4(%rip),%ymm10 # 66a8 <_sk_callback_avx+0x4c1>
.byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
.byte 196,65,60,89,201 // vmulps %ymm9,%ymm8,%ymm9
- .byte 196,98,125,24,21,229,6,0,0 // vbroadcastss 0x6e5(%rip),%ymm10 # 67c8 <_sk_callback_avx+0x4c5>
+ .byte 196,98,125,24,21,229,6,0,0 // vbroadcastss 0x6e5(%rip),%ymm10 # 66ac <_sk_callback_avx+0x4c5>
.byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
- .byte 196,98,125,24,13,214,6,0,0 // vbroadcastss 0x6d6(%rip),%ymm9 # 67cc <_sk_callback_avx+0x4c9>
+ .byte 196,98,125,24,13,214,6,0,0 // vbroadcastss 0x6d6(%rip),%ymm9 # 66b0 <_sk_callback_avx+0x4c9>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -19812,17 +19717,17 @@ HIDDEN _sk_bicubic_p1x_avx
FUNCTION(_sk_bicubic_p1x_avx)
_sk_bicubic_p1x_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,190,6,0,0 // vbroadcastss 0x6be(%rip),%ymm8 # 67d0 <_sk_callback_avx+0x4cd>
+ .byte 196,98,125,24,5,190,6,0,0 // vbroadcastss 0x6be(%rip),%ymm8 # 66b4 <_sk_callback_avx+0x4cd>
.byte 197,188,88,0 // vaddps (%rax),%ymm8,%ymm0
.byte 197,124,16,72,64 // vmovups 0x40(%rax),%ymm9
- .byte 196,98,125,24,21,176,6,0,0 // vbroadcastss 0x6b0(%rip),%ymm10 # 67d4 <_sk_callback_avx+0x4d1>
+ .byte 196,98,125,24,21,176,6,0,0 // vbroadcastss 0x6b0(%rip),%ymm10 # 66b8 <_sk_callback_avx+0x4d1>
.byte 196,65,52,89,210 // vmulps %ymm10,%ymm9,%ymm10
- .byte 196,98,125,24,29,166,6,0,0 // vbroadcastss 0x6a6(%rip),%ymm11 # 67d8 <_sk_callback_avx+0x4d5>
+ .byte 196,98,125,24,29,166,6,0,0 // vbroadcastss 0x6a6(%rip),%ymm11 # 66bc <_sk_callback_avx+0x4d5>
.byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
.byte 196,65,52,89,210 // vmulps %ymm10,%ymm9,%ymm10
.byte 196,65,44,88,192 // vaddps %ymm8,%ymm10,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
- .byte 196,98,125,24,13,141,6,0,0 // vbroadcastss 0x68d(%rip),%ymm9 # 67dc <_sk_callback_avx+0x4d9>
+ .byte 196,98,125,24,13,141,6,0,0 // vbroadcastss 0x68d(%rip),%ymm9 # 66c0 <_sk_callback_avx+0x4d9>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -19833,13 +19738,13 @@ HIDDEN _sk_bicubic_p3x_avx
FUNCTION(_sk_bicubic_p3x_avx)
_sk_bicubic_p3x_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,117,6,0,0 // vbroadcastss 0x675(%rip),%ymm0 # 67e0 <_sk_callback_avx+0x4dd>
+ .byte 196,226,125,24,5,117,6,0,0 // vbroadcastss 0x675(%rip),%ymm0 # 66c4 <_sk_callback_avx+0x4dd>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
.byte 197,124,16,64,64 // vmovups 0x40(%rax),%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,98,6,0,0 // vbroadcastss 0x662(%rip),%ymm10 # 67e4 <_sk_callback_avx+0x4e1>
+ .byte 196,98,125,24,21,98,6,0,0 // vbroadcastss 0x662(%rip),%ymm10 # 66c8 <_sk_callback_avx+0x4e1>
.byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,98,125,24,21,88,6,0,0 // vbroadcastss 0x658(%rip),%ymm10 # 67e8 <_sk_callback_avx+0x4e5>
+ .byte 196,98,125,24,21,88,6,0,0 // vbroadcastss 0x658(%rip),%ymm10 # 66cc <_sk_callback_avx+0x4e5>
.byte 196,65,60,88,194 // vaddps %ymm10,%ymm8,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
@@ -19851,14 +19756,14 @@ HIDDEN _sk_bicubic_n3y_avx
FUNCTION(_sk_bicubic_n3y_avx)
_sk_bicubic_n3y_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,59,6,0,0 // vbroadcastss 0x63b(%rip),%ymm1 # 67ec <_sk_callback_avx+0x4e9>
+ .byte 196,226,125,24,13,59,6,0,0 // vbroadcastss 0x63b(%rip),%ymm1 # 66d0 <_sk_callback_avx+0x4e9>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
- .byte 196,98,125,24,5,49,6,0,0 // vbroadcastss 0x631(%rip),%ymm8 # 67f0 <_sk_callback_avx+0x4ed>
+ .byte 196,98,125,24,5,49,6,0,0 // vbroadcastss 0x631(%rip),%ymm8 # 66d4 <_sk_callback_avx+0x4ed>
.byte 197,60,92,64,96 // vsubps 0x60(%rax),%ymm8,%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,34,6,0,0 // vbroadcastss 0x622(%rip),%ymm10 # 67f4 <_sk_callback_avx+0x4f1>
+ .byte 196,98,125,24,21,34,6,0,0 // vbroadcastss 0x622(%rip),%ymm10 # 66d8 <_sk_callback_avx+0x4f1>
.byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,98,125,24,21,24,6,0,0 // vbroadcastss 0x618(%rip),%ymm10 # 67f8 <_sk_callback_avx+0x4f5>
+ .byte 196,98,125,24,21,24,6,0,0 // vbroadcastss 0x618(%rip),%ymm10 # 66dc <_sk_callback_avx+0x4f5>
.byte 196,65,60,88,194 // vaddps %ymm10,%ymm8,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
@@ -19870,19 +19775,19 @@ HIDDEN _sk_bicubic_n1y_avx
FUNCTION(_sk_bicubic_n1y_avx)
_sk_bicubic_n1y_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,251,5,0,0 // vbroadcastss 0x5fb(%rip),%ymm1 # 67fc <_sk_callback_avx+0x4f9>
+ .byte 196,226,125,24,13,251,5,0,0 // vbroadcastss 0x5fb(%rip),%ymm1 # 66e0 <_sk_callback_avx+0x4f9>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
- .byte 196,98,125,24,5,241,5,0,0 // vbroadcastss 0x5f1(%rip),%ymm8 # 6800 <_sk_callback_avx+0x4fd>
+ .byte 196,98,125,24,5,241,5,0,0 // vbroadcastss 0x5f1(%rip),%ymm8 # 66e4 <_sk_callback_avx+0x4fd>
.byte 197,60,92,64,96 // vsubps 0x60(%rax),%ymm8,%ymm8
- .byte 196,98,125,24,13,231,5,0,0 // vbroadcastss 0x5e7(%rip),%ymm9 # 6804 <_sk_callback_avx+0x501>
+ .byte 196,98,125,24,13,231,5,0,0 // vbroadcastss 0x5e7(%rip),%ymm9 # 66e8 <_sk_callback_avx+0x501>
.byte 196,65,60,89,201 // vmulps %ymm9,%ymm8,%ymm9
- .byte 196,98,125,24,21,221,5,0,0 // vbroadcastss 0x5dd(%rip),%ymm10 # 6808 <_sk_callback_avx+0x505>
+ .byte 196,98,125,24,21,221,5,0,0 // vbroadcastss 0x5dd(%rip),%ymm10 # 66ec <_sk_callback_avx+0x505>
.byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
.byte 196,65,60,89,201 // vmulps %ymm9,%ymm8,%ymm9
- .byte 196,98,125,24,21,206,5,0,0 // vbroadcastss 0x5ce(%rip),%ymm10 # 680c <_sk_callback_avx+0x509>
+ .byte 196,98,125,24,21,206,5,0,0 // vbroadcastss 0x5ce(%rip),%ymm10 # 66f0 <_sk_callback_avx+0x509>
.byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
- .byte 196,98,125,24,13,191,5,0,0 // vbroadcastss 0x5bf(%rip),%ymm9 # 6810 <_sk_callback_avx+0x50d>
+ .byte 196,98,125,24,13,191,5,0,0 // vbroadcastss 0x5bf(%rip),%ymm9 # 66f4 <_sk_callback_avx+0x50d>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -19893,17 +19798,17 @@ HIDDEN _sk_bicubic_p1y_avx
FUNCTION(_sk_bicubic_p1y_avx)
_sk_bicubic_p1y_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,167,5,0,0 // vbroadcastss 0x5a7(%rip),%ymm8 # 6814 <_sk_callback_avx+0x511>
+ .byte 196,98,125,24,5,167,5,0,0 // vbroadcastss 0x5a7(%rip),%ymm8 # 66f8 <_sk_callback_avx+0x511>
.byte 197,188,88,72,32 // vaddps 0x20(%rax),%ymm8,%ymm1
.byte 197,124,16,72,96 // vmovups 0x60(%rax),%ymm9
- .byte 196,98,125,24,21,152,5,0,0 // vbroadcastss 0x598(%rip),%ymm10 # 6818 <_sk_callback_avx+0x515>
+ .byte 196,98,125,24,21,152,5,0,0 // vbroadcastss 0x598(%rip),%ymm10 # 66fc <_sk_callback_avx+0x515>
.byte 196,65,52,89,210 // vmulps %ymm10,%ymm9,%ymm10
- .byte 196,98,125,24,29,142,5,0,0 // vbroadcastss 0x58e(%rip),%ymm11 # 681c <_sk_callback_avx+0x519>
+ .byte 196,98,125,24,29,142,5,0,0 // vbroadcastss 0x58e(%rip),%ymm11 # 6700 <_sk_callback_avx+0x519>
.byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
.byte 196,65,52,89,210 // vmulps %ymm10,%ymm9,%ymm10
.byte 196,65,44,88,192 // vaddps %ymm8,%ymm10,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
- .byte 196,98,125,24,13,117,5,0,0 // vbroadcastss 0x575(%rip),%ymm9 # 6820 <_sk_callback_avx+0x51d>
+ .byte 196,98,125,24,13,117,5,0,0 // vbroadcastss 0x575(%rip),%ymm9 # 6704 <_sk_callback_avx+0x51d>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -19914,13 +19819,13 @@ HIDDEN _sk_bicubic_p3y_avx
FUNCTION(_sk_bicubic_p3y_avx)
_sk_bicubic_p3y_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,93,5,0,0 // vbroadcastss 0x55d(%rip),%ymm1 # 6824 <_sk_callback_avx+0x521>
+ .byte 196,226,125,24,13,93,5,0,0 // vbroadcastss 0x55d(%rip),%ymm1 # 6708 <_sk_callback_avx+0x521>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
.byte 197,124,16,64,96 // vmovups 0x60(%rax),%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,73,5,0,0 // vbroadcastss 0x549(%rip),%ymm10 # 6828 <_sk_callback_avx+0x525>
+ .byte 196,98,125,24,21,73,5,0,0 // vbroadcastss 0x549(%rip),%ymm10 # 670c <_sk_callback_avx+0x525>
.byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,98,125,24,21,63,5,0,0 // vbroadcastss 0x53f(%rip),%ymm10 # 682c <_sk_callback_avx+0x529>
+ .byte 196,98,125,24,21,63,5,0,0 // vbroadcastss 0x53f(%rip),%ymm10 # 6710 <_sk_callback_avx+0x529>
.byte 196,65,60,88,194 // vaddps %ymm10,%ymm8,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
@@ -20044,25 +19949,25 @@ BALIGN4
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope 64d9 <.literal4+0xb1>
+ .byte 71,225,61 // rex.RXB loope 63bd <.literal4+0xb1>
.byte 0,0 // add %al,(%rax)
.byte 128,63,154 // cmpb $0x9a,(%rdi)
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope 64e9 <.literal4+0xc1>
+ .byte 71,225,61 // rex.RXB loope 63cd <.literal4+0xc1>
.byte 0,0 // add %al,(%rax)
.byte 128,63,154 // cmpb $0x9a,(%rdi)
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope 64f9 <.literal4+0xd1>
+ .byte 71,225,61 // rex.RXB loope 63dd <.literal4+0xd1>
.byte 0,0 // add %al,(%rax)
.byte 128,63,154 // cmpb $0x9a,(%rdi)
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope 6509 <.literal4+0xe1>
+ .byte 71,225,61 // rex.RXB loope 63ed <.literal4+0xe1>
.byte 0,0 // add %al,(%rax)
.byte 128,63,0 // cmpb $0x0,(%rdi)
.byte 0,128,63,0,0,128 // add %al,-0x7fffffc1(%rax)
@@ -20110,7 +20015,7 @@ BALIGN4
.byte 190,129,128,128,59 // mov $0x3b808081,%esi
.byte 129,128,128,59,0,248,0,0,8,33 // addl $0x21080000,-0x7ffc480(%rax)
.byte 132,55 // test %dh,(%rdi)
- .byte 224,7 // loopne 6555 <.literal4+0x12d>
+ .byte 224,7 // loopne 6439 <.literal4+0x12d>
.byte 0,0 // add %al,(%rax)
.byte 33,8 // and %ecx,(%rax)
.byte 2,58 // add (%rdx),%bh
@@ -20126,10 +20031,10 @@ BALIGN4
.byte 129,128,128,59,129,128,128,59,0,0 // addl $0x3b80,-0x7f7ec480(%rax)
.byte 0,52,255 // add %dh,(%rdi,%rdi,8)
.byte 255 // (bad)
- .byte 127,0 // jg 657c <.literal4+0x154>
+ .byte 127,0 // jg 6460 <.literal4+0x154>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja 65f5 <.literal4+0x1cd>
+ .byte 119,115 // ja 64d9 <.literal4+0x1cd>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -20143,10 +20048,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg 65b0 <.literal4+0x188>
+ .byte 127,0 // jg 6494 <.literal4+0x188>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja 6629 <.literal4+0x201>
+ .byte 119,115 // ja 650d <.literal4+0x201>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -20160,10 +20065,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg 65e4 <.literal4+0x1bc>
+ .byte 127,0 // jg 64c8 <.literal4+0x1bc>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja 665d <.literal4+0x235>
+ .byte 119,115 // ja 6541 <.literal4+0x235>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -20177,10 +20082,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg 6618 <.literal4+0x1f0>
+ .byte 127,0 // jg 64fc <.literal4+0x1f0>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja 6691 <.literal4+0x269>
+ .byte 119,115 // ja 6575 <.literal4+0x269>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -20193,7 +20098,7 @@ BALIGN4
.byte 0,75,0 // add %cl,0x0(%rbx)
.byte 0,128,63,0,0,200 // add %al,-0x37ffffc1(%rax)
.byte 66,0,0 // rex.X add %al,(%rax)
- .byte 127,67 // jg 668f <.literal4+0x267>
+ .byte 127,67 // jg 6573 <.literal4+0x267>
.byte 0,0 // add %al,(%rax)
.byte 0,195 // add %al,%bl
.byte 0,0 // add %al,(%rax)
@@ -20205,10 +20110,10 @@ BALIGN4
.byte 190,80,128,3,62 // mov $0x3e038050,%esi
.byte 31 // (bad)
.byte 215 // xlat %ds:(%rbx)
- .byte 118,63 // jbe 66af <.literal4+0x287>
+ .byte 118,63 // jbe 6593 <.literal4+0x287>
.byte 246,64,83,63 // testb $0x3f,0x53(%rax)
.byte 129,128,128,59,129,128,128,59,0,0 // addl $0x3b80,-0x7f7ec480(%rax)
- .byte 127,67 // jg 66c3 <.literal4+0x29b>
+ .byte 127,67 // jg 65a7 <.literal4+0x29b>
.byte 129,128,128,59,0,0,128,63,129,128 // addl $0x80813f80,0x3b80(%rax)
.byte 128,59,0 // cmpb $0x0,(%rbx)
.byte 0,128,63,129,128,128 // add %al,-0x7f7f7ec1(%rax)
@@ -20217,7 +20122,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 8,33 // or %ah,(%rcx)
.byte 132,55 // test %dh,(%rdi)
- .byte 224,7 // loopne 66a5 <.literal4+0x27d>
+ .byte 224,7 // loopne 6589 <.literal4+0x27d>
.byte 0,0 // add %al,(%rax)
.byte 33,8 // and %ecx,(%rax)
.byte 2,58 // add (%rdx),%bh
@@ -20229,7 +20134,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 8,33 // or %ah,(%rcx)
.byte 132,55 // test %dh,(%rdi)
- .byte 224,7 // loopne 66c1 <.literal4+0x299>
+ .byte 224,7 // loopne 65a5 <.literal4+0x299>
.byte 0,0 // add %al,(%rax)
.byte 33,8 // and %ecx,(%rax)
.byte 2,58 // add (%rdx),%bh
@@ -20240,7 +20145,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 248 // clc
.byte 65,0,0 // add %al,(%r8)
- .byte 124,66 // jl 6716 <.literal4+0x2ee>
+ .byte 124,66 // jl 65fa <.literal4+0x2ee>
.byte 0,240 // add %dh,%al
.byte 0,0 // add %al,(%rax)
.byte 137,136,136,55,0,15 // mov %ecx,0xf003788(%rax)
@@ -20258,9 +20163,9 @@ BALIGN4
.byte 137,136,136,59,15,0 // mov %ecx,0xf3b88(%rax)
.byte 0,0 // add %al,(%rax)
.byte 137,136,136,61,0,0 // mov %ecx,0x3d88(%rax)
- .byte 112,65 // jo 6759 <.literal4+0x331>
+ .byte 112,65 // jo 663d <.literal4+0x331>
.byte 129,128,128,59,129,128,128,59,0,0 // addl $0x3b80,-0x7f7ec480(%rax)
- .byte 127,67 // jg 6767 <.literal4+0x33f>
+ .byte 127,67 // jg 664b <.literal4+0x33f>
.byte 0,128,0,0,0,0 // add %al,0x0(%rax)
.byte 0,128,0,4,0,128 // add %al,-0x7ffffc00(%rax)
.byte 0,0 // add %al,(%rax)
@@ -20276,7 +20181,7 @@ BALIGN4
.byte 0,128,55,0,0,128 // add %al,-0x7fffffc9(%rax)
.byte 63 // (bad)
.byte 0,255 // add %bh,%bh
- .byte 127,71 // jg 67a7 <.literal4+0x37f>
+ .byte 127,71 // jg 668b <.literal4+0x37f>
.byte 208 // (bad)
.byte 179,89 // mov $0x59,%bl
.byte 62,89 // ds pop %rcx
@@ -20363,39 +20268,73 @@ BALIGN4
.byte 170 // stos %al,%es:(%rdi)
.byte 190 // .byte 0xbe
-BALIGN32
- .byte 255,0 // incl (%rax)
+BALIGN16
+ .byte 0,2 // add %al,(%rdx)
+ .byte 4,6 // add $0x6,%al
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
+ .byte 8,10 // or %cl,(%rdx)
+ .byte 12,14 // or $0xe,%al
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 0,2 // add %al,(%rdx)
+ .byte 4,6 // add $0x6,%al
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 8,10 // or %cl,(%rdx)
+ .byte 12,14 // or $0xe,%al
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,2 // add %al,(%rdx)
+ .byte 4,6 // add $0x6,%al
+ .byte 0,0 // add %al,(%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 8,10 // or %cl,(%rdx)
+ .byte 12,14 // or $0xe,%al
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+
+BALIGN32
.byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
@@ -20428,24 +20367,38 @@ BALIGN32
.byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
-
-BALIGN16
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
BALIGN32
HIDDEN _sk_start_pipeline_sse41
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 268cd261e4..0177a13f78 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -1647,8 +1647,8 @@ _sk_load_tables_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 77,133,192 ; test %r8,%r8
DB 117,105 ; jne 1b0a <_sk_load_tables_hsw+0x7e>
- DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3
- DB 197,229,219,13,18,50,0,0 ; vpand 0x3212(%rip),%ymm3,%ymm1 # 4cc0 <_sk_callback_hsw+0x513>
+ DB 196,193,124,16,25 ; vmovups (%r9),%ymm3
+ DB 197,228,84,13,18,50,0,0 ; vandps 0x3212(%rip),%ymm3,%ymm1 # 4cc0 <_sk_callback_hsw+0x513>
DB 196,65,61,118,192 ; vpcmpeqd %ymm8,%ymm8,%ymm8
DB 72,139,72,8 ; mov 0x8(%rax),%rcx
DB 76,139,72,16 ; mov 0x10(%rax),%r9
@@ -1674,7 +1674,7 @@ _sk_load_tables_hsw LABEL PROC
DB 73,211,234 ; shr %cl,%r10
DB 196,193,249,110,194 ; vmovq %r10,%xmm0
DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
- DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3
+ DB 196,194,125,44,25 ; vmaskmovps (%r9),%ymm0,%ymm3
DB 233,115,255,255,255 ; jmpq 1aa6 <_sk_load_tables_hsw+0x1a>
PUBLIC _sk_load_tables_u16_be_hsw
@@ -3147,8 +3147,8 @@ _sk_load_8888_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 77,133,192 ; test %r8,%r8
DB 117,88 ; jne 342d <_sk_load_8888_hsw+0x6d>
- DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3
- DB 197,229,219,5,158,25,0,0 ; vpand 0x199e(%rip),%ymm3,%ymm0 # 4d80 <_sk_callback_hsw+0x5d3>
+ DB 196,193,124,16,25 ; vmovups (%r9),%ymm3
+ DB 197,228,84,5,158,25,0,0 ; vandps 0x199e(%rip),%ymm3,%ymm0 # 4d80 <_sk_callback_hsw+0x5d3>
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
DB 196,98,125,24,5,205,23,0,0 ; vbroadcastss 0x17cd(%rip),%ymm8 # 4bbc <_sk_callback_hsw+0x40f>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
@@ -3171,7 +3171,7 @@ _sk_load_8888_hsw LABEL PROC
DB 72,211,232 ; shr %cl,%rax
DB 196,225,249,110,192 ; vmovq %rax,%xmm0
DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
- DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3
+ DB 196,194,125,44,25 ; vmaskmovps (%r9),%ymm0,%ymm3
DB 235,135 ; jmp 33da <_sk_load_8888_hsw+0x1a>
PUBLIC _sk_gather_8888_hsw
@@ -3224,7 +3224,7 @@ _sk_store_8888_hsw LABEL PROC
DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8
DB 77,133,192 ; test %r8,%r8
DB 117,12 ; jne 353c <_sk_store_8888_hsw+0x73>
- DB 196,65,126,127,1 ; vmovdqu %ymm8,(%r9)
+ DB 196,65,124,17,1 ; vmovups %ymm8,(%r9)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,137,193 ; mov %r8,%rcx
DB 255,224 ; jmpq *%rax
@@ -3235,7 +3235,7 @@ _sk_store_8888_hsw LABEL PROC
DB 72,211,232 ; shr %cl,%rax
DB 196,97,249,110,200 ; vmovq %rax,%xmm9
DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9
- DB 196,66,53,142,1 ; vpmaskmovd %ymm8,%ymm9,(%r9)
+ DB 196,66,53,46,1 ; vmaskmovps %ymm8,%ymm9,(%r9)
DB 235,211 ; jmp 3535 <_sk_store_8888_hsw+0x6c>
PUBLIC _sk_load_f16_hsw
@@ -5076,14 +5076,14 @@ _sk_seed_shader_avx LABEL PROC
DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0
DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,152,99,0,0 ; vbroadcastss 0x6398(%rip),%ymm1 # 64f8 <_sk_callback_avx+0x119>
+ DB 196,226,125,24,13,132,98,0,0 ; vbroadcastss 0x6284(%rip),%ymm1 # 63e4 <_sk_callback_avx+0x119>
DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0
DB 197,252,88,2 ; vaddps (%rdx),%ymm0,%ymm0
DB 196,226,125,24,16 ; vbroadcastss (%rax),%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 197,236,88,201 ; vaddps %ymm1,%ymm2,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,21,124,99,0,0 ; vbroadcastss 0x637c(%rip),%ymm2 # 64fc <_sk_callback_avx+0x11d>
+ DB 196,226,125,24,21,104,98,0,0 ; vbroadcastss 0x6268(%rip),%ymm2 # 63e8 <_sk_callback_avx+0x11d>
DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3
DB 197,220,87,228 ; vxorps %ymm4,%ymm4,%ymm4
DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5
@@ -5104,7 +5104,7 @@ _sk_dither_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 196,66,125,24,8 ; vbroadcastss (%r8),%ymm9
DB 196,65,60,87,209 ; vxorps %ymm9,%ymm8,%ymm10
- DB 196,98,125,24,29,45,99,0,0 ; vbroadcastss 0x632d(%rip),%ymm11 # 6500 <_sk_callback_avx+0x121>
+ DB 196,98,125,24,29,25,98,0,0 ; vbroadcastss 0x6219(%rip),%ymm11 # 63ec <_sk_callback_avx+0x121>
DB 196,65,44,84,203 ; vandps %ymm11,%ymm10,%ymm9
DB 196,193,25,114,241,5 ; vpslld $0x5,%xmm9,%xmm12
DB 196,67,125,25,201,1 ; vextractf128 $0x1,%ymm9,%xmm9
@@ -5115,8 +5115,8 @@ _sk_dither_avx LABEL PROC
DB 196,67,125,25,219,1 ; vextractf128 $0x1,%ymm11,%xmm11
DB 196,193,33,114,243,4 ; vpslld $0x4,%xmm11,%xmm11
DB 196,67,29,24,219,1 ; vinsertf128 $0x1,%xmm11,%ymm12,%ymm11
- DB 196,98,125,24,37,238,98,0,0 ; vbroadcastss 0x62ee(%rip),%ymm12 # 6504 <_sk_callback_avx+0x125>
- DB 196,98,125,24,45,233,98,0,0 ; vbroadcastss 0x62e9(%rip),%ymm13 # 6508 <_sk_callback_avx+0x129>
+ DB 196,98,125,24,37,218,97,0,0 ; vbroadcastss 0x61da(%rip),%ymm12 # 63f0 <_sk_callback_avx+0x125>
+ DB 196,98,125,24,45,213,97,0,0 ; vbroadcastss 0x61d5(%rip),%ymm13 # 63f4 <_sk_callback_avx+0x129>
DB 196,65,44,84,245 ; vandps %ymm13,%ymm10,%ymm14
DB 196,193,1,114,246,2 ; vpslld $0x2,%xmm14,%xmm15
DB 196,67,125,25,246,1 ; vextractf128 $0x1,%ymm14,%xmm14
@@ -5143,9 +5143,9 @@ _sk_dither_avx LABEL PROC
DB 196,65,12,86,202 ; vorps %ymm10,%ymm14,%ymm9
DB 196,65,60,86,193 ; vorps %ymm9,%ymm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,13,84,98,0,0 ; vbroadcastss 0x6254(%rip),%ymm9 # 650c <_sk_callback_avx+0x12d>
+ DB 196,98,125,24,13,64,97,0,0 ; vbroadcastss 0x6140(%rip),%ymm9 # 63f8 <_sk_callback_avx+0x12d>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
- DB 196,98,125,24,13,74,98,0,0 ; vbroadcastss 0x624a(%rip),%ymm9 # 6510 <_sk_callback_avx+0x131>
+ DB 196,98,125,24,13,54,97,0,0 ; vbroadcastss 0x6136(%rip),%ymm9 # 63fc <_sk_callback_avx+0x131>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 196,98,125,24,72,8 ; vbroadcastss 0x8(%rax),%ymm9
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
@@ -5204,7 +5204,7 @@ _sk_clear_avx LABEL PROC
PUBLIC _sk_srcatop_avx
_sk_srcatop_avx LABEL PROC
DB 197,252,89,199 ; vmulps %ymm7,%ymm0,%ymm0
- DB 196,98,125,24,5,161,97,0,0 ; vbroadcastss 0x61a1(%rip),%ymm8 # 6514 <_sk_callback_avx+0x135>
+ DB 196,98,125,24,5,141,96,0,0 ; vbroadcastss 0x608d(%rip),%ymm8 # 6400 <_sk_callback_avx+0x135>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,204 ; vmulps %ymm4,%ymm8,%ymm9
DB 197,180,88,192 ; vaddps %ymm0,%ymm9,%ymm0
@@ -5223,7 +5223,7 @@ _sk_srcatop_avx LABEL PROC
PUBLIC _sk_dstatop_avx
_sk_dstatop_avx LABEL PROC
DB 197,100,89,196 ; vmulps %ymm4,%ymm3,%ymm8
- DB 196,98,125,24,13,99,97,0,0 ; vbroadcastss 0x6163(%rip),%ymm9 # 6518 <_sk_callback_avx+0x139>
+ DB 196,98,125,24,13,79,96,0,0 ; vbroadcastss 0x604f(%rip),%ymm9 # 6404 <_sk_callback_avx+0x139>
DB 197,52,92,207 ; vsubps %ymm7,%ymm9,%ymm9
DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0
DB 197,188,88,192 ; vaddps %ymm0,%ymm8,%ymm0
@@ -5259,7 +5259,7 @@ _sk_dstin_avx LABEL PROC
PUBLIC _sk_srcout_avx
_sk_srcout_avx LABEL PROC
- DB 196,98,125,24,5,2,97,0,0 ; vbroadcastss 0x6102(%rip),%ymm8 # 651c <_sk_callback_avx+0x13d>
+ DB 196,98,125,24,5,238,95,0,0 ; vbroadcastss 0x5fee(%rip),%ymm8 # 6408 <_sk_callback_avx+0x13d>
DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1
@@ -5270,7 +5270,7 @@ _sk_srcout_avx LABEL PROC
PUBLIC _sk_dstout_avx
_sk_dstout_avx LABEL PROC
- DB 196,226,125,24,5,229,96,0,0 ; vbroadcastss 0x60e5(%rip),%ymm0 # 6520 <_sk_callback_avx+0x141>
+ DB 196,226,125,24,5,209,95,0,0 ; vbroadcastss 0x5fd1(%rip),%ymm0 # 640c <_sk_callback_avx+0x141>
DB 197,252,92,219 ; vsubps %ymm3,%ymm0,%ymm3
DB 197,228,89,196 ; vmulps %ymm4,%ymm3,%ymm0
DB 197,228,89,205 ; vmulps %ymm5,%ymm3,%ymm1
@@ -5281,7 +5281,7 @@ _sk_dstout_avx LABEL PROC
PUBLIC _sk_srcover_avx
_sk_srcover_avx LABEL PROC
- DB 196,98,125,24,5,200,96,0,0 ; vbroadcastss 0x60c8(%rip),%ymm8 # 6524 <_sk_callback_avx+0x145>
+ DB 196,98,125,24,5,180,95,0,0 ; vbroadcastss 0x5fb4(%rip),%ymm8 # 6410 <_sk_callback_avx+0x145>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,204 ; vmulps %ymm4,%ymm8,%ymm9
DB 197,180,88,192 ; vaddps %ymm0,%ymm9,%ymm0
@@ -5296,7 +5296,7 @@ _sk_srcover_avx LABEL PROC
PUBLIC _sk_dstover_avx
_sk_dstover_avx LABEL PROC
- DB 196,98,125,24,5,155,96,0,0 ; vbroadcastss 0x609b(%rip),%ymm8 # 6528 <_sk_callback_avx+0x149>
+ DB 196,98,125,24,5,135,95,0,0 ; vbroadcastss 0x5f87(%rip),%ymm8 # 6414 <_sk_callback_avx+0x149>
DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0
@@ -5320,7 +5320,7 @@ _sk_modulate_avx LABEL PROC
PUBLIC _sk_multiply_avx
_sk_multiply_avx LABEL PROC
- DB 196,98,125,24,5,90,96,0,0 ; vbroadcastss 0x605a(%rip),%ymm8 # 652c <_sk_callback_avx+0x14d>
+ DB 196,98,125,24,5,70,95,0,0 ; vbroadcastss 0x5f46(%rip),%ymm8 # 6418 <_sk_callback_avx+0x14d>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,52,89,208 ; vmulps %ymm0,%ymm9,%ymm10
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -5374,7 +5374,7 @@ _sk_screen_avx LABEL PROC
PUBLIC _sk_xor__avx
_sk_xor__avx LABEL PROC
- DB 196,98,125,24,5,169,95,0,0 ; vbroadcastss 0x5fa9(%rip),%ymm8 # 6530 <_sk_callback_avx+0x151>
+ DB 196,98,125,24,5,149,94,0,0 ; vbroadcastss 0x5e95(%rip),%ymm8 # 641c <_sk_callback_avx+0x151>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -5409,7 +5409,7 @@ _sk_darken_avx LABEL PROC
DB 197,100,89,206 ; vmulps %ymm6,%ymm3,%ymm9
DB 196,193,108,95,209 ; vmaxps %ymm9,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,41,95,0,0 ; vbroadcastss 0x5f29(%rip),%ymm8 # 6534 <_sk_callback_avx+0x155>
+ DB 196,98,125,24,5,21,94,0,0 ; vbroadcastss 0x5e15(%rip),%ymm8 # 6420 <_sk_callback_avx+0x155>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8
DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3
@@ -5433,7 +5433,7 @@ _sk_lighten_avx LABEL PROC
DB 197,100,89,206 ; vmulps %ymm6,%ymm3,%ymm9
DB 196,193,108,93,209 ; vminps %ymm9,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,213,94,0,0 ; vbroadcastss 0x5ed5(%rip),%ymm8 # 6538 <_sk_callback_avx+0x159>
+ DB 196,98,125,24,5,193,93,0,0 ; vbroadcastss 0x5dc1(%rip),%ymm8 # 6424 <_sk_callback_avx+0x159>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8
DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3
@@ -5460,7 +5460,7 @@ _sk_difference_avx LABEL PROC
DB 196,193,108,93,209 ; vminps %ymm9,%ymm2,%ymm2
DB 197,236,88,210 ; vaddps %ymm2,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,117,94,0,0 ; vbroadcastss 0x5e75(%rip),%ymm8 # 653c <_sk_callback_avx+0x15d>
+ DB 196,98,125,24,5,97,93,0,0 ; vbroadcastss 0x5d61(%rip),%ymm8 # 6428 <_sk_callback_avx+0x15d>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8
DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3
@@ -5481,7 +5481,7 @@ _sk_exclusion_avx LABEL PROC
DB 197,236,89,214 ; vmulps %ymm6,%ymm2,%ymm2
DB 197,236,88,210 ; vaddps %ymm2,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,48,94,0,0 ; vbroadcastss 0x5e30(%rip),%ymm8 # 6540 <_sk_callback_avx+0x161>
+ DB 196,98,125,24,5,28,93,0,0 ; vbroadcastss 0x5d1c(%rip),%ymm8 # 642c <_sk_callback_avx+0x161>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8
DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3
@@ -5490,7 +5490,7 @@ _sk_exclusion_avx LABEL PROC
PUBLIC _sk_colorburn_avx
_sk_colorburn_avx LABEL PROC
- DB 196,98,125,24,5,27,94,0,0 ; vbroadcastss 0x5e1b(%rip),%ymm8 # 6544 <_sk_callback_avx+0x165>
+ DB 196,98,125,24,5,7,93,0,0 ; vbroadcastss 0x5d07(%rip),%ymm8 # 6430 <_sk_callback_avx+0x165>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,52,89,216 ; vmulps %ymm0,%ymm9,%ymm11
DB 196,65,44,87,210 ; vxorps %ymm10,%ymm10,%ymm10
@@ -5550,7 +5550,7 @@ _sk_colorburn_avx LABEL PROC
PUBLIC _sk_colordodge_avx
_sk_colordodge_avx LABEL PROC
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
- DB 196,98,125,24,13,23,93,0,0 ; vbroadcastss 0x5d17(%rip),%ymm9 # 6548 <_sk_callback_avx+0x169>
+ DB 196,98,125,24,13,3,92,0,0 ; vbroadcastss 0x5c03(%rip),%ymm9 # 6434 <_sk_callback_avx+0x169>
DB 197,52,92,215 ; vsubps %ymm7,%ymm9,%ymm10
DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
DB 197,52,92,203 ; vsubps %ymm3,%ymm9,%ymm9
@@ -5605,7 +5605,7 @@ _sk_colordodge_avx LABEL PROC
PUBLIC _sk_hardlight_avx
_sk_hardlight_avx LABEL PROC
- DB 196,98,125,24,5,41,92,0,0 ; vbroadcastss 0x5c29(%rip),%ymm8 # 654c <_sk_callback_avx+0x16d>
+ DB 196,98,125,24,5,21,91,0,0 ; vbroadcastss 0x5b15(%rip),%ymm8 # 6438 <_sk_callback_avx+0x16d>
DB 197,60,92,215 ; vsubps %ymm7,%ymm8,%ymm10
DB 197,44,89,200 ; vmulps %ymm0,%ymm10,%ymm9
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -5658,7 +5658,7 @@ _sk_hardlight_avx LABEL PROC
PUBLIC _sk_overlay_avx
_sk_overlay_avx LABEL PROC
- DB 196,98,125,24,5,82,91,0,0 ; vbroadcastss 0x5b52(%rip),%ymm8 # 6550 <_sk_callback_avx+0x171>
+ DB 196,98,125,24,5,62,90,0,0 ; vbroadcastss 0x5a3e(%rip),%ymm8 # 643c <_sk_callback_avx+0x171>
DB 197,60,92,215 ; vsubps %ymm7,%ymm8,%ymm10
DB 197,44,89,200 ; vmulps %ymm0,%ymm10,%ymm9
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -5723,10 +5723,10 @@ _sk_softlight_avx LABEL PROC
DB 196,65,60,88,192 ; vaddps %ymm8,%ymm8,%ymm8
DB 196,65,60,89,216 ; vmulps %ymm8,%ymm8,%ymm11
DB 196,65,60,88,195 ; vaddps %ymm11,%ymm8,%ymm8
- DB 196,98,125,24,29,69,90,0,0 ; vbroadcastss 0x5a45(%rip),%ymm11 # 6558 <_sk_callback_avx+0x179>
+ DB 196,98,125,24,29,49,89,0,0 ; vbroadcastss 0x5931(%rip),%ymm11 # 6444 <_sk_callback_avx+0x179>
DB 196,65,28,88,235 ; vaddps %ymm11,%ymm12,%ymm13
DB 196,65,20,89,192 ; vmulps %ymm8,%ymm13,%ymm8
- DB 196,98,125,24,45,54,90,0,0 ; vbroadcastss 0x5a36(%rip),%ymm13 # 655c <_sk_callback_avx+0x17d>
+ DB 196,98,125,24,45,34,89,0,0 ; vbroadcastss 0x5922(%rip),%ymm13 # 6448 <_sk_callback_avx+0x17d>
DB 196,65,28,89,245 ; vmulps %ymm13,%ymm12,%ymm14
DB 196,65,12,88,192 ; vaddps %ymm8,%ymm14,%ymm8
DB 196,65,124,82,244 ; vrsqrtps %ymm12,%ymm14
@@ -5737,7 +5737,7 @@ _sk_softlight_avx LABEL PROC
DB 197,4,194,255,2 ; vcmpleps %ymm7,%ymm15,%ymm15
DB 196,67,13,74,240,240 ; vblendvps %ymm15,%ymm8,%ymm14,%ymm14
DB 197,116,88,249 ; vaddps %ymm1,%ymm1,%ymm15
- DB 196,98,125,24,5,244,89,0,0 ; vbroadcastss 0x59f4(%rip),%ymm8 # 6554 <_sk_callback_avx+0x175>
+ DB 196,98,125,24,5,224,88,0,0 ; vbroadcastss 0x58e0(%rip),%ymm8 # 6440 <_sk_callback_avx+0x175>
DB 196,65,60,92,228 ; vsubps %ymm12,%ymm8,%ymm12
DB 197,132,92,195 ; vsubps %ymm3,%ymm15,%ymm0
DB 196,65,124,89,228 ; vmulps %ymm12,%ymm0,%ymm12
@@ -5864,12 +5864,12 @@ _sk_hue_avx LABEL PROC
DB 196,65,28,89,219 ; vmulps %ymm11,%ymm12,%ymm11
DB 196,65,36,94,222 ; vdivps %ymm14,%ymm11,%ymm11
DB 196,67,37,74,224,240 ; vblendvps %ymm15,%ymm8,%ymm11,%ymm12
- DB 196,98,125,24,53,190,87,0,0 ; vbroadcastss 0x57be(%rip),%ymm14 # 6560 <_sk_callback_avx+0x181>
+ DB 196,98,125,24,53,170,86,0,0 ; vbroadcastss 0x56aa(%rip),%ymm14 # 644c <_sk_callback_avx+0x181>
DB 196,65,92,89,222 ; vmulps %ymm14,%ymm4,%ymm11
- DB 196,98,125,24,61,180,87,0,0 ; vbroadcastss 0x57b4(%rip),%ymm15 # 6564 <_sk_callback_avx+0x185>
+ DB 196,98,125,24,61,160,86,0,0 ; vbroadcastss 0x56a0(%rip),%ymm15 # 6450 <_sk_callback_avx+0x185>
DB 196,65,84,89,239 ; vmulps %ymm15,%ymm5,%ymm13
DB 196,65,36,88,221 ; vaddps %ymm13,%ymm11,%ymm11
- DB 196,226,125,24,5,165,87,0,0 ; vbroadcastss 0x57a5(%rip),%ymm0 # 6568 <_sk_callback_avx+0x189>
+ DB 196,226,125,24,5,145,86,0,0 ; vbroadcastss 0x5691(%rip),%ymm0 # 6454 <_sk_callback_avx+0x189>
DB 197,76,89,232 ; vmulps %ymm0,%ymm6,%ymm13
DB 196,65,36,88,221 ; vaddps %ymm13,%ymm11,%ymm11
DB 196,65,52,89,238 ; vmulps %ymm14,%ymm9,%ymm13
@@ -5930,7 +5930,7 @@ _sk_hue_avx LABEL PROC
DB 196,65,36,95,208 ; vmaxps %ymm8,%ymm11,%ymm10
DB 196,195,109,74,209,240 ; vblendvps %ymm15,%ymm9,%ymm2,%ymm2
DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,126,86,0,0 ; vbroadcastss 0x567e(%rip),%ymm8 # 656c <_sk_callback_avx+0x18d>
+ DB 196,98,125,24,5,106,85,0,0 ; vbroadcastss 0x556a(%rip),%ymm8 # 6458 <_sk_callback_avx+0x18d>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,180,89,201 ; vmulps %ymm1,%ymm9,%ymm1
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -5987,12 +5987,12 @@ _sk_saturation_avx LABEL PROC
DB 196,65,28,89,219 ; vmulps %ymm11,%ymm12,%ymm11
DB 196,65,36,94,222 ; vdivps %ymm14,%ymm11,%ymm11
DB 196,67,37,74,224,240 ; vblendvps %ymm15,%ymm8,%ymm11,%ymm12
- DB 196,98,125,24,53,134,85,0,0 ; vbroadcastss 0x5586(%rip),%ymm14 # 6570 <_sk_callback_avx+0x191>
+ DB 196,98,125,24,53,114,84,0,0 ; vbroadcastss 0x5472(%rip),%ymm14 # 645c <_sk_callback_avx+0x191>
DB 196,65,92,89,222 ; vmulps %ymm14,%ymm4,%ymm11
- DB 196,98,125,24,61,124,85,0,0 ; vbroadcastss 0x557c(%rip),%ymm15 # 6574 <_sk_callback_avx+0x195>
+ DB 196,98,125,24,61,104,84,0,0 ; vbroadcastss 0x5468(%rip),%ymm15 # 6460 <_sk_callback_avx+0x195>
DB 196,65,84,89,239 ; vmulps %ymm15,%ymm5,%ymm13
DB 196,65,36,88,221 ; vaddps %ymm13,%ymm11,%ymm11
- DB 196,226,125,24,5,109,85,0,0 ; vbroadcastss 0x556d(%rip),%ymm0 # 6578 <_sk_callback_avx+0x199>
+ DB 196,226,125,24,5,89,84,0,0 ; vbroadcastss 0x5459(%rip),%ymm0 # 6464 <_sk_callback_avx+0x199>
DB 197,76,89,232 ; vmulps %ymm0,%ymm6,%ymm13
DB 196,65,36,88,221 ; vaddps %ymm13,%ymm11,%ymm11
DB 196,65,52,89,238 ; vmulps %ymm14,%ymm9,%ymm13
@@ -6053,7 +6053,7 @@ _sk_saturation_avx LABEL PROC
DB 196,65,36,95,208 ; vmaxps %ymm8,%ymm11,%ymm10
DB 196,195,109,74,209,240 ; vblendvps %ymm15,%ymm9,%ymm2,%ymm2
DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,70,84,0,0 ; vbroadcastss 0x5446(%rip),%ymm8 # 657c <_sk_callback_avx+0x19d>
+ DB 196,98,125,24,5,50,83,0,0 ; vbroadcastss 0x5332(%rip),%ymm8 # 6468 <_sk_callback_avx+0x19d>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,180,89,201 ; vmulps %ymm1,%ymm9,%ymm1
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -6082,12 +6082,12 @@ _sk_color_avx LABEL PROC
DB 197,252,17,68,36,32 ; vmovups %ymm0,0x20(%rsp)
DB 197,124,89,199 ; vmulps %ymm7,%ymm0,%ymm8
DB 197,116,89,207 ; vmulps %ymm7,%ymm1,%ymm9
- DB 196,98,125,24,45,214,83,0,0 ; vbroadcastss 0x53d6(%rip),%ymm13 # 6580 <_sk_callback_avx+0x1a1>
+ DB 196,98,125,24,45,194,82,0,0 ; vbroadcastss 0x52c2(%rip),%ymm13 # 646c <_sk_callback_avx+0x1a1>
DB 196,65,92,89,213 ; vmulps %ymm13,%ymm4,%ymm10
- DB 196,98,125,24,53,204,83,0,0 ; vbroadcastss 0x53cc(%rip),%ymm14 # 6584 <_sk_callback_avx+0x1a5>
+ DB 196,98,125,24,53,184,82,0,0 ; vbroadcastss 0x52b8(%rip),%ymm14 # 6470 <_sk_callback_avx+0x1a5>
DB 196,65,84,89,222 ; vmulps %ymm14,%ymm5,%ymm11
DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
- DB 196,98,125,24,61,189,83,0,0 ; vbroadcastss 0x53bd(%rip),%ymm15 # 6588 <_sk_callback_avx+0x1a9>
+ DB 196,98,125,24,61,169,82,0,0 ; vbroadcastss 0x52a9(%rip),%ymm15 # 6474 <_sk_callback_avx+0x1a9>
DB 196,65,76,89,223 ; vmulps %ymm15,%ymm6,%ymm11
DB 196,193,44,88,195 ; vaddps %ymm11,%ymm10,%ymm0
DB 196,65,60,89,221 ; vmulps %ymm13,%ymm8,%ymm11
@@ -6150,7 +6150,7 @@ _sk_color_avx LABEL PROC
DB 196,65,44,95,207 ; vmaxps %ymm15,%ymm10,%ymm9
DB 196,195,37,74,192,0 ; vblendvps %ymm0,%ymm8,%ymm11,%ymm0
DB 196,65,124,95,199 ; vmaxps %ymm15,%ymm0,%ymm8
- DB 196,226,125,24,5,132,82,0,0 ; vbroadcastss 0x5284(%rip),%ymm0 # 658c <_sk_callback_avx+0x1ad>
+ DB 196,226,125,24,5,112,81,0,0 ; vbroadcastss 0x5170(%rip),%ymm0 # 6478 <_sk_callback_avx+0x1ad>
DB 197,124,92,215 ; vsubps %ymm7,%ymm0,%ymm10
DB 197,172,89,84,36,32 ; vmulps 0x20(%rsp),%ymm10,%ymm2
DB 197,124,92,219 ; vsubps %ymm3,%ymm0,%ymm11
@@ -6180,12 +6180,12 @@ _sk_luminosity_avx LABEL PROC
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
DB 197,100,89,196 ; vmulps %ymm4,%ymm3,%ymm8
DB 197,100,89,205 ; vmulps %ymm5,%ymm3,%ymm9
- DB 196,98,125,24,45,16,82,0,0 ; vbroadcastss 0x5210(%rip),%ymm13 # 6590 <_sk_callback_avx+0x1b1>
+ DB 196,98,125,24,45,252,80,0,0 ; vbroadcastss 0x50fc(%rip),%ymm13 # 647c <_sk_callback_avx+0x1b1>
DB 196,65,108,89,213 ; vmulps %ymm13,%ymm2,%ymm10
- DB 196,98,125,24,53,6,82,0,0 ; vbroadcastss 0x5206(%rip),%ymm14 # 6594 <_sk_callback_avx+0x1b5>
+ DB 196,98,125,24,53,242,80,0,0 ; vbroadcastss 0x50f2(%rip),%ymm14 # 6480 <_sk_callback_avx+0x1b5>
DB 196,65,116,89,222 ; vmulps %ymm14,%ymm1,%ymm11
DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
- DB 196,98,125,24,61,247,81,0,0 ; vbroadcastss 0x51f7(%rip),%ymm15 # 6598 <_sk_callback_avx+0x1b9>
+ DB 196,98,125,24,61,227,80,0,0 ; vbroadcastss 0x50e3(%rip),%ymm15 # 6484 <_sk_callback_avx+0x1b9>
DB 196,65,28,89,223 ; vmulps %ymm15,%ymm12,%ymm11
DB 196,193,44,88,195 ; vaddps %ymm11,%ymm10,%ymm0
DB 196,65,60,89,221 ; vmulps %ymm13,%ymm8,%ymm11
@@ -6248,7 +6248,7 @@ _sk_luminosity_avx LABEL PROC
DB 196,65,44,95,207 ; vmaxps %ymm15,%ymm10,%ymm9
DB 196,195,37,74,192,0 ; vblendvps %ymm0,%ymm8,%ymm11,%ymm0
DB 196,65,124,95,199 ; vmaxps %ymm15,%ymm0,%ymm8
- DB 196,226,125,24,5,190,80,0,0 ; vbroadcastss 0x50be(%rip),%ymm0 # 659c <_sk_callback_avx+0x1bd>
+ DB 196,226,125,24,5,170,79,0,0 ; vbroadcastss 0x4faa(%rip),%ymm0 # 6488 <_sk_callback_avx+0x1bd>
DB 197,124,92,215 ; vsubps %ymm7,%ymm0,%ymm10
DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
DB 197,124,92,219 ; vsubps %ymm3,%ymm0,%ymm11
@@ -6281,7 +6281,7 @@ _sk_clamp_0_avx LABEL PROC
PUBLIC _sk_clamp_1_avx
_sk_clamp_1_avx LABEL PROC
- DB 196,98,125,24,5,78,80,0,0 ; vbroadcastss 0x504e(%rip),%ymm8 # 65a0 <_sk_callback_avx+0x1c1>
+ DB 196,98,125,24,5,58,79,0,0 ; vbroadcastss 0x4f3a(%rip),%ymm8 # 648c <_sk_callback_avx+0x1c1>
DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0
DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1
DB 196,193,108,93,208 ; vminps %ymm8,%ymm2,%ymm2
@@ -6291,7 +6291,7 @@ _sk_clamp_1_avx LABEL PROC
PUBLIC _sk_clamp_a_avx
_sk_clamp_a_avx LABEL PROC
- DB 196,98,125,24,5,49,80,0,0 ; vbroadcastss 0x5031(%rip),%ymm8 # 65a4 <_sk_callback_avx+0x1c5>
+ DB 196,98,125,24,5,29,79,0,0 ; vbroadcastss 0x4f1d(%rip),%ymm8 # 6490 <_sk_callback_avx+0x1c5>
DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3
DB 197,252,93,195 ; vminps %ymm3,%ymm0,%ymm0
DB 197,244,93,203 ; vminps %ymm3,%ymm1,%ymm1
@@ -6363,7 +6363,7 @@ PUBLIC _sk_unpremul_avx
_sk_unpremul_avx LABEL PROC
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,65,100,194,200,0 ; vcmpeqps %ymm8,%ymm3,%ymm9
- DB 196,98,125,24,21,121,79,0,0 ; vbroadcastss 0x4f79(%rip),%ymm10 # 65a8 <_sk_callback_avx+0x1c9>
+ DB 196,98,125,24,21,101,78,0,0 ; vbroadcastss 0x4e65(%rip),%ymm10 # 6494 <_sk_callback_avx+0x1c9>
DB 197,44,94,211 ; vdivps %ymm3,%ymm10,%ymm10
DB 196,67,45,74,192,144 ; vblendvps %ymm9,%ymm8,%ymm10,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
@@ -6374,17 +6374,17 @@ _sk_unpremul_avx LABEL PROC
PUBLIC _sk_from_srgb_avx
_sk_from_srgb_avx LABEL PROC
- DB 196,98,125,24,5,90,79,0,0 ; vbroadcastss 0x4f5a(%rip),%ymm8 # 65ac <_sk_callback_avx+0x1cd>
+ DB 196,98,125,24,5,70,78,0,0 ; vbroadcastss 0x4e46(%rip),%ymm8 # 6498 <_sk_callback_avx+0x1cd>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 197,124,89,208 ; vmulps %ymm0,%ymm0,%ymm10
- DB 196,98,125,24,29,76,79,0,0 ; vbroadcastss 0x4f4c(%rip),%ymm11 # 65b0 <_sk_callback_avx+0x1d1>
+ DB 196,98,125,24,29,56,78,0,0 ; vbroadcastss 0x4e38(%rip),%ymm11 # 649c <_sk_callback_avx+0x1d1>
DB 196,65,124,89,227 ; vmulps %ymm11,%ymm0,%ymm12
- DB 196,98,125,24,45,66,79,0,0 ; vbroadcastss 0x4f42(%rip),%ymm13 # 65b4 <_sk_callback_avx+0x1d5>
+ DB 196,98,125,24,45,46,78,0,0 ; vbroadcastss 0x4e2e(%rip),%ymm13 # 64a0 <_sk_callback_avx+0x1d5>
DB 196,65,28,88,229 ; vaddps %ymm13,%ymm12,%ymm12
DB 196,65,44,89,212 ; vmulps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,37,51,79,0,0 ; vbroadcastss 0x4f33(%rip),%ymm12 # 65b8 <_sk_callback_avx+0x1d9>
+ DB 196,98,125,24,37,31,78,0,0 ; vbroadcastss 0x4e1f(%rip),%ymm12 # 64a4 <_sk_callback_avx+0x1d9>
DB 196,65,44,88,212 ; vaddps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,53,41,79,0,0 ; vbroadcastss 0x4f29(%rip),%ymm14 # 65bc <_sk_callback_avx+0x1dd>
+ DB 196,98,125,24,53,21,78,0,0 ; vbroadcastss 0x4e15(%rip),%ymm14 # 64a8 <_sk_callback_avx+0x1dd>
DB 196,193,124,194,198,1 ; vcmpltps %ymm14,%ymm0,%ymm0
DB 196,195,45,74,193,0 ; vblendvps %ymm0,%ymm9,%ymm10,%ymm0
DB 196,65,116,89,200 ; vmulps %ymm8,%ymm1,%ymm9
@@ -6409,20 +6409,20 @@ _sk_from_srgb_avx LABEL PROC
PUBLIC _sk_to_srgb_avx
_sk_to_srgb_avx LABEL PROC
DB 197,124,82,200 ; vrsqrtps %ymm0,%ymm9
- DB 196,98,125,24,5,190,78,0,0 ; vbroadcastss 0x4ebe(%rip),%ymm8 # 65c0 <_sk_callback_avx+0x1e1>
+ DB 196,98,125,24,5,170,77,0,0 ; vbroadcastss 0x4daa(%rip),%ymm8 # 64ac <_sk_callback_avx+0x1e1>
DB 196,65,124,89,208 ; vmulps %ymm8,%ymm0,%ymm10
- DB 196,98,125,24,29,180,78,0,0 ; vbroadcastss 0x4eb4(%rip),%ymm11 # 65c4 <_sk_callback_avx+0x1e5>
+ DB 196,98,125,24,29,160,77,0,0 ; vbroadcastss 0x4da0(%rip),%ymm11 # 64b0 <_sk_callback_avx+0x1e5>
DB 196,65,52,89,227 ; vmulps %ymm11,%ymm9,%ymm12
- DB 196,98,125,24,45,170,78,0,0 ; vbroadcastss 0x4eaa(%rip),%ymm13 # 65c8 <_sk_callback_avx+0x1e9>
+ DB 196,98,125,24,45,150,77,0,0 ; vbroadcastss 0x4d96(%rip),%ymm13 # 64b4 <_sk_callback_avx+0x1e9>
DB 196,65,28,88,229 ; vaddps %ymm13,%ymm12,%ymm12
DB 196,65,52,89,228 ; vmulps %ymm12,%ymm9,%ymm12
- DB 196,98,125,24,53,155,78,0,0 ; vbroadcastss 0x4e9b(%rip),%ymm14 # 65cc <_sk_callback_avx+0x1ed>
+ DB 196,98,125,24,53,135,77,0,0 ; vbroadcastss 0x4d87(%rip),%ymm14 # 64b8 <_sk_callback_avx+0x1ed>
DB 196,65,28,88,230 ; vaddps %ymm14,%ymm12,%ymm12
- DB 196,98,125,24,61,145,78,0,0 ; vbroadcastss 0x4e91(%rip),%ymm15 # 65d0 <_sk_callback_avx+0x1f1>
+ DB 196,98,125,24,61,125,77,0,0 ; vbroadcastss 0x4d7d(%rip),%ymm15 # 64bc <_sk_callback_avx+0x1f1>
DB 196,65,52,88,207 ; vaddps %ymm15,%ymm9,%ymm9
DB 196,65,124,83,201 ; vrcpps %ymm9,%ymm9
DB 196,65,52,89,204 ; vmulps %ymm12,%ymm9,%ymm9
- DB 196,98,125,24,37,125,78,0,0 ; vbroadcastss 0x4e7d(%rip),%ymm12 # 65d4 <_sk_callback_avx+0x1f5>
+ DB 196,98,125,24,37,105,77,0,0 ; vbroadcastss 0x4d69(%rip),%ymm12 # 64c0 <_sk_callback_avx+0x1f5>
DB 196,193,124,194,196,1 ; vcmpltps %ymm12,%ymm0,%ymm0
DB 196,195,53,74,194,0 ; vblendvps %ymm0,%ymm10,%ymm9,%ymm0
DB 197,124,82,201 ; vrsqrtps %ymm1,%ymm9
@@ -6457,7 +6457,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
DB 197,124,93,201 ; vminps %ymm1,%ymm0,%ymm9
DB 197,52,93,202 ; vminps %ymm2,%ymm9,%ymm9
DB 196,65,60,92,209 ; vsubps %ymm9,%ymm8,%ymm10
- DB 196,98,125,24,29,227,77,0,0 ; vbroadcastss 0x4de3(%rip),%ymm11 # 65d8 <_sk_callback_avx+0x1f9>
+ DB 196,98,125,24,29,207,76,0,0 ; vbroadcastss 0x4ccf(%rip),%ymm11 # 64c4 <_sk_callback_avx+0x1f9>
DB 196,65,36,94,218 ; vdivps %ymm10,%ymm11,%ymm11
DB 197,116,92,226 ; vsubps %ymm2,%ymm1,%ymm12
DB 196,65,28,89,227 ; vmulps %ymm11,%ymm12,%ymm12
@@ -6467,19 +6467,19 @@ _sk_rgb_to_hsl_avx LABEL PROC
DB 196,193,108,89,211 ; vmulps %ymm11,%ymm2,%ymm2
DB 197,252,92,201 ; vsubps %ymm1,%ymm0,%ymm1
DB 196,193,116,89,203 ; vmulps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,29,188,77,0,0 ; vbroadcastss 0x4dbc(%rip),%ymm11 # 65e4 <_sk_callback_avx+0x205>
+ DB 196,98,125,24,29,168,76,0,0 ; vbroadcastss 0x4ca8(%rip),%ymm11 # 64d0 <_sk_callback_avx+0x205>
DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,29,170,77,0,0 ; vbroadcastss 0x4daa(%rip),%ymm11 # 65e0 <_sk_callback_avx+0x201>
+ DB 196,98,125,24,29,150,76,0,0 ; vbroadcastss 0x4c96(%rip),%ymm11 # 64cc <_sk_callback_avx+0x201>
DB 196,193,108,88,211 ; vaddps %ymm11,%ymm2,%ymm2
DB 196,227,117,74,202,224 ; vblendvps %ymm14,%ymm2,%ymm1,%ymm1
- DB 196,226,125,24,21,146,77,0,0 ; vbroadcastss 0x4d92(%rip),%ymm2 # 65dc <_sk_callback_avx+0x1fd>
+ DB 196,226,125,24,21,126,76,0,0 ; vbroadcastss 0x4c7e(%rip),%ymm2 # 64c8 <_sk_callback_avx+0x1fd>
DB 196,65,12,87,246 ; vxorps %ymm14,%ymm14,%ymm14
DB 196,227,13,74,210,208 ; vblendvps %ymm13,%ymm2,%ymm14,%ymm2
DB 197,188,194,192,0 ; vcmpeqps %ymm0,%ymm8,%ymm0
DB 196,193,108,88,212 ; vaddps %ymm12,%ymm2,%ymm2
DB 196,227,117,74,194,0 ; vblendvps %ymm0,%ymm2,%ymm1,%ymm0
DB 196,193,60,88,201 ; vaddps %ymm9,%ymm8,%ymm1
- DB 196,98,125,24,37,121,77,0,0 ; vbroadcastss 0x4d79(%rip),%ymm12 # 65ec <_sk_callback_avx+0x20d>
+ DB 196,98,125,24,37,101,76,0,0 ; vbroadcastss 0x4c65(%rip),%ymm12 # 64d8 <_sk_callback_avx+0x20d>
DB 196,193,116,89,212 ; vmulps %ymm12,%ymm1,%ymm2
DB 197,28,194,226,1 ; vcmpltps %ymm2,%ymm12,%ymm12
DB 196,65,36,92,216 ; vsubps %ymm8,%ymm11,%ymm11
@@ -6489,7 +6489,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
DB 197,172,94,201 ; vdivps %ymm1,%ymm10,%ymm1
DB 196,195,125,74,198,128 ; vblendvps %ymm8,%ymm14,%ymm0,%ymm0
DB 196,195,117,74,206,128 ; vblendvps %ymm8,%ymm14,%ymm1,%ymm1
- DB 196,98,125,24,5,60,77,0,0 ; vbroadcastss 0x4d3c(%rip),%ymm8 # 65e8 <_sk_callback_avx+0x209>
+ DB 196,98,125,24,5,40,76,0,0 ; vbroadcastss 0x4c28(%rip),%ymm8 # 64d4 <_sk_callback_avx+0x209>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -6504,7 +6504,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
DB 197,252,17,28,36 ; vmovups %ymm3,(%rsp)
DB 197,252,40,225 ; vmovaps %ymm1,%ymm4
DB 197,252,40,216 ; vmovaps %ymm0,%ymm3
- DB 196,98,125,24,5,3,77,0,0 ; vbroadcastss 0x4d03(%rip),%ymm8 # 65f0 <_sk_callback_avx+0x211>
+ DB 196,98,125,24,5,239,75,0,0 ; vbroadcastss 0x4bef(%rip),%ymm8 # 64dc <_sk_callback_avx+0x211>
DB 197,60,194,202,2 ; vcmpleps %ymm2,%ymm8,%ymm9
DB 197,92,89,210 ; vmulps %ymm2,%ymm4,%ymm10
DB 196,65,92,92,218 ; vsubps %ymm10,%ymm4,%ymm11
@@ -6512,23 +6512,23 @@ _sk_hsl_to_rgb_avx LABEL PROC
DB 197,52,88,210 ; vaddps %ymm2,%ymm9,%ymm10
DB 197,108,88,202 ; vaddps %ymm2,%ymm2,%ymm9
DB 196,65,52,92,202 ; vsubps %ymm10,%ymm9,%ymm9
- DB 196,98,125,24,29,221,76,0,0 ; vbroadcastss 0x4cdd(%rip),%ymm11 # 65f4 <_sk_callback_avx+0x215>
+ DB 196,98,125,24,29,201,75,0,0 ; vbroadcastss 0x4bc9(%rip),%ymm11 # 64e0 <_sk_callback_avx+0x215>
DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
DB 196,67,125,8,227,1 ; vroundps $0x1,%ymm11,%ymm12
DB 196,65,36,92,252 ; vsubps %ymm12,%ymm11,%ymm15
DB 196,65,44,92,217 ; vsubps %ymm9,%ymm10,%ymm11
- DB 196,98,125,24,37,199,76,0,0 ; vbroadcastss 0x4cc7(%rip),%ymm12 # 65fc <_sk_callback_avx+0x21d>
+ DB 196,98,125,24,37,179,75,0,0 ; vbroadcastss 0x4bb3(%rip),%ymm12 # 64e8 <_sk_callback_avx+0x21d>
DB 196,193,4,89,196 ; vmulps %ymm12,%ymm15,%ymm0
- DB 196,98,125,24,45,189,76,0,0 ; vbroadcastss 0x4cbd(%rip),%ymm13 # 6600 <_sk_callback_avx+0x221>
+ DB 196,98,125,24,45,169,75,0,0 ; vbroadcastss 0x4ba9(%rip),%ymm13 # 64ec <_sk_callback_avx+0x221>
DB 197,20,92,240 ; vsubps %ymm0,%ymm13,%ymm14
DB 196,65,36,89,246 ; vmulps %ymm14,%ymm11,%ymm14
DB 196,65,52,88,246 ; vaddps %ymm14,%ymm9,%ymm14
- DB 196,226,125,24,13,158,76,0,0 ; vbroadcastss 0x4c9e(%rip),%ymm1 # 65f8 <_sk_callback_avx+0x219>
+ DB 196,226,125,24,13,138,75,0,0 ; vbroadcastss 0x4b8a(%rip),%ymm1 # 64e4 <_sk_callback_avx+0x219>
DB 196,193,116,194,255,2 ; vcmpleps %ymm15,%ymm1,%ymm7
DB 196,195,13,74,249,112 ; vblendvps %ymm7,%ymm9,%ymm14,%ymm7
DB 196,65,60,194,247,2 ; vcmpleps %ymm15,%ymm8,%ymm14
DB 196,227,45,74,255,224 ; vblendvps %ymm14,%ymm7,%ymm10,%ymm7
- DB 196,98,125,24,53,137,76,0,0 ; vbroadcastss 0x4c89(%rip),%ymm14 # 6604 <_sk_callback_avx+0x225>
+ DB 196,98,125,24,53,117,75,0,0 ; vbroadcastss 0x4b75(%rip),%ymm14 # 64f0 <_sk_callback_avx+0x225>
DB 196,65,12,194,255,2 ; vcmpleps %ymm15,%ymm14,%ymm15
DB 196,193,124,89,195 ; vmulps %ymm11,%ymm0,%ymm0
DB 197,180,88,192 ; vaddps %ymm0,%ymm9,%ymm0
@@ -6547,7 +6547,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
DB 197,164,89,247 ; vmulps %ymm7,%ymm11,%ymm6
DB 197,180,88,246 ; vaddps %ymm6,%ymm9,%ymm6
DB 196,227,77,74,237,0 ; vblendvps %ymm0,%ymm5,%ymm6,%ymm5
- DB 196,226,125,24,5,43,76,0,0 ; vbroadcastss 0x4c2b(%rip),%ymm0 # 6608 <_sk_callback_avx+0x229>
+ DB 196,226,125,24,5,23,75,0,0 ; vbroadcastss 0x4b17(%rip),%ymm0 # 64f4 <_sk_callback_avx+0x229>
DB 197,228,88,192 ; vaddps %ymm0,%ymm3,%ymm0
DB 196,227,125,8,216,1 ; vroundps $0x1,%ymm0,%ymm3
DB 197,252,92,195 ; vsubps %ymm3,%ymm0,%ymm0
@@ -6602,7 +6602,7 @@ _sk_scale_u8_avx LABEL PROC
DB 196,66,121,49,192 ; vpmovzxbd %xmm8,%xmm8
DB 196,67,53,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,13,78,75,0,0 ; vbroadcastss 0x4b4e(%rip),%ymm9 # 660c <_sk_callback_avx+0x22d>
+ DB 196,98,125,24,13,58,74,0,0 ; vbroadcastss 0x4a3a(%rip),%ymm9 # 64f8 <_sk_callback_avx+0x22d>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1
@@ -6657,7 +6657,7 @@ _sk_lerp_u8_avx LABEL PROC
DB 196,66,121,49,192 ; vpmovzxbd %xmm8,%xmm8
DB 196,67,53,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm9,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,13,154,74,0,0 ; vbroadcastss 0x4a9a(%rip),%ymm9 # 6610 <_sk_callback_avx+0x231>
+ DB 196,98,125,24,13,134,73,0,0 ; vbroadcastss 0x4986(%rip),%ymm9 # 64fc <_sk_callback_avx+0x231>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,252,92,196 ; vsubps %ymm4,%ymm0,%ymm0
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
@@ -6698,20 +6698,20 @@ _sk_lerp_565_avx LABEL PROC
DB 196,65,57,105,201 ; vpunpckhwd %xmm9,%xmm8,%xmm9
DB 196,66,121,51,192 ; vpmovzxwd %xmm8,%xmm8
DB 196,67,61,24,193,1 ; vinsertf128 $0x1,%xmm9,%ymm8,%ymm8
- DB 196,98,125,24,13,4,74,0,0 ; vbroadcastss 0x4a04(%rip),%ymm9 # 6614 <_sk_callback_avx+0x235>
+ DB 196,98,125,24,13,240,72,0,0 ; vbroadcastss 0x48f0(%rip),%ymm9 # 6500 <_sk_callback_avx+0x235>
DB 196,65,60,84,201 ; vandps %ymm9,%ymm8,%ymm9
DB 196,65,124,91,201 ; vcvtdq2ps %ymm9,%ymm9
- DB 196,98,125,24,21,245,73,0,0 ; vbroadcastss 0x49f5(%rip),%ymm10 # 6618 <_sk_callback_avx+0x239>
+ DB 196,98,125,24,21,225,72,0,0 ; vbroadcastss 0x48e1(%rip),%ymm10 # 6504 <_sk_callback_avx+0x239>
DB 196,65,52,89,202 ; vmulps %ymm10,%ymm9,%ymm9
- DB 196,98,125,24,21,235,73,0,0 ; vbroadcastss 0x49eb(%rip),%ymm10 # 661c <_sk_callback_avx+0x23d>
+ DB 196,98,125,24,21,215,72,0,0 ; vbroadcastss 0x48d7(%rip),%ymm10 # 6508 <_sk_callback_avx+0x23d>
DB 196,65,60,84,210 ; vandps %ymm10,%ymm8,%ymm10
DB 196,65,124,91,210 ; vcvtdq2ps %ymm10,%ymm10
- DB 196,98,125,24,29,220,73,0,0 ; vbroadcastss 0x49dc(%rip),%ymm11 # 6620 <_sk_callback_avx+0x241>
+ DB 196,98,125,24,29,200,72,0,0 ; vbroadcastss 0x48c8(%rip),%ymm11 # 650c <_sk_callback_avx+0x241>
DB 196,65,44,89,211 ; vmulps %ymm11,%ymm10,%ymm10
- DB 196,98,125,24,29,210,73,0,0 ; vbroadcastss 0x49d2(%rip),%ymm11 # 6624 <_sk_callback_avx+0x245>
+ DB 196,98,125,24,29,190,72,0,0 ; vbroadcastss 0x48be(%rip),%ymm11 # 6510 <_sk_callback_avx+0x245>
DB 196,65,60,84,195 ; vandps %ymm11,%ymm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,29,195,73,0,0 ; vbroadcastss 0x49c3(%rip),%ymm11 # 6628 <_sk_callback_avx+0x249>
+ DB 196,98,125,24,29,175,72,0,0 ; vbroadcastss 0x48af(%rip),%ymm11 # 6514 <_sk_callback_avx+0x249>
DB 196,65,60,89,195 ; vmulps %ymm11,%ymm8,%ymm8
DB 197,252,92,196 ; vsubps %ymm4,%ymm0,%ymm0
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
@@ -6780,19 +6780,22 @@ _sk_lerp_565_avx LABEL PROC
PUBLIC _sk_load_tables_avx
_sk_load_tables_avx LABEL PROC
+ DB 73,137,200 ; mov %rcx,%r8
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,139,0 ; mov (%rax),%r8
- DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,26,2,0,0 ; jne 1f70 <_sk_load_tables_avx+0x228>
- DB 196,65,124,16,4,184 ; vmovups (%r8,%rdi,4),%ymm8
+ DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 77,133,192 ; test %r8,%r8
+ DB 15,133,31,2,0,0 ; jne 1f80 <_sk_load_tables_avx+0x238>
+ DB 196,65,124,16,17 ; vmovups (%r9),%ymm10
DB 85 ; push %rbp
DB 65,87 ; push %r15
DB 65,86 ; push %r14
DB 65,85 ; push %r13
DB 65,84 ; push %r12
DB 83 ; push %rbx
- DB 197,124,40,13,146,75,0,0 ; vmovaps 0x4b92(%rip),%ymm9 # 6900 <_sk_callback_avx+0x521>
- DB 196,193,60,84,193 ; vandps %ymm9,%ymm8,%ymm0
+ DB 80 ; push %rax
+ DB 197,124,40,13,7,75,0,0 ; vmovaps 0x4b07(%rip),%ymm9 # 6880 <_sk_callback_avx+0x5b5>
+ DB 196,193,44,84,193 ; vandps %ymm9,%ymm10,%ymm0
DB 196,193,249,126,193 ; vmovq %xmm0,%r9
DB 69,137,203 ; mov %r9d,%r11d
DB 196,195,249,22,194,1 ; vpextrq $0x1,%xmm0,%r10
@@ -6800,26 +6803,26 @@ _sk_load_tables_avx LABEL PROC
DB 73,193,234,32 ; shr $0x20,%r10
DB 73,193,233,32 ; shr $0x20,%r9
DB 196,227,125,25,192,1 ; vextractf128 $0x1,%ymm0,%xmm0
- DB 196,193,249,126,196 ; vmovq %xmm0,%r12
- DB 69,137,231 ; mov %r12d,%r15d
- DB 196,227,249,22,195,1 ; vpextrq $0x1,%xmm0,%rbx
- DB 65,137,221 ; mov %ebx,%r13d
+ DB 196,225,249,126,195 ; vmovq %xmm0,%rbx
+ DB 65,137,223 ; mov %ebx,%r15d
+ DB 196,227,249,22,193,1 ; vpextrq $0x1,%xmm0,%rcx
+ DB 65,137,205 ; mov %ecx,%r13d
+ DB 72,193,233,32 ; shr $0x20,%rcx
DB 72,193,235,32 ; shr $0x20,%rbx
- DB 73,193,236,32 ; shr $0x20,%r12
DB 72,139,104,8 ; mov 0x8(%rax),%rbp
- DB 76,139,64,16 ; mov 0x10(%rax),%r8
+ DB 76,139,96,16 ; mov 0x10(%rax),%r12
DB 196,161,122,16,68,189,0 ; vmovss 0x0(%rbp,%r15,4),%xmm0
- DB 196,163,121,33,68,165,0,16 ; vinsertps $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
+ DB 196,227,121,33,68,157,0,16 ; vinsertps $0x10,0x0(%rbp,%rbx,4),%xmm0,%xmm0
DB 196,163,121,33,68,173,0,32 ; vinsertps $0x20,0x0(%rbp,%r13,4),%xmm0,%xmm0
- DB 196,227,121,33,68,157,0,48 ; vinsertps $0x30,0x0(%rbp,%rbx,4),%xmm0,%xmm0
+ DB 196,227,121,33,68,141,0,48 ; vinsertps $0x30,0x0(%rbp,%rcx,4),%xmm0,%xmm0
DB 196,161,122,16,76,157,0 ; vmovss 0x0(%rbp,%r11,4),%xmm1
DB 196,163,113,33,76,141,0,16 ; vinsertps $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
DB 196,163,113,33,76,181,0,32 ; vinsertps $0x20,0x0(%rbp,%r14,4),%xmm1,%xmm1
DB 196,163,113,33,76,149,0,48 ; vinsertps $0x30,0x0(%rbp,%r10,4),%xmm1,%xmm1
DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- DB 196,193,113,114,208,8 ; vpsrld $0x8,%xmm8,%xmm1
- DB 196,67,125,25,194,1 ; vextractf128 $0x1,%ymm8,%xmm10
- DB 196,193,105,114,210,8 ; vpsrld $0x8,%xmm10,%xmm2
+ DB 196,193,113,114,210,8 ; vpsrld $0x8,%xmm10,%xmm1
+ DB 196,67,125,25,208,1 ; vextractf128 $0x1,%ymm10,%xmm8
+ DB 196,193,105,114,208,8 ; vpsrld $0x8,%xmm8,%xmm2
DB 196,227,117,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
DB 196,193,116,84,201 ; vandps %ymm9,%ymm1,%ymm1
DB 196,193,249,126,201 ; vmovq %xmm1,%r9
@@ -6829,36 +6832,36 @@ _sk_load_tables_avx LABEL PROC
DB 73,193,234,32 ; shr $0x20,%r10
DB 73,193,233,32 ; shr $0x20,%r9
DB 196,227,125,25,201,1 ; vextractf128 $0x1,%ymm1,%xmm1
- DB 196,225,249,126,205 ; vmovq %xmm1,%rbp
- DB 65,137,239 ; mov %ebp,%r15d
- DB 196,227,249,22,203,1 ; vpextrq $0x1,%xmm1,%rbx
- DB 65,137,220 ; mov %ebx,%r12d
- DB 72,193,235,32 ; shr $0x20,%rbx
+ DB 196,225,249,126,203 ; vmovq %xmm1,%rbx
+ DB 65,137,223 ; mov %ebx,%r15d
+ DB 196,227,249,22,205,1 ; vpextrq $0x1,%xmm1,%rbp
+ DB 137,233 ; mov %ebp,%ecx
DB 72,193,237,32 ; shr $0x20,%rbp
- DB 196,129,122,16,12,184 ; vmovss (%r8,%r15,4),%xmm1
- DB 196,195,113,33,12,168,16 ; vinsertps $0x10,(%r8,%rbp,4),%xmm1,%xmm1
- DB 196,129,122,16,20,160 ; vmovss (%r8,%r12,4),%xmm2
+ DB 72,193,235,32 ; shr $0x20,%rbx
+ DB 196,129,122,16,12,188 ; vmovss (%r12,%r15,4),%xmm1
+ DB 196,195,113,33,12,156,16 ; vinsertps $0x10,(%r12,%rbx,4),%xmm1,%xmm1
+ DB 196,193,122,16,20,140 ; vmovss (%r12,%rcx,4),%xmm2
DB 196,227,113,33,202,32 ; vinsertps $0x20,%xmm2,%xmm1,%xmm1
- DB 196,193,122,16,20,152 ; vmovss (%r8,%rbx,4),%xmm2
+ DB 196,193,122,16,20,172 ; vmovss (%r12,%rbp,4),%xmm2
DB 196,227,113,33,202,48 ; vinsertps $0x30,%xmm2,%xmm1,%xmm1
- DB 196,129,122,16,20,152 ; vmovss (%r8,%r11,4),%xmm2
- DB 196,131,105,33,20,136,16 ; vinsertps $0x10,(%r8,%r9,4),%xmm2,%xmm2
- DB 196,129,122,16,28,176 ; vmovss (%r8,%r14,4),%xmm3
+ DB 196,129,122,16,20,156 ; vmovss (%r12,%r11,4),%xmm2
+ DB 196,131,105,33,20,140,16 ; vinsertps $0x10,(%r12,%r9,4),%xmm2,%xmm2
+ DB 196,129,122,16,28,180 ; vmovss (%r12,%r14,4),%xmm3
DB 196,227,105,33,211,32 ; vinsertps $0x20,%xmm3,%xmm2,%xmm2
- DB 196,129,122,16,28,144 ; vmovss (%r8,%r10,4),%xmm3
+ DB 196,129,122,16,28,148 ; vmovss (%r12,%r10,4),%xmm3
DB 196,227,105,33,211,48 ; vinsertps $0x30,%xmm3,%xmm2,%xmm2
DB 196,227,109,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm2,%ymm1
DB 72,139,64,24 ; mov 0x18(%rax),%rax
- DB 196,193,105,114,208,16 ; vpsrld $0x10,%xmm8,%xmm2
- DB 196,193,97,114,210,16 ; vpsrld $0x10,%xmm10,%xmm3
+ DB 196,193,105,114,210,16 ; vpsrld $0x10,%xmm10,%xmm2
+ DB 196,193,97,114,208,16 ; vpsrld $0x10,%xmm8,%xmm3
DB 196,227,109,24,211,1 ; vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
DB 196,193,108,84,209 ; vandps %ymm9,%ymm2,%ymm2
- DB 196,193,249,126,208 ; vmovq %xmm2,%r8
- DB 69,137,194 ; mov %r8d,%r10d
- DB 196,195,249,22,209,1 ; vpextrq $0x1,%xmm2,%r9
- DB 69,137,203 ; mov %r9d,%r11d
+ DB 196,193,249,126,209 ; vmovq %xmm2,%r9
+ DB 69,137,202 ; mov %r9d,%r10d
+ DB 196,227,249,22,209,1 ; vpextrq $0x1,%xmm2,%rcx
+ DB 65,137,203 ; mov %ecx,%r11d
+ DB 72,193,233,32 ; shr $0x20,%rcx
DB 73,193,233,32 ; shr $0x20,%r9
- DB 73,193,232,32 ; shr $0x20,%r8
DB 196,227,125,25,210,1 ; vextractf128 $0x1,%ymm2,%xmm2
DB 196,225,249,126,213 ; vmovq %xmm2,%rbp
DB 65,137,238 ; mov %ebp,%r14d
@@ -6873,19 +6876,21 @@ _sk_load_tables_avx LABEL PROC
DB 197,250,16,28,152 ; vmovss (%rax,%rbx,4),%xmm3
DB 196,99,105,33,203,48 ; vinsertps $0x30,%xmm3,%xmm2,%xmm9
DB 196,161,122,16,28,144 ; vmovss (%rax,%r10,4),%xmm3
- DB 196,163,97,33,28,128,16 ; vinsertps $0x10,(%rax,%r8,4),%xmm3,%xmm3
+ DB 196,163,97,33,28,136,16 ; vinsertps $0x10,(%rax,%r9,4),%xmm3,%xmm3
DB 196,161,122,16,20,152 ; vmovss (%rax,%r11,4),%xmm2
DB 196,227,97,33,210,32 ; vinsertps $0x20,%xmm2,%xmm3,%xmm2
- DB 196,161,122,16,28,136 ; vmovss (%rax,%r9,4),%xmm3
+ DB 197,250,16,28,136 ; vmovss (%rax,%rcx,4),%xmm3
DB 196,227,105,33,211,48 ; vinsertps $0x30,%xmm3,%xmm2,%xmm2
DB 196,195,109,24,209,1 ; vinsertf128 $0x1,%xmm9,%ymm2,%ymm2
- DB 196,193,57,114,208,24 ; vpsrld $0x18,%xmm8,%xmm8
- DB 196,193,97,114,210,24 ; vpsrld $0x18,%xmm10,%xmm3
- DB 196,227,61,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm8,%ymm3
+ DB 196,193,49,114,210,24 ; vpsrld $0x18,%xmm10,%xmm9
+ DB 196,193,97,114,208,24 ; vpsrld $0x18,%xmm8,%xmm3
+ DB 196,227,53,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,207,70,0,0 ; vbroadcastss 0x46cf(%rip),%ymm8 # 662c <_sk_callback_avx+0x24d>
+ DB 196,98,125,24,5,178,69,0,0 ; vbroadcastss 0x45b2(%rip),%ymm8 # 6518 <_sk_callback_avx+0x24d>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 76,137,193 ; mov %r8,%rcx
+ DB 72,131,196,8 ; add $0x8,%rsp
DB 91 ; pop %rbx
DB 65,92 ; pop %r12
DB 65,93 ; pop %r13
@@ -6893,57 +6898,20 @@ _sk_load_tables_avx LABEL PROC
DB 65,95 ; pop %r15
DB 93 ; pop %rbp
DB 255,224 ; jmpq *%rax
- DB 65,137,201 ; mov %ecx,%r9d
- DB 65,128,225,7 ; and $0x7,%r9b
- DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
- DB 65,254,201 ; dec %r9b
- DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 15,135,211,253,255,255 ; ja 1d5c <_sk_load_tables_avx+0x14>
- DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,140,0,0,0 ; lea 0x8c(%rip),%r10 # 2020 <_sk_load_tables_avx+0x2d8>
- DB 79,99,12,138 ; movslq (%r10,%r9,4),%r9
- DB 77,1,209 ; add %r10,%r9
- DB 65,255,225 ; jmpq *%r9
- DB 196,193,121,110,68,184,24 ; vmovd 0x18(%r8,%rdi,4),%xmm0
- DB 197,249,112,192,68 ; vpshufd $0x44,%xmm0,%xmm0
- DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
- DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
- DB 196,99,117,12,192,64 ; vblendps $0x40,%ymm0,%ymm1,%ymm8
- DB 196,99,125,25,192,1 ; vextractf128 $0x1,%ymm8,%xmm0
- DB 196,195,121,34,68,184,20,1 ; vpinsrd $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
- DB 196,99,61,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
- DB 196,99,125,25,192,1 ; vextractf128 $0x1,%ymm8,%xmm0
- DB 196,195,121,34,68,184,16,0 ; vpinsrd $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
- DB 196,99,61,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm8,%ymm8
- DB 196,195,57,34,68,184,12,3 ; vpinsrd $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
- DB 196,99,61,12,192,15 ; vblendps $0xf,%ymm0,%ymm8,%ymm8
- DB 196,195,57,34,68,184,8,2 ; vpinsrd $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
- DB 196,99,61,12,192,15 ; vblendps $0xf,%ymm0,%ymm8,%ymm8
- DB 196,195,57,34,68,184,4,1 ; vpinsrd $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
- DB 196,99,61,12,192,15 ; vblendps $0xf,%ymm0,%ymm8,%ymm8
- DB 196,195,57,34,4,184,0 ; vpinsrd $0x0,(%r8,%rdi,4),%xmm8,%xmm0
- DB 196,99,61,12,192,15 ; vblendps $0xf,%ymm0,%ymm8,%ymm8
- DB 233,62,253,255,255 ; jmpq 1d5c <_sk_load_tables_avx+0x14>
- DB 102,144 ; xchg %ax,%ax
- DB 236 ; in (%dx),%al
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 222,255 ; fdivrp %st,%st(7)
- DB 255 ; (bad)
- DB 255,208 ; callq *%rax
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255,194 ; inc %edx
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255,174,255,255,255,154 ; ljmp *-0x65000001(%rsi)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 126,255 ; jle 2039 <_sk_load_tables_avx+0x2f1>
- DB 255 ; (bad)
- DB 255 ; .byte 0xff
+ DB 185,8,0,0,0 ; mov $0x8,%ecx
+ DB 68,41,193 ; sub %r8d,%ecx
+ DB 192,225,3 ; shl $0x3,%cl
+ DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
+ DB 73,211,234 ; shr %cl,%r10
+ DB 196,193,249,110,194 ; vmovq %r10,%xmm0
+ DB 196,226,121,48,192 ; vpmovzxbw %xmm0,%xmm0
+ DB 196,226,121,0,13,72,72,0,0 ; vpshufb 0x4848(%rip),%xmm0,%xmm1 # 67f0 <_sk_callback_avx+0x525>
+ DB 196,226,121,33,201 ; vpmovsxbd %xmm1,%xmm1
+ DB 196,226,121,0,5,74,72,0,0 ; vpshufb 0x484a(%rip),%xmm0,%xmm0 # 6800 <_sk_callback_avx+0x535>
+ DB 196,226,121,33,192 ; vpmovsxbd %xmm0,%xmm0
+ DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
+ DB 196,66,125,44,17 ; vmaskmovps (%r9),%ymm0,%ymm10
+ DB 233,155,253,255,255 ; jmpq 1d66 <_sk_load_tables_avx+0x1e>
PUBLIC _sk_load_tables_u16_be_avx
_sk_load_tables_u16_be_avx LABEL PROC
@@ -6951,7 +6919,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,113,2,0,0 ; jne 22c3 <_sk_load_tables_u16_be_avx+0x287>
+ DB 15,133,113,2,0,0 ; jne 2252 <_sk_load_tables_u16_be_avx+0x287>
DB 196,1,121,16,4,72 ; vmovupd (%r8,%r9,2),%xmm8
DB 196,129,121,16,84,72,16 ; vmovupd 0x10(%r8,%r9,2),%xmm2
DB 196,129,121,16,92,72,32 ; vmovupd 0x20(%r8,%r9,2),%xmm3
@@ -6973,7 +6941,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 197,177,108,208 ; vpunpcklqdq %xmm0,%xmm9,%xmm2
DB 197,177,109,200 ; vpunpckhqdq %xmm0,%xmm9,%xmm1
DB 196,65,57,108,212 ; vpunpcklqdq %xmm12,%xmm8,%xmm10
- DB 197,121,111,29,210,72,0,0 ; vmovdqa 0x48d2(%rip),%xmm11 # 6980 <_sk_callback_avx+0x5a1>
+ DB 197,121,111,29,211,71,0,0 ; vmovdqa 0x47d3(%rip),%xmm11 # 6810 <_sk_callback_avx+0x545>
DB 196,193,105,219,195 ; vpand %xmm11,%xmm2,%xmm0
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 196,193,121,105,209 ; vpunpckhwd %xmm9,%xmm0,%xmm2
@@ -7072,7 +7040,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 196,226,121,51,219 ; vpmovzxwd %xmm3,%xmm3
DB 196,195,101,24,216,1 ; vinsertf128 $0x1,%xmm8,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,128,67,0,0 ; vbroadcastss 0x4380(%rip),%ymm8 # 6630 <_sk_callback_avx+0x251>
+ DB 196,98,125,24,5,221,66,0,0 ; vbroadcastss 0x42dd(%rip),%ymm8 # 651c <_sk_callback_avx+0x251>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -7085,29 +7053,29 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 196,1,123,16,4,72 ; vmovsd (%r8,%r9,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 2329 <_sk_load_tables_u16_be_avx+0x2ed>
+ DB 116,85 ; je 22b8 <_sk_load_tables_u16_be_avx+0x2ed>
DB 196,1,57,22,68,72,8 ; vmovhpd 0x8(%r8,%r9,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 2329 <_sk_load_tables_u16_be_avx+0x2ed>
+ DB 114,72 ; jb 22b8 <_sk_load_tables_u16_be_avx+0x2ed>
DB 196,129,123,16,84,72,16 ; vmovsd 0x10(%r8,%r9,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 2336 <_sk_load_tables_u16_be_avx+0x2fa>
+ DB 116,72 ; je 22c5 <_sk_load_tables_u16_be_avx+0x2fa>
DB 196,129,105,22,84,72,24 ; vmovhpd 0x18(%r8,%r9,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 2336 <_sk_load_tables_u16_be_avx+0x2fa>
+ DB 114,59 ; jb 22c5 <_sk_load_tables_u16_be_avx+0x2fa>
DB 196,129,123,16,92,72,32 ; vmovsd 0x20(%r8,%r9,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,97,253,255,255 ; je 206d <_sk_load_tables_u16_be_avx+0x31>
+ DB 15,132,97,253,255,255 ; je 1ffc <_sk_load_tables_u16_be_avx+0x31>
DB 196,129,97,22,92,72,40 ; vmovhpd 0x28(%r8,%r9,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,80,253,255,255 ; jb 206d <_sk_load_tables_u16_be_avx+0x31>
+ DB 15,130,80,253,255,255 ; jb 1ffc <_sk_load_tables_u16_be_avx+0x31>
DB 196,1,122,126,76,72,48 ; vmovq 0x30(%r8,%r9,2),%xmm9
- DB 233,68,253,255,255 ; jmpq 206d <_sk_load_tables_u16_be_avx+0x31>
+ DB 233,68,253,255,255 ; jmpq 1ffc <_sk_load_tables_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,55,253,255,255 ; jmpq 206d <_sk_load_tables_u16_be_avx+0x31>
+ DB 233,55,253,255,255 ; jmpq 1ffc <_sk_load_tables_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,46,253,255,255 ; jmpq 206d <_sk_load_tables_u16_be_avx+0x31>
+ DB 233,46,253,255,255 ; jmpq 1ffc <_sk_load_tables_u16_be_avx+0x31>
PUBLIC _sk_load_tables_rgb_u16_be_avx
_sk_load_tables_rgb_u16_be_avx LABEL PROC
@@ -7115,7 +7083,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,127 ; lea (%rdi,%rdi,2),%r9
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,93,2,0,0 ; jne 25ae <_sk_load_tables_rgb_u16_be_avx+0x26f>
+ DB 15,133,93,2,0,0 ; jne 253d <_sk_load_tables_rgb_u16_be_avx+0x26f>
DB 196,129,122,111,4,72 ; vmovdqu (%r8,%r9,2),%xmm0
DB 196,129,122,111,84,72,12 ; vmovdqu 0xc(%r8,%r9,2),%xmm2
DB 196,129,122,111,76,72,24 ; vmovdqu 0x18(%r8,%r9,2),%xmm1
@@ -7142,7 +7110,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 197,185,108,202 ; vpunpcklqdq %xmm2,%xmm8,%xmm1
DB 197,185,109,210 ; vpunpckhqdq %xmm2,%xmm8,%xmm2
DB 197,121,108,195 ; vpunpcklqdq %xmm3,%xmm0,%xmm8
- DB 197,121,111,13,203,69,0,0 ; vmovdqa 0x45cb(%rip),%xmm9 # 6990 <_sk_callback_avx+0x5b1>
+ DB 197,121,111,13,204,68,0,0 ; vmovdqa 0x44cc(%rip),%xmm9 # 6820 <_sk_callback_avx+0x555>
DB 196,193,113,219,193 ; vpand %xmm9,%xmm1,%xmm0
DB 196,65,41,239,210 ; vpxor %xmm10,%xmm10,%xmm10
DB 196,193,121,105,202 ; vpunpckhwd %xmm10,%xmm0,%xmm1
@@ -7234,7 +7202,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 196,227,105,33,211,48 ; vinsertps $0x30,%xmm3,%xmm2,%xmm2
DB 196,195,109,24,208,1 ; vinsertf128 $0x1,%xmm8,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,146,64,0,0 ; vbroadcastss 0x4092(%rip),%ymm3 # 6634 <_sk_callback_avx+0x255>
+ DB 196,226,125,24,29,239,63,0,0 ; vbroadcastss 0x3fef(%rip),%ymm3 # 6520 <_sk_callback_avx+0x255>
DB 91 ; pop %rbx
DB 65,92 ; pop %r12
DB 65,93 ; pop %r13
@@ -7245,36 +7213,36 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 196,129,121,110,4,72 ; vmovd (%r8,%r9,2),%xmm0
DB 196,129,121,196,68,72,4,2 ; vpinsrw $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 25c7 <_sk_load_tables_rgb_u16_be_avx+0x288>
- DB 233,190,253,255,255 ; jmpq 2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 117,5 ; jne 2556 <_sk_load_tables_rgb_u16_be_avx+0x288>
+ DB 233,190,253,255,255 ; jmpq 2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
DB 196,129,121,110,76,72,6 ; vmovd 0x6(%r8,%r9,2),%xmm1
DB 196,1,113,196,68,72,10,2 ; vpinsrw $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 25f6 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
+ DB 114,26 ; jb 2585 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
DB 196,129,121,110,76,72,12 ; vmovd 0xc(%r8,%r9,2),%xmm1
DB 196,129,113,196,84,72,16,2 ; vpinsrw $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 25fb <_sk_load_tables_rgb_u16_be_avx+0x2bc>
- DB 233,143,253,255,255 ; jmpq 2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
- DB 233,138,253,255,255 ; jmpq 2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 258a <_sk_load_tables_rgb_u16_be_avx+0x2bc>
+ DB 233,143,253,255,255 ; jmpq 2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 233,138,253,255,255 ; jmpq 2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
DB 196,129,121,110,76,72,18 ; vmovd 0x12(%r8,%r9,2),%xmm1
DB 196,1,113,196,76,72,22,2 ; vpinsrw $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 262a <_sk_load_tables_rgb_u16_be_avx+0x2eb>
+ DB 114,26 ; jb 25b9 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
DB 196,129,121,110,76,72,24 ; vmovd 0x18(%r8,%r9,2),%xmm1
DB 196,129,113,196,76,72,28,2 ; vpinsrw $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 262f <_sk_load_tables_rgb_u16_be_avx+0x2f0>
- DB 233,91,253,255,255 ; jmpq 2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
- DB 233,86,253,255,255 ; jmpq 2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 25be <_sk_load_tables_rgb_u16_be_avx+0x2f0>
+ DB 233,91,253,255,255 ; jmpq 2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 233,86,253,255,255 ; jmpq 2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
DB 196,129,121,110,92,72,30 ; vmovd 0x1e(%r8,%r9,2),%xmm3
DB 196,1,97,196,92,72,34,2 ; vpinsrw $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 2658 <_sk_load_tables_rgb_u16_be_avx+0x319>
+ DB 114,20 ; jb 25e7 <_sk_load_tables_rgb_u16_be_avx+0x319>
DB 196,129,121,110,92,72,36 ; vmovd 0x24(%r8,%r9,2),%xmm3
DB 196,129,97,196,92,72,40,2 ; vpinsrw $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
- DB 233,45,253,255,255 ; jmpq 2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
- DB 233,40,253,255,255 ; jmpq 2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 233,45,253,255,255 ; jmpq 2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
+ DB 233,40,253,255,255 ; jmpq 2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
PUBLIC _sk_byte_tables_avx
_sk_byte_tables_avx LABEL PROC
@@ -7285,7 +7253,7 @@ _sk_byte_tables_avx LABEL PROC
DB 65,84 ; push %r12
DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,198,63,0,0 ; vbroadcastss 0x3fc6(%rip),%ymm8 # 6638 <_sk_callback_avx+0x259>
+ DB 196,98,125,24,5,35,63,0,0 ; vbroadcastss 0x3f23(%rip),%ymm8 # 6524 <_sk_callback_avx+0x259>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
DB 196,195,249,22,192,1 ; vpextrq $0x1,%xmm0,%r8
@@ -7322,7 +7290,7 @@ _sk_byte_tables_avx LABEL PROC
DB 196,226,121,49,192 ; vpmovzxbd %xmm0,%xmm0
DB 196,227,53,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm9,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,13,20,63,0,0 ; vbroadcastss 0x3f14(%rip),%ymm9 # 663c <_sk_callback_avx+0x25d>
+ DB 196,98,125,24,13,113,62,0,0 ; vbroadcastss 0x3e71(%rip),%ymm9 # 6528 <_sk_callback_avx+0x25d>
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
@@ -7482,7 +7450,7 @@ _sk_byte_tables_rgb_avx LABEL PROC
DB 196,226,121,49,192 ; vpmovzxbd %xmm0,%xmm0
DB 196,227,53,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm9,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,13,58,60,0,0 ; vbroadcastss 0x3c3a(%rip),%ymm9 # 6640 <_sk_callback_avx+0x261>
+ DB 196,98,125,24,13,151,59,0,0 ; vbroadcastss 0x3b97(%rip),%ymm9 # 652c <_sk_callback_avx+0x261>
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1
DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
@@ -7769,36 +7737,36 @@ _sk_parametric_r_avx LABEL PROC
DB 196,193,124,88,195 ; vaddps %ymm11,%ymm0,%ymm0
DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
DB 197,124,91,216 ; vcvtdq2ps %ymm0,%ymm11
- DB 196,98,125,24,37,152,55,0,0 ; vbroadcastss 0x3798(%rip),%ymm12 # 6644 <_sk_callback_avx+0x265>
+ DB 196,98,125,24,37,245,54,0,0 ; vbroadcastss 0x36f5(%rip),%ymm12 # 6530 <_sk_callback_avx+0x265>
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,142,55,0,0 ; vbroadcastss 0x378e(%rip),%ymm12 # 6648 <_sk_callback_avx+0x269>
+ DB 196,98,125,24,37,235,54,0,0 ; vbroadcastss 0x36eb(%rip),%ymm12 # 6534 <_sk_callback_avx+0x269>
DB 196,193,124,84,196 ; vandps %ymm12,%ymm0,%ymm0
- DB 196,98,125,24,37,132,55,0,0 ; vbroadcastss 0x3784(%rip),%ymm12 # 664c <_sk_callback_avx+0x26d>
+ DB 196,98,125,24,37,225,54,0,0 ; vbroadcastss 0x36e1(%rip),%ymm12 # 6538 <_sk_callback_avx+0x26d>
DB 196,193,124,86,196 ; vorps %ymm12,%ymm0,%ymm0
- DB 196,98,125,24,37,122,55,0,0 ; vbroadcastss 0x377a(%rip),%ymm12 # 6650 <_sk_callback_avx+0x271>
+ DB 196,98,125,24,37,215,54,0,0 ; vbroadcastss 0x36d7(%rip),%ymm12 # 653c <_sk_callback_avx+0x271>
DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,112,55,0,0 ; vbroadcastss 0x3770(%rip),%ymm12 # 6654 <_sk_callback_avx+0x275>
+ DB 196,98,125,24,37,205,54,0,0 ; vbroadcastss 0x36cd(%rip),%ymm12 # 6540 <_sk_callback_avx+0x275>
DB 196,65,124,89,228 ; vmulps %ymm12,%ymm0,%ymm12
DB 196,65,36,92,220 ; vsubps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,97,55,0,0 ; vbroadcastss 0x3761(%rip),%ymm12 # 6658 <_sk_callback_avx+0x279>
+ DB 196,98,125,24,37,190,54,0,0 ; vbroadcastss 0x36be(%rip),%ymm12 # 6544 <_sk_callback_avx+0x279>
DB 196,193,124,88,196 ; vaddps %ymm12,%ymm0,%ymm0
- DB 196,98,125,24,37,87,55,0,0 ; vbroadcastss 0x3757(%rip),%ymm12 # 665c <_sk_callback_avx+0x27d>
+ DB 196,98,125,24,37,180,54,0,0 ; vbroadcastss 0x36b4(%rip),%ymm12 # 6548 <_sk_callback_avx+0x27d>
DB 197,156,94,192 ; vdivps %ymm0,%ymm12,%ymm0
DB 197,164,92,192 ; vsubps %ymm0,%ymm11,%ymm0
DB 197,172,89,192 ; vmulps %ymm0,%ymm10,%ymm0
DB 196,99,125,8,208,1 ; vroundps $0x1,%ymm0,%ymm10
DB 196,65,124,92,210 ; vsubps %ymm10,%ymm0,%ymm10
- DB 196,98,125,24,29,59,55,0,0 ; vbroadcastss 0x373b(%rip),%ymm11 # 6660 <_sk_callback_avx+0x281>
+ DB 196,98,125,24,29,152,54,0,0 ; vbroadcastss 0x3698(%rip),%ymm11 # 654c <_sk_callback_avx+0x281>
DB 196,193,124,88,195 ; vaddps %ymm11,%ymm0,%ymm0
- DB 196,98,125,24,29,49,55,0,0 ; vbroadcastss 0x3731(%rip),%ymm11 # 6664 <_sk_callback_avx+0x285>
+ DB 196,98,125,24,29,142,54,0,0 ; vbroadcastss 0x368e(%rip),%ymm11 # 6550 <_sk_callback_avx+0x285>
DB 196,65,44,89,219 ; vmulps %ymm11,%ymm10,%ymm11
DB 196,193,124,92,195 ; vsubps %ymm11,%ymm0,%ymm0
- DB 196,98,125,24,29,34,55,0,0 ; vbroadcastss 0x3722(%rip),%ymm11 # 6668 <_sk_callback_avx+0x289>
+ DB 196,98,125,24,29,127,54,0,0 ; vbroadcastss 0x367f(%rip),%ymm11 # 6554 <_sk_callback_avx+0x289>
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
- DB 196,98,125,24,29,24,55,0,0 ; vbroadcastss 0x3718(%rip),%ymm11 # 666c <_sk_callback_avx+0x28d>
+ DB 196,98,125,24,29,117,54,0,0 ; vbroadcastss 0x3675(%rip),%ymm11 # 6558 <_sk_callback_avx+0x28d>
DB 196,65,36,94,210 ; vdivps %ymm10,%ymm11,%ymm10
DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0
- DB 196,98,125,24,21,9,55,0,0 ; vbroadcastss 0x3709(%rip),%ymm10 # 6670 <_sk_callback_avx+0x291>
+ DB 196,98,125,24,21,102,54,0,0 ; vbroadcastss 0x3666(%rip),%ymm10 # 655c <_sk_callback_avx+0x291>
DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0
DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -7806,7 +7774,7 @@ _sk_parametric_r_avx LABEL PROC
DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,5,224,54,0,0 ; vbroadcastss 0x36e0(%rip),%ymm8 # 6674 <_sk_callback_avx+0x295>
+ DB 196,98,125,24,5,61,54,0,0 ; vbroadcastss 0x363d(%rip),%ymm8 # 6560 <_sk_callback_avx+0x295>
DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7826,36 +7794,36 @@ _sk_parametric_g_avx LABEL PROC
DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1
DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
DB 197,124,91,217 ; vcvtdq2ps %ymm1,%ymm11
- DB 196,98,125,24,37,145,54,0,0 ; vbroadcastss 0x3691(%rip),%ymm12 # 6678 <_sk_callback_avx+0x299>
+ DB 196,98,125,24,37,238,53,0,0 ; vbroadcastss 0x35ee(%rip),%ymm12 # 6564 <_sk_callback_avx+0x299>
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,135,54,0,0 ; vbroadcastss 0x3687(%rip),%ymm12 # 667c <_sk_callback_avx+0x29d>
+ DB 196,98,125,24,37,228,53,0,0 ; vbroadcastss 0x35e4(%rip),%ymm12 # 6568 <_sk_callback_avx+0x29d>
DB 196,193,116,84,204 ; vandps %ymm12,%ymm1,%ymm1
- DB 196,98,125,24,37,125,54,0,0 ; vbroadcastss 0x367d(%rip),%ymm12 # 6680 <_sk_callback_avx+0x2a1>
+ DB 196,98,125,24,37,218,53,0,0 ; vbroadcastss 0x35da(%rip),%ymm12 # 656c <_sk_callback_avx+0x2a1>
DB 196,193,116,86,204 ; vorps %ymm12,%ymm1,%ymm1
- DB 196,98,125,24,37,115,54,0,0 ; vbroadcastss 0x3673(%rip),%ymm12 # 6684 <_sk_callback_avx+0x2a5>
+ DB 196,98,125,24,37,208,53,0,0 ; vbroadcastss 0x35d0(%rip),%ymm12 # 6570 <_sk_callback_avx+0x2a5>
DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,105,54,0,0 ; vbroadcastss 0x3669(%rip),%ymm12 # 6688 <_sk_callback_avx+0x2a9>
+ DB 196,98,125,24,37,198,53,0,0 ; vbroadcastss 0x35c6(%rip),%ymm12 # 6574 <_sk_callback_avx+0x2a9>
DB 196,65,116,89,228 ; vmulps %ymm12,%ymm1,%ymm12
DB 196,65,36,92,220 ; vsubps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,90,54,0,0 ; vbroadcastss 0x365a(%rip),%ymm12 # 668c <_sk_callback_avx+0x2ad>
+ DB 196,98,125,24,37,183,53,0,0 ; vbroadcastss 0x35b7(%rip),%ymm12 # 6578 <_sk_callback_avx+0x2ad>
DB 196,193,116,88,204 ; vaddps %ymm12,%ymm1,%ymm1
- DB 196,98,125,24,37,80,54,0,0 ; vbroadcastss 0x3650(%rip),%ymm12 # 6690 <_sk_callback_avx+0x2b1>
+ DB 196,98,125,24,37,173,53,0,0 ; vbroadcastss 0x35ad(%rip),%ymm12 # 657c <_sk_callback_avx+0x2b1>
DB 197,156,94,201 ; vdivps %ymm1,%ymm12,%ymm1
DB 197,164,92,201 ; vsubps %ymm1,%ymm11,%ymm1
DB 197,172,89,201 ; vmulps %ymm1,%ymm10,%ymm1
DB 196,99,125,8,209,1 ; vroundps $0x1,%ymm1,%ymm10
DB 196,65,116,92,210 ; vsubps %ymm10,%ymm1,%ymm10
- DB 196,98,125,24,29,52,54,0,0 ; vbroadcastss 0x3634(%rip),%ymm11 # 6694 <_sk_callback_avx+0x2b5>
+ DB 196,98,125,24,29,145,53,0,0 ; vbroadcastss 0x3591(%rip),%ymm11 # 6580 <_sk_callback_avx+0x2b5>
DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,29,42,54,0,0 ; vbroadcastss 0x362a(%rip),%ymm11 # 6698 <_sk_callback_avx+0x2b9>
+ DB 196,98,125,24,29,135,53,0,0 ; vbroadcastss 0x3587(%rip),%ymm11 # 6584 <_sk_callback_avx+0x2b9>
DB 196,65,44,89,219 ; vmulps %ymm11,%ymm10,%ymm11
DB 196,193,116,92,203 ; vsubps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,29,27,54,0,0 ; vbroadcastss 0x361b(%rip),%ymm11 # 669c <_sk_callback_avx+0x2bd>
+ DB 196,98,125,24,29,120,53,0,0 ; vbroadcastss 0x3578(%rip),%ymm11 # 6588 <_sk_callback_avx+0x2bd>
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
- DB 196,98,125,24,29,17,54,0,0 ; vbroadcastss 0x3611(%rip),%ymm11 # 66a0 <_sk_callback_avx+0x2c1>
+ DB 196,98,125,24,29,110,53,0,0 ; vbroadcastss 0x356e(%rip),%ymm11 # 658c <_sk_callback_avx+0x2c1>
DB 196,65,36,94,210 ; vdivps %ymm10,%ymm11,%ymm10
DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1
- DB 196,98,125,24,21,2,54,0,0 ; vbroadcastss 0x3602(%rip),%ymm10 # 66a4 <_sk_callback_avx+0x2c5>
+ DB 196,98,125,24,21,95,53,0,0 ; vbroadcastss 0x355f(%rip),%ymm10 # 6590 <_sk_callback_avx+0x2c5>
DB 196,193,116,89,202 ; vmulps %ymm10,%ymm1,%ymm1
DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -7863,7 +7831,7 @@ _sk_parametric_g_avx LABEL PROC
DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,116,95,200 ; vmaxps %ymm8,%ymm1,%ymm1
- DB 196,98,125,24,5,217,53,0,0 ; vbroadcastss 0x35d9(%rip),%ymm8 # 66a8 <_sk_callback_avx+0x2c9>
+ DB 196,98,125,24,5,54,53,0,0 ; vbroadcastss 0x3536(%rip),%ymm8 # 6594 <_sk_callback_avx+0x2c9>
DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7883,36 +7851,36 @@ _sk_parametric_b_avx LABEL PROC
DB 196,193,108,88,211 ; vaddps %ymm11,%ymm2,%ymm2
DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
DB 197,124,91,218 ; vcvtdq2ps %ymm2,%ymm11
- DB 196,98,125,24,37,138,53,0,0 ; vbroadcastss 0x358a(%rip),%ymm12 # 66ac <_sk_callback_avx+0x2cd>
+ DB 196,98,125,24,37,231,52,0,0 ; vbroadcastss 0x34e7(%rip),%ymm12 # 6598 <_sk_callback_avx+0x2cd>
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,128,53,0,0 ; vbroadcastss 0x3580(%rip),%ymm12 # 66b0 <_sk_callback_avx+0x2d1>
+ DB 196,98,125,24,37,221,52,0,0 ; vbroadcastss 0x34dd(%rip),%ymm12 # 659c <_sk_callback_avx+0x2d1>
DB 196,193,108,84,212 ; vandps %ymm12,%ymm2,%ymm2
- DB 196,98,125,24,37,118,53,0,0 ; vbroadcastss 0x3576(%rip),%ymm12 # 66b4 <_sk_callback_avx+0x2d5>
+ DB 196,98,125,24,37,211,52,0,0 ; vbroadcastss 0x34d3(%rip),%ymm12 # 65a0 <_sk_callback_avx+0x2d5>
DB 196,193,108,86,212 ; vorps %ymm12,%ymm2,%ymm2
- DB 196,98,125,24,37,108,53,0,0 ; vbroadcastss 0x356c(%rip),%ymm12 # 66b8 <_sk_callback_avx+0x2d9>
+ DB 196,98,125,24,37,201,52,0,0 ; vbroadcastss 0x34c9(%rip),%ymm12 # 65a4 <_sk_callback_avx+0x2d9>
DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,98,53,0,0 ; vbroadcastss 0x3562(%rip),%ymm12 # 66bc <_sk_callback_avx+0x2dd>
+ DB 196,98,125,24,37,191,52,0,0 ; vbroadcastss 0x34bf(%rip),%ymm12 # 65a8 <_sk_callback_avx+0x2dd>
DB 196,65,108,89,228 ; vmulps %ymm12,%ymm2,%ymm12
DB 196,65,36,92,220 ; vsubps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,83,53,0,0 ; vbroadcastss 0x3553(%rip),%ymm12 # 66c0 <_sk_callback_avx+0x2e1>
+ DB 196,98,125,24,37,176,52,0,0 ; vbroadcastss 0x34b0(%rip),%ymm12 # 65ac <_sk_callback_avx+0x2e1>
DB 196,193,108,88,212 ; vaddps %ymm12,%ymm2,%ymm2
- DB 196,98,125,24,37,73,53,0,0 ; vbroadcastss 0x3549(%rip),%ymm12 # 66c4 <_sk_callback_avx+0x2e5>
+ DB 196,98,125,24,37,166,52,0,0 ; vbroadcastss 0x34a6(%rip),%ymm12 # 65b0 <_sk_callback_avx+0x2e5>
DB 197,156,94,210 ; vdivps %ymm2,%ymm12,%ymm2
DB 197,164,92,210 ; vsubps %ymm2,%ymm11,%ymm2
DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
DB 196,99,125,8,210,1 ; vroundps $0x1,%ymm2,%ymm10
DB 196,65,108,92,210 ; vsubps %ymm10,%ymm2,%ymm10
- DB 196,98,125,24,29,45,53,0,0 ; vbroadcastss 0x352d(%rip),%ymm11 # 66c8 <_sk_callback_avx+0x2e9>
+ DB 196,98,125,24,29,138,52,0,0 ; vbroadcastss 0x348a(%rip),%ymm11 # 65b4 <_sk_callback_avx+0x2e9>
DB 196,193,108,88,211 ; vaddps %ymm11,%ymm2,%ymm2
- DB 196,98,125,24,29,35,53,0,0 ; vbroadcastss 0x3523(%rip),%ymm11 # 66cc <_sk_callback_avx+0x2ed>
+ DB 196,98,125,24,29,128,52,0,0 ; vbroadcastss 0x3480(%rip),%ymm11 # 65b8 <_sk_callback_avx+0x2ed>
DB 196,65,44,89,219 ; vmulps %ymm11,%ymm10,%ymm11
DB 196,193,108,92,211 ; vsubps %ymm11,%ymm2,%ymm2
- DB 196,98,125,24,29,20,53,0,0 ; vbroadcastss 0x3514(%rip),%ymm11 # 66d0 <_sk_callback_avx+0x2f1>
+ DB 196,98,125,24,29,113,52,0,0 ; vbroadcastss 0x3471(%rip),%ymm11 # 65bc <_sk_callback_avx+0x2f1>
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
- DB 196,98,125,24,29,10,53,0,0 ; vbroadcastss 0x350a(%rip),%ymm11 # 66d4 <_sk_callback_avx+0x2f5>
+ DB 196,98,125,24,29,103,52,0,0 ; vbroadcastss 0x3467(%rip),%ymm11 # 65c0 <_sk_callback_avx+0x2f5>
DB 196,65,36,94,210 ; vdivps %ymm10,%ymm11,%ymm10
DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2
- DB 196,98,125,24,21,251,52,0,0 ; vbroadcastss 0x34fb(%rip),%ymm10 # 66d8 <_sk_callback_avx+0x2f9>
+ DB 196,98,125,24,21,88,52,0,0 ; vbroadcastss 0x3458(%rip),%ymm10 # 65c4 <_sk_callback_avx+0x2f9>
DB 196,193,108,89,210 ; vmulps %ymm10,%ymm2,%ymm2
DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -7920,7 +7888,7 @@ _sk_parametric_b_avx LABEL PROC
DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,210,52,0,0 ; vbroadcastss 0x34d2(%rip),%ymm8 # 66dc <_sk_callback_avx+0x2fd>
+ DB 196,98,125,24,5,47,52,0,0 ; vbroadcastss 0x342f(%rip),%ymm8 # 65c8 <_sk_callback_avx+0x2fd>
DB 196,193,108,93,208 ; vminps %ymm8,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7940,36 +7908,36 @@ _sk_parametric_a_avx LABEL PROC
DB 196,193,100,88,219 ; vaddps %ymm11,%ymm3,%ymm3
DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
DB 197,124,91,219 ; vcvtdq2ps %ymm3,%ymm11
- DB 196,98,125,24,37,131,52,0,0 ; vbroadcastss 0x3483(%rip),%ymm12 # 66e0 <_sk_callback_avx+0x301>
+ DB 196,98,125,24,37,224,51,0,0 ; vbroadcastss 0x33e0(%rip),%ymm12 # 65cc <_sk_callback_avx+0x301>
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,121,52,0,0 ; vbroadcastss 0x3479(%rip),%ymm12 # 66e4 <_sk_callback_avx+0x305>
+ DB 196,98,125,24,37,214,51,0,0 ; vbroadcastss 0x33d6(%rip),%ymm12 # 65d0 <_sk_callback_avx+0x305>
DB 196,193,100,84,220 ; vandps %ymm12,%ymm3,%ymm3
- DB 196,98,125,24,37,111,52,0,0 ; vbroadcastss 0x346f(%rip),%ymm12 # 66e8 <_sk_callback_avx+0x309>
+ DB 196,98,125,24,37,204,51,0,0 ; vbroadcastss 0x33cc(%rip),%ymm12 # 65d4 <_sk_callback_avx+0x309>
DB 196,193,100,86,220 ; vorps %ymm12,%ymm3,%ymm3
- DB 196,98,125,24,37,101,52,0,0 ; vbroadcastss 0x3465(%rip),%ymm12 # 66ec <_sk_callback_avx+0x30d>
+ DB 196,98,125,24,37,194,51,0,0 ; vbroadcastss 0x33c2(%rip),%ymm12 # 65d8 <_sk_callback_avx+0x30d>
DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,91,52,0,0 ; vbroadcastss 0x345b(%rip),%ymm12 # 66f0 <_sk_callback_avx+0x311>
+ DB 196,98,125,24,37,184,51,0,0 ; vbroadcastss 0x33b8(%rip),%ymm12 # 65dc <_sk_callback_avx+0x311>
DB 196,65,100,89,228 ; vmulps %ymm12,%ymm3,%ymm12
DB 196,65,36,92,220 ; vsubps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,76,52,0,0 ; vbroadcastss 0x344c(%rip),%ymm12 # 66f4 <_sk_callback_avx+0x315>
+ DB 196,98,125,24,37,169,51,0,0 ; vbroadcastss 0x33a9(%rip),%ymm12 # 65e0 <_sk_callback_avx+0x315>
DB 196,193,100,88,220 ; vaddps %ymm12,%ymm3,%ymm3
- DB 196,98,125,24,37,66,52,0,0 ; vbroadcastss 0x3442(%rip),%ymm12 # 66f8 <_sk_callback_avx+0x319>
+ DB 196,98,125,24,37,159,51,0,0 ; vbroadcastss 0x339f(%rip),%ymm12 # 65e4 <_sk_callback_avx+0x319>
DB 197,156,94,219 ; vdivps %ymm3,%ymm12,%ymm3
DB 197,164,92,219 ; vsubps %ymm3,%ymm11,%ymm3
DB 197,172,89,219 ; vmulps %ymm3,%ymm10,%ymm3
DB 196,99,125,8,211,1 ; vroundps $0x1,%ymm3,%ymm10
DB 196,65,100,92,210 ; vsubps %ymm10,%ymm3,%ymm10
- DB 196,98,125,24,29,38,52,0,0 ; vbroadcastss 0x3426(%rip),%ymm11 # 66fc <_sk_callback_avx+0x31d>
+ DB 196,98,125,24,29,131,51,0,0 ; vbroadcastss 0x3383(%rip),%ymm11 # 65e8 <_sk_callback_avx+0x31d>
DB 196,193,100,88,219 ; vaddps %ymm11,%ymm3,%ymm3
- DB 196,98,125,24,29,28,52,0,0 ; vbroadcastss 0x341c(%rip),%ymm11 # 6700 <_sk_callback_avx+0x321>
+ DB 196,98,125,24,29,121,51,0,0 ; vbroadcastss 0x3379(%rip),%ymm11 # 65ec <_sk_callback_avx+0x321>
DB 196,65,44,89,219 ; vmulps %ymm11,%ymm10,%ymm11
DB 196,193,100,92,219 ; vsubps %ymm11,%ymm3,%ymm3
- DB 196,98,125,24,29,13,52,0,0 ; vbroadcastss 0x340d(%rip),%ymm11 # 6704 <_sk_callback_avx+0x325>
+ DB 196,98,125,24,29,106,51,0,0 ; vbroadcastss 0x336a(%rip),%ymm11 # 65f0 <_sk_callback_avx+0x325>
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
- DB 196,98,125,24,29,3,52,0,0 ; vbroadcastss 0x3403(%rip),%ymm11 # 6708 <_sk_callback_avx+0x329>
+ DB 196,98,125,24,29,96,51,0,0 ; vbroadcastss 0x3360(%rip),%ymm11 # 65f4 <_sk_callback_avx+0x329>
DB 196,65,36,94,210 ; vdivps %ymm10,%ymm11,%ymm10
DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3
- DB 196,98,125,24,21,244,51,0,0 ; vbroadcastss 0x33f4(%rip),%ymm10 # 670c <_sk_callback_avx+0x32d>
+ DB 196,98,125,24,21,81,51,0,0 ; vbroadcastss 0x3351(%rip),%ymm10 # 65f8 <_sk_callback_avx+0x32d>
DB 196,193,100,89,218 ; vmulps %ymm10,%ymm3,%ymm3
DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -7977,38 +7945,38 @@ _sk_parametric_a_avx LABEL PROC
DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,100,95,216 ; vmaxps %ymm8,%ymm3,%ymm3
- DB 196,98,125,24,5,203,51,0,0 ; vbroadcastss 0x33cb(%rip),%ymm8 # 6710 <_sk_callback_avx+0x331>
+ DB 196,98,125,24,5,40,51,0,0 ; vbroadcastss 0x3328(%rip),%ymm8 # 65fc <_sk_callback_avx+0x331>
DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
PUBLIC _sk_lab_to_xyz_avx
_sk_lab_to_xyz_avx LABEL PROC
- DB 196,98,125,24,5,189,51,0,0 ; vbroadcastss 0x33bd(%rip),%ymm8 # 6714 <_sk_callback_avx+0x335>
+ DB 196,98,125,24,5,26,51,0,0 ; vbroadcastss 0x331a(%rip),%ymm8 # 6600 <_sk_callback_avx+0x335>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,5,179,51,0,0 ; vbroadcastss 0x33b3(%rip),%ymm8 # 6718 <_sk_callback_avx+0x339>
+ DB 196,98,125,24,5,16,51,0,0 ; vbroadcastss 0x3310(%rip),%ymm8 # 6604 <_sk_callback_avx+0x339>
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
- DB 196,98,125,24,13,169,51,0,0 ; vbroadcastss 0x33a9(%rip),%ymm9 # 671c <_sk_callback_avx+0x33d>
+ DB 196,98,125,24,13,6,51,0,0 ; vbroadcastss 0x3306(%rip),%ymm9 # 6608 <_sk_callback_avx+0x33d>
DB 196,193,116,88,201 ; vaddps %ymm9,%ymm1,%ymm1
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
DB 196,193,108,88,209 ; vaddps %ymm9,%ymm2,%ymm2
- DB 196,98,125,24,5,149,51,0,0 ; vbroadcastss 0x3395(%rip),%ymm8 # 6720 <_sk_callback_avx+0x341>
+ DB 196,98,125,24,5,242,50,0,0 ; vbroadcastss 0x32f2(%rip),%ymm8 # 660c <_sk_callback_avx+0x341>
DB 196,193,124,88,192 ; vaddps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,5,139,51,0,0 ; vbroadcastss 0x338b(%rip),%ymm8 # 6724 <_sk_callback_avx+0x345>
+ DB 196,98,125,24,5,232,50,0,0 ; vbroadcastss 0x32e8(%rip),%ymm8 # 6610 <_sk_callback_avx+0x345>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,5,129,51,0,0 ; vbroadcastss 0x3381(%rip),%ymm8 # 6728 <_sk_callback_avx+0x349>
+ DB 196,98,125,24,5,222,50,0,0 ; vbroadcastss 0x32de(%rip),%ymm8 # 6614 <_sk_callback_avx+0x349>
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
DB 197,252,88,201 ; vaddps %ymm1,%ymm0,%ymm1
- DB 196,98,125,24,5,115,51,0,0 ; vbroadcastss 0x3373(%rip),%ymm8 # 672c <_sk_callback_avx+0x34d>
+ DB 196,98,125,24,5,208,50,0,0 ; vbroadcastss 0x32d0(%rip),%ymm8 # 6618 <_sk_callback_avx+0x34d>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
DB 197,252,92,210 ; vsubps %ymm2,%ymm0,%ymm2
DB 197,116,89,193 ; vmulps %ymm1,%ymm1,%ymm8
DB 196,65,116,89,192 ; vmulps %ymm8,%ymm1,%ymm8
- DB 196,98,125,24,13,92,51,0,0 ; vbroadcastss 0x335c(%rip),%ymm9 # 6730 <_sk_callback_avx+0x351>
+ DB 196,98,125,24,13,185,50,0,0 ; vbroadcastss 0x32b9(%rip),%ymm9 # 661c <_sk_callback_avx+0x351>
DB 196,65,52,194,208,1 ; vcmpltps %ymm8,%ymm9,%ymm10
- DB 196,98,125,24,29,81,51,0,0 ; vbroadcastss 0x3351(%rip),%ymm11 # 6734 <_sk_callback_avx+0x355>
+ DB 196,98,125,24,29,174,50,0,0 ; vbroadcastss 0x32ae(%rip),%ymm11 # 6620 <_sk_callback_avx+0x355>
DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,37,71,51,0,0 ; vbroadcastss 0x3347(%rip),%ymm12 # 6738 <_sk_callback_avx+0x359>
+ DB 196,98,125,24,37,164,50,0,0 ; vbroadcastss 0x32a4(%rip),%ymm12 # 6624 <_sk_callback_avx+0x359>
DB 196,193,116,89,204 ; vmulps %ymm12,%ymm1,%ymm1
DB 196,67,117,74,192,160 ; vblendvps %ymm10,%ymm8,%ymm1,%ymm8
DB 197,252,89,200 ; vmulps %ymm0,%ymm0,%ymm1
@@ -8023,9 +7991,9 @@ _sk_lab_to_xyz_avx LABEL PROC
DB 196,193,108,88,211 ; vaddps %ymm11,%ymm2,%ymm2
DB 196,193,108,89,212 ; vmulps %ymm12,%ymm2,%ymm2
DB 196,227,109,74,208,144 ; vblendvps %ymm9,%ymm0,%ymm2,%ymm2
- DB 196,226,125,24,5,253,50,0,0 ; vbroadcastss 0x32fd(%rip),%ymm0 # 673c <_sk_callback_avx+0x35d>
+ DB 196,226,125,24,5,90,50,0,0 ; vbroadcastss 0x325a(%rip),%ymm0 # 6628 <_sk_callback_avx+0x35d>
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
- DB 196,98,125,24,5,244,50,0,0 ; vbroadcastss 0x32f4(%rip),%ymm8 # 6740 <_sk_callback_avx+0x361>
+ DB 196,98,125,24,5,81,50,0,0 ; vbroadcastss 0x3251(%rip),%ymm8 # 662c <_sk_callback_avx+0x361>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -8037,14 +8005,14 @@ _sk_load_a8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,62 ; jne 34a3 <_sk_load_a8_avx+0x4e>
+ DB 117,62 ; jne 3432 <_sk_load_a8_avx+0x4e>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
DB 196,226,121,49,192 ; vpmovzxbd %xmm0,%xmm0
DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,184,50,0,0 ; vbroadcastss 0x32b8(%rip),%ymm1 # 6744 <_sk_callback_avx+0x365>
+ DB 196,226,125,24,13,21,50,0,0 ; vbroadcastss 0x3215(%rip),%ymm1 # 6630 <_sk_callback_avx+0x365>
DB 197,252,89,217 ; vmulps %ymm1,%ymm0,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0
@@ -8061,9 +8029,9 @@ _sk_load_a8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 34ab <_sk_load_a8_avx+0x56>
+ DB 117,234 ; jne 343a <_sk_load_a8_avx+0x56>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,161 ; jmp 3469 <_sk_load_a8_avx+0x14>
+ DB 235,161 ; jmp 33f8 <_sk_load_a8_avx+0x14>
PUBLIC _sk_gather_a8_avx
_sk_gather_a8_avx LABEL PROC
@@ -8111,7 +8079,7 @@ _sk_gather_a8_avx LABEL PROC
DB 196,226,121,49,201 ; vpmovzxbd %xmm1,%xmm1
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,173,49,0,0 ; vbroadcastss 0x31ad(%rip),%ymm1 # 6748 <_sk_callback_avx+0x369>
+ DB 196,226,125,24,13,10,49,0,0 ; vbroadcastss 0x310a(%rip),%ymm1 # 6634 <_sk_callback_avx+0x369>
DB 197,252,89,217 ; vmulps %ymm1,%ymm0,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0
@@ -8127,14 +8095,14 @@ PUBLIC _sk_store_a8_avx
_sk_store_a8_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
- DB 196,98,125,24,5,136,49,0,0 ; vbroadcastss 0x3188(%rip),%ymm8 # 674c <_sk_callback_avx+0x36d>
+ DB 196,98,125,24,5,229,48,0,0 ; vbroadcastss 0x30e5(%rip),%ymm8 # 6638 <_sk_callback_avx+0x36d>
DB 196,65,100,89,192 ; vmulps %ymm8,%ymm3,%ymm8
DB 196,65,125,91,192 ; vcvtps2dq %ymm8,%ymm8
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 35ed <_sk_store_a8_avx+0x37>
+ DB 117,10 ; jne 357c <_sk_store_a8_avx+0x37>
DB 196,65,123,17,4,58 ; vmovsd %xmm8,(%r10,%rdi,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -8142,10 +8110,10 @@ _sk_store_a8_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 35e9 <_sk_store_a8_avx+0x33>
+ DB 119,236 ; ja 3578 <_sk_store_a8_avx+0x33>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,67,0,0,0 ; lea 0x43(%rip),%r9 # 3650 <_sk_store_a8_avx+0x9a>
+ DB 76,141,13,68,0,0,0 ; lea 0x44(%rip),%r9 # 35e0 <_sk_store_a8_avx+0x9b>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -8156,27 +8124,28 @@ _sk_store_a8_avx LABEL PROC
DB 196,67,121,20,68,58,2,4 ; vpextrb $0x4,%xmm8,0x2(%r10,%rdi,1)
DB 196,67,121,20,68,58,1,2 ; vpextrb $0x2,%xmm8,0x1(%r10,%rdi,1)
DB 196,67,121,20,4,58,0 ; vpextrb $0x0,%xmm8,(%r10,%rdi,1)
- DB 235,154 ; jmp 35e9 <_sk_store_a8_avx+0x33>
- DB 144 ; nop
- DB 246,255 ; idiv %bh
+ DB 235,154 ; jmp 3578 <_sk_store_a8_avx+0x33>
+ DB 102,144 ; xchg %ax,%ax
+ DB 245 ; cmc
DB 255 ; (bad)
DB 255 ; (bad)
- DB 238 ; out %al,(%dx)
DB 255 ; (bad)
+ DB 237 ; in (%dx),%eax
DB 255 ; (bad)
- DB 255,230 ; jmpq *%rsi
DB 255 ; (bad)
+ DB 255,229 ; jmpq *%rbp
DB 255 ; (bad)
DB 255 ; (bad)
- DB 222,255 ; fdivrp %st,%st(7)
DB 255 ; (bad)
- DB 255,214 ; callq *%rsi
+ DB 221,255 ; (bad)
DB 255 ; (bad)
+ DB 255,213 ; callq *%rbp
DB 255 ; (bad)
- DB 255,206 ; dec %esi
DB 255 ; (bad)
+ DB 255,205 ; dec %ebp
DB 255 ; (bad)
- DB 255,198 ; inc %esi
+ DB 255 ; (bad)
+ DB 255,197 ; inc %ebp
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -8188,17 +8157,17 @@ _sk_load_g8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,67 ; jne 36bf <_sk_load_g8_avx+0x53>
+ DB 117,67 ; jne 364f <_sk_load_g8_avx+0x53>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
DB 196,226,121,49,192 ; vpmovzxbd %xmm0,%xmm0
DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,173,48,0,0 ; vbroadcastss 0x30ad(%rip),%ymm1 # 6750 <_sk_callback_avx+0x371>
+ DB 196,226,125,24,13,9,48,0,0 ; vbroadcastss 0x3009(%rip),%ymm1 # 663c <_sk_callback_avx+0x371>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,162,48,0,0 ; vbroadcastss 0x30a2(%rip),%ymm3 # 6754 <_sk_callback_avx+0x375>
+ DB 196,226,125,24,29,254,47,0,0 ; vbroadcastss 0x2ffe(%rip),%ymm3 # 6640 <_sk_callback_avx+0x375>
DB 76,137,193 ; mov %r8,%rcx
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
@@ -8212,9 +8181,9 @@ _sk_load_g8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 36c7 <_sk_load_g8_avx+0x5b>
+ DB 117,234 ; jne 3657 <_sk_load_g8_avx+0x5b>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,156 ; jmp 3680 <_sk_load_g8_avx+0x14>
+ DB 235,156 ; jmp 3610 <_sk_load_g8_avx+0x14>
PUBLIC _sk_gather_g8_avx
_sk_gather_g8_avx LABEL PROC
@@ -8262,10 +8231,10 @@ _sk_gather_g8_avx LABEL PROC
DB 196,226,121,49,201 ; vpmovzxbd %xmm1,%xmm1
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,161,47,0,0 ; vbroadcastss 0x2fa1(%rip),%ymm1 # 6758 <_sk_callback_avx+0x379>
+ DB 196,226,125,24,13,253,46,0,0 ; vbroadcastss 0x2efd(%rip),%ymm1 # 6644 <_sk_callback_avx+0x379>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,150,47,0,0 ; vbroadcastss 0x2f96(%rip),%ymm3 # 675c <_sk_callback_avx+0x37d>
+ DB 196,226,125,24,29,242,46,0,0 ; vbroadcastss 0x2ef2(%rip),%ymm3 # 6648 <_sk_callback_avx+0x37d>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
DB 91 ; pop %rbx
@@ -8279,9 +8248,9 @@ _sk_gather_i8_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 37e6 <_sk_gather_i8_avx+0xf>
+ DB 116,5 ; je 3776 <_sk_gather_i8_avx+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 37e8 <_sk_gather_i8_avx+0x11>
+ DB 235,2 ; jmp 3778 <_sk_gather_i8_avx+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 65,87 ; push %r15
DB 65,86 ; push %r14
@@ -8343,10 +8312,10 @@ _sk_gather_i8_avx LABEL PROC
DB 196,163,121,34,4,163,2 ; vpinsrd $0x2,(%rbx,%r12,4),%xmm0,%xmm0
DB 196,163,121,34,28,19,3 ; vpinsrd $0x3,(%rbx,%r10,1),%xmm0,%xmm3
DB 196,227,61,24,195,1 ; vinsertf128 $0x1,%xmm3,%ymm8,%ymm0
- DB 197,124,40,21,14,48,0,0 ; vmovaps 0x300e(%rip),%ymm10 # 6920 <_sk_callback_avx+0x541>
+ DB 197,124,40,21,254,47,0,0 ; vmovaps 0x2ffe(%rip),%ymm10 # 68a0 <_sk_callback_avx+0x5d5>
DB 196,193,124,84,194 ; vandps %ymm10,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,13,60,46,0,0 ; vbroadcastss 0x2e3c(%rip),%ymm9 # 6760 <_sk_callback_avx+0x381>
+ DB 196,98,125,24,13,152,45,0,0 ; vbroadcastss 0x2d98(%rip),%ymm9 # 664c <_sk_callback_avx+0x381>
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
DB 196,193,113,114,208,8 ; vpsrld $0x8,%xmm8,%xmm1
DB 197,233,114,211,8 ; vpsrld $0x8,%xmm3,%xmm2
@@ -8378,38 +8347,38 @@ _sk_load_565_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,128,0,0,0 ; jne 3a1c <_sk_load_565_avx+0x8e>
+ DB 15,133,128,0,0,0 ; jne 39ac <_sk_load_565_avx+0x8e>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,209,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
- DB 196,226,125,24,5,166,45,0,0 ; vbroadcastss 0x2da6(%rip),%ymm0 # 6764 <_sk_callback_avx+0x385>
+ DB 196,226,125,24,5,2,45,0,0 ; vbroadcastss 0x2d02(%rip),%ymm0 # 6650 <_sk_callback_avx+0x385>
DB 197,236,84,192 ; vandps %ymm0,%ymm2,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,153,45,0,0 ; vbroadcastss 0x2d99(%rip),%ymm1 # 6768 <_sk_callback_avx+0x389>
+ DB 196,226,125,24,13,245,44,0,0 ; vbroadcastss 0x2cf5(%rip),%ymm1 # 6654 <_sk_callback_avx+0x389>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,144,45,0,0 ; vbroadcastss 0x2d90(%rip),%ymm1 # 676c <_sk_callback_avx+0x38d>
+ DB 196,226,125,24,13,236,44,0,0 ; vbroadcastss 0x2cec(%rip),%ymm1 # 6658 <_sk_callback_avx+0x38d>
DB 197,236,84,201 ; vandps %ymm1,%ymm2,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,29,131,45,0,0 ; vbroadcastss 0x2d83(%rip),%ymm3 # 6770 <_sk_callback_avx+0x391>
+ DB 196,226,125,24,29,223,44,0,0 ; vbroadcastss 0x2cdf(%rip),%ymm3 # 665c <_sk_callback_avx+0x391>
DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1
- DB 196,226,125,24,29,122,45,0,0 ; vbroadcastss 0x2d7a(%rip),%ymm3 # 6774 <_sk_callback_avx+0x395>
+ DB 196,226,125,24,29,214,44,0,0 ; vbroadcastss 0x2cd6(%rip),%ymm3 # 6660 <_sk_callback_avx+0x395>
DB 197,236,84,211 ; vandps %ymm3,%ymm2,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,226,125,24,29,109,45,0,0 ; vbroadcastss 0x2d6d(%rip),%ymm3 # 6778 <_sk_callback_avx+0x399>
+ DB 196,226,125,24,29,201,44,0,0 ; vbroadcastss 0x2cc9(%rip),%ymm3 # 6664 <_sk_callback_avx+0x399>
DB 197,236,89,211 ; vmulps %ymm3,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,98,45,0,0 ; vbroadcastss 0x2d62(%rip),%ymm3 # 677c <_sk_callback_avx+0x39d>
+ DB 196,226,125,24,29,190,44,0,0 ; vbroadcastss 0x2cbe(%rip),%ymm3 # 6668 <_sk_callback_avx+0x39d>
DB 255,224 ; jmpq *%rax
DB 65,137,200 ; mov %ecx,%r8d
DB 65,128,224,7 ; and $0x7,%r8b
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,110,255,255,255 ; ja 39a2 <_sk_load_565_avx+0x14>
+ DB 15,135,110,255,255,255 ; ja 3932 <_sk_load_565_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,73,0,0,0 ; lea 0x49(%rip),%r9 # 3a88 <_sk_load_565_avx+0xfa>
+ DB 76,141,13,73,0,0,0 ; lea 0x49(%rip),%r9 # 3a18 <_sk_load_565_avx+0xfa>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -8421,7 +8390,7 @@ _sk_load_565_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,26,255,255,255 ; jmpq 39a2 <_sk_load_565_avx+0x14>
+ DB 233,26,255,255,255 ; jmpq 3932 <_sk_load_565_avx+0x14>
DB 244 ; hlt
DB 255 ; (bad)
DB 255 ; (bad)
@@ -8497,23 +8466,23 @@ _sk_gather_565_avx LABEL PROC
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,209,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
- DB 196,226,125,24,5,2,44,0,0 ; vbroadcastss 0x2c02(%rip),%ymm0 # 6780 <_sk_callback_avx+0x3a1>
+ DB 196,226,125,24,5,94,43,0,0 ; vbroadcastss 0x2b5e(%rip),%ymm0 # 666c <_sk_callback_avx+0x3a1>
DB 197,236,84,192 ; vandps %ymm0,%ymm2,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,245,43,0,0 ; vbroadcastss 0x2bf5(%rip),%ymm1 # 6784 <_sk_callback_avx+0x3a5>
+ DB 196,226,125,24,13,81,43,0,0 ; vbroadcastss 0x2b51(%rip),%ymm1 # 6670 <_sk_callback_avx+0x3a5>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,236,43,0,0 ; vbroadcastss 0x2bec(%rip),%ymm1 # 6788 <_sk_callback_avx+0x3a9>
+ DB 196,226,125,24,13,72,43,0,0 ; vbroadcastss 0x2b48(%rip),%ymm1 # 6674 <_sk_callback_avx+0x3a9>
DB 197,236,84,201 ; vandps %ymm1,%ymm2,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,29,223,43,0,0 ; vbroadcastss 0x2bdf(%rip),%ymm3 # 678c <_sk_callback_avx+0x3ad>
+ DB 196,226,125,24,29,59,43,0,0 ; vbroadcastss 0x2b3b(%rip),%ymm3 # 6678 <_sk_callback_avx+0x3ad>
DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1
- DB 196,226,125,24,29,214,43,0,0 ; vbroadcastss 0x2bd6(%rip),%ymm3 # 6790 <_sk_callback_avx+0x3b1>
+ DB 196,226,125,24,29,50,43,0,0 ; vbroadcastss 0x2b32(%rip),%ymm3 # 667c <_sk_callback_avx+0x3b1>
DB 197,236,84,211 ; vandps %ymm3,%ymm2,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,226,125,24,29,201,43,0,0 ; vbroadcastss 0x2bc9(%rip),%ymm3 # 6794 <_sk_callback_avx+0x3b5>
+ DB 196,226,125,24,29,37,43,0,0 ; vbroadcastss 0x2b25(%rip),%ymm3 # 6680 <_sk_callback_avx+0x3b5>
DB 197,236,89,211 ; vmulps %ymm3,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,190,43,0,0 ; vbroadcastss 0x2bbe(%rip),%ymm3 # 6798 <_sk_callback_avx+0x3b9>
+ DB 196,226,125,24,29,26,43,0,0 ; vbroadcastss 0x2b1a(%rip),%ymm3 # 6684 <_sk_callback_avx+0x3b9>
DB 91 ; pop %rbx
DB 65,92 ; pop %r12
DB 65,94 ; pop %r14
@@ -8525,14 +8494,14 @@ PUBLIC _sk_store_565_avx
_sk_store_565_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
- DB 196,98,125,24,5,170,43,0,0 ; vbroadcastss 0x2baa(%rip),%ymm8 # 679c <_sk_callback_avx+0x3bd>
+ DB 196,98,125,24,5,6,43,0,0 ; vbroadcastss 0x2b06(%rip),%ymm8 # 6688 <_sk_callback_avx+0x3bd>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,193,41,114,241,11 ; vpslld $0xb,%xmm9,%xmm10
DB 196,67,125,25,201,1 ; vextractf128 $0x1,%ymm9,%xmm9
DB 196,193,49,114,241,11 ; vpslld $0xb,%xmm9,%xmm9
DB 196,67,45,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- DB 196,98,125,24,21,131,43,0,0 ; vbroadcastss 0x2b83(%rip),%ymm10 # 67a0 <_sk_callback_avx+0x3c1>
+ DB 196,98,125,24,21,223,42,0,0 ; vbroadcastss 0x2adf(%rip),%ymm10 # 668c <_sk_callback_avx+0x3c1>
DB 196,65,116,89,210 ; vmulps %ymm10,%ymm1,%ymm10
DB 196,65,125,91,210 ; vcvtps2dq %ymm10,%ymm10
DB 196,193,33,114,242,5 ; vpslld $0x5,%xmm10,%xmm11
@@ -8546,7 +8515,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 3c6d <_sk_store_565_avx+0x89>
+ DB 117,10 ; jne 3bfd <_sk_store_565_avx+0x89>
DB 196,65,122,127,4,122 ; vmovdqu %xmm8,(%r10,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -8554,9 +8523,9 @@ _sk_store_565_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3c69 <_sk_store_565_avx+0x85>
+ DB 119,236 ; ja 3bf9 <_sk_store_565_avx+0x85>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,68,0,0,0 ; lea 0x44(%rip),%r9 # 3ccc <_sk_store_565_avx+0xe8>
+ DB 76,141,13,68,0,0,0 ; lea 0x44(%rip),%r9 # 3c5c <_sk_store_565_avx+0xe8>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -8567,7 +8536,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,121,21,68,122,4,2 ; vpextrw $0x2,%xmm8,0x4(%r10,%rdi,2)
DB 196,67,121,21,68,122,2,1 ; vpextrw $0x1,%xmm8,0x2(%r10,%rdi,2)
DB 196,67,121,21,4,122,0 ; vpextrw $0x0,%xmm8,(%r10,%rdi,2)
- DB 235,159 ; jmp 3c69 <_sk_store_565_avx+0x85>
+ DB 235,159 ; jmp 3bf9 <_sk_store_565_avx+0x85>
DB 102,144 ; xchg %ax,%ax
DB 245 ; cmc
DB 255 ; (bad)
@@ -8598,31 +8567,31 @@ _sk_load_4444_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,152,0,0,0 ; jne 3d8e <_sk_load_4444_avx+0xa6>
+ DB 15,133,152,0,0,0 ; jne 3d1e <_sk_load_4444_avx+0xa6>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,217,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm3
- DB 196,226,125,24,5,140,42,0,0 ; vbroadcastss 0x2a8c(%rip),%ymm0 # 67a4 <_sk_callback_avx+0x3c5>
+ DB 196,226,125,24,5,232,41,0,0 ; vbroadcastss 0x29e8(%rip),%ymm0 # 6690 <_sk_callback_avx+0x3c5>
DB 197,228,84,192 ; vandps %ymm0,%ymm3,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,127,42,0,0 ; vbroadcastss 0x2a7f(%rip),%ymm1 # 67a8 <_sk_callback_avx+0x3c9>
+ DB 196,226,125,24,13,219,41,0,0 ; vbroadcastss 0x29db(%rip),%ymm1 # 6694 <_sk_callback_avx+0x3c9>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,118,42,0,0 ; vbroadcastss 0x2a76(%rip),%ymm1 # 67ac <_sk_callback_avx+0x3cd>
+ DB 196,226,125,24,13,210,41,0,0 ; vbroadcastss 0x29d2(%rip),%ymm1 # 6698 <_sk_callback_avx+0x3cd>
DB 197,228,84,201 ; vandps %ymm1,%ymm3,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,21,105,42,0,0 ; vbroadcastss 0x2a69(%rip),%ymm2 # 67b0 <_sk_callback_avx+0x3d1>
+ DB 196,226,125,24,21,197,41,0,0 ; vbroadcastss 0x29c5(%rip),%ymm2 # 669c <_sk_callback_avx+0x3d1>
DB 197,244,89,202 ; vmulps %ymm2,%ymm1,%ymm1
- DB 196,226,125,24,21,96,42,0,0 ; vbroadcastss 0x2a60(%rip),%ymm2 # 67b4 <_sk_callback_avx+0x3d5>
+ DB 196,226,125,24,21,188,41,0,0 ; vbroadcastss 0x29bc(%rip),%ymm2 # 66a0 <_sk_callback_avx+0x3d5>
DB 197,228,84,210 ; vandps %ymm2,%ymm3,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,98,125,24,5,83,42,0,0 ; vbroadcastss 0x2a53(%rip),%ymm8 # 67b8 <_sk_callback_avx+0x3d9>
+ DB 196,98,125,24,5,175,41,0,0 ; vbroadcastss 0x29af(%rip),%ymm8 # 66a4 <_sk_callback_avx+0x3d9>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,73,42,0,0 ; vbroadcastss 0x2a49(%rip),%ymm8 # 67bc <_sk_callback_avx+0x3dd>
+ DB 196,98,125,24,5,165,41,0,0 ; vbroadcastss 0x29a5(%rip),%ymm8 # 66a8 <_sk_callback_avx+0x3dd>
DB 196,193,100,84,216 ; vandps %ymm8,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,59,42,0,0 ; vbroadcastss 0x2a3b(%rip),%ymm8 # 67c0 <_sk_callback_avx+0x3e1>
+ DB 196,98,125,24,5,151,41,0,0 ; vbroadcastss 0x2997(%rip),%ymm8 # 66ac <_sk_callback_avx+0x3e1>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -8631,9 +8600,9 @@ _sk_load_4444_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,86,255,255,255 ; ja 3cfc <_sk_load_4444_avx+0x14>
+ DB 15,135,86,255,255,255 ; ja 3c8c <_sk_load_4444_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3dfc <_sk_load_4444_avx+0x114>
+ DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3d8c <_sk_load_4444_avx+0x114>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -8645,7 +8614,7 @@ _sk_load_4444_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,2,255,255,255 ; jmpq 3cfc <_sk_load_4444_avx+0x14>
+ DB 233,2,255,255,255 ; jmpq 3c8c <_sk_load_4444_avx+0x14>
DB 102,144 ; xchg %ax,%ax
DB 242,255 ; repnz (bad)
DB 255 ; (bad)
@@ -8722,25 +8691,25 @@ _sk_gather_4444_avx LABEL PROC
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,217,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm3
- DB 196,226,125,24,5,210,40,0,0 ; vbroadcastss 0x28d2(%rip),%ymm0 # 67c4 <_sk_callback_avx+0x3e5>
+ DB 196,226,125,24,5,46,40,0,0 ; vbroadcastss 0x282e(%rip),%ymm0 # 66b0 <_sk_callback_avx+0x3e5>
DB 197,228,84,192 ; vandps %ymm0,%ymm3,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,197,40,0,0 ; vbroadcastss 0x28c5(%rip),%ymm1 # 67c8 <_sk_callback_avx+0x3e9>
+ DB 196,226,125,24,13,33,40,0,0 ; vbroadcastss 0x2821(%rip),%ymm1 # 66b4 <_sk_callback_avx+0x3e9>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,188,40,0,0 ; vbroadcastss 0x28bc(%rip),%ymm1 # 67cc <_sk_callback_avx+0x3ed>
+ DB 196,226,125,24,13,24,40,0,0 ; vbroadcastss 0x2818(%rip),%ymm1 # 66b8 <_sk_callback_avx+0x3ed>
DB 197,228,84,201 ; vandps %ymm1,%ymm3,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,21,175,40,0,0 ; vbroadcastss 0x28af(%rip),%ymm2 # 67d0 <_sk_callback_avx+0x3f1>
+ DB 196,226,125,24,21,11,40,0,0 ; vbroadcastss 0x280b(%rip),%ymm2 # 66bc <_sk_callback_avx+0x3f1>
DB 197,244,89,202 ; vmulps %ymm2,%ymm1,%ymm1
- DB 196,226,125,24,21,166,40,0,0 ; vbroadcastss 0x28a6(%rip),%ymm2 # 67d4 <_sk_callback_avx+0x3f5>
+ DB 196,226,125,24,21,2,40,0,0 ; vbroadcastss 0x2802(%rip),%ymm2 # 66c0 <_sk_callback_avx+0x3f5>
DB 197,228,84,210 ; vandps %ymm2,%ymm3,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,98,125,24,5,153,40,0,0 ; vbroadcastss 0x2899(%rip),%ymm8 # 67d8 <_sk_callback_avx+0x3f9>
+ DB 196,98,125,24,5,245,39,0,0 ; vbroadcastss 0x27f5(%rip),%ymm8 # 66c4 <_sk_callback_avx+0x3f9>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,143,40,0,0 ; vbroadcastss 0x288f(%rip),%ymm8 # 67dc <_sk_callback_avx+0x3fd>
+ DB 196,98,125,24,5,235,39,0,0 ; vbroadcastss 0x27eb(%rip),%ymm8 # 66c8 <_sk_callback_avx+0x3fd>
DB 196,193,100,84,216 ; vandps %ymm8,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,129,40,0,0 ; vbroadcastss 0x2881(%rip),%ymm8 # 67e0 <_sk_callback_avx+0x401>
+ DB 196,98,125,24,5,221,39,0,0 ; vbroadcastss 0x27dd(%rip),%ymm8 # 66cc <_sk_callback_avx+0x401>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -8754,7 +8723,7 @@ PUBLIC _sk_store_4444_avx
_sk_store_4444_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
- DB 196,98,125,24,5,102,40,0,0 ; vbroadcastss 0x2866(%rip),%ymm8 # 67e4 <_sk_callback_avx+0x405>
+ DB 196,98,125,24,5,194,39,0,0 ; vbroadcastss 0x27c2(%rip),%ymm8 # 66d0 <_sk_callback_avx+0x405>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,193,41,114,241,12 ; vpslld $0xc,%xmm9,%xmm10
@@ -8781,7 +8750,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 4017 <_sk_store_4444_avx+0xa7>
+ DB 117,10 ; jne 3fa7 <_sk_store_4444_avx+0xa7>
DB 196,65,122,127,4,122 ; vmovdqu %xmm8,(%r10,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -8789,9 +8758,9 @@ _sk_store_4444_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 4013 <_sk_store_4444_avx+0xa3>
+ DB 119,236 ; ja 3fa3 <_sk_store_4444_avx+0xa3>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,66,0,0,0 ; lea 0x42(%rip),%r9 # 4074 <_sk_store_4444_avx+0x104>
+ DB 76,141,13,66,0,0,0 ; lea 0x42(%rip),%r9 # 4004 <_sk_store_4444_avx+0x104>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -8802,7 +8771,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,121,21,68,122,4,2 ; vpextrw $0x2,%xmm8,0x4(%r10,%rdi,2)
DB 196,67,121,21,68,122,2,1 ; vpextrw $0x1,%xmm8,0x2(%r10,%rdi,2)
DB 196,67,121,21,4,122,0 ; vpextrw $0x0,%xmm8,(%r10,%rdi,2)
- DB 235,159 ; jmp 4013 <_sk_store_4444_avx+0xa3>
+ DB 235,159 ; jmp 3fa3 <_sk_store_4444_avx+0xa3>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -8828,87 +8797,55 @@ _sk_store_4444_avx LABEL PROC
PUBLIC _sk_load_8888_avx
_sk_load_8888_avx LABEL PROC
+ DB 80 ; push %rax
+ DB 73,137,200 ; mov %rcx,%r8
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,139,16 ; mov (%rax),%r10
- DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,135,0,0,0 ; jne 4125 <_sk_load_8888_avx+0x95>
- DB 196,65,124,16,12,186 ; vmovups (%r10,%rdi,4),%ymm9
- DB 197,124,40,21,148,40,0,0 ; vmovaps 0x2894(%rip),%ymm10 # 6940 <_sk_callback_avx+0x561>
- DB 196,193,52,84,194 ; vandps %ymm10,%ymm9,%ymm0
+ DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 77,133,192 ; test %r8,%r8
+ DB 15,133,139,0,0,0 ; jne 40c5 <_sk_load_8888_avx+0xa5>
+ DB 196,193,124,16,25 ; vmovups (%r9),%ymm3
+ DB 197,124,40,21,121,40,0,0 ; vmovaps 0x2879(%rip),%ymm10 # 68c0 <_sk_callback_avx+0x5f5>
+ DB 196,193,100,84,194 ; vandps %ymm10,%ymm3,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,5,42,39,0,0 ; vbroadcastss 0x272a(%rip),%ymm8 # 67e8 <_sk_callback_avx+0x409>
+ DB 196,98,125,24,5,123,38,0,0 ; vbroadcastss 0x267b(%rip),%ymm8 # 66d4 <_sk_callback_avx+0x409>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 196,193,113,114,209,8 ; vpsrld $0x8,%xmm9,%xmm1
- DB 196,99,125,25,203,1 ; vextractf128 $0x1,%ymm9,%xmm3
- DB 197,233,114,211,8 ; vpsrld $0x8,%xmm3,%xmm2
+ DB 197,241,114,211,8 ; vpsrld $0x8,%xmm3,%xmm1
+ DB 196,195,125,25,217,1 ; vextractf128 $0x1,%ymm3,%xmm9
+ DB 196,193,105,114,209,8 ; vpsrld $0x8,%xmm9,%xmm2
DB 196,227,117,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
DB 196,193,116,84,202 ; vandps %ymm10,%ymm1,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
- DB 196,193,33,114,209,16 ; vpsrld $0x10,%xmm9,%xmm11
- DB 197,233,114,211,16 ; vpsrld $0x10,%xmm3,%xmm2
+ DB 197,161,114,211,16 ; vpsrld $0x10,%xmm3,%xmm11
+ DB 196,193,105,114,209,16 ; vpsrld $0x10,%xmm9,%xmm2
DB 196,227,37,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm11,%ymm2
DB 196,193,108,84,210 ; vandps %ymm10,%ymm2,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
- DB 196,193,49,114,209,24 ; vpsrld $0x18,%xmm9,%xmm9
- DB 197,225,114,211,24 ; vpsrld $0x18,%xmm3,%xmm3
- DB 196,227,53,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
+ DB 197,169,114,211,24 ; vpsrld $0x18,%xmm3,%xmm10
+ DB 196,193,97,114,209,24 ; vpsrld $0x18,%xmm9,%xmm3
+ DB 196,227,45,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm10,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 76,137,193 ; mov %r8,%rcx
+ DB 65,88 ; pop %r8
DB 255,224 ; jmpq *%rax
- DB 65,137,200 ; mov %ecx,%r8d
- DB 65,128,224,7 ; and $0x7,%r8b
- DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
- DB 65,254,200 ; dec %r8b
- DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,102,255,255,255 ; ja 40a4 <_sk_load_8888_avx+0x14>
- DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,139,0,0,0 ; lea 0x8b(%rip),%r9 # 41d4 <_sk_load_8888_avx+0x144>
- DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
- DB 76,1,200 ; add %r9,%rax
- DB 255,224 ; jmpq *%rax
- DB 196,193,121,110,68,186,24 ; vmovd 0x18(%r10,%rdi,4),%xmm0
- DB 197,249,112,192,68 ; vpshufd $0x44,%xmm0,%xmm0
- DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
- DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
- DB 196,99,117,12,200,64 ; vblendps $0x40,%ymm0,%ymm1,%ymm9
- DB 196,99,125,25,200,1 ; vextractf128 $0x1,%ymm9,%xmm0
- DB 196,195,121,34,68,186,20,1 ; vpinsrd $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
- DB 196,99,53,24,200,1 ; vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
- DB 196,99,125,25,200,1 ; vextractf128 $0x1,%ymm9,%xmm0
- DB 196,195,121,34,68,186,16,0 ; vpinsrd $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
- DB 196,99,53,24,200,1 ; vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
- DB 196,195,49,34,68,186,12,3 ; vpinsrd $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
- DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
- DB 196,195,49,34,68,186,8,2 ; vpinsrd $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
- DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
- DB 196,195,49,34,68,186,4,1 ; vpinsrd $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
- DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
- DB 196,195,49,34,4,186,0 ; vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
- DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
- DB 233,210,254,255,255 ; jmpq 40a4 <_sk_load_8888_avx+0x14>
- DB 102,144 ; xchg %ax,%ax
- DB 236 ; in (%dx),%al
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 222,255 ; fdivrp %st,%st(7)
- DB 255 ; (bad)
- DB 255,208 ; callq *%rax
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255,194 ; inc %edx
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255,174,255,255,255,154 ; ljmp *-0x65000001(%rsi)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 126,255 ; jle 41ed <_sk_load_8888_avx+0x15d>
- DB 255 ; (bad)
- DB 255 ; .byte 0xff
+ DB 185,8,0,0,0 ; mov $0x8,%ecx
+ DB 68,41,193 ; sub %r8d,%ecx
+ DB 192,225,3 ; shl $0x3,%cl
+ DB 72,199,192,255,255,255,255 ; mov $0xffffffffffffffff,%rax
+ DB 72,211,232 ; shr %cl,%rax
+ DB 196,225,249,110,192 ; vmovq %rax,%xmm0
+ DB 196,226,121,48,192 ; vpmovzxbw %xmm0,%xmm0
+ DB 196,226,121,0,13,67,39,0,0 ; vpshufb 0x2743(%rip),%xmm0,%xmm1 # 6830 <_sk_callback_avx+0x565>
+ DB 196,226,121,33,201 ; vpmovsxbd %xmm1,%xmm1
+ DB 196,226,121,0,5,69,39,0,0 ; vpshufb 0x2745(%rip),%xmm0,%xmm0 # 6840 <_sk_callback_avx+0x575>
+ DB 196,226,121,33,192 ; vpmovsxbd %xmm0,%xmm0
+ DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
+ DB 196,194,125,44,25 ; vmaskmovps (%r9),%ymm0,%ymm3
+ DB 233,47,255,255,255 ; jmpq 403f <_sk_load_8888_avx+0x1f>
PUBLIC _sk_gather_8888_avx
_sk_gather_8888_avx LABEL PROC
@@ -8949,10 +8886,10 @@ _sk_gather_8888_avx LABEL PROC
DB 196,131,121,34,4,152,2 ; vpinsrd $0x2,(%r8,%r11,4),%xmm0,%xmm0
DB 196,131,121,34,28,144,3 ; vpinsrd $0x3,(%r8,%r10,4),%xmm0,%xmm3
DB 196,227,61,24,195,1 ; vinsertf128 $0x1,%xmm3,%ymm8,%ymm0
- DB 197,124,40,21,190,38,0,0 ; vmovaps 0x26be(%rip),%ymm10 # 6960 <_sk_callback_avx+0x581>
+ DB 197,124,40,21,30,39,0,0 ; vmovaps 0x271e(%rip),%ymm10 # 68e0 <_sk_callback_avx+0x615>
DB 196,193,124,84,194 ; vandps %ymm10,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,13,56,37,0,0 ; vbroadcastss 0x2538(%rip),%ymm9 # 67ec <_sk_callback_avx+0x40d>
+ DB 196,98,125,24,13,4,37,0,0 ; vbroadcastss 0x2504(%rip),%ymm9 # 66d8 <_sk_callback_avx+0x40d>
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
DB 196,193,113,114,208,8 ; vpsrld $0x8,%xmm8,%xmm1
DB 197,233,114,211,8 ; vpsrld $0x8,%xmm3,%xmm2
@@ -8980,9 +8917,12 @@ _sk_gather_8888_avx LABEL PROC
PUBLIC _sk_store_8888_avx
_sk_store_8888_avx LABEL PROC
+ DB 80 ; push %rax
+ DB 73,137,200 ; mov %rcx,%r8
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,139,16 ; mov (%rax),%r10
- DB 196,98,125,24,5,198,36,0,0 ; vbroadcastss 0x24c6(%rip),%ymm8 # 67f0 <_sk_callback_avx+0x411>
+ DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 196,98,125,24,5,134,36,0,0 ; vbroadcastss 0x2486(%rip),%ymm8 # 66dc <_sk_callback_avx+0x411>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,65,116,89,208 ; vmulps %ymm8,%ymm1,%ymm10
@@ -9006,56 +8946,27 @@ _sk_store_8888_avx LABEL PROC
DB 196,67,37,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm11,%ymm8
DB 196,65,45,86,192 ; vorpd %ymm8,%ymm10,%ymm8
DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8
- DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 43b8 <_sk_store_8888_avx+0x9c>
- DB 196,65,124,17,4,186 ; vmovups %ymm8,(%r10,%rdi,4)
+ DB 77,133,192 ; test %r8,%r8
+ DB 117,14 ; jne 42e8 <_sk_store_8888_avx+0xac>
+ DB 196,65,124,17,1 ; vmovups %ymm8,(%r9)
DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 76,137,193 ; mov %r8,%rcx
+ DB 65,88 ; pop %r8
DB 255,224 ; jmpq *%rax
- DB 65,137,200 ; mov %ecx,%r8d
- DB 65,128,224,7 ; and $0x7,%r8b
- DB 65,254,200 ; dec %r8b
- DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 43b4 <_sk_store_8888_avx+0x98>
- DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,85,0,0,0 ; lea 0x55(%rip),%r9 # 4428 <_sk_store_8888_avx+0x10c>
- DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
- DB 76,1,200 ; add %r9,%rax
- DB 255,224 ; jmpq *%rax
- DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
- DB 196,67,121,22,76,186,24,2 ; vpextrd $0x2,%xmm9,0x18(%r10,%rdi,4)
- DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
- DB 196,67,121,22,76,186,20,1 ; vpextrd $0x1,%xmm9,0x14(%r10,%rdi,4)
- DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
- DB 196,65,122,17,76,186,16 ; vmovss %xmm9,0x10(%r10,%rdi,4)
- DB 196,67,121,22,68,186,12,3 ; vpextrd $0x3,%xmm8,0xc(%r10,%rdi,4)
- DB 196,67,121,22,68,186,8,2 ; vpextrd $0x2,%xmm8,0x8(%r10,%rdi,4)
- DB 196,67,121,22,68,186,4,1 ; vpextrd $0x1,%xmm8,0x4(%r10,%rdi,4)
- DB 196,65,121,126,4,186 ; vmovd %xmm8,(%r10,%rdi,4)
- DB 235,143 ; jmp 43b4 <_sk_store_8888_avx+0x98>
- DB 15,31,0 ; nopl (%rax)
- DB 245 ; cmc
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 237 ; in (%dx),%eax
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255,229 ; jmpq *%rbp
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 221,255 ; (bad)
- DB 255 ; (bad)
- DB 255,208 ; callq *%rax
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255,194 ; inc %edx
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255 ; .byte 0xff
- DB 180,255 ; mov $0xff,%ah
- DB 255 ; (bad)
- DB 255 ; .byte 0xff
+ DB 185,8,0,0,0 ; mov $0x8,%ecx
+ DB 68,41,193 ; sub %r8d,%ecx
+ DB 192,225,3 ; shl $0x3,%cl
+ DB 72,199,192,255,255,255,255 ; mov $0xffffffffffffffff,%rax
+ DB 72,211,232 ; shr %cl,%rax
+ DB 196,97,249,110,200 ; vmovq %rax,%xmm9
+ DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
+ DB 196,98,49,0,21,64,37,0,0 ; vpshufb 0x2540(%rip),%xmm9,%xmm10 # 6850 <_sk_callback_avx+0x585>
+ DB 196,66,121,33,210 ; vpmovsxbd %xmm10,%xmm10
+ DB 196,98,49,0,13,66,37,0,0 ; vpshufb 0x2542(%rip),%xmm9,%xmm9 # 6860 <_sk_callback_avx+0x595>
+ DB 196,66,121,33,201 ; vpmovsxbd %xmm9,%xmm9
+ DB 196,67,45,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
+ DB 196,66,53,46,1 ; vmaskmovps %ymm8,%ymm9,(%r9)
+ DB 235,175 ; jmp 42df <_sk_store_8888_avx+0xa3>
PUBLIC _sk_load_f16_avx
_sk_load_f16_avx LABEL PROC
@@ -9067,7 +8978,7 @@ _sk_load_f16_avx LABEL PROC
DB 197,252,17,116,36,64 ; vmovups %ymm6,0x40(%rsp)
DB 197,252,17,108,36,32 ; vmovups %ymm5,0x20(%rsp)
DB 197,254,127,36,36 ; vmovdqu %ymm4,(%rsp)
- DB 15,133,143,2,0,0 ; jne 46ff <_sk_load_f16_avx+0x2bb>
+ DB 15,133,143,2,0,0 ; jne 45eb <_sk_load_f16_avx+0x2bb>
DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8
DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2
DB 197,249,16,76,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm1
@@ -9085,13 +8996,13 @@ _sk_load_f16_avx LABEL PROC
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
- DB 196,98,125,24,37,43,35,0,0 ; vbroadcastss 0x232b(%rip),%ymm12 # 67f4 <_sk_callback_avx+0x415>
+ DB 196,98,125,24,37,43,35,0,0 ; vbroadcastss 0x232b(%rip),%ymm12 # 66e0 <_sk_callback_avx+0x415>
DB 196,193,124,84,204 ; vandps %ymm12,%ymm0,%ymm1
DB 197,252,87,193 ; vxorps %ymm1,%ymm0,%ymm0
DB 196,195,125,25,198,1 ; vextractf128 $0x1,%ymm0,%xmm14
- DB 196,98,121,24,29,23,35,0,0 ; vbroadcastss 0x2317(%rip),%xmm11 # 67f8 <_sk_callback_avx+0x419>
+ DB 196,98,121,24,29,23,35,0,0 ; vbroadcastss 0x2317(%rip),%xmm11 # 66e4 <_sk_callback_avx+0x419>
DB 196,193,8,87,219 ; vxorps %xmm11,%xmm14,%xmm3
- DB 196,98,121,24,45,13,35,0,0 ; vbroadcastss 0x230d(%rip),%xmm13 # 67fc <_sk_callback_avx+0x41d>
+ DB 196,98,121,24,45,13,35,0,0 ; vbroadcastss 0x230d(%rip),%xmm13 # 66e8 <_sk_callback_avx+0x41d>
DB 197,145,102,219 ; vpcmpgtd %xmm3,%xmm13,%xmm3
DB 196,65,120,87,211 ; vxorps %xmm11,%xmm0,%xmm10
DB 196,65,17,102,210 ; vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -9105,7 +9016,7 @@ _sk_load_f16_avx LABEL PROC
DB 196,227,125,24,195,1 ; vinsertf128 $0x1,%xmm3,%ymm0,%ymm0
DB 197,252,86,193 ; vorps %ymm1,%ymm0,%ymm0
DB 196,227,125,25,193,1 ; vextractf128 $0x1,%ymm0,%xmm1
- DB 196,226,121,24,29,195,34,0,0 ; vbroadcastss 0x22c3(%rip),%xmm3 # 6800 <_sk_callback_avx+0x421>
+ DB 196,226,121,24,29,195,34,0,0 ; vbroadcastss 0x22c3(%rip),%xmm3 # 66ec <_sk_callback_avx+0x421>
DB 197,241,254,203 ; vpaddd %xmm3,%xmm1,%xmm1
DB 197,249,254,195 ; vpaddd %xmm3,%xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
@@ -9198,29 +9109,29 @@ _sk_load_f16_avx LABEL PROC
DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,79 ; je 475e <_sk_load_f16_avx+0x31a>
+ DB 116,79 ; je 464a <_sk_load_f16_avx+0x31a>
DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,67 ; jb 475e <_sk_load_f16_avx+0x31a>
+ DB 114,67 ; jb 464a <_sk_load_f16_avx+0x31a>
DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,68 ; je 476b <_sk_load_f16_avx+0x327>
+ DB 116,68 ; je 4657 <_sk_load_f16_avx+0x327>
DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,56 ; jb 476b <_sk_load_f16_avx+0x327>
+ DB 114,56 ; jb 4657 <_sk_load_f16_avx+0x327>
DB 197,251,16,76,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,68,253,255,255 ; je 4487 <_sk_load_f16_avx+0x43>
+ DB 15,132,68,253,255,255 ; je 4373 <_sk_load_f16_avx+0x43>
DB 197,241,22,76,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm1,%xmm1
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,52,253,255,255 ; jb 4487 <_sk_load_f16_avx+0x43>
+ DB 15,130,52,253,255,255 ; jb 4373 <_sk_load_f16_avx+0x43>
DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9
- DB 233,41,253,255,255 ; jmpq 4487 <_sk_load_f16_avx+0x43>
+ DB 233,41,253,255,255 ; jmpq 4373 <_sk_load_f16_avx+0x43>
DB 197,241,87,201 ; vxorpd %xmm1,%xmm1,%xmm1
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,28,253,255,255 ; jmpq 4487 <_sk_load_f16_avx+0x43>
+ DB 233,28,253,255,255 ; jmpq 4373 <_sk_load_f16_avx+0x43>
DB 197,241,87,201 ; vxorpd %xmm1,%xmm1,%xmm1
- DB 233,19,253,255,255 ; jmpq 4487 <_sk_load_f16_avx+0x43>
+ DB 233,19,253,255,255 ; jmpq 4373 <_sk_load_f16_avx+0x43>
PUBLIC _sk_gather_f16_avx
_sk_gather_f16_avx LABEL PROC
@@ -9282,13 +9193,13 @@ _sk_gather_f16_avx LABEL PROC
DB 197,249,105,210 ; vpunpckhwd %xmm2,%xmm0,%xmm2
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,194,1 ; vinsertf128 $0x1,%xmm2,%ymm0,%ymm0
- DB 196,98,125,24,37,131,31,0,0 ; vbroadcastss 0x1f83(%rip),%ymm12 # 6804 <_sk_callback_avx+0x425>
+ DB 196,98,125,24,37,131,31,0,0 ; vbroadcastss 0x1f83(%rip),%ymm12 # 66f0 <_sk_callback_avx+0x425>
DB 196,193,124,84,212 ; vandps %ymm12,%ymm0,%ymm2
DB 197,252,87,194 ; vxorps %ymm2,%ymm0,%ymm0
DB 196,195,125,25,198,1 ; vextractf128 $0x1,%ymm0,%xmm14
- DB 196,98,121,24,29,111,31,0,0 ; vbroadcastss 0x1f6f(%rip),%xmm11 # 6808 <_sk_callback_avx+0x429>
+ DB 196,98,121,24,29,111,31,0,0 ; vbroadcastss 0x1f6f(%rip),%xmm11 # 66f4 <_sk_callback_avx+0x429>
DB 196,193,8,87,219 ; vxorps %xmm11,%xmm14,%xmm3
- DB 196,98,121,24,45,101,31,0,0 ; vbroadcastss 0x1f65(%rip),%xmm13 # 680c <_sk_callback_avx+0x42d>
+ DB 196,98,121,24,45,101,31,0,0 ; vbroadcastss 0x1f65(%rip),%xmm13 # 66f8 <_sk_callback_avx+0x42d>
DB 197,145,102,219 ; vpcmpgtd %xmm3,%xmm13,%xmm3
DB 196,65,120,87,211 ; vxorps %xmm11,%xmm0,%xmm10
DB 196,65,17,102,210 ; vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -9302,7 +9213,7 @@ _sk_gather_f16_avx LABEL PROC
DB 196,227,125,24,195,1 ; vinsertf128 $0x1,%xmm3,%ymm0,%ymm0
DB 197,252,86,194 ; vorps %ymm2,%ymm0,%ymm0
DB 196,227,125,25,194,1 ; vextractf128 $0x1,%ymm0,%xmm2
- DB 196,226,121,24,29,27,31,0,0 ; vbroadcastss 0x1f1b(%rip),%xmm3 # 6810 <_sk_callback_avx+0x431>
+ DB 196,226,121,24,29,27,31,0,0 ; vbroadcastss 0x1f1b(%rip),%xmm3 # 66fc <_sk_callback_avx+0x431>
DB 197,233,254,211 ; vpaddd %xmm3,%xmm2,%xmm2
DB 197,249,254,195 ; vpaddd %xmm3,%xmm0,%xmm0
DB 196,227,125,24,194,1 ; vinsertf128 $0x1,%xmm2,%ymm0,%ymm0
@@ -9404,12 +9315,12 @@ _sk_store_f16_avx LABEL PROC
DB 197,252,17,180,36,128,0,0,0 ; vmovups %ymm6,0x80(%rsp)
DB 197,252,17,108,36,96 ; vmovups %ymm5,0x60(%rsp)
DB 197,252,17,100,36,64 ; vmovups %ymm4,0x40(%rsp)
- DB 196,98,125,24,13,40,29,0,0 ; vbroadcastss 0x1d28(%rip),%ymm9 # 6814 <_sk_callback_avx+0x435>
+ DB 196,98,125,24,13,40,29,0,0 ; vbroadcastss 0x1d28(%rip),%ymm9 # 6700 <_sk_callback_avx+0x435>
DB 196,65,124,84,209 ; vandps %ymm9,%ymm0,%ymm10
DB 197,252,17,4,36 ; vmovups %ymm0,(%rsp)
DB 196,65,124,87,218 ; vxorps %ymm10,%ymm0,%ymm11
DB 196,67,125,25,220,1 ; vextractf128 $0x1,%ymm11,%xmm12
- DB 196,98,121,24,5,14,29,0,0 ; vbroadcastss 0x1d0e(%rip),%xmm8 # 6818 <_sk_callback_avx+0x439>
+ DB 196,98,121,24,5,14,29,0,0 ; vbroadcastss 0x1d0e(%rip),%xmm8 # 6704 <_sk_callback_avx+0x439>
DB 196,65,57,102,236 ; vpcmpgtd %xmm12,%xmm8,%xmm13
DB 196,65,57,102,243 ; vpcmpgtd %xmm11,%xmm8,%xmm14
DB 196,67,13,24,237,1 ; vinsertf128 $0x1,%xmm13,%ymm14,%ymm13
@@ -9419,7 +9330,7 @@ _sk_store_f16_avx LABEL PROC
DB 196,67,13,24,242,1 ; vinsertf128 $0x1,%xmm10,%ymm14,%ymm14
DB 196,193,33,114,211,13 ; vpsrld $0xd,%xmm11,%xmm11
DB 196,193,25,114,212,13 ; vpsrld $0xd,%xmm12,%xmm12
- DB 196,98,125,24,21,213,28,0,0 ; vbroadcastss 0x1cd5(%rip),%ymm10 # 681c <_sk_callback_avx+0x43d>
+ DB 196,98,125,24,21,213,28,0,0 ; vbroadcastss 0x1cd5(%rip),%ymm10 # 6708 <_sk_callback_avx+0x43d>
DB 196,65,12,86,242 ; vorps %ymm10,%ymm14,%ymm14
DB 196,67,125,25,247,1 ; vextractf128 $0x1,%ymm14,%xmm15
DB 196,65,1,254,228 ; vpaddd %xmm12,%xmm15,%xmm12
@@ -9501,7 +9412,7 @@ _sk_store_f16_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,139,0 ; mov (%rax),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 117,75 ; jne 4d2e <_sk_store_f16_avx+0x270>
+ DB 117,75 ; jne 4c1a <_sk_store_f16_avx+0x270>
DB 197,120,17,28,248 ; vmovups %xmm11,(%rax,%rdi,8)
DB 197,120,17,84,248,16 ; vmovups %xmm10,0x10(%rax,%rdi,8)
DB 197,120,17,76,248,32 ; vmovups %xmm9,0x20(%rax,%rdi,8)
@@ -9517,22 +9428,22 @@ _sk_store_f16_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 197,121,214,28,248 ; vmovq %xmm11,(%rax,%rdi,8)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,193 ; je 4cfa <_sk_store_f16_avx+0x23c>
+ DB 116,193 ; je 4be6 <_sk_store_f16_avx+0x23c>
DB 197,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%rax,%rdi,8)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,181 ; jb 4cfa <_sk_store_f16_avx+0x23c>
+ DB 114,181 ; jb 4be6 <_sk_store_f16_avx+0x23c>
DB 197,121,214,84,248,16 ; vmovq %xmm10,0x10(%rax,%rdi,8)
- DB 116,173 ; je 4cfa <_sk_store_f16_avx+0x23c>
+ DB 116,173 ; je 4be6 <_sk_store_f16_avx+0x23c>
DB 197,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%rax,%rdi,8)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,161 ; jb 4cfa <_sk_store_f16_avx+0x23c>
+ DB 114,161 ; jb 4be6 <_sk_store_f16_avx+0x23c>
DB 197,121,214,76,248,32 ; vmovq %xmm9,0x20(%rax,%rdi,8)
- DB 116,153 ; je 4cfa <_sk_store_f16_avx+0x23c>
+ DB 116,153 ; je 4be6 <_sk_store_f16_avx+0x23c>
DB 197,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%rax,%rdi,8)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,141 ; jb 4cfa <_sk_store_f16_avx+0x23c>
+ DB 114,141 ; jb 4be6 <_sk_store_f16_avx+0x23c>
DB 197,121,214,68,248,48 ; vmovq %xmm8,0x30(%rax,%rdi,8)
- DB 235,133 ; jmp 4cfa <_sk_store_f16_avx+0x23c>
+ DB 235,133 ; jmp 4be6 <_sk_store_f16_avx+0x23c>
PUBLIC _sk_load_u16_be_avx
_sk_load_u16_be_avx LABEL PROC
@@ -9540,7 +9451,7 @@ _sk_load_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,253,0,0,0 ; jne 4e88 <_sk_load_u16_be_avx+0x113>
+ DB 15,133,253,0,0,0 ; jne 4d74 <_sk_load_u16_be_avx+0x113>
DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8
DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2
DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -9562,7 +9473,7 @@ _sk_load_u16_be_avx LABEL PROC
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,29,36,26,0,0 ; vbroadcastss 0x1a24(%rip),%ymm11 # 6820 <_sk_callback_avx+0x441>
+ DB 196,98,125,24,29,36,26,0,0 ; vbroadcastss 0x1a24(%rip),%ymm11 # 670c <_sk_callback_avx+0x441>
DB 196,193,124,89,195 ; vmulps %ymm11,%ymm0,%ymm0
DB 197,177,109,202 ; vpunpckhqdq %xmm2,%xmm9,%xmm1
DB 197,233,113,241,8 ; vpsllw $0x8,%xmm1,%xmm2
@@ -9596,29 +9507,29 @@ _sk_load_u16_be_avx LABEL PROC
DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 4eee <_sk_load_u16_be_avx+0x179>
+ DB 116,85 ; je 4dda <_sk_load_u16_be_avx+0x179>
DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 4eee <_sk_load_u16_be_avx+0x179>
+ DB 114,72 ; jb 4dda <_sk_load_u16_be_avx+0x179>
DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 4efb <_sk_load_u16_be_avx+0x186>
+ DB 116,72 ; je 4de7 <_sk_load_u16_be_avx+0x186>
DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 4efb <_sk_load_u16_be_avx+0x186>
+ DB 114,59 ; jb 4de7 <_sk_load_u16_be_avx+0x186>
DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,213,254,255,255 ; je 4da6 <_sk_load_u16_be_avx+0x31>
+ DB 15,132,213,254,255,255 ; je 4c92 <_sk_load_u16_be_avx+0x31>
DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,196,254,255,255 ; jb 4da6 <_sk_load_u16_be_avx+0x31>
+ DB 15,130,196,254,255,255 ; jb 4c92 <_sk_load_u16_be_avx+0x31>
DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9
- DB 233,184,254,255,255 ; jmpq 4da6 <_sk_load_u16_be_avx+0x31>
+ DB 233,184,254,255,255 ; jmpq 4c92 <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,171,254,255,255 ; jmpq 4da6 <_sk_load_u16_be_avx+0x31>
+ DB 233,171,254,255,255 ; jmpq 4c92 <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,162,254,255,255 ; jmpq 4da6 <_sk_load_u16_be_avx+0x31>
+ DB 233,162,254,255,255 ; jmpq 4c92 <_sk_load_u16_be_avx+0x31>
PUBLIC _sk_load_rgb_u16_be_avx
_sk_load_rgb_u16_be_avx LABEL PROC
@@ -9626,7 +9537,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,243,0,0,0 ; jne 5009 <_sk_load_rgb_u16_be_avx+0x105>
+ DB 15,133,243,0,0,0 ; jne 4ef5 <_sk_load_rgb_u16_be_avx+0x105>
DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0
DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2
DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -9653,7 +9564,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,29,132,24,0,0 ; vbroadcastss 0x1884(%rip),%ymm11 # 6824 <_sk_callback_avx+0x445>
+ DB 196,98,125,24,29,132,24,0,0 ; vbroadcastss 0x1884(%rip),%ymm11 # 6710 <_sk_callback_avx+0x445>
DB 196,193,124,89,195 ; vmulps %ymm11,%ymm0,%ymm0
DB 197,185,109,202 ; vpunpckhqdq %xmm2,%xmm8,%xmm1
DB 197,233,113,241,8 ; vpsllw $0x8,%xmm1,%xmm2
@@ -9674,48 +9585,48 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 196,193,108,89,211 ; vmulps %ymm11,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,33,24,0,0 ; vbroadcastss 0x1821(%rip),%ymm3 # 6828 <_sk_callback_avx+0x449>
+ DB 196,226,125,24,29,33,24,0,0 ; vbroadcastss 0x1821(%rip),%ymm3 # 6714 <_sk_callback_avx+0x449>
DB 255,224 ; jmpq *%rax
DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0
DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 5022 <_sk_load_rgb_u16_be_avx+0x11e>
- DB 233,40,255,255,255 ; jmpq 4f4a <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,5 ; jne 4f0e <_sk_load_rgb_u16_be_avx+0x11e>
+ DB 233,40,255,255,255 ; jmpq 4e36 <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1
DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 5051 <_sk_load_rgb_u16_be_avx+0x14d>
+ DB 114,26 ; jb 4f3d <_sk_load_rgb_u16_be_avx+0x14d>
DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1
DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 5056 <_sk_load_rgb_u16_be_avx+0x152>
- DB 233,249,254,255,255 ; jmpq 4f4a <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,244,254,255,255 ; jmpq 4f4a <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 4f42 <_sk_load_rgb_u16_be_avx+0x152>
+ DB 233,249,254,255,255 ; jmpq 4e36 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,244,254,255,255 ; jmpq 4e36 <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1
DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 5085 <_sk_load_rgb_u16_be_avx+0x181>
+ DB 114,26 ; jb 4f71 <_sk_load_rgb_u16_be_avx+0x181>
DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1
DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 508a <_sk_load_rgb_u16_be_avx+0x186>
- DB 233,197,254,255,255 ; jmpq 4f4a <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,192,254,255,255 ; jmpq 4f4a <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 4f76 <_sk_load_rgb_u16_be_avx+0x186>
+ DB 233,197,254,255,255 ; jmpq 4e36 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,192,254,255,255 ; jmpq 4e36 <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3
DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 50b3 <_sk_load_rgb_u16_be_avx+0x1af>
+ DB 114,20 ; jb 4f9f <_sk_load_rgb_u16_be_avx+0x1af>
DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3
DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- DB 233,151,254,255,255 ; jmpq 4f4a <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,146,254,255,255 ; jmpq 4f4a <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,151,254,255,255 ; jmpq 4e36 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,146,254,255,255 ; jmpq 4e36 <_sk_load_rgb_u16_be_avx+0x46>
PUBLIC _sk_store_u16_be_avx
_sk_store_u16_be_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
- DB 196,98,125,24,5,94,23,0,0 ; vbroadcastss 0x175e(%rip),%ymm8 # 682c <_sk_callback_avx+0x44d>
+ DB 196,98,125,24,5,94,23,0,0 ; vbroadcastss 0x175e(%rip),%ymm8 # 6718 <_sk_callback_avx+0x44d>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,67,125,25,202,1 ; vextractf128 $0x1,%ymm9,%xmm10
@@ -9753,7 +9664,7 @@ _sk_store_u16_be_avx LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 51b2 <_sk_store_u16_be_avx+0xfa>
+ DB 117,31 ; jne 509e <_sk_store_u16_be_avx+0xfa>
DB 196,65,120,17,28,64 ; vmovups %xmm11,(%r8,%rax,2)
DB 196,65,120,17,84,64,16 ; vmovups %xmm10,0x10(%r8,%rax,2)
DB 196,65,120,17,76,64,32 ; vmovups %xmm9,0x20(%r8,%rax,2)
@@ -9762,31 +9673,31 @@ _sk_store_u16_be_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,214,28,64 ; vmovq %xmm11,(%r8,%rax,2)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 51ae <_sk_store_u16_be_avx+0xf6>
+ DB 116,240 ; je 509a <_sk_store_u16_be_avx+0xf6>
DB 196,65,121,23,92,64,8 ; vmovhpd %xmm11,0x8(%r8,%rax,2)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 51ae <_sk_store_u16_be_avx+0xf6>
+ DB 114,227 ; jb 509a <_sk_store_u16_be_avx+0xf6>
DB 196,65,121,214,84,64,16 ; vmovq %xmm10,0x10(%r8,%rax,2)
- DB 116,218 ; je 51ae <_sk_store_u16_be_avx+0xf6>
+ DB 116,218 ; je 509a <_sk_store_u16_be_avx+0xf6>
DB 196,65,121,23,84,64,24 ; vmovhpd %xmm10,0x18(%r8,%rax,2)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 51ae <_sk_store_u16_be_avx+0xf6>
+ DB 114,205 ; jb 509a <_sk_store_u16_be_avx+0xf6>
DB 196,65,121,214,76,64,32 ; vmovq %xmm9,0x20(%r8,%rax,2)
- DB 116,196 ; je 51ae <_sk_store_u16_be_avx+0xf6>
+ DB 116,196 ; je 509a <_sk_store_u16_be_avx+0xf6>
DB 196,65,121,23,76,64,40 ; vmovhpd %xmm9,0x28(%r8,%rax,2)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 51ae <_sk_store_u16_be_avx+0xf6>
+ DB 114,183 ; jb 509a <_sk_store_u16_be_avx+0xf6>
DB 196,65,121,214,68,64,48 ; vmovq %xmm8,0x30(%r8,%rax,2)
- DB 235,174 ; jmp 51ae <_sk_store_u16_be_avx+0xf6>
+ DB 235,174 ; jmp 509a <_sk_store_u16_be_avx+0xf6>
PUBLIC _sk_load_f32_avx
_sk_load_f32_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 119,110 ; ja 5276 <_sk_load_f32_avx+0x76>
+ DB 119,110 ; ja 5162 <_sk_load_f32_avx+0x76>
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
- DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 52a0 <_sk_load_f32_avx+0xa0>
+ DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 518c <_sk_load_f32_avx+0xa0>
DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -9843,7 +9754,7 @@ _sk_store_f32_avx LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 72,133,201 ; test %rcx,%rcx
- DB 117,55 ; jne 532d <_sk_store_f32_avx+0x6d>
+ DB 117,55 ; jne 5219 <_sk_store_f32_avx+0x6d>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -9856,22 +9767,22 @@ _sk_store_f32_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 5329 <_sk_store_f32_avx+0x69>
+ DB 116,240 ; je 5215 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 5329 <_sk_store_f32_avx+0x69>
+ DB 114,227 ; jb 5215 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4)
- DB 116,218 ; je 5329 <_sk_store_f32_avx+0x69>
+ DB 116,218 ; je 5215 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 5329 <_sk_store_f32_avx+0x69>
+ DB 114,205 ; jb 5215 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- DB 116,195 ; je 5329 <_sk_store_f32_avx+0x69>
+ DB 116,195 ; je 5215 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,181 ; jb 5329 <_sk_store_f32_avx+0x69>
+ DB 114,181 ; jb 5215 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- DB 235,171 ; jmp 5329 <_sk_store_f32_avx+0x69>
+ DB 235,171 ; jmp 5215 <_sk_store_f32_avx+0x69>
PUBLIC _sk_clamp_x_avx
_sk_clamp_x_avx LABEL PROC
@@ -9963,12 +9874,12 @@ _sk_mirror_y_avx LABEL PROC
PUBLIC _sk_luminance_to_alpha_avx
_sk_luminance_to_alpha_avx LABEL PROC
- DB 196,226,125,24,29,131,19,0,0 ; vbroadcastss 0x1383(%rip),%ymm3 # 6830 <_sk_callback_avx+0x451>
+ DB 196,226,125,24,29,131,19,0,0 ; vbroadcastss 0x1383(%rip),%ymm3 # 671c <_sk_callback_avx+0x451>
DB 197,252,89,195 ; vmulps %ymm3,%ymm0,%ymm0
- DB 196,226,125,24,29,122,19,0,0 ; vbroadcastss 0x137a(%rip),%ymm3 # 6834 <_sk_callback_avx+0x455>
+ DB 196,226,125,24,29,122,19,0,0 ; vbroadcastss 0x137a(%rip),%ymm3 # 6720 <_sk_callback_avx+0x455>
DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1
DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,109,19,0,0 ; vbroadcastss 0x136d(%rip),%ymm1 # 6838 <_sk_callback_avx+0x459>
+ DB 196,226,125,24,13,109,19,0,0 ; vbroadcastss 0x136d(%rip),%ymm1 # 6724 <_sk_callback_avx+0x459>
DB 197,236,89,201 ; vmulps %ymm1,%ymm2,%ymm1
DB 197,252,88,217 ; vaddps %ymm1,%ymm0,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -10175,9 +10086,9 @@ _sk_evenly_spaced_gradient_avx LABEL PROC
DB 72,139,24 ; mov (%rax),%rbx
DB 72,139,104,8 ; mov 0x8(%rax),%rbp
DB 72,255,203 ; dec %rbx
- DB 120,7 ; js 5821 <_sk_evenly_spaced_gradient_avx+0x1f>
+ DB 120,7 ; js 570d <_sk_evenly_spaced_gradient_avx+0x1f>
DB 196,225,242,42,203 ; vcvtsi2ss %rbx,%xmm1,%xmm1
- DB 235,21 ; jmp 5836 <_sk_evenly_spaced_gradient_avx+0x34>
+ DB 235,21 ; jmp 5722 <_sk_evenly_spaced_gradient_avx+0x34>
DB 73,137,216 ; mov %rbx,%r8
DB 73,209,232 ; shr %r8
DB 131,227,1 ; and $0x1,%ebx
@@ -10342,12 +10253,12 @@ _sk_gradient_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 73,131,248,2 ; cmp $0x2,%r8
- DB 114,80 ; jb 5bc4 <_sk_gradient_avx+0x69>
+ DB 114,80 ; jb 5ab0 <_sk_gradient_avx+0x69>
DB 72,139,88,72 ; mov 0x48(%rax),%rbx
DB 73,255,200 ; dec %r8
DB 72,131,195,4 ; add $0x4,%rbx
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
- DB 196,98,125,24,21,175,12,0,0 ; vbroadcastss 0xcaf(%rip),%ymm10 # 683c <_sk_callback_avx+0x45d>
+ DB 196,98,125,24,21,175,12,0,0 ; vbroadcastss 0xcaf(%rip),%ymm10 # 6728 <_sk_callback_avx+0x45d>
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 196,98,125,24,3 ; vbroadcastss (%rbx),%ymm8
DB 197,60,194,192,2 ; vcmpleps %ymm0,%ymm8,%ymm8
@@ -10359,7 +10270,7 @@ _sk_gradient_avx LABEL PROC
DB 196,227,117,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
DB 72,131,195,4 ; add $0x4,%rbx
DB 73,255,200 ; dec %r8
- DB 117,205 ; jne 5b91 <_sk_gradient_avx+0x36>
+ DB 117,205 ; jne 5a7d <_sk_gradient_avx+0x36>
DB 196,195,249,22,200,1 ; vpextrq $0x1,%xmm1,%r8
DB 69,137,193 ; mov %r8d,%r9d
DB 73,193,232,32 ; shr $0x20,%r8
@@ -10537,27 +10448,27 @@ _sk_xy_to_unit_angle_avx LABEL PROC
DB 196,65,52,95,226 ; vmaxps %ymm10,%ymm9,%ymm12
DB 196,65,36,94,220 ; vdivps %ymm12,%ymm11,%ymm11
DB 196,65,36,89,227 ; vmulps %ymm11,%ymm11,%ymm12
- DB 196,98,125,24,45,211,8,0,0 ; vbroadcastss 0x8d3(%rip),%ymm13 # 6840 <_sk_callback_avx+0x461>
+ DB 196,98,125,24,45,211,8,0,0 ; vbroadcastss 0x8d3(%rip),%ymm13 # 672c <_sk_callback_avx+0x461>
DB 196,65,28,89,237 ; vmulps %ymm13,%ymm12,%ymm13
- DB 196,98,125,24,53,201,8,0,0 ; vbroadcastss 0x8c9(%rip),%ymm14 # 6844 <_sk_callback_avx+0x465>
+ DB 196,98,125,24,53,201,8,0,0 ; vbroadcastss 0x8c9(%rip),%ymm14 # 6730 <_sk_callback_avx+0x465>
DB 196,65,20,88,238 ; vaddps %ymm14,%ymm13,%ymm13
DB 196,65,28,89,237 ; vmulps %ymm13,%ymm12,%ymm13
- DB 196,98,125,24,53,186,8,0,0 ; vbroadcastss 0x8ba(%rip),%ymm14 # 6848 <_sk_callback_avx+0x469>
+ DB 196,98,125,24,53,186,8,0,0 ; vbroadcastss 0x8ba(%rip),%ymm14 # 6734 <_sk_callback_avx+0x469>
DB 196,65,20,88,238 ; vaddps %ymm14,%ymm13,%ymm13
DB 196,65,28,89,229 ; vmulps %ymm13,%ymm12,%ymm12
- DB 196,98,125,24,45,171,8,0,0 ; vbroadcastss 0x8ab(%rip),%ymm13 # 684c <_sk_callback_avx+0x46d>
+ DB 196,98,125,24,45,171,8,0,0 ; vbroadcastss 0x8ab(%rip),%ymm13 # 6738 <_sk_callback_avx+0x46d>
DB 196,65,28,88,229 ; vaddps %ymm13,%ymm12,%ymm12
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
DB 196,65,52,194,202,1 ; vcmpltps %ymm10,%ymm9,%ymm9
- DB 196,98,125,24,21,150,8,0,0 ; vbroadcastss 0x896(%rip),%ymm10 # 6850 <_sk_callback_avx+0x471>
+ DB 196,98,125,24,21,150,8,0,0 ; vbroadcastss 0x896(%rip),%ymm10 # 673c <_sk_callback_avx+0x471>
DB 196,65,44,92,211 ; vsubps %ymm11,%ymm10,%ymm10
DB 196,67,37,74,202,144 ; vblendvps %ymm9,%ymm10,%ymm11,%ymm9
DB 196,193,124,194,192,1 ; vcmpltps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,21,128,8,0,0 ; vbroadcastss 0x880(%rip),%ymm10 # 6854 <_sk_callback_avx+0x475>
+ DB 196,98,125,24,21,128,8,0,0 ; vbroadcastss 0x880(%rip),%ymm10 # 6740 <_sk_callback_avx+0x475>
DB 196,65,44,92,209 ; vsubps %ymm9,%ymm10,%ymm10
DB 196,195,53,74,194,0 ; vblendvps %ymm0,%ymm10,%ymm9,%ymm0
DB 196,65,116,194,200,1 ; vcmpltps %ymm8,%ymm1,%ymm9
- DB 196,98,125,24,21,106,8,0,0 ; vbroadcastss 0x86a(%rip),%ymm10 # 6858 <_sk_callback_avx+0x479>
+ DB 196,98,125,24,21,106,8,0,0 ; vbroadcastss 0x86a(%rip),%ymm10 # 6744 <_sk_callback_avx+0x479>
DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10
DB 196,195,125,74,194,144 ; vblendvps %ymm9,%ymm10,%ymm0,%ymm0
DB 196,65,124,194,200,3 ; vcmpunordps %ymm8,%ymm0,%ymm9
@@ -10577,7 +10488,7 @@ _sk_xy_to_radius_avx LABEL PROC
PUBLIC _sk_save_xy_avx
_sk_save_xy_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,52,8,0,0 ; vbroadcastss 0x834(%rip),%ymm8 # 685c <_sk_callback_avx+0x47d>
+ DB 196,98,125,24,5,52,8,0,0 ; vbroadcastss 0x834(%rip),%ymm8 # 6748 <_sk_callback_avx+0x47d>
DB 196,65,124,88,200 ; vaddps %ymm8,%ymm0,%ymm9
DB 196,67,125,8,209,1 ; vroundps $0x1,%ymm9,%ymm10
DB 196,65,52,92,202 ; vsubps %ymm10,%ymm9,%ymm9
@@ -10610,9 +10521,9 @@ _sk_accumulate_avx LABEL PROC
PUBLIC _sk_bilinear_nx_avx
_sk_bilinear_nx_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,192,7,0,0 ; vbroadcastss 0x7c0(%rip),%ymm0 # 6860 <_sk_callback_avx+0x481>
+ DB 196,226,125,24,5,192,7,0,0 ; vbroadcastss 0x7c0(%rip),%ymm0 # 674c <_sk_callback_avx+0x481>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
- DB 196,98,125,24,5,183,7,0,0 ; vbroadcastss 0x7b7(%rip),%ymm8 # 6864 <_sk_callback_avx+0x485>
+ DB 196,98,125,24,5,183,7,0,0 ; vbroadcastss 0x7b7(%rip),%ymm8 # 6750 <_sk_callback_avx+0x485>
DB 197,60,92,64,64 ; vsubps 0x40(%rax),%ymm8,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -10621,7 +10532,7 @@ _sk_bilinear_nx_avx LABEL PROC
PUBLIC _sk_bilinear_px_avx
_sk_bilinear_px_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,159,7,0,0 ; vbroadcastss 0x79f(%rip),%ymm0 # 6868 <_sk_callback_avx+0x489>
+ DB 196,226,125,24,5,159,7,0,0 ; vbroadcastss 0x79f(%rip),%ymm0 # 6754 <_sk_callback_avx+0x489>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
DB 197,124,16,64,64 ; vmovups 0x40(%rax),%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
@@ -10631,9 +10542,9 @@ _sk_bilinear_px_avx LABEL PROC
PUBLIC _sk_bilinear_ny_avx
_sk_bilinear_ny_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,131,7,0,0 ; vbroadcastss 0x783(%rip),%ymm1 # 686c <_sk_callback_avx+0x48d>
+ DB 196,226,125,24,13,131,7,0,0 ; vbroadcastss 0x783(%rip),%ymm1 # 6758 <_sk_callback_avx+0x48d>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
- DB 196,98,125,24,5,121,7,0,0 ; vbroadcastss 0x779(%rip),%ymm8 # 6870 <_sk_callback_avx+0x491>
+ DB 196,98,125,24,5,121,7,0,0 ; vbroadcastss 0x779(%rip),%ymm8 # 675c <_sk_callback_avx+0x491>
DB 197,60,92,64,96 ; vsubps 0x60(%rax),%ymm8,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -10642,7 +10553,7 @@ _sk_bilinear_ny_avx LABEL PROC
PUBLIC _sk_bilinear_py_avx
_sk_bilinear_py_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,97,7,0,0 ; vbroadcastss 0x761(%rip),%ymm1 # 6874 <_sk_callback_avx+0x495>
+ DB 196,226,125,24,13,97,7,0,0 ; vbroadcastss 0x761(%rip),%ymm1 # 6760 <_sk_callback_avx+0x495>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
DB 197,124,16,64,96 ; vmovups 0x60(%rax),%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
@@ -10652,14 +10563,14 @@ _sk_bilinear_py_avx LABEL PROC
PUBLIC _sk_bicubic_n3x_avx
_sk_bicubic_n3x_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,68,7,0,0 ; vbroadcastss 0x744(%rip),%ymm0 # 6878 <_sk_callback_avx+0x499>
+ DB 196,226,125,24,5,68,7,0,0 ; vbroadcastss 0x744(%rip),%ymm0 # 6764 <_sk_callback_avx+0x499>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
- DB 196,98,125,24,5,59,7,0,0 ; vbroadcastss 0x73b(%rip),%ymm8 # 687c <_sk_callback_avx+0x49d>
+ DB 196,98,125,24,5,59,7,0,0 ; vbroadcastss 0x73b(%rip),%ymm8 # 6768 <_sk_callback_avx+0x49d>
DB 197,60,92,64,64 ; vsubps 0x40(%rax),%ymm8,%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,44,7,0,0 ; vbroadcastss 0x72c(%rip),%ymm10 # 6880 <_sk_callback_avx+0x4a1>
+ DB 196,98,125,24,21,44,7,0,0 ; vbroadcastss 0x72c(%rip),%ymm10 # 676c <_sk_callback_avx+0x4a1>
DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,98,125,24,21,34,7,0,0 ; vbroadcastss 0x722(%rip),%ymm10 # 6884 <_sk_callback_avx+0x4a5>
+ DB 196,98,125,24,21,34,7,0,0 ; vbroadcastss 0x722(%rip),%ymm10 # 6770 <_sk_callback_avx+0x4a5>
DB 196,65,60,88,194 ; vaddps %ymm10,%ymm8,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
@@ -10669,19 +10580,19 @@ _sk_bicubic_n3x_avx LABEL PROC
PUBLIC _sk_bicubic_n1x_avx
_sk_bicubic_n1x_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,5,7,0,0 ; vbroadcastss 0x705(%rip),%ymm0 # 6888 <_sk_callback_avx+0x4a9>
+ DB 196,226,125,24,5,5,7,0,0 ; vbroadcastss 0x705(%rip),%ymm0 # 6774 <_sk_callback_avx+0x4a9>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
- DB 196,98,125,24,5,252,6,0,0 ; vbroadcastss 0x6fc(%rip),%ymm8 # 688c <_sk_callback_avx+0x4ad>
+ DB 196,98,125,24,5,252,6,0,0 ; vbroadcastss 0x6fc(%rip),%ymm8 # 6778 <_sk_callback_avx+0x4ad>
DB 197,60,92,64,64 ; vsubps 0x40(%rax),%ymm8,%ymm8
- DB 196,98,125,24,13,242,6,0,0 ; vbroadcastss 0x6f2(%rip),%ymm9 # 6890 <_sk_callback_avx+0x4b1>
+ DB 196,98,125,24,13,242,6,0,0 ; vbroadcastss 0x6f2(%rip),%ymm9 # 677c <_sk_callback_avx+0x4b1>
DB 196,65,60,89,201 ; vmulps %ymm9,%ymm8,%ymm9
- DB 196,98,125,24,21,232,6,0,0 ; vbroadcastss 0x6e8(%rip),%ymm10 # 6894 <_sk_callback_avx+0x4b5>
+ DB 196,98,125,24,21,232,6,0,0 ; vbroadcastss 0x6e8(%rip),%ymm10 # 6780 <_sk_callback_avx+0x4b5>
DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
DB 196,65,60,89,201 ; vmulps %ymm9,%ymm8,%ymm9
- DB 196,98,125,24,21,217,6,0,0 ; vbroadcastss 0x6d9(%rip),%ymm10 # 6898 <_sk_callback_avx+0x4b9>
+ DB 196,98,125,24,21,217,6,0,0 ; vbroadcastss 0x6d9(%rip),%ymm10 # 6784 <_sk_callback_avx+0x4b9>
DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
- DB 196,98,125,24,13,202,6,0,0 ; vbroadcastss 0x6ca(%rip),%ymm9 # 689c <_sk_callback_avx+0x4bd>
+ DB 196,98,125,24,13,202,6,0,0 ; vbroadcastss 0x6ca(%rip),%ymm9 # 6788 <_sk_callback_avx+0x4bd>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -10690,17 +10601,17 @@ _sk_bicubic_n1x_avx LABEL PROC
PUBLIC _sk_bicubic_p1x_avx
_sk_bicubic_p1x_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,178,6,0,0 ; vbroadcastss 0x6b2(%rip),%ymm8 # 68a0 <_sk_callback_avx+0x4c1>
+ DB 196,98,125,24,5,178,6,0,0 ; vbroadcastss 0x6b2(%rip),%ymm8 # 678c <_sk_callback_avx+0x4c1>
DB 197,188,88,0 ; vaddps (%rax),%ymm8,%ymm0
DB 197,124,16,72,64 ; vmovups 0x40(%rax),%ymm9
- DB 196,98,125,24,21,164,6,0,0 ; vbroadcastss 0x6a4(%rip),%ymm10 # 68a4 <_sk_callback_avx+0x4c5>
+ DB 196,98,125,24,21,164,6,0,0 ; vbroadcastss 0x6a4(%rip),%ymm10 # 6790 <_sk_callback_avx+0x4c5>
DB 196,65,52,89,210 ; vmulps %ymm10,%ymm9,%ymm10
- DB 196,98,125,24,29,154,6,0,0 ; vbroadcastss 0x69a(%rip),%ymm11 # 68a8 <_sk_callback_avx+0x4c9>
+ DB 196,98,125,24,29,154,6,0,0 ; vbroadcastss 0x69a(%rip),%ymm11 # 6794 <_sk_callback_avx+0x4c9>
DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
DB 196,65,52,89,210 ; vmulps %ymm10,%ymm9,%ymm10
DB 196,65,44,88,192 ; vaddps %ymm8,%ymm10,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
- DB 196,98,125,24,13,129,6,0,0 ; vbroadcastss 0x681(%rip),%ymm9 # 68ac <_sk_callback_avx+0x4cd>
+ DB 196,98,125,24,13,129,6,0,0 ; vbroadcastss 0x681(%rip),%ymm9 # 6798 <_sk_callback_avx+0x4cd>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -10709,13 +10620,13 @@ _sk_bicubic_p1x_avx LABEL PROC
PUBLIC _sk_bicubic_p3x_avx
_sk_bicubic_p3x_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,105,6,0,0 ; vbroadcastss 0x669(%rip),%ymm0 # 68b0 <_sk_callback_avx+0x4d1>
+ DB 196,226,125,24,5,105,6,0,0 ; vbroadcastss 0x669(%rip),%ymm0 # 679c <_sk_callback_avx+0x4d1>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
DB 197,124,16,64,64 ; vmovups 0x40(%rax),%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,86,6,0,0 ; vbroadcastss 0x656(%rip),%ymm10 # 68b4 <_sk_callback_avx+0x4d5>
+ DB 196,98,125,24,21,86,6,0,0 ; vbroadcastss 0x656(%rip),%ymm10 # 67a0 <_sk_callback_avx+0x4d5>
DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,98,125,24,21,76,6,0,0 ; vbroadcastss 0x64c(%rip),%ymm10 # 68b8 <_sk_callback_avx+0x4d9>
+ DB 196,98,125,24,21,76,6,0,0 ; vbroadcastss 0x64c(%rip),%ymm10 # 67a4 <_sk_callback_avx+0x4d9>
DB 196,65,60,88,194 ; vaddps %ymm10,%ymm8,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
@@ -10725,14 +10636,14 @@ _sk_bicubic_p3x_avx LABEL PROC
PUBLIC _sk_bicubic_n3y_avx
_sk_bicubic_n3y_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,47,6,0,0 ; vbroadcastss 0x62f(%rip),%ymm1 # 68bc <_sk_callback_avx+0x4dd>
+ DB 196,226,125,24,13,47,6,0,0 ; vbroadcastss 0x62f(%rip),%ymm1 # 67a8 <_sk_callback_avx+0x4dd>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
- DB 196,98,125,24,5,37,6,0,0 ; vbroadcastss 0x625(%rip),%ymm8 # 68c0 <_sk_callback_avx+0x4e1>
+ DB 196,98,125,24,5,37,6,0,0 ; vbroadcastss 0x625(%rip),%ymm8 # 67ac <_sk_callback_avx+0x4e1>
DB 197,60,92,64,96 ; vsubps 0x60(%rax),%ymm8,%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,22,6,0,0 ; vbroadcastss 0x616(%rip),%ymm10 # 68c4 <_sk_callback_avx+0x4e5>
+ DB 196,98,125,24,21,22,6,0,0 ; vbroadcastss 0x616(%rip),%ymm10 # 67b0 <_sk_callback_avx+0x4e5>
DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,98,125,24,21,12,6,0,0 ; vbroadcastss 0x60c(%rip),%ymm10 # 68c8 <_sk_callback_avx+0x4e9>
+ DB 196,98,125,24,21,12,6,0,0 ; vbroadcastss 0x60c(%rip),%ymm10 # 67b4 <_sk_callback_avx+0x4e9>
DB 196,65,60,88,194 ; vaddps %ymm10,%ymm8,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
@@ -10742,19 +10653,19 @@ _sk_bicubic_n3y_avx LABEL PROC
PUBLIC _sk_bicubic_n1y_avx
_sk_bicubic_n1y_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,239,5,0,0 ; vbroadcastss 0x5ef(%rip),%ymm1 # 68cc <_sk_callback_avx+0x4ed>
+ DB 196,226,125,24,13,239,5,0,0 ; vbroadcastss 0x5ef(%rip),%ymm1 # 67b8 <_sk_callback_avx+0x4ed>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
- DB 196,98,125,24,5,229,5,0,0 ; vbroadcastss 0x5e5(%rip),%ymm8 # 68d0 <_sk_callback_avx+0x4f1>
+ DB 196,98,125,24,5,229,5,0,0 ; vbroadcastss 0x5e5(%rip),%ymm8 # 67bc <_sk_callback_avx+0x4f1>
DB 197,60,92,64,96 ; vsubps 0x60(%rax),%ymm8,%ymm8
- DB 196,98,125,24,13,219,5,0,0 ; vbroadcastss 0x5db(%rip),%ymm9 # 68d4 <_sk_callback_avx+0x4f5>
+ DB 196,98,125,24,13,219,5,0,0 ; vbroadcastss 0x5db(%rip),%ymm9 # 67c0 <_sk_callback_avx+0x4f5>
DB 196,65,60,89,201 ; vmulps %ymm9,%ymm8,%ymm9
- DB 196,98,125,24,21,209,5,0,0 ; vbroadcastss 0x5d1(%rip),%ymm10 # 68d8 <_sk_callback_avx+0x4f9>
+ DB 196,98,125,24,21,209,5,0,0 ; vbroadcastss 0x5d1(%rip),%ymm10 # 67c4 <_sk_callback_avx+0x4f9>
DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
DB 196,65,60,89,201 ; vmulps %ymm9,%ymm8,%ymm9
- DB 196,98,125,24,21,194,5,0,0 ; vbroadcastss 0x5c2(%rip),%ymm10 # 68dc <_sk_callback_avx+0x4fd>
+ DB 196,98,125,24,21,194,5,0,0 ; vbroadcastss 0x5c2(%rip),%ymm10 # 67c8 <_sk_callback_avx+0x4fd>
DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
- DB 196,98,125,24,13,179,5,0,0 ; vbroadcastss 0x5b3(%rip),%ymm9 # 68e0 <_sk_callback_avx+0x501>
+ DB 196,98,125,24,13,179,5,0,0 ; vbroadcastss 0x5b3(%rip),%ymm9 # 67cc <_sk_callback_avx+0x501>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -10763,17 +10674,17 @@ _sk_bicubic_n1y_avx LABEL PROC
PUBLIC _sk_bicubic_p1y_avx
_sk_bicubic_p1y_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,155,5,0,0 ; vbroadcastss 0x59b(%rip),%ymm8 # 68e4 <_sk_callback_avx+0x505>
+ DB 196,98,125,24,5,155,5,0,0 ; vbroadcastss 0x59b(%rip),%ymm8 # 67d0 <_sk_callback_avx+0x505>
DB 197,188,88,72,32 ; vaddps 0x20(%rax),%ymm8,%ymm1
DB 197,124,16,72,96 ; vmovups 0x60(%rax),%ymm9
- DB 196,98,125,24,21,140,5,0,0 ; vbroadcastss 0x58c(%rip),%ymm10 # 68e8 <_sk_callback_avx+0x509>
+ DB 196,98,125,24,21,140,5,0,0 ; vbroadcastss 0x58c(%rip),%ymm10 # 67d4 <_sk_callback_avx+0x509>
DB 196,65,52,89,210 ; vmulps %ymm10,%ymm9,%ymm10
- DB 196,98,125,24,29,130,5,0,0 ; vbroadcastss 0x582(%rip),%ymm11 # 68ec <_sk_callback_avx+0x50d>
+ DB 196,98,125,24,29,130,5,0,0 ; vbroadcastss 0x582(%rip),%ymm11 # 67d8 <_sk_callback_avx+0x50d>
DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
DB 196,65,52,89,210 ; vmulps %ymm10,%ymm9,%ymm10
DB 196,65,44,88,192 ; vaddps %ymm8,%ymm10,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
- DB 196,98,125,24,13,105,5,0,0 ; vbroadcastss 0x569(%rip),%ymm9 # 68f0 <_sk_callback_avx+0x511>
+ DB 196,98,125,24,13,105,5,0,0 ; vbroadcastss 0x569(%rip),%ymm9 # 67dc <_sk_callback_avx+0x511>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -10782,13 +10693,13 @@ _sk_bicubic_p1y_avx LABEL PROC
PUBLIC _sk_bicubic_p3y_avx
_sk_bicubic_p3y_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,81,5,0,0 ; vbroadcastss 0x551(%rip),%ymm1 # 68f4 <_sk_callback_avx+0x515>
+ DB 196,226,125,24,13,81,5,0,0 ; vbroadcastss 0x551(%rip),%ymm1 # 67e0 <_sk_callback_avx+0x515>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
DB 197,124,16,64,96 ; vmovups 0x60(%rax),%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,61,5,0,0 ; vbroadcastss 0x53d(%rip),%ymm10 # 68f8 <_sk_callback_avx+0x519>
+ DB 196,98,125,24,21,61,5,0,0 ; vbroadcastss 0x53d(%rip),%ymm10 # 67e4 <_sk_callback_avx+0x519>
DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,98,125,24,21,51,5,0,0 ; vbroadcastss 0x533(%rip),%ymm10 # 68fc <_sk_callback_avx+0x51d>
+ DB 196,98,125,24,21,51,5,0,0 ; vbroadcastss 0x533(%rip),%ymm10 # 67e8 <_sk_callback_avx+0x51d>
DB 196,65,60,88,194 ; vaddps %ymm10,%ymm8,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
@@ -10902,25 +10813,25 @@ ALIGN 4
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope 65a9 <.literal4+0xb1>
+ DB 71,225,61 ; rex.RXB loope 6495 <.literal4+0xb1>
DB 0,0 ; add %al,(%rax)
DB 128,63,154 ; cmpb $0x9a,(%rdi)
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope 65b9 <.literal4+0xc1>
+ DB 71,225,61 ; rex.RXB loope 64a5 <.literal4+0xc1>
DB 0,0 ; add %al,(%rax)
DB 128,63,154 ; cmpb $0x9a,(%rdi)
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope 65c9 <.literal4+0xd1>
+ DB 71,225,61 ; rex.RXB loope 64b5 <.literal4+0xd1>
DB 0,0 ; add %al,(%rax)
DB 128,63,154 ; cmpb $0x9a,(%rdi)
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope 65d9 <.literal4+0xe1>
+ DB 71,225,61 ; rex.RXB loope 64c5 <.literal4+0xe1>
DB 0,0 ; add %al,(%rax)
DB 128,63,0 ; cmpb $0x0,(%rdi)
DB 0,128,63,0,0,128 ; add %al,-0x7fffffc1(%rax)
@@ -10968,7 +10879,7 @@ ALIGN 4
DB 190,129,128,128,59 ; mov $0x3b808081,%esi
DB 129,128,128,59,0,248,0,0,8,33 ; addl $0x21080000,-0x7ffc480(%rax)
DB 132,55 ; test %dh,(%rdi)
- DB 224,7 ; loopne 6625 <.literal4+0x12d>
+ DB 224,7 ; loopne 6511 <.literal4+0x12d>
DB 0,0 ; add %al,(%rax)
DB 33,8 ; and %ecx,(%rax)
DB 2,58 ; add (%rdx),%bh
@@ -10984,10 +10895,10 @@ ALIGN 4
DB 129,128,128,59,129,128,128,59,0,0 ; addl $0x3b80,-0x7f7ec480(%rax)
DB 0,52,255 ; add %dh,(%rdi,%rdi,8)
DB 255 ; (bad)
- DB 127,0 ; jg 664c <.literal4+0x154>
+ DB 127,0 ; jg 6538 <.literal4+0x154>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja 66c5 <.literal4+0x1cd>
+ DB 119,115 ; ja 65b1 <.literal4+0x1cd>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -11001,10 +10912,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg 6680 <.literal4+0x188>
+ DB 127,0 ; jg 656c <.literal4+0x188>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja 66f9 <.literal4+0x201>
+ DB 119,115 ; ja 65e5 <.literal4+0x201>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -11018,10 +10929,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg 66b4 <.literal4+0x1bc>
+ DB 127,0 ; jg 65a0 <.literal4+0x1bc>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja 672d <.literal4+0x235>
+ DB 119,115 ; ja 6619 <.literal4+0x235>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -11035,10 +10946,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg 66e8 <.literal4+0x1f0>
+ DB 127,0 ; jg 65d4 <.literal4+0x1f0>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja 6761 <.literal4+0x269>
+ DB 119,115 ; ja 664d <.literal4+0x269>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -11051,7 +10962,7 @@ ALIGN 4
DB 0,75,0 ; add %cl,0x0(%rbx)
DB 0,128,63,0,0,200 ; add %al,-0x37ffffc1(%rax)
DB 66,0,0 ; rex.X add %al,(%rax)
- DB 127,67 ; jg 675f <.literal4+0x267>
+ DB 127,67 ; jg 664b <.literal4+0x267>
DB 0,0 ; add %al,(%rax)
DB 0,195 ; add %al,%bl
DB 0,0 ; add %al,(%rax)
@@ -11063,10 +10974,10 @@ ALIGN 4
DB 190,80,128,3,62 ; mov $0x3e038050,%esi
DB 31 ; (bad)
DB 215 ; xlat %ds:(%rbx)
- DB 118,63 ; jbe 677f <.literal4+0x287>
+ DB 118,63 ; jbe 666b <.literal4+0x287>
DB 246,64,83,63 ; testb $0x3f,0x53(%rax)
DB 129,128,128,59,129,128,128,59,0,0 ; addl $0x3b80,-0x7f7ec480(%rax)
- DB 127,67 ; jg 6793 <.literal4+0x29b>
+ DB 127,67 ; jg 667f <.literal4+0x29b>
DB 129,128,128,59,0,0,128,63,129,128 ; addl $0x80813f80,0x3b80(%rax)
DB 128,59,0 ; cmpb $0x0,(%rbx)
DB 0,128,63,129,128,128 ; add %al,-0x7f7f7ec1(%rax)
@@ -11075,7 +10986,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 8,33 ; or %ah,(%rcx)
DB 132,55 ; test %dh,(%rdi)
- DB 224,7 ; loopne 6775 <.literal4+0x27d>
+ DB 224,7 ; loopne 6661 <.literal4+0x27d>
DB 0,0 ; add %al,(%rax)
DB 33,8 ; and %ecx,(%rax)
DB 2,58 ; add (%rdx),%bh
@@ -11087,7 +10998,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 8,33 ; or %ah,(%rcx)
DB 132,55 ; test %dh,(%rdi)
- DB 224,7 ; loopne 6791 <.literal4+0x299>
+ DB 224,7 ; loopne 667d <.literal4+0x299>
DB 0,0 ; add %al,(%rax)
DB 33,8 ; and %ecx,(%rax)
DB 2,58 ; add (%rdx),%bh
@@ -11098,7 +11009,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 248 ; clc
DB 65,0,0 ; add %al,(%r8)
- DB 124,66 ; jl 67e6 <.literal4+0x2ee>
+ DB 124,66 ; jl 66d2 <.literal4+0x2ee>
DB 0,240 ; add %dh,%al
DB 0,0 ; add %al,(%rax)
DB 137,136,136,55,0,15 ; mov %ecx,0xf003788(%rax)
@@ -11116,9 +11027,9 @@ ALIGN 4
DB 137,136,136,59,15,0 ; mov %ecx,0xf3b88(%rax)
DB 0,0 ; add %al,(%rax)
DB 137,136,136,61,0,0 ; mov %ecx,0x3d88(%rax)
- DB 112,65 ; jo 6829 <.literal4+0x331>
+ DB 112,65 ; jo 6715 <.literal4+0x331>
DB 129,128,128,59,129,128,128,59,0,0 ; addl $0x3b80,-0x7f7ec480(%rax)
- DB 127,67 ; jg 6837 <.literal4+0x33f>
+ DB 127,67 ; jg 6723 <.literal4+0x33f>
DB 0,128,0,0,0,0 ; add %al,0x0(%rax)
DB 0,128,0,4,0,128 ; add %al,-0x7ffffc00(%rax)
DB 0,0 ; add %al,(%rax)
@@ -11134,7 +11045,7 @@ ALIGN 4
DB 0,128,55,0,0,128 ; add %al,-0x7fffffc9(%rax)
DB 63 ; (bad)
DB 0,255 ; add %bh,%bh
- DB 127,71 ; jg 6877 <.literal4+0x37f>
+ DB 127,71 ; jg 6763 <.literal4+0x37f>
DB 208 ; (bad)
DB 179,89 ; mov $0x59,%bl
DB 62,89 ; ds pop %rcx
@@ -11221,39 +11132,73 @@ ALIGN 4
DB 170 ; stos %al,%es:(%rdi)
DB 190 ; .byte 0xbe
-ALIGN 32
- DB 255,0 ; incl (%rax)
+ALIGN 16
+ DB 0,2 ; add %al,(%rdx)
+ DB 4,6 ; add $0x6,%al
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
+ DB 8,10 ; or %cl,(%rdx)
+ DB 12,14 ; or $0xe,%al
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 0,2 ; add %al,(%rdx)
+ DB 4,6 ; add $0x6,%al
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 8,10 ; or %cl,(%rdx)
+ DB 12,14 ; or $0xe,%al
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,2 ; add %al,(%rdx)
+ DB 4,6 ; add $0x6,%al
+ DB 0,0 ; add %al,(%rax)
DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 8,10 ; or %cl,(%rdx)
+ DB 12,14 ; or $0xe,%al
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+
+ALIGN 32
DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
@@ -11286,24 +11231,38 @@ ALIGN 32
DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
-
-ALIGN 16
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
ALIGN 32
PUBLIC _sk_start_pipeline_sse41
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index fa11869a6e..d1747018bf 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -217,8 +217,8 @@ SI void store(T* dst, V v, size_t tail) {
}
#endif
-// AVX2 adds some mask loads and stores that make for shorter, faster code.
-#if defined(JUMPER) && defined(__AVX2__)
+// AVX adds some mask loads and stores that make for shorter, faster code.
+#if defined(JUMPER) && defined(__AVX__)
SI U32 mask(size_t tail) {
// We go a little out of our way to avoid needing large constant values here.
@@ -227,14 +227,16 @@ SI void store(T* dst, V v, size_t tail) {
uint64_t mask = 0xffffffffffffffff >> 8*(kStride-tail);
// Sign-extend each mask lane to its full width, 0x00000000 or 0xffffffff.
- return _mm256_cvtepi8_epi32(_mm_cvtsi64_si128((int64_t)mask));
+ using S8 = int8_t __attribute__((ext_vector_type(8)));
+ using S32 = int32_t __attribute__((ext_vector_type(8)));
+ return (U32)__builtin_convertvector(unaligned_load<S8>(&mask), S32);
}
template <>
inline U32 load(const uint32_t* src, size_t tail) {
__builtin_assume(tail < kStride);
if (__builtin_expect(tail, 0)) {
- return _mm256_maskload_epi32((const int*)src, mask(tail));
+ return (U32)_mm256_maskload_ps((const float*)src, mask(tail));
}
return unaligned_load<U32>(src);
}
@@ -243,7 +245,7 @@ SI void store(T* dst, V v, size_t tail) {
inline void store(uint32_t* dst, U32 v, size_t tail) {
__builtin_assume(tail < kStride);
if (__builtin_expect(tail, 0)) {
- return _mm256_maskstore_epi32((int*)dst, mask(tail), v);
+ return _mm256_maskstore_ps((float*)dst, mask(tail), (F)v);
}
unaligned_store(dst, v);
}