aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-08-11 14:22:12 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-08-11 18:52:36 +0000
commitc10850f4e77182f0f3324ca0da207c8693625b4b (patch)
tree8261dc979570c9aff2edbf47e48d0f058161d897 /src/jumper
parent6382f455f0bb447debc670c10360d7aed970b728 (diff)
remove mask load() and store()
They appear to be slower than the generic load() and store() now. [blendmode_mask_Hue] 14.7ms @0 15.6ms @95 39.6ms @100 [blendmode_rect_Hue] 31.5ms @0 37.6ms @95 39.5ms @100 ~~> [blendmode_mask_Hue] 14.7ms @0 15.2ms @95 39.5ms @100 [blendmode_rect_Hue] 30.5ms @0 32.6ms @95 37.8ms @100 Change-Id: I674b75087b8139debead71f3016631bcb0cb0047 Reviewed-on: https://skia-review.googlesource.com/33800 Reviewed-by: Florin Malita <fmalita@chromium.org> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper')
-rw-r--r--src/jumper/SkJumper_generated.S4309
-rw-r--r--src/jumper/SkJumper_generated_win.S4332
-rw-r--r--src/jumper/SkJumper_stages.cpp34
3 files changed, 4836 insertions, 3839 deletions
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 0cc69f8f1f..bb2e8da1de 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -16599,7 +16599,7 @@ _sk_seed_shader_hsw:
.byte 197,249,110,194 // vmovd %edx,%xmm0
.byte 196,226,125,88,192 // vpbroadcastd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,94,103,0,0 // vbroadcastss 0x675e(%rip),%ymm1 # 6828 <_sk_clut_4D_hsw+0x89e>
+ .byte 196,226,125,24,13,42,108,0,0 // vbroadcastss 0x6c2a(%rip),%ymm1 # 6cf4 <_sk_clut_4D_hsw+0x89e>
.byte 197,252,88,193 // vaddps %ymm1,%ymm0,%ymm0
.byte 197,252,88,7 // vaddps (%rdi),%ymm0,%ymm0
.byte 197,249,110,209 // vmovd %ecx,%xmm2
@@ -16607,7 +16607,7 @@ _sk_seed_shader_hsw:
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 197,236,88,201 // vaddps %ymm1,%ymm2,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,21,62,103,0,0 // vbroadcastss 0x673e(%rip),%ymm2 # 682c <_sk_clut_4D_hsw+0x8a2>
+ .byte 196,226,125,24,21,10,108,0,0 // vbroadcastss 0x6c0a(%rip),%ymm2 # 6cf8 <_sk_clut_4D_hsw+0x8a2>
.byte 197,228,87,219 // vxorps %ymm3,%ymm3,%ymm3
.byte 197,220,87,228 // vxorps %ymm4,%ymm4,%ymm4
.byte 197,212,87,237 // vxorps %ymm5,%ymm5,%ymm5
@@ -16626,13 +16626,13 @@ _sk_dither_hsw:
.byte 197,121,110,201 // vmovd %ecx,%xmm9
.byte 196,66,125,88,201 // vpbroadcastd %xmm9,%ymm9
.byte 196,65,53,239,200 // vpxor %ymm8,%ymm9,%ymm9
- .byte 196,98,125,88,21,5,103,0,0 // vpbroadcastd 0x6705(%rip),%ymm10 # 6830 <_sk_clut_4D_hsw+0x8a6>
+ .byte 196,98,125,88,21,209,107,0,0 // vpbroadcastd 0x6bd1(%rip),%ymm10 # 6cfc <_sk_clut_4D_hsw+0x8a6>
.byte 196,65,53,219,218 // vpand %ymm10,%ymm9,%ymm11
.byte 196,193,37,114,243,5 // vpslld $0x5,%ymm11,%ymm11
.byte 196,65,61,219,210 // vpand %ymm10,%ymm8,%ymm10
.byte 196,193,45,114,242,4 // vpslld $0x4,%ymm10,%ymm10
- .byte 196,98,125,88,37,234,102,0,0 // vpbroadcastd 0x66ea(%rip),%ymm12 # 6834 <_sk_clut_4D_hsw+0x8aa>
- .byte 196,98,125,88,45,229,102,0,0 // vpbroadcastd 0x66e5(%rip),%ymm13 # 6838 <_sk_clut_4D_hsw+0x8ae>
+ .byte 196,98,125,88,37,182,107,0,0 // vpbroadcastd 0x6bb6(%rip),%ymm12 # 6d00 <_sk_clut_4D_hsw+0x8aa>
+ .byte 196,98,125,88,45,177,107,0,0 // vpbroadcastd 0x6bb1(%rip),%ymm13 # 6d04 <_sk_clut_4D_hsw+0x8ae>
.byte 196,65,53,219,245 // vpand %ymm13,%ymm9,%ymm14
.byte 196,193,13,114,246,2 // vpslld $0x2,%ymm14,%ymm14
.byte 196,65,61,219,237 // vpand %ymm13,%ymm8,%ymm13
@@ -16647,8 +16647,8 @@ _sk_dither_hsw:
.byte 196,65,61,235,194 // vpor %ymm10,%ymm8,%ymm8
.byte 196,65,61,235,193 // vpor %ymm9,%ymm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,13,151,102,0,0 // vbroadcastss 0x6697(%rip),%ymm9 # 683c <_sk_clut_4D_hsw+0x8b2>
- .byte 196,98,125,24,21,146,102,0,0 // vbroadcastss 0x6692(%rip),%ymm10 # 6840 <_sk_clut_4D_hsw+0x8b6>
+ .byte 196,98,125,24,13,99,107,0,0 // vbroadcastss 0x6b63(%rip),%ymm9 # 6d08 <_sk_clut_4D_hsw+0x8b2>
+ .byte 196,98,125,24,21,94,107,0,0 // vbroadcastss 0x6b5e(%rip),%ymm10 # 6d0c <_sk_clut_4D_hsw+0x8b6>
.byte 196,66,61,184,209 // vfmadd231ps %ymm9,%ymm8,%ymm10
.byte 196,98,125,24,0 // vbroadcastss (%rax),%ymm8
.byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
@@ -16682,7 +16682,7 @@ HIDDEN _sk_black_color_hsw
FUNCTION(_sk_black_color_hsw)
_sk_black_color_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,50,102,0,0 // vbroadcastss 0x6632(%rip),%ymm3 # 6844 <_sk_clut_4D_hsw+0x8ba>
+ .byte 196,226,125,24,29,254,106,0,0 // vbroadcastss 0x6afe(%rip),%ymm3 # 6d10 <_sk_clut_4D_hsw+0x8ba>
.byte 197,252,87,192 // vxorps %ymm0,%ymm0,%ymm0
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 197,236,87,210 // vxorps %ymm2,%ymm2,%ymm2
@@ -16693,7 +16693,7 @@ HIDDEN _sk_white_color_hsw
FUNCTION(_sk_white_color_hsw)
_sk_white_color_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,29,102,0,0 // vbroadcastss 0x661d(%rip),%ymm0 # 6848 <_sk_clut_4D_hsw+0x8be>
+ .byte 196,226,125,24,5,233,106,0,0 // vbroadcastss 0x6ae9(%rip),%ymm0 # 6d14 <_sk_clut_4D_hsw+0x8be>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
.byte 197,252,40,216 // vmovaps %ymm0,%ymm3
@@ -16739,7 +16739,7 @@ HIDDEN _sk_srcatop_hsw
FUNCTION(_sk_srcatop_hsw)
_sk_srcatop_hsw:
.byte 197,252,89,199 // vmulps %ymm7,%ymm0,%ymm0
- .byte 196,98,125,24,5,192,101,0,0 // vbroadcastss 0x65c0(%rip),%ymm8 # 684c <_sk_clut_4D_hsw+0x8c2>
+ .byte 196,98,125,24,5,140,106,0,0 // vbroadcastss 0x6a8c(%rip),%ymm8 # 6d18 <_sk_clut_4D_hsw+0x8c2>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 196,226,61,184,196 // vfmadd231ps %ymm4,%ymm8,%ymm0
.byte 197,244,89,207 // vmulps %ymm7,%ymm1,%ymm1
@@ -16755,7 +16755,7 @@ HIDDEN _sk_dstatop_hsw
.globl _sk_dstatop_hsw
FUNCTION(_sk_dstatop_hsw)
_sk_dstatop_hsw:
- .byte 196,98,125,24,5,147,101,0,0 // vbroadcastss 0x6593(%rip),%ymm8 # 6850 <_sk_clut_4D_hsw+0x8c6>
+ .byte 196,98,125,24,5,95,106,0,0 // vbroadcastss 0x6a5f(%rip),%ymm8 # 6d1c <_sk_clut_4D_hsw+0x8c6>
.byte 197,60,92,199 // vsubps %ymm7,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 196,226,101,184,196 // vfmadd231ps %ymm4,%ymm3,%ymm0
@@ -16794,7 +16794,7 @@ HIDDEN _sk_srcout_hsw
.globl _sk_srcout_hsw
FUNCTION(_sk_srcout_hsw)
_sk_srcout_hsw:
- .byte 196,98,125,24,5,58,101,0,0 // vbroadcastss 0x653a(%rip),%ymm8 # 6854 <_sk_clut_4D_hsw+0x8ca>
+ .byte 196,98,125,24,5,6,106,0,0 // vbroadcastss 0x6a06(%rip),%ymm8 # 6d20 <_sk_clut_4D_hsw+0x8ca>
.byte 197,60,92,199 // vsubps %ymm7,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 197,188,89,201 // vmulps %ymm1,%ymm8,%ymm1
@@ -16807,7 +16807,7 @@ HIDDEN _sk_dstout_hsw
.globl _sk_dstout_hsw
FUNCTION(_sk_dstout_hsw)
_sk_dstout_hsw:
- .byte 196,226,125,24,5,29,101,0,0 // vbroadcastss 0x651d(%rip),%ymm0 # 6858 <_sk_clut_4D_hsw+0x8ce>
+ .byte 196,226,125,24,5,233,105,0,0 // vbroadcastss 0x69e9(%rip),%ymm0 # 6d24 <_sk_clut_4D_hsw+0x8ce>
.byte 197,252,92,219 // vsubps %ymm3,%ymm0,%ymm3
.byte 197,228,89,196 // vmulps %ymm4,%ymm3,%ymm0
.byte 197,228,89,205 // vmulps %ymm5,%ymm3,%ymm1
@@ -16820,7 +16820,7 @@ HIDDEN _sk_srcover_hsw
.globl _sk_srcover_hsw
FUNCTION(_sk_srcover_hsw)
_sk_srcover_hsw:
- .byte 196,98,125,24,5,0,101,0,0 // vbroadcastss 0x6500(%rip),%ymm8 # 685c <_sk_clut_4D_hsw+0x8d2>
+ .byte 196,98,125,24,5,204,105,0,0 // vbroadcastss 0x69cc(%rip),%ymm8 # 6d28 <_sk_clut_4D_hsw+0x8d2>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 196,194,93,184,192 // vfmadd231ps %ymm8,%ymm4,%ymm0
.byte 196,194,85,184,200 // vfmadd231ps %ymm8,%ymm5,%ymm1
@@ -16833,7 +16833,7 @@ HIDDEN _sk_dstover_hsw
.globl _sk_dstover_hsw
FUNCTION(_sk_dstover_hsw)
_sk_dstover_hsw:
- .byte 196,98,125,24,5,223,100,0,0 // vbroadcastss 0x64df(%rip),%ymm8 # 6860 <_sk_clut_4D_hsw+0x8d6>
+ .byte 196,98,125,24,5,171,105,0,0 // vbroadcastss 0x69ab(%rip),%ymm8 # 6d2c <_sk_clut_4D_hsw+0x8d6>
.byte 197,60,92,199 // vsubps %ymm7,%ymm8,%ymm8
.byte 196,226,61,168,196 // vfmadd213ps %ymm4,%ymm8,%ymm0
.byte 196,226,61,168,205 // vfmadd213ps %ymm5,%ymm8,%ymm1
@@ -16857,7 +16857,7 @@ HIDDEN _sk_multiply_hsw
.globl _sk_multiply_hsw
FUNCTION(_sk_multiply_hsw)
_sk_multiply_hsw:
- .byte 196,98,125,24,5,170,100,0,0 // vbroadcastss 0x64aa(%rip),%ymm8 # 6864 <_sk_clut_4D_hsw+0x8da>
+ .byte 196,98,125,24,5,118,105,0,0 // vbroadcastss 0x6976(%rip),%ymm8 # 6d30 <_sk_clut_4D_hsw+0x8da>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,52,89,208 // vmulps %ymm0,%ymm9,%ymm10
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -16905,7 +16905,7 @@ HIDDEN _sk_xor__hsw
.globl _sk_xor__hsw
FUNCTION(_sk_xor__hsw)
_sk_xor__hsw:
- .byte 196,98,125,24,5,37,100,0,0 // vbroadcastss 0x6425(%rip),%ymm8 # 6868 <_sk_clut_4D_hsw+0x8de>
+ .byte 196,98,125,24,5,241,104,0,0 // vbroadcastss 0x68f1(%rip),%ymm8 # 6d34 <_sk_clut_4D_hsw+0x8de>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,180,89,192 // vmulps %ymm0,%ymm9,%ymm0
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -16939,7 +16939,7 @@ _sk_darken_hsw:
.byte 197,100,89,206 // vmulps %ymm6,%ymm3,%ymm9
.byte 196,193,108,95,209 // vmaxps %ymm9,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,173,99,0,0 // vbroadcastss 0x63ad(%rip),%ymm8 # 686c <_sk_clut_4D_hsw+0x8e2>
+ .byte 196,98,125,24,5,121,104,0,0 // vbroadcastss 0x6879(%rip),%ymm8 # 6d38 <_sk_clut_4D_hsw+0x8e2>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 196,194,69,184,216 // vfmadd231ps %ymm8,%ymm7,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -16964,7 +16964,7 @@ _sk_lighten_hsw:
.byte 197,100,89,206 // vmulps %ymm6,%ymm3,%ymm9
.byte 196,193,108,93,209 // vminps %ymm9,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,92,99,0,0 // vbroadcastss 0x635c(%rip),%ymm8 # 6870 <_sk_clut_4D_hsw+0x8e6>
+ .byte 196,98,125,24,5,40,104,0,0 // vbroadcastss 0x6828(%rip),%ymm8 # 6d3c <_sk_clut_4D_hsw+0x8e6>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 196,194,69,184,216 // vfmadd231ps %ymm8,%ymm7,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -16992,7 +16992,7 @@ _sk_difference_hsw:
.byte 196,193,108,93,209 // vminps %ymm9,%ymm2,%ymm2
.byte 197,236,88,210 // vaddps %ymm2,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,255,98,0,0 // vbroadcastss 0x62ff(%rip),%ymm8 # 6874 <_sk_clut_4D_hsw+0x8ea>
+ .byte 196,98,125,24,5,203,103,0,0 // vbroadcastss 0x67cb(%rip),%ymm8 # 6d40 <_sk_clut_4D_hsw+0x8ea>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 196,194,69,184,216 // vfmadd231ps %ymm8,%ymm7,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -17014,7 +17014,7 @@ _sk_exclusion_hsw:
.byte 197,236,89,214 // vmulps %ymm6,%ymm2,%ymm2
.byte 197,236,88,210 // vaddps %ymm2,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,189,98,0,0 // vbroadcastss 0x62bd(%rip),%ymm8 # 6878 <_sk_clut_4D_hsw+0x8ee>
+ .byte 196,98,125,24,5,137,103,0,0 // vbroadcastss 0x6789(%rip),%ymm8 # 6d44 <_sk_clut_4D_hsw+0x8ee>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 196,194,69,184,216 // vfmadd231ps %ymm8,%ymm7,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -17024,7 +17024,7 @@ HIDDEN _sk_colorburn_hsw
.globl _sk_colorburn_hsw
FUNCTION(_sk_colorburn_hsw)
_sk_colorburn_hsw:
- .byte 196,98,125,24,5,171,98,0,0 // vbroadcastss 0x62ab(%rip),%ymm8 # 687c <_sk_clut_4D_hsw+0x8f2>
+ .byte 196,98,125,24,5,119,103,0,0 // vbroadcastss 0x6777(%rip),%ymm8 # 6d48 <_sk_clut_4D_hsw+0x8f2>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,52,89,216 // vmulps %ymm0,%ymm9,%ymm11
.byte 196,65,44,87,210 // vxorps %ymm10,%ymm10,%ymm10
@@ -17082,7 +17082,7 @@ HIDDEN _sk_colordodge_hsw
FUNCTION(_sk_colordodge_hsw)
_sk_colordodge_hsw:
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
- .byte 196,98,125,24,13,182,97,0,0 // vbroadcastss 0x61b6(%rip),%ymm9 # 6880 <_sk_clut_4D_hsw+0x8f6>
+ .byte 196,98,125,24,13,130,102,0,0 // vbroadcastss 0x6682(%rip),%ymm9 # 6d4c <_sk_clut_4D_hsw+0x8f6>
.byte 197,52,92,215 // vsubps %ymm7,%ymm9,%ymm10
.byte 197,44,89,216 // vmulps %ymm0,%ymm10,%ymm11
.byte 197,52,92,203 // vsubps %ymm3,%ymm9,%ymm9
@@ -17135,7 +17135,7 @@ HIDDEN _sk_hardlight_hsw
.globl _sk_hardlight_hsw
FUNCTION(_sk_hardlight_hsw)
_sk_hardlight_hsw:
- .byte 196,98,125,24,5,215,96,0,0 // vbroadcastss 0x60d7(%rip),%ymm8 # 6884 <_sk_clut_4D_hsw+0x8fa>
+ .byte 196,98,125,24,5,163,101,0,0 // vbroadcastss 0x65a3(%rip),%ymm8 # 6d50 <_sk_clut_4D_hsw+0x8fa>
.byte 197,60,92,215 // vsubps %ymm7,%ymm8,%ymm10
.byte 197,44,89,216 // vmulps %ymm0,%ymm10,%ymm11
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -17186,7 +17186,7 @@ HIDDEN _sk_overlay_hsw
.globl _sk_overlay_hsw
FUNCTION(_sk_overlay_hsw)
_sk_overlay_hsw:
- .byte 196,98,125,24,5,15,96,0,0 // vbroadcastss 0x600f(%rip),%ymm8 # 6888 <_sk_clut_4D_hsw+0x8fe>
+ .byte 196,98,125,24,5,219,100,0,0 // vbroadcastss 0x64db(%rip),%ymm8 # 6d54 <_sk_clut_4D_hsw+0x8fe>
.byte 197,60,92,215 // vsubps %ymm7,%ymm8,%ymm10
.byte 197,44,89,216 // vmulps %ymm0,%ymm10,%ymm11
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -17247,10 +17247,10 @@ _sk_softlight_hsw:
.byte 196,65,20,88,197 // vaddps %ymm13,%ymm13,%ymm8
.byte 196,65,60,88,192 // vaddps %ymm8,%ymm8,%ymm8
.byte 196,66,61,168,192 // vfmadd213ps %ymm8,%ymm8,%ymm8
- .byte 196,98,125,24,29,26,95,0,0 // vbroadcastss 0x5f1a(%rip),%ymm11 # 6890 <_sk_clut_4D_hsw+0x906>
+ .byte 196,98,125,24,29,230,99,0,0 // vbroadcastss 0x63e6(%rip),%ymm11 # 6d5c <_sk_clut_4D_hsw+0x906>
.byte 196,65,20,88,227 // vaddps %ymm11,%ymm13,%ymm12
.byte 196,65,28,89,192 // vmulps %ymm8,%ymm12,%ymm8
- .byte 196,98,125,24,37,11,95,0,0 // vbroadcastss 0x5f0b(%rip),%ymm12 # 6894 <_sk_clut_4D_hsw+0x90a>
+ .byte 196,98,125,24,37,215,99,0,0 // vbroadcastss 0x63d7(%rip),%ymm12 # 6d60 <_sk_clut_4D_hsw+0x90a>
.byte 196,66,21,184,196 // vfmadd231ps %ymm12,%ymm13,%ymm8
.byte 196,65,124,82,245 // vrsqrtps %ymm13,%ymm14
.byte 196,65,124,83,246 // vrcpps %ymm14,%ymm14
@@ -17260,7 +17260,7 @@ _sk_softlight_hsw:
.byte 197,4,194,255,2 // vcmpleps %ymm7,%ymm15,%ymm15
.byte 196,67,13,74,240,240 // vblendvps %ymm15,%ymm8,%ymm14,%ymm14
.byte 197,116,88,249 // vaddps %ymm1,%ymm1,%ymm15
- .byte 196,98,125,24,5,206,94,0,0 // vbroadcastss 0x5ece(%rip),%ymm8 # 688c <_sk_clut_4D_hsw+0x902>
+ .byte 196,98,125,24,5,154,99,0,0 // vbroadcastss 0x639a(%rip),%ymm8 # 6d58 <_sk_clut_4D_hsw+0x902>
.byte 196,65,60,92,237 // vsubps %ymm13,%ymm8,%ymm13
.byte 197,132,92,195 // vsubps %ymm3,%ymm15,%ymm0
.byte 196,98,125,168,235 // vfmadd213ps %ymm3,%ymm0,%ymm13
@@ -17373,11 +17373,11 @@ _sk_hue_hsw:
.byte 196,65,28,89,210 // vmulps %ymm10,%ymm12,%ymm10
.byte 196,65,44,94,214 // vdivps %ymm14,%ymm10,%ymm10
.byte 196,67,45,74,224,240 // vblendvps %ymm15,%ymm8,%ymm10,%ymm12
- .byte 196,98,125,24,53,210,92,0,0 // vbroadcastss 0x5cd2(%rip),%ymm14 # 6898 <_sk_clut_4D_hsw+0x90e>
- .byte 196,98,125,24,61,205,92,0,0 // vbroadcastss 0x5ccd(%rip),%ymm15 # 689c <_sk_clut_4D_hsw+0x912>
+ .byte 196,98,125,24,53,158,97,0,0 // vbroadcastss 0x619e(%rip),%ymm14 # 6d64 <_sk_clut_4D_hsw+0x90e>
+ .byte 196,98,125,24,61,153,97,0,0 // vbroadcastss 0x6199(%rip),%ymm15 # 6d68 <_sk_clut_4D_hsw+0x912>
.byte 196,65,84,89,239 // vmulps %ymm15,%ymm5,%ymm13
.byte 196,66,93,184,238 // vfmadd231ps %ymm14,%ymm4,%ymm13
- .byte 196,226,125,24,5,190,92,0,0 // vbroadcastss 0x5cbe(%rip),%ymm0 # 68a0 <_sk_clut_4D_hsw+0x916>
+ .byte 196,226,125,24,5,138,97,0,0 // vbroadcastss 0x618a(%rip),%ymm0 # 6d6c <_sk_clut_4D_hsw+0x916>
.byte 196,98,77,184,232 // vfmadd231ps %ymm0,%ymm6,%ymm13
.byte 196,65,116,89,215 // vmulps %ymm15,%ymm1,%ymm10
.byte 196,66,53,184,214 // vfmadd231ps %ymm14,%ymm9,%ymm10
@@ -17432,7 +17432,7 @@ _sk_hue_hsw:
.byte 196,193,124,95,192 // vmaxps %ymm8,%ymm0,%ymm0
.byte 196,65,36,95,200 // vmaxps %ymm8,%ymm11,%ymm9
.byte 196,65,116,95,192 // vmaxps %ymm8,%ymm1,%ymm8
- .byte 196,226,125,24,13,171,91,0,0 // vbroadcastss 0x5bab(%rip),%ymm1 # 68a4 <_sk_clut_4D_hsw+0x91a>
+ .byte 196,226,125,24,13,119,96,0,0 // vbroadcastss 0x6077(%rip),%ymm1 # 6d70 <_sk_clut_4D_hsw+0x91a>
.byte 197,116,92,215 // vsubps %ymm7,%ymm1,%ymm10
.byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
.byte 197,116,92,219 // vsubps %ymm3,%ymm1,%ymm11
@@ -17486,11 +17486,11 @@ _sk_saturation_hsw:
.byte 196,65,28,89,210 // vmulps %ymm10,%ymm12,%ymm10
.byte 196,65,44,94,214 // vdivps %ymm14,%ymm10,%ymm10
.byte 196,67,45,74,224,240 // vblendvps %ymm15,%ymm8,%ymm10,%ymm12
- .byte 196,98,125,24,53,194,90,0,0 // vbroadcastss 0x5ac2(%rip),%ymm14 # 68a8 <_sk_clut_4D_hsw+0x91e>
- .byte 196,98,125,24,61,189,90,0,0 // vbroadcastss 0x5abd(%rip),%ymm15 # 68ac <_sk_clut_4D_hsw+0x922>
+ .byte 196,98,125,24,53,142,95,0,0 // vbroadcastss 0x5f8e(%rip),%ymm14 # 6d74 <_sk_clut_4D_hsw+0x91e>
+ .byte 196,98,125,24,61,137,95,0,0 // vbroadcastss 0x5f89(%rip),%ymm15 # 6d78 <_sk_clut_4D_hsw+0x922>
.byte 196,65,84,89,239 // vmulps %ymm15,%ymm5,%ymm13
.byte 196,66,93,184,238 // vfmadd231ps %ymm14,%ymm4,%ymm13
- .byte 196,226,125,24,5,174,90,0,0 // vbroadcastss 0x5aae(%rip),%ymm0 # 68b0 <_sk_clut_4D_hsw+0x926>
+ .byte 196,226,125,24,5,122,95,0,0 // vbroadcastss 0x5f7a(%rip),%ymm0 # 6d7c <_sk_clut_4D_hsw+0x926>
.byte 196,98,77,184,232 // vfmadd231ps %ymm0,%ymm6,%ymm13
.byte 196,65,116,89,215 // vmulps %ymm15,%ymm1,%ymm10
.byte 196,66,53,184,214 // vfmadd231ps %ymm14,%ymm9,%ymm10
@@ -17545,7 +17545,7 @@ _sk_saturation_hsw:
.byte 196,193,124,95,192 // vmaxps %ymm8,%ymm0,%ymm0
.byte 196,65,36,95,200 // vmaxps %ymm8,%ymm11,%ymm9
.byte 196,65,116,95,192 // vmaxps %ymm8,%ymm1,%ymm8
- .byte 196,226,125,24,13,155,89,0,0 // vbroadcastss 0x599b(%rip),%ymm1 # 68b4 <_sk_clut_4D_hsw+0x92a>
+ .byte 196,226,125,24,13,103,94,0,0 // vbroadcastss 0x5e67(%rip),%ymm1 # 6d80 <_sk_clut_4D_hsw+0x92a>
.byte 197,116,92,215 // vsubps %ymm7,%ymm1,%ymm10
.byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
.byte 197,116,92,219 // vsubps %ymm3,%ymm1,%ymm11
@@ -17573,11 +17573,11 @@ _sk_color_hsw:
.byte 197,108,89,199 // vmulps %ymm7,%ymm2,%ymm8
.byte 197,116,89,215 // vmulps %ymm7,%ymm1,%ymm10
.byte 197,52,89,223 // vmulps %ymm7,%ymm9,%ymm11
- .byte 196,98,125,24,45,52,89,0,0 // vbroadcastss 0x5934(%rip),%ymm13 # 68b8 <_sk_clut_4D_hsw+0x92e>
- .byte 196,98,125,24,53,47,89,0,0 // vbroadcastss 0x592f(%rip),%ymm14 # 68bc <_sk_clut_4D_hsw+0x932>
+ .byte 196,98,125,24,45,0,94,0,0 // vbroadcastss 0x5e00(%rip),%ymm13 # 6d84 <_sk_clut_4D_hsw+0x92e>
+ .byte 196,98,125,24,53,251,93,0,0 // vbroadcastss 0x5dfb(%rip),%ymm14 # 6d88 <_sk_clut_4D_hsw+0x932>
.byte 196,65,84,89,230 // vmulps %ymm14,%ymm5,%ymm12
.byte 196,66,93,184,229 // vfmadd231ps %ymm13,%ymm4,%ymm12
- .byte 196,98,125,24,61,32,89,0,0 // vbroadcastss 0x5920(%rip),%ymm15 # 68c0 <_sk_clut_4D_hsw+0x936>
+ .byte 196,98,125,24,61,236,93,0,0 // vbroadcastss 0x5dec(%rip),%ymm15 # 6d8c <_sk_clut_4D_hsw+0x936>
.byte 196,66,77,184,231 // vfmadd231ps %ymm15,%ymm6,%ymm12
.byte 196,65,44,89,206 // vmulps %ymm14,%ymm10,%ymm9
.byte 196,66,61,184,205 // vfmadd231ps %ymm13,%ymm8,%ymm9
@@ -17633,7 +17633,7 @@ _sk_color_hsw:
.byte 196,193,116,95,206 // vmaxps %ymm14,%ymm1,%ymm1
.byte 196,65,44,95,198 // vmaxps %ymm14,%ymm10,%ymm8
.byte 196,65,124,95,206 // vmaxps %ymm14,%ymm0,%ymm9
- .byte 196,226,125,24,5,2,88,0,0 // vbroadcastss 0x5802(%rip),%ymm0 # 68c4 <_sk_clut_4D_hsw+0x93a>
+ .byte 196,226,125,24,5,206,92,0,0 // vbroadcastss 0x5cce(%rip),%ymm0 # 6d90 <_sk_clut_4D_hsw+0x93a>
.byte 197,124,92,215 // vsubps %ymm7,%ymm0,%ymm10
.byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
.byte 197,124,92,219 // vsubps %ymm3,%ymm0,%ymm11
@@ -17661,11 +17661,11 @@ _sk_luminosity_hsw:
.byte 197,100,89,196 // vmulps %ymm4,%ymm3,%ymm8
.byte 197,100,89,213 // vmulps %ymm5,%ymm3,%ymm10
.byte 197,100,89,222 // vmulps %ymm6,%ymm3,%ymm11
- .byte 196,98,125,24,45,155,87,0,0 // vbroadcastss 0x579b(%rip),%ymm13 # 68c8 <_sk_clut_4D_hsw+0x93e>
- .byte 196,98,125,24,53,150,87,0,0 // vbroadcastss 0x5796(%rip),%ymm14 # 68cc <_sk_clut_4D_hsw+0x942>
+ .byte 196,98,125,24,45,103,92,0,0 // vbroadcastss 0x5c67(%rip),%ymm13 # 6d94 <_sk_clut_4D_hsw+0x93e>
+ .byte 196,98,125,24,53,98,92,0,0 // vbroadcastss 0x5c62(%rip),%ymm14 # 6d98 <_sk_clut_4D_hsw+0x942>
.byte 196,65,116,89,230 // vmulps %ymm14,%ymm1,%ymm12
.byte 196,66,109,184,229 // vfmadd231ps %ymm13,%ymm2,%ymm12
- .byte 196,98,125,24,61,135,87,0,0 // vbroadcastss 0x5787(%rip),%ymm15 # 68d0 <_sk_clut_4D_hsw+0x946>
+ .byte 196,98,125,24,61,83,92,0,0 // vbroadcastss 0x5c53(%rip),%ymm15 # 6d9c <_sk_clut_4D_hsw+0x946>
.byte 196,66,53,184,231 // vfmadd231ps %ymm15,%ymm9,%ymm12
.byte 196,65,44,89,206 // vmulps %ymm14,%ymm10,%ymm9
.byte 196,66,61,184,205 // vfmadd231ps %ymm13,%ymm8,%ymm9
@@ -17721,7 +17721,7 @@ _sk_luminosity_hsw:
.byte 196,193,116,95,206 // vmaxps %ymm14,%ymm1,%ymm1
.byte 196,65,44,95,198 // vmaxps %ymm14,%ymm10,%ymm8
.byte 196,65,124,95,206 // vmaxps %ymm14,%ymm0,%ymm9
- .byte 196,226,125,24,5,105,86,0,0 // vbroadcastss 0x5669(%rip),%ymm0 # 68d4 <_sk_clut_4D_hsw+0x94a>
+ .byte 196,226,125,24,5,53,91,0,0 // vbroadcastss 0x5b35(%rip),%ymm0 # 6da0 <_sk_clut_4D_hsw+0x94a>
.byte 197,124,92,215 // vsubps %ymm7,%ymm0,%ymm10
.byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
.byte 197,124,92,219 // vsubps %ymm3,%ymm0,%ymm11
@@ -17742,29 +17742,28 @@ HIDDEN _sk_srcover_rgba_8888_hsw
.globl _sk_srcover_rgba_8888_hsw
FUNCTION(_sk_srcover_rgba_8888_hsw)
_sk_srcover_rgba_8888_hsw:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,178,0,0,0 // jne 1386 <_sk_srcover_rgba_8888_hsw+0xd9>
- .byte 197,252,16,56 // vmovups (%rax),%ymm7
- .byte 197,196,84,37,96,90,0,0 // vandps 0x5a60(%rip),%ymm7,%ymm4 # 6d40 <_sk_clut_4D_hsw+0xdb6>
+ .byte 15,133,180,0,0,0 // jne 1382 <_sk_srcover_rgba_8888_hsw+0xd5>
+ .byte 196,193,126,111,60,153 // vmovdqu (%r9,%rbx,4),%ymm7
+ .byte 197,197,219,37,36,95,0,0 // vpand 0x5f24(%rip),%ymm7,%ymm4 # 7200 <_sk_clut_4D_hsw+0xdaa>
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,226,69,0,45,115,90,0,0 // vpshufb 0x5a73(%rip),%ymm7,%ymm5 # 6d60 <_sk_clut_4D_hsw+0xdd6>
+ .byte 196,226,69,0,45,55,95,0,0 // vpshufb 0x5f37(%rip),%ymm7,%ymm5 # 7220 <_sk_clut_4D_hsw+0xdca>
.byte 197,252,91,237 // vcvtdq2ps %ymm5,%ymm5
- .byte 196,226,69,0,53,134,90,0,0 // vpshufb 0x5a86(%rip),%ymm7,%ymm6 # 6d80 <_sk_clut_4D_hsw+0xdf6>
+ .byte 196,226,69,0,53,74,95,0,0 // vpshufb 0x5f4a(%rip),%ymm7,%ymm6 # 7240 <_sk_clut_4D_hsw+0xdea>
.byte 197,252,91,246 // vcvtdq2ps %ymm6,%ymm6
.byte 197,197,114,215,24 // vpsrld $0x18,%ymm7,%ymm7
.byte 197,252,91,255 // vcvtdq2ps %ymm7,%ymm7
- .byte 196,98,125,24,5,200,85,0,0 // vbroadcastss 0x55c8(%rip),%ymm8 # 68d8 <_sk_clut_4D_hsw+0x94e>
+ .byte 196,98,125,24,5,152,90,0,0 // vbroadcastss 0x5a98(%rip),%ymm8 # 6da4 <_sk_clut_4D_hsw+0x94e>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
- .byte 196,98,125,24,13,191,85,0,0 // vbroadcastss 0x55bf(%rip),%ymm9 # 68dc <_sk_clut_4D_hsw+0x952>
+ .byte 196,98,125,24,13,143,90,0,0 // vbroadcastss 0x5a8f(%rip),%ymm9 # 6da8 <_sk_clut_4D_hsw+0x952>
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
.byte 196,194,93,184,192 // vfmadd231ps %ymm8,%ymm4,%ymm0
.byte 196,193,116,89,201 // vmulps %ymm9,%ymm1,%ymm1
@@ -17784,29 +17783,96 @@ _sk_srcover_rgba_8888_hsw:
.byte 196,65,53,235,202 // vpor %ymm10,%ymm9,%ymm9
.byte 196,65,61,235,193 // vpor %ymm9,%ymm8,%ymm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,52 // jne 13af <_sk_srcover_rgba_8888_hsw+0x102>
- .byte 197,124,17,0 // vmovups %ymm8,(%rax)
+ .byte 117,66 // jne 13b9 <_sk_srcover_rgba_8888_hsw+0x10c>
+ .byte 196,65,126,127,4,153 // vmovdqu %ymm8,(%r9,%rbx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
- .byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,226 // vmovq %r10,%xmm4
- .byte 196,226,125,33,228 // vpmovsxbd %xmm4,%ymm4
- .byte 196,226,93,44,56 // vmaskmovps (%rax),%ymm4,%ymm7
- .byte 233,41,255,255,255 // jmpq 12d8 <_sk_srcover_rgba_8888_hsw+0x2b>
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,65,249,110,202 // vmovq %r10,%xmm9
- .byte 196,66,125,33,201 // vpmovsxbd %xmm9,%ymm9
- .byte 196,98,53,46,0 // vmaskmovps %ymm8,%ymm9,(%rax)
- .byte 235,170 // jmp 137f <_sk_srcover_rgba_8888_hsw+0xd2>
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 197,197,239,255 // vpxor %ymm7,%ymm7,%ymm7
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 15,135,58,255,255,255 // ja 12d4 <_sk_srcover_rgba_8888_hsw+0x27>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,255,0,0,0 // lea 0xff(%rip),%r11 # 14a4 <_sk_srcover_rgba_8888_hsw+0x1f7>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,193,121,110,60,153 // vmovd (%r9,%rbx,4),%xmm7
+ .byte 233,27,255,255,255 // jmpq 12d4 <_sk_srcover_rgba_8888_hsw+0x27>
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,180 // ja 137d <_sk_srcover_rgba_8888_hsw+0xd0>
+ .byte 65,15,182,194 // movzbl %r10b,%eax
+ .byte 76,141,21,236,0,0,0 // lea 0xec(%rip),%r10 # 14c0 <_sk_srcover_rgba_8888_hsw+0x213>
+ .byte 73,99,4,130 // movslq (%r10,%rax,4),%rax
+ .byte 76,1,208 // add %r10,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,121,126,4,153 // vmovd %xmm8,(%r9,%rbx,4)
+ .byte 235,152 // jmp 137d <_sk_srcover_rgba_8888_hsw+0xd0>
+ .byte 196,193,121,110,100,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm4
+ .byte 196,226,121,89,228 // vpbroadcastq %xmm4,%xmm4
+ .byte 197,213,239,237 // vpxor %ymm5,%ymm5,%ymm5
+ .byte 196,227,85,2,252,4 // vpblendd $0x4,%ymm4,%ymm5,%ymm7
+ .byte 196,193,122,126,36,153 // vmovq (%r9,%rbx,4),%xmm4
+ .byte 196,227,69,2,252,3 // vpblendd $0x3,%ymm4,%ymm7,%ymm7
+ .byte 233,200,254,255,255 // jmpq 12d4 <_sk_srcover_rgba_8888_hsw+0x27>
+ .byte 196,193,121,110,100,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm4
+ .byte 196,226,125,89,228 // vpbroadcastq %xmm4,%ymm4
+ .byte 197,213,239,237 // vpxor %ymm5,%ymm5,%ymm5
+ .byte 196,227,85,2,252,64 // vpblendd $0x40,%ymm4,%ymm5,%ymm7
+ .byte 196,227,125,57,252,1 // vextracti128 $0x1,%ymm7,%xmm4
+ .byte 196,195,89,34,100,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,227,69,56,252,1 // vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ .byte 196,227,125,57,252,1 // vextracti128 $0x1,%ymm7,%xmm4
+ .byte 196,195,89,34,100,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,227,69,56,252,1 // vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ .byte 196,193,122,111,36,153 // vmovdqu (%r9,%rbx,4),%xmm4
+ .byte 196,227,93,2,255,240 // vpblendd $0xf0,%ymm7,%ymm4,%ymm7
+ .byte 233,121,254,255,255 // jmpq 12d4 <_sk_srcover_rgba_8888_hsw+0x27>
+ .byte 196,67,121,22,68,153,8,2 // vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ .byte 196,65,121,214,4,153 // vmovq %xmm8,(%r9,%rbx,4)
+ .byte 233,15,255,255,255 // jmpq 137d <_sk_srcover_rgba_8888_hsw+0xd0>
+ .byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,24,2 // vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ .byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,20,1 // vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ .byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
+ .byte 196,65,121,126,76,153,16 // vmovd %xmm9,0x10(%r9,%rbx,4)
+ .byte 196,65,122,127,4,153 // vmovdqu %xmm8,(%r9,%rbx,4)
+ .byte 233,219,254,255,255 // jmpq 137d <_sk_srcover_rgba_8888_hsw+0xd0>
+ .byte 102,144 // xchg %ax,%ax
+ .byte 10,255 // or %bh,%bh
+ .byte 255 // (bad)
+ .byte 255,87,255 // callq *-0x1(%rdi)
+ .byte 255 // (bad)
+ .byte 255,65,255 // incl -0x1(%rcx)
+ .byte 255 // (bad)
+ .byte 255,166,255,255,255,146 // jmpq *-0x6d000001(%rsi)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 126,255 // jle 14b9 <_sk_srcover_rgba_8888_hsw+0x20c>
+ .byte 255 // (bad)
+ .byte 255,104,255 // ljmp *-0x1(%rax)
+ .byte 255 // (bad)
+ .byte 255,29,255,255,255,163 // lcall *-0x5c000001(%rip) # ffffffffa40014c4 <_sk_clut_4D_hsw+0xffffffffa3ffb06e>
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,155,255,255,255,215 // lcall *-0x28000001(%rbx)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,202 // dec %edx
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 188,255,255,255,174 // mov $0xaeffffff,%esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_clamp_0_hsw
.globl _sk_clamp_0_hsw
@@ -17824,7 +17890,7 @@ HIDDEN _sk_clamp_1_hsw
.globl _sk_clamp_1_hsw
FUNCTION(_sk_clamp_1_hsw)
_sk_clamp_1_hsw:
- .byte 196,98,125,24,5,229,84,0,0 // vbroadcastss 0x54e5(%rip),%ymm8 # 68e0 <_sk_clut_4D_hsw+0x956>
+ .byte 196,98,125,24,5,170,88,0,0 // vbroadcastss 0x58aa(%rip),%ymm8 # 6dac <_sk_clut_4D_hsw+0x956>
.byte 196,193,124,93,192 // vminps %ymm8,%ymm0,%ymm0
.byte 196,193,116,93,200 // vminps %ymm8,%ymm1,%ymm1
.byte 196,193,108,93,208 // vminps %ymm8,%ymm2,%ymm2
@@ -17836,7 +17902,7 @@ HIDDEN _sk_clamp_a_hsw
.globl _sk_clamp_a_hsw
FUNCTION(_sk_clamp_a_hsw)
_sk_clamp_a_hsw:
- .byte 196,98,125,24,5,200,84,0,0 // vbroadcastss 0x54c8(%rip),%ymm8 # 68e4 <_sk_clut_4D_hsw+0x95a>
+ .byte 196,98,125,24,5,141,88,0,0 // vbroadcastss 0x588d(%rip),%ymm8 # 6db0 <_sk_clut_4D_hsw+0x95a>
.byte 196,193,100,93,216 // vminps %ymm8,%ymm3,%ymm3
.byte 197,252,93,195 // vminps %ymm3,%ymm0,%ymm0
.byte 197,244,93,203 // vminps %ymm3,%ymm1,%ymm1
@@ -17848,7 +17914,7 @@ HIDDEN _sk_clamp_a_dst_hsw
.globl _sk_clamp_a_dst_hsw
FUNCTION(_sk_clamp_a_dst_hsw)
_sk_clamp_a_dst_hsw:
- .byte 196,98,125,24,5,174,84,0,0 // vbroadcastss 0x54ae(%rip),%ymm8 # 68e8 <_sk_clut_4D_hsw+0x95e>
+ .byte 196,98,125,24,5,115,88,0,0 // vbroadcastss 0x5873(%rip),%ymm8 # 6db4 <_sk_clut_4D_hsw+0x95e>
.byte 196,193,68,93,248 // vminps %ymm8,%ymm7,%ymm7
.byte 197,220,93,231 // vminps %ymm7,%ymm4,%ymm4
.byte 197,212,93,239 // vminps %ymm7,%ymm5,%ymm5
@@ -17881,7 +17947,7 @@ HIDDEN _sk_invert_hsw
.globl _sk_invert_hsw
FUNCTION(_sk_invert_hsw)
_sk_invert_hsw:
- .byte 196,98,125,24,5,109,84,0,0 // vbroadcastss 0x546d(%rip),%ymm8 # 68ec <_sk_clut_4D_hsw+0x962>
+ .byte 196,98,125,24,5,50,88,0,0 // vbroadcastss 0x5832(%rip),%ymm8 # 6db8 <_sk_clut_4D_hsw+0x962>
.byte 197,188,92,192 // vsubps %ymm0,%ymm8,%ymm0
.byte 197,188,92,201 // vsubps %ymm1,%ymm8,%ymm1
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
@@ -17937,7 +18003,7 @@ FUNCTION(_sk_unpremul_hsw)
_sk_unpremul_hsw:
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,65,100,194,200,0 // vcmpeqps %ymm8,%ymm3,%ymm9
- .byte 196,98,125,24,21,1,84,0,0 // vbroadcastss 0x5401(%rip),%ymm10 # 68f0 <_sk_clut_4D_hsw+0x966>
+ .byte 196,98,125,24,21,198,87,0,0 // vbroadcastss 0x57c6(%rip),%ymm10 # 6dbc <_sk_clut_4D_hsw+0x966>
.byte 197,44,94,211 // vdivps %ymm3,%ymm10,%ymm10
.byte 196,67,45,74,192,144 // vblendvps %ymm9,%ymm8,%ymm10,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
@@ -17950,16 +18016,16 @@ HIDDEN _sk_from_srgb_hsw
.globl _sk_from_srgb_hsw
FUNCTION(_sk_from_srgb_hsw)
_sk_from_srgb_hsw:
- .byte 196,98,125,24,5,226,83,0,0 // vbroadcastss 0x53e2(%rip),%ymm8 # 68f4 <_sk_clut_4D_hsw+0x96a>
+ .byte 196,98,125,24,5,167,87,0,0 // vbroadcastss 0x57a7(%rip),%ymm8 # 6dc0 <_sk_clut_4D_hsw+0x96a>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 197,124,89,208 // vmulps %ymm0,%ymm0,%ymm10
- .byte 196,98,125,24,29,212,83,0,0 // vbroadcastss 0x53d4(%rip),%ymm11 # 68f8 <_sk_clut_4D_hsw+0x96e>
- .byte 196,98,125,24,37,207,83,0,0 // vbroadcastss 0x53cf(%rip),%ymm12 # 68fc <_sk_clut_4D_hsw+0x972>
+ .byte 196,98,125,24,29,153,87,0,0 // vbroadcastss 0x5799(%rip),%ymm11 # 6dc4 <_sk_clut_4D_hsw+0x96e>
+ .byte 196,98,125,24,37,148,87,0,0 // vbroadcastss 0x5794(%rip),%ymm12 # 6dc8 <_sk_clut_4D_hsw+0x972>
.byte 196,65,124,40,236 // vmovaps %ymm12,%ymm13
.byte 196,66,125,168,235 // vfmadd213ps %ymm11,%ymm0,%ymm13
- .byte 196,98,125,24,53,192,83,0,0 // vbroadcastss 0x53c0(%rip),%ymm14 # 6900 <_sk_clut_4D_hsw+0x976>
+ .byte 196,98,125,24,53,133,87,0,0 // vbroadcastss 0x5785(%rip),%ymm14 # 6dcc <_sk_clut_4D_hsw+0x976>
.byte 196,66,45,168,238 // vfmadd213ps %ymm14,%ymm10,%ymm13
- .byte 196,98,125,24,21,182,83,0,0 // vbroadcastss 0x53b6(%rip),%ymm10 # 6904 <_sk_clut_4D_hsw+0x97a>
+ .byte 196,98,125,24,21,123,87,0,0 // vbroadcastss 0x577b(%rip),%ymm10 # 6dd0 <_sk_clut_4D_hsw+0x97a>
.byte 196,193,124,194,194,1 // vcmpltps %ymm10,%ymm0,%ymm0
.byte 196,195,21,74,193,0 // vblendvps %ymm0,%ymm9,%ymm13,%ymm0
.byte 196,65,116,89,200 // vmulps %ymm8,%ymm1,%ymm9
@@ -17982,16 +18048,16 @@ HIDDEN _sk_from_srgb_dst_hsw
.globl _sk_from_srgb_dst_hsw
FUNCTION(_sk_from_srgb_dst_hsw)
_sk_from_srgb_dst_hsw:
- .byte 196,98,125,24,5,94,83,0,0 // vbroadcastss 0x535e(%rip),%ymm8 # 6908 <_sk_clut_4D_hsw+0x97e>
+ .byte 196,98,125,24,5,35,87,0,0 // vbroadcastss 0x5723(%rip),%ymm8 # 6dd4 <_sk_clut_4D_hsw+0x97e>
.byte 196,65,92,89,200 // vmulps %ymm8,%ymm4,%ymm9
.byte 197,92,89,212 // vmulps %ymm4,%ymm4,%ymm10
- .byte 196,98,125,24,29,80,83,0,0 // vbroadcastss 0x5350(%rip),%ymm11 # 690c <_sk_clut_4D_hsw+0x982>
- .byte 196,98,125,24,37,75,83,0,0 // vbroadcastss 0x534b(%rip),%ymm12 # 6910 <_sk_clut_4D_hsw+0x986>
+ .byte 196,98,125,24,29,21,87,0,0 // vbroadcastss 0x5715(%rip),%ymm11 # 6dd8 <_sk_clut_4D_hsw+0x982>
+ .byte 196,98,125,24,37,16,87,0,0 // vbroadcastss 0x5710(%rip),%ymm12 # 6ddc <_sk_clut_4D_hsw+0x986>
.byte 196,65,124,40,236 // vmovaps %ymm12,%ymm13
.byte 196,66,93,168,235 // vfmadd213ps %ymm11,%ymm4,%ymm13
- .byte 196,98,125,24,53,60,83,0,0 // vbroadcastss 0x533c(%rip),%ymm14 # 6914 <_sk_clut_4D_hsw+0x98a>
+ .byte 196,98,125,24,53,1,87,0,0 // vbroadcastss 0x5701(%rip),%ymm14 # 6de0 <_sk_clut_4D_hsw+0x98a>
.byte 196,66,45,168,238 // vfmadd213ps %ymm14,%ymm10,%ymm13
- .byte 196,98,125,24,21,50,83,0,0 // vbroadcastss 0x5332(%rip),%ymm10 # 6918 <_sk_clut_4D_hsw+0x98e>
+ .byte 196,98,125,24,21,247,86,0,0 // vbroadcastss 0x56f7(%rip),%ymm10 # 6de4 <_sk_clut_4D_hsw+0x98e>
.byte 196,193,92,194,226,1 // vcmpltps %ymm10,%ymm4,%ymm4
.byte 196,195,21,74,225,64 // vblendvps %ymm4,%ymm9,%ymm13,%ymm4
.byte 196,65,84,89,200 // vmulps %ymm8,%ymm5,%ymm9
@@ -18015,19 +18081,19 @@ HIDDEN _sk_to_srgb_hsw
FUNCTION(_sk_to_srgb_hsw)
_sk_to_srgb_hsw:
.byte 197,124,82,200 // vrsqrtps %ymm0,%ymm9
- .byte 196,98,125,24,5,214,82,0,0 // vbroadcastss 0x52d6(%rip),%ymm8 # 691c <_sk_clut_4D_hsw+0x992>
+ .byte 196,98,125,24,5,155,86,0,0 // vbroadcastss 0x569b(%rip),%ymm8 # 6de8 <_sk_clut_4D_hsw+0x992>
.byte 196,65,124,89,208 // vmulps %ymm8,%ymm0,%ymm10
- .byte 196,98,125,24,29,204,82,0,0 // vbroadcastss 0x52cc(%rip),%ymm11 # 6920 <_sk_clut_4D_hsw+0x996>
- .byte 196,98,125,24,37,199,82,0,0 // vbroadcastss 0x52c7(%rip),%ymm12 # 6924 <_sk_clut_4D_hsw+0x99a>
+ .byte 196,98,125,24,29,145,86,0,0 // vbroadcastss 0x5691(%rip),%ymm11 # 6dec <_sk_clut_4D_hsw+0x996>
+ .byte 196,98,125,24,37,140,86,0,0 // vbroadcastss 0x568c(%rip),%ymm12 # 6df0 <_sk_clut_4D_hsw+0x99a>
.byte 196,65,124,40,236 // vmovaps %ymm12,%ymm13
.byte 196,66,53,168,235 // vfmadd213ps %ymm11,%ymm9,%ymm13
- .byte 196,98,125,24,53,184,82,0,0 // vbroadcastss 0x52b8(%rip),%ymm14 # 6928 <_sk_clut_4D_hsw+0x99e>
+ .byte 196,98,125,24,53,125,86,0,0 // vbroadcastss 0x567d(%rip),%ymm14 # 6df4 <_sk_clut_4D_hsw+0x99e>
.byte 196,66,53,168,238 // vfmadd213ps %ymm14,%ymm9,%ymm13
- .byte 196,98,125,24,61,174,82,0,0 // vbroadcastss 0x52ae(%rip),%ymm15 # 692c <_sk_clut_4D_hsw+0x9a2>
+ .byte 196,98,125,24,61,115,86,0,0 // vbroadcastss 0x5673(%rip),%ymm15 # 6df8 <_sk_clut_4D_hsw+0x9a2>
.byte 196,65,52,88,207 // vaddps %ymm15,%ymm9,%ymm9
.byte 196,65,124,83,201 // vrcpps %ymm9,%ymm9
.byte 196,65,20,89,201 // vmulps %ymm9,%ymm13,%ymm9
- .byte 196,98,125,24,45,154,82,0,0 // vbroadcastss 0x529a(%rip),%ymm13 # 6930 <_sk_clut_4D_hsw+0x9a6>
+ .byte 196,98,125,24,45,95,86,0,0 // vbroadcastss 0x565f(%rip),%ymm13 # 6dfc <_sk_clut_4D_hsw+0x9a6>
.byte 196,193,124,194,197,1 // vcmpltps %ymm13,%ymm0,%ymm0
.byte 196,195,53,74,194,0 // vblendvps %ymm0,%ymm10,%ymm9,%ymm0
.byte 197,124,82,201 // vrsqrtps %ymm1,%ymm9
@@ -18061,26 +18127,26 @@ _sk_rgb_to_hsl_hsw:
.byte 197,124,93,201 // vminps %ymm1,%ymm0,%ymm9
.byte 197,52,93,202 // vminps %ymm2,%ymm9,%ymm9
.byte 196,65,60,92,209 // vsubps %ymm9,%ymm8,%ymm10
- .byte 196,98,125,24,29,15,82,0,0 // vbroadcastss 0x520f(%rip),%ymm11 # 6934 <_sk_clut_4D_hsw+0x9aa>
+ .byte 196,98,125,24,29,212,85,0,0 // vbroadcastss 0x55d4(%rip),%ymm11 # 6e00 <_sk_clut_4D_hsw+0x9aa>
.byte 196,65,36,94,218 // vdivps %ymm10,%ymm11,%ymm11
.byte 197,116,92,226 // vsubps %ymm2,%ymm1,%ymm12
.byte 197,116,194,234,1 // vcmpltps %ymm2,%ymm1,%ymm13
- .byte 196,98,125,24,53,252,81,0,0 // vbroadcastss 0x51fc(%rip),%ymm14 # 6938 <_sk_clut_4D_hsw+0x9ae>
+ .byte 196,98,125,24,53,193,85,0,0 // vbroadcastss 0x55c1(%rip),%ymm14 # 6e04 <_sk_clut_4D_hsw+0x9ae>
.byte 196,65,4,87,255 // vxorps %ymm15,%ymm15,%ymm15
.byte 196,67,5,74,238,208 // vblendvps %ymm13,%ymm14,%ymm15,%ymm13
.byte 196,66,37,168,229 // vfmadd213ps %ymm13,%ymm11,%ymm12
.byte 197,236,92,208 // vsubps %ymm0,%ymm2,%ymm2
.byte 197,124,92,233 // vsubps %ymm1,%ymm0,%ymm13
- .byte 196,98,125,24,53,227,81,0,0 // vbroadcastss 0x51e3(%rip),%ymm14 # 6940 <_sk_clut_4D_hsw+0x9b6>
+ .byte 196,98,125,24,53,168,85,0,0 // vbroadcastss 0x55a8(%rip),%ymm14 # 6e0c <_sk_clut_4D_hsw+0x9b6>
.byte 196,66,37,168,238 // vfmadd213ps %ymm14,%ymm11,%ymm13
- .byte 196,98,125,24,53,209,81,0,0 // vbroadcastss 0x51d1(%rip),%ymm14 # 693c <_sk_clut_4D_hsw+0x9b2>
+ .byte 196,98,125,24,53,150,85,0,0 // vbroadcastss 0x5596(%rip),%ymm14 # 6e08 <_sk_clut_4D_hsw+0x9b2>
.byte 196,194,37,168,214 // vfmadd213ps %ymm14,%ymm11,%ymm2
.byte 197,188,194,201,0 // vcmpeqps %ymm1,%ymm8,%ymm1
.byte 196,227,21,74,202,16 // vblendvps %ymm1,%ymm2,%ymm13,%ymm1
.byte 197,188,194,192,0 // vcmpeqps %ymm0,%ymm8,%ymm0
.byte 196,195,117,74,196,0 // vblendvps %ymm0,%ymm12,%ymm1,%ymm0
.byte 196,193,60,88,201 // vaddps %ymm9,%ymm8,%ymm1
- .byte 196,98,125,24,29,180,81,0,0 // vbroadcastss 0x51b4(%rip),%ymm11 # 6948 <_sk_clut_4D_hsw+0x9be>
+ .byte 196,98,125,24,29,121,85,0,0 // vbroadcastss 0x5579(%rip),%ymm11 # 6e14 <_sk_clut_4D_hsw+0x9be>
.byte 196,193,116,89,211 // vmulps %ymm11,%ymm1,%ymm2
.byte 197,36,194,218,1 // vcmpltps %ymm2,%ymm11,%ymm11
.byte 196,65,12,92,224 // vsubps %ymm8,%ymm14,%ymm12
@@ -18090,7 +18156,7 @@ _sk_rgb_to_hsl_hsw:
.byte 197,172,94,201 // vdivps %ymm1,%ymm10,%ymm1
.byte 196,195,125,74,199,128 // vblendvps %ymm8,%ymm15,%ymm0,%ymm0
.byte 196,195,117,74,207,128 // vblendvps %ymm8,%ymm15,%ymm1,%ymm1
- .byte 196,98,125,24,5,119,81,0,0 // vbroadcastss 0x5177(%rip),%ymm8 # 6944 <_sk_clut_4D_hsw+0x9ba>
+ .byte 196,98,125,24,5,60,85,0,0 // vbroadcastss 0x553c(%rip),%ymm8 # 6e10 <_sk_clut_4D_hsw+0x9ba>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -18107,30 +18173,30 @@ _sk_hsl_to_rgb_hsw:
.byte 197,252,17,92,36,128 // vmovups %ymm3,-0x80(%rsp)
.byte 197,252,40,233 // vmovaps %ymm1,%ymm5
.byte 197,252,40,224 // vmovaps %ymm0,%ymm4
- .byte 196,98,125,24,5,68,81,0,0 // vbroadcastss 0x5144(%rip),%ymm8 # 694c <_sk_clut_4D_hsw+0x9c2>
+ .byte 196,98,125,24,5,9,85,0,0 // vbroadcastss 0x5509(%rip),%ymm8 # 6e18 <_sk_clut_4D_hsw+0x9c2>
.byte 197,60,194,202,2 // vcmpleps %ymm2,%ymm8,%ymm9
.byte 197,84,89,210 // vmulps %ymm2,%ymm5,%ymm10
.byte 196,65,84,92,218 // vsubps %ymm10,%ymm5,%ymm11
.byte 196,67,45,74,203,144 // vblendvps %ymm9,%ymm11,%ymm10,%ymm9
.byte 197,52,88,210 // vaddps %ymm2,%ymm9,%ymm10
- .byte 196,98,125,24,13,39,81,0,0 // vbroadcastss 0x5127(%rip),%ymm9 # 6950 <_sk_clut_4D_hsw+0x9c6>
+ .byte 196,98,125,24,13,236,84,0,0 // vbroadcastss 0x54ec(%rip),%ymm9 # 6e1c <_sk_clut_4D_hsw+0x9c6>
.byte 196,66,109,170,202 // vfmsub213ps %ymm10,%ymm2,%ymm9
- .byte 196,98,125,24,29,29,81,0,0 // vbroadcastss 0x511d(%rip),%ymm11 # 6954 <_sk_clut_4D_hsw+0x9ca>
+ .byte 196,98,125,24,29,226,84,0,0 // vbroadcastss 0x54e2(%rip),%ymm11 # 6e20 <_sk_clut_4D_hsw+0x9ca>
.byte 196,65,92,88,219 // vaddps %ymm11,%ymm4,%ymm11
.byte 196,67,125,8,227,1 // vroundps $0x1,%ymm11,%ymm12
.byte 196,65,36,92,252 // vsubps %ymm12,%ymm11,%ymm15
.byte 196,65,44,92,217 // vsubps %ymm9,%ymm10,%ymm11
- .byte 196,98,125,24,45,7,81,0,0 // vbroadcastss 0x5107(%rip),%ymm13 # 695c <_sk_clut_4D_hsw+0x9d2>
+ .byte 196,98,125,24,45,204,84,0,0 // vbroadcastss 0x54cc(%rip),%ymm13 # 6e28 <_sk_clut_4D_hsw+0x9d2>
.byte 196,193,4,89,197 // vmulps %ymm13,%ymm15,%ymm0
- .byte 196,98,125,24,53,253,80,0,0 // vbroadcastss 0x50fd(%rip),%ymm14 # 6960 <_sk_clut_4D_hsw+0x9d6>
+ .byte 196,98,125,24,53,194,84,0,0 // vbroadcastss 0x54c2(%rip),%ymm14 # 6e2c <_sk_clut_4D_hsw+0x9d6>
.byte 197,12,92,224 // vsubps %ymm0,%ymm14,%ymm12
.byte 196,66,37,168,225 // vfmadd213ps %ymm9,%ymm11,%ymm12
- .byte 196,226,125,24,29,227,80,0,0 // vbroadcastss 0x50e3(%rip),%ymm3 # 6958 <_sk_clut_4D_hsw+0x9ce>
+ .byte 196,226,125,24,29,168,84,0,0 // vbroadcastss 0x54a8(%rip),%ymm3 # 6e24 <_sk_clut_4D_hsw+0x9ce>
.byte 196,193,100,194,255,2 // vcmpleps %ymm15,%ymm3,%ymm7
.byte 196,195,29,74,249,112 // vblendvps %ymm7,%ymm9,%ymm12,%ymm7
.byte 196,65,60,194,231,2 // vcmpleps %ymm15,%ymm8,%ymm12
.byte 196,227,45,74,255,192 // vblendvps %ymm12,%ymm7,%ymm10,%ymm7
- .byte 196,98,125,24,37,206,80,0,0 // vbroadcastss 0x50ce(%rip),%ymm12 # 6964 <_sk_clut_4D_hsw+0x9da>
+ .byte 196,98,125,24,37,147,84,0,0 // vbroadcastss 0x5493(%rip),%ymm12 # 6e30 <_sk_clut_4D_hsw+0x9da>
.byte 196,65,28,194,255,2 // vcmpleps %ymm15,%ymm12,%ymm15
.byte 196,194,37,168,193 // vfmadd213ps %ymm9,%ymm11,%ymm0
.byte 196,99,125,74,255,240 // vblendvps %ymm15,%ymm7,%ymm0,%ymm15
@@ -18146,7 +18212,7 @@ _sk_hsl_to_rgb_hsw:
.byte 197,156,194,192,2 // vcmpleps %ymm0,%ymm12,%ymm0
.byte 196,194,37,168,249 // vfmadd213ps %ymm9,%ymm11,%ymm7
.byte 196,227,69,74,201,0 // vblendvps %ymm0,%ymm1,%ymm7,%ymm1
- .byte 196,226,125,24,5,122,80,0,0 // vbroadcastss 0x507a(%rip),%ymm0 # 6968 <_sk_clut_4D_hsw+0x9de>
+ .byte 196,226,125,24,5,63,84,0,0 // vbroadcastss 0x543f(%rip),%ymm0 # 6e34 <_sk_clut_4D_hsw+0x9de>
.byte 197,220,88,192 // vaddps %ymm0,%ymm4,%ymm0
.byte 196,227,125,8,224,1 // vroundps $0x1,%ymm0,%ymm4
.byte 197,252,92,196 // vsubps %ymm4,%ymm0,%ymm0
@@ -18199,12 +18265,12 @@ _sk_scale_u8_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,59 // jne 19df <_sk_scale_u8_hsw+0x54>
+ .byte 117,59 // jne 1ae6 <_sk_scale_u8_hsw+0x54>
.byte 196,66,121,48,4,25 // vpmovzxbw (%r9,%rbx,1),%xmm8
- .byte 197,57,219,5,142,86,0,0 // vpand 0x568e(%rip),%xmm8,%xmm8 # 7040 <_sk_clut_4D_hsw+0x10b6>
+ .byte 197,57,219,5,71,90,0,0 // vpand 0x5a47(%rip),%xmm8,%xmm8 # 7500 <_sk_clut_4D_hsw+0x10aa>
.byte 196,66,125,51,192 // vpmovzxwd %xmm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,13,167,79,0,0 // vbroadcastss 0x4fa7(%rip),%ymm9 # 696c <_sk_clut_4D_hsw+0x9e2>
+ .byte 196,98,125,24,13,108,83,0,0 // vbroadcastss 0x536c(%rip),%ymm9 # 6e38 <_sk_clut_4D_hsw+0x9e2>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 197,188,89,201 // vmulps %ymm1,%ymm8,%ymm1
@@ -18218,15 +18284,15 @@ _sk_scale_u8_hsw:
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,182 // ja 19aa <_sk_scale_u8_hsw+0x1f>
+ .byte 119,182 // ja 1ab1 <_sk_scale_u8_hsw+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,121,0,0,0 // lea 0x79(%rip),%r11 # 1a78 <_sk_scale_u8_hsw+0xed>
+ .byte 76,141,29,122,0,0,0 // lea 0x7a(%rip),%r11 # 1b80 <_sk_scale_u8_hsw+0xee>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,121,110,192 // vmovd %eax,%xmm8
- .byte 235,151 // jmp 19aa <_sk_scale_u8_hsw+0x1f>
+ .byte 235,151 // jmp 1ab1 <_sk_scale_u8_hsw+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 197,57,196,192,2 // vpinsrw $0x2,%eax,%xmm8,%xmm8
@@ -18234,7 +18300,7 @@ _sk_scale_u8_hsw:
.byte 197,121,110,200 // vmovd %eax,%xmm9
.byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
.byte 196,67,57,2,193,1 // vpblendd $0x1,%xmm9,%xmm8,%xmm8
- .byte 233,110,255,255,255 // jmpq 19aa <_sk_scale_u8_hsw+0x1f>
+ .byte 233,110,255,255,255 // jmpq 1ab1 <_sk_scale_u8_hsw+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 197,57,196,192,6 // vpinsrw $0x6,%eax,%xmm8,%xmm8
@@ -18245,24 +18311,22 @@ _sk_scale_u8_hsw:
.byte 196,65,121,110,12,25 // vmovd (%r9,%rbx,1),%xmm9
.byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
.byte 196,67,49,2,192,12 // vpblendd $0xc,%xmm8,%xmm9,%xmm8
- .byte 233,50,255,255,255 // jmpq 19aa <_sk_scale_u8_hsw+0x1f>
+ .byte 233,50,255,255,255 // jmpq 1ab1 <_sk_scale_u8_hsw+0x1f>
.byte 144 // nop
+ .byte 143 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,171,255,255,255,155 // ljmp *-0x64000001(%rbx)
- .byte 255 // (bad)
- .byte 255 // (bad)
+ .byte 255,170,255,255,255,154 // ljmp *-0x65000001(%rdx)
.byte 255 // (bad)
- .byte 234 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
+ .byte 233,255,255,255,222 // jmpq ffffffffdf001b90 <_sk_clut_4D_hsw+0xffffffffdeffb73a>
.byte 255 // (bad)
- .byte 223,255 // (bad)
.byte 255 // (bad)
- .byte 255,212 // callq *%rsp
+ .byte 255,211 // callq *%rbx
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,196 // inc %esp
+ .byte 255,195 // inc %ebx
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -18296,12 +18360,12 @@ _sk_lerp_u8_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,79 // jne 1b2b <_sk_lerp_u8_hsw+0x68>
+ .byte 117,79 // jne 1c33 <_sk_lerp_u8_hsw+0x68>
.byte 196,66,121,48,4,25 // vpmovzxbw (%r9,%rbx,1),%xmm8
- .byte 197,57,219,5,102,85,0,0 // vpand 0x5566(%rip),%xmm8,%xmm8 # 7050 <_sk_clut_4D_hsw+0x10c6>
+ .byte 197,57,219,5,30,89,0,0 // vpand 0x591e(%rip),%xmm8,%xmm8 # 7510 <_sk_clut_4D_hsw+0x10ba>
.byte 196,66,125,51,192 // vpmovzxwd %xmm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,13,115,78,0,0 // vbroadcastss 0x4e73(%rip),%ymm9 # 6970 <_sk_clut_4D_hsw+0x9e6>
+ .byte 196,98,125,24,13,55,82,0,0 // vbroadcastss 0x5237(%rip),%ymm9 # 6e3c <_sk_clut_4D_hsw+0x9e6>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,252,92,196 // vsubps %ymm4,%ymm0,%ymm0
.byte 196,226,61,168,196 // vfmadd213ps %ymm4,%ymm8,%ymm0
@@ -18319,15 +18383,15 @@ _sk_lerp_u8_hsw:
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,162 // ja 1ae2 <_sk_lerp_u8_hsw+0x1f>
+ .byte 119,162 // ja 1bea <_sk_lerp_u8_hsw+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,121,0,0,0 // lea 0x79(%rip),%r11 # 1bc4 <_sk_lerp_u8_hsw+0x101>
+ .byte 76,141,29,121,0,0,0 // lea 0x79(%rip),%r11 # 1ccc <_sk_lerp_u8_hsw+0x101>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,121,110,192 // vmovd %eax,%xmm8
- .byte 235,131 // jmp 1ae2 <_sk_lerp_u8_hsw+0x1f>
+ .byte 235,131 // jmp 1bea <_sk_lerp_u8_hsw+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 197,57,196,192,2 // vpinsrw $0x2,%eax,%xmm8,%xmm8
@@ -18335,7 +18399,7 @@ _sk_lerp_u8_hsw:
.byte 197,121,110,200 // vmovd %eax,%xmm9
.byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
.byte 196,67,57,2,193,1 // vpblendd $0x1,%xmm9,%xmm8,%xmm8
- .byte 233,90,255,255,255 // jmpq 1ae2 <_sk_lerp_u8_hsw+0x1f>
+ .byte 233,90,255,255,255 // jmpq 1bea <_sk_lerp_u8_hsw+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 197,57,196,192,6 // vpinsrw $0x6,%eax,%xmm8,%xmm8
@@ -18346,7 +18410,7 @@ _sk_lerp_u8_hsw:
.byte 196,65,121,110,12,25 // vmovd (%r9,%rbx,1),%xmm9
.byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
.byte 196,67,49,2,192,12 // vpblendd $0xc,%xmm8,%xmm9,%xmm8
- .byte 233,30,255,255,255 // jmpq 1ae2 <_sk_lerp_u8_hsw+0x1f>
+ .byte 233,30,255,255,255 // jmpq 1bea <_sk_lerp_u8_hsw+0x1f>
.byte 144 // nop
.byte 255 // (bad)
.byte 255 // (bad)
@@ -18381,23 +18445,23 @@ _sk_lerp_565_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,170,0,0,0 // jne 1caa <_sk_lerp_565_hsw+0xca>
+ .byte 15,133,170,0,0,0 // jne 1db2 <_sk_lerp_565_hsw+0xca>
.byte 196,65,122,111,4,89 // vmovdqu (%r9,%rbx,2),%xmm8
.byte 196,66,125,51,192 // vpmovzxwd %xmm8,%ymm8
- .byte 196,98,125,88,13,96,77,0,0 // vpbroadcastd 0x4d60(%rip),%ymm9 # 6974 <_sk_clut_4D_hsw+0x9ea>
+ .byte 196,98,125,88,13,36,81,0,0 // vpbroadcastd 0x5124(%rip),%ymm9 # 6e40 <_sk_clut_4D_hsw+0x9ea>
.byte 196,65,61,219,201 // vpand %ymm9,%ymm8,%ymm9
.byte 196,65,124,91,201 // vcvtdq2ps %ymm9,%ymm9
- .byte 196,98,125,24,21,81,77,0,0 // vbroadcastss 0x4d51(%rip),%ymm10 # 6978 <_sk_clut_4D_hsw+0x9ee>
+ .byte 196,98,125,24,21,21,81,0,0 // vbroadcastss 0x5115(%rip),%ymm10 # 6e44 <_sk_clut_4D_hsw+0x9ee>
.byte 196,65,52,89,202 // vmulps %ymm10,%ymm9,%ymm9
- .byte 196,98,125,88,21,71,77,0,0 // vpbroadcastd 0x4d47(%rip),%ymm10 # 697c <_sk_clut_4D_hsw+0x9f2>
+ .byte 196,98,125,88,21,11,81,0,0 // vpbroadcastd 0x510b(%rip),%ymm10 # 6e48 <_sk_clut_4D_hsw+0x9f2>
.byte 196,65,61,219,210 // vpand %ymm10,%ymm8,%ymm10
.byte 196,65,124,91,210 // vcvtdq2ps %ymm10,%ymm10
- .byte 196,98,125,24,29,56,77,0,0 // vbroadcastss 0x4d38(%rip),%ymm11 # 6980 <_sk_clut_4D_hsw+0x9f6>
+ .byte 196,98,125,24,29,252,80,0,0 // vbroadcastss 0x50fc(%rip),%ymm11 # 6e4c <_sk_clut_4D_hsw+0x9f6>
.byte 196,65,44,89,211 // vmulps %ymm11,%ymm10,%ymm10
- .byte 196,98,125,88,29,46,77,0,0 // vpbroadcastd 0x4d2e(%rip),%ymm11 # 6984 <_sk_clut_4D_hsw+0x9fa>
+ .byte 196,98,125,88,29,242,80,0,0 // vpbroadcastd 0x50f2(%rip),%ymm11 # 6e50 <_sk_clut_4D_hsw+0x9fa>
.byte 196,65,61,219,195 // vpand %ymm11,%ymm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,29,31,77,0,0 // vbroadcastss 0x4d1f(%rip),%ymm11 # 6988 <_sk_clut_4D_hsw+0x9fe>
+ .byte 196,98,125,24,29,227,80,0,0 // vbroadcastss 0x50e3(%rip),%ymm11 # 6e54 <_sk_clut_4D_hsw+0x9fe>
.byte 196,65,60,89,195 // vmulps %ymm11,%ymm8,%ymm8
.byte 197,252,92,196 // vsubps %ymm4,%ymm0,%ymm0
.byte 196,226,53,168,196 // vfmadd213ps %ymm4,%ymm9,%ymm0
@@ -18419,27 +18483,27 @@ _sk_lerp_565_hsw:
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,67,255,255,255 // ja 1c06 <_sk_lerp_565_hsw+0x26>
+ .byte 15,135,67,255,255,255 // ja 1d0e <_sk_lerp_565_hsw+0x26>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,102,0,0,0 // lea 0x66(%rip),%r11 # 1d34 <_sk_lerp_565_hsw+0x154>
+ .byte 76,141,29,102,0,0,0 // lea 0x66(%rip),%r11 # 1e3c <_sk_lerp_565_hsw+0x154>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,121,110,192 // vmovd %eax,%xmm8
- .byte 233,33,255,255,255 // jmpq 1c06 <_sk_lerp_565_hsw+0x26>
+ .byte 233,33,255,255,255 // jmpq 1d0e <_sk_lerp_565_hsw+0x26>
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 196,65,57,196,68,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm8,%xmm8
.byte 196,65,121,110,12,89 // vmovd (%r9,%rbx,2),%xmm9
.byte 196,67,57,2,193,1 // vpblendd $0x1,%xmm9,%xmm8,%xmm8
- .byte 233,3,255,255,255 // jmpq 1c06 <_sk_lerp_565_hsw+0x26>
+ .byte 233,3,255,255,255 // jmpq 1d0e <_sk_lerp_565_hsw+0x26>
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 196,65,57,196,68,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm8,%xmm8
.byte 196,65,57,196,68,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm8,%xmm8
.byte 196,65,57,196,68,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm8,%xmm8
.byte 196,65,122,126,12,89 // vmovq (%r9,%rbx,2),%xmm9
.byte 196,67,49,2,192,12 // vpblendd $0xc,%xmm8,%xmm9,%xmm8
- .byte 233,213,254,255,255 // jmpq 1c06 <_sk_lerp_565_hsw+0x26>
+ .byte 233,213,254,255,255 // jmpq 1d0e <_sk_lerp_565_hsw+0x26>
.byte 15,31,0 // nopl (%rax)
.byte 163,255,255,255,190,255,255,255,177 // movabs %eax,0xb1ffffffbeffffff
.byte 255 // (bad)
@@ -18463,41 +18527,82 @@ HIDDEN _sk_load_tables_hsw
.globl _sk_load_tables_hsw
FUNCTION(_sk_load_tables_hsw)
_sk_load_tables_hsw:
- .byte 73,137,201 // mov %rcx,%r9
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,141,20,149,0,0,0,0 // lea 0x0(,%rdx,4),%r10
- .byte 76,3,16 // add (%rax),%r10
+ .byte 76,139,8 // mov (%rax),%r9
.byte 77,133,192 // test %r8,%r8
- .byte 117,105 // jne 1dce <_sk_load_tables_hsw+0x7e>
- .byte 196,193,124,16,26 // vmovups (%r10),%ymm3
- .byte 197,228,84,13,46,80,0,0 // vandps 0x502e(%rip),%ymm3,%ymm1 # 6da0 <_sk_clut_4D_hsw+0xe16>
+ .byte 117,103 // jne 1ec9 <_sk_load_tables_hsw+0x71>
+ .byte 196,193,126,111,28,145 // vmovdqu (%r9,%rdx,4),%ymm3
+ .byte 197,229,219,13,240,83,0,0 // vpand 0x53f0(%rip),%ymm3,%ymm1 # 7260 <_sk_clut_4D_hsw+0xe0a>
.byte 196,65,61,118,192 // vpcmpeqd %ymm8,%ymm8,%ymm8
- .byte 72,139,72,8 // mov 0x8(%rax),%rcx
+ .byte 76,139,72,8 // mov 0x8(%rax),%r9
.byte 76,139,80,16 // mov 0x10(%rax),%r10
.byte 197,237,118,210 // vpcmpeqd %ymm2,%ymm2,%ymm2
- .byte 196,226,109,146,4,137 // vgatherdps %ymm2,(%rcx,%ymm1,4),%ymm0
- .byte 196,226,101,0,21,46,80,0,0 // vpshufb 0x502e(%rip),%ymm3,%ymm2 # 6dc0 <_sk_clut_4D_hsw+0xe36>
+ .byte 196,194,109,146,4,137 // vgatherdps %ymm2,(%r9,%ymm1,4),%ymm0
+ .byte 196,226,101,0,21,240,83,0,0 // vpshufb 0x53f0(%rip),%ymm3,%ymm2 # 7280 <_sk_clut_4D_hsw+0xe2a>
.byte 196,65,53,118,201 // vpcmpeqd %ymm9,%ymm9,%ymm9
.byte 196,194,53,146,12,146 // vgatherdps %ymm9,(%r10,%ymm2,4),%ymm1
.byte 72,139,64,24 // mov 0x18(%rax),%rax
- .byte 196,98,101,0,13,54,80,0,0 // vpshufb 0x5036(%rip),%ymm3,%ymm9 # 6de0 <_sk_clut_4D_hsw+0xe56>
+ .byte 196,98,101,0,13,248,83,0,0 // vpshufb 0x53f8(%rip),%ymm3,%ymm9 # 72a0 <_sk_clut_4D_hsw+0xe4a>
.byte 196,162,61,146,20,136 // vgatherdps %ymm8,(%rax,%ymm9,4),%ymm2
.byte 197,229,114,211,24 // vpsrld $0x18,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,202,75,0,0 // vbroadcastss 0x4bca(%rip),%ymm8 # 698c <_sk_clut_4D_hsw+0xa02>
+ .byte 196,98,125,24,5,152,79,0,0 // vbroadcastss 0x4f98(%rip),%ymm8 # 6e58 <_sk_clut_4D_hsw+0xa02>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,195,255,255,255,255 // mov $0xffffffffffffffff,%r11
- .byte 73,211,235 // shr %cl,%r11
- .byte 196,193,249,110,195 // vmovq %r11,%xmm0
- .byte 196,226,125,33,192 // vpmovsxbd %xmm0,%ymm0
- .byte 196,194,125,44,26 // vmaskmovps (%r10),%ymm0,%ymm3
- .byte 233,115,255,255,255 // jmpq 1d6a <_sk_load_tables_hsw+0x1a>
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 197,229,239,219 // vpxor %ymm3,%ymm3,%ymm3
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,139 // ja 1e68 <_sk_load_tables_hsw+0x10>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,140,0,0,0 // lea 0x8c(%rip),%r11 # 1f74 <_sk_load_tables_hsw+0x11c>
+ .byte 79,99,20,147 // movslq (%r11,%r10,4),%r10
+ .byte 77,1,218 // add %r11,%r10
+ .byte 65,255,226 // jmpq *%r10
+ .byte 196,193,121,110,28,145 // vmovd (%r9,%rdx,4),%xmm3
+ .byte 233,107,255,255,255 // jmpq 1e68 <_sk_load_tables_hsw+0x10>
+ .byte 196,193,121,110,68,145,8 // vmovd 0x8(%r9,%rdx,4),%xmm0
+ .byte 196,226,121,89,192 // vpbroadcastq %xmm0,%xmm0
+ .byte 197,245,239,201 // vpxor %ymm1,%ymm1,%ymm1
+ .byte 196,227,117,2,216,4 // vpblendd $0x4,%ymm0,%ymm1,%ymm3
+ .byte 196,193,122,126,4,145 // vmovq (%r9,%rdx,4),%xmm0
+ .byte 196,227,101,2,216,3 // vpblendd $0x3,%ymm0,%ymm3,%ymm3
+ .byte 233,68,255,255,255 // jmpq 1e68 <_sk_load_tables_hsw+0x10>
+ .byte 196,193,121,110,68,145,24 // vmovd 0x18(%r9,%rdx,4),%xmm0
+ .byte 196,226,125,89,192 // vpbroadcastq %xmm0,%ymm0
+ .byte 197,245,239,201 // vpxor %ymm1,%ymm1,%ymm1
+ .byte 196,227,117,2,216,64 // vpblendd $0x40,%ymm0,%ymm1,%ymm3
+ .byte 196,227,125,57,216,1 // vextracti128 $0x1,%ymm3,%xmm0
+ .byte 196,195,121,34,68,145,20,1 // vpinsrd $0x1,0x14(%r9,%rdx,4),%xmm0,%xmm0
+ .byte 196,227,101,56,216,1 // vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ .byte 196,227,125,57,216,1 // vextracti128 $0x1,%ymm3,%xmm0
+ .byte 196,195,121,34,68,145,16,0 // vpinsrd $0x0,0x10(%r9,%rdx,4),%xmm0,%xmm0
+ .byte 196,227,101,56,216,1 // vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ .byte 196,193,122,111,4,145 // vmovdqu (%r9,%rdx,4),%xmm0
+ .byte 196,227,125,2,219,240 // vpblendd $0xf0,%ymm3,%ymm0,%ymm3
+ .byte 233,245,254,255,255 // jmpq 1e68 <_sk_load_tables_hsw+0x10>
+ .byte 144 // nop
+ .byte 126,255 // jle 1f75 <_sk_load_tables_hsw+0x11d>
+ .byte 255 // (bad)
+ .byte 255,159,255,255,255,137 // lcall *-0x76000001(%rdi)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 238 // out %al,(%dx)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 218,255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,198 // inc %esi
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 176,255 // mov $0xff,%al
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_tables_u16_be_hsw
.globl _sk_load_tables_u16_be_hsw
@@ -18507,7 +18612,7 @@ _sk_load_tables_u16_be_hsw:
.byte 76,139,8 // mov (%rax),%r9
.byte 76,141,20,149,0,0,0,0 // lea 0x0(,%rdx,4),%r10
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,201,0,0,0 // jne 1ed6 <_sk_load_tables_u16_be_hsw+0xdf>
+ .byte 15,133,201,0,0,0 // jne 206f <_sk_load_tables_u16_be_hsw+0xdf>
.byte 196,1,121,16,4,81 // vmovupd (%r9,%r10,2),%xmm8
.byte 196,129,121,16,84,81,16 // vmovupd 0x10(%r9,%r10,2),%xmm2
.byte 196,129,121,16,92,81,32 // vmovupd 0x20(%r9,%r10,2),%xmm3
@@ -18523,7 +18628,7 @@ _sk_load_tables_u16_be_hsw:
.byte 197,185,108,200 // vpunpcklqdq %xmm0,%xmm8,%xmm1
.byte 197,185,109,208 // vpunpckhqdq %xmm0,%xmm8,%xmm2
.byte 197,49,108,195 // vpunpcklqdq %xmm3,%xmm9,%xmm8
- .byte 197,121,111,21,2,82,0,0 // vmovdqa 0x5202(%rip),%xmm10 # 7060 <_sk_clut_4D_hsw+0x10d6>
+ .byte 197,121,111,21,41,85,0,0 // vmovdqa 0x5529(%rip),%xmm10 # 7520 <_sk_clut_4D_hsw+0x10ca>
.byte 196,193,113,219,194 // vpand %xmm10,%xmm1,%xmm0
.byte 196,226,125,51,200 // vpmovzxwd %xmm0,%ymm1
.byte 196,65,37,118,219 // vpcmpeqd %ymm11,%ymm11,%ymm11
@@ -18545,36 +18650,36 @@ _sk_load_tables_u16_be_hsw:
.byte 197,185,235,219 // vpor %xmm3,%xmm8,%xmm3
.byte 196,226,125,51,219 // vpmovzxwd %xmm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,195,74,0,0 // vbroadcastss 0x4ac3(%rip),%ymm8 # 6990 <_sk_clut_4D_hsw+0xa06>
+ .byte 196,98,125,24,5,246,77,0,0 // vbroadcastss 0x4df6(%rip),%ymm8 # 6e5c <_sk_clut_4D_hsw+0xa06>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
.byte 196,1,123,16,4,81 // vmovsd (%r9,%r10,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,85 // je 1f3c <_sk_load_tables_u16_be_hsw+0x145>
+ .byte 116,85 // je 20d5 <_sk_load_tables_u16_be_hsw+0x145>
.byte 196,1,57,22,68,81,8 // vmovhpd 0x8(%r9,%r10,2),%xmm8,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,72 // jb 1f3c <_sk_load_tables_u16_be_hsw+0x145>
+ .byte 114,72 // jb 20d5 <_sk_load_tables_u16_be_hsw+0x145>
.byte 196,129,123,16,84,81,16 // vmovsd 0x10(%r9,%r10,2),%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 116,72 // je 1f49 <_sk_load_tables_u16_be_hsw+0x152>
+ .byte 116,72 // je 20e2 <_sk_load_tables_u16_be_hsw+0x152>
.byte 196,129,105,22,84,81,24 // vmovhpd 0x18(%r9,%r10,2),%xmm2,%xmm2
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,59 // jb 1f49 <_sk_load_tables_u16_be_hsw+0x152>
+ .byte 114,59 // jb 20e2 <_sk_load_tables_u16_be_hsw+0x152>
.byte 196,129,123,16,92,81,32 // vmovsd 0x20(%r9,%r10,2),%xmm3
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 15,132,9,255,255,255 // je 1e28 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 15,132,9,255,255,255 // je 1fc1 <_sk_load_tables_u16_be_hsw+0x31>
.byte 196,129,97,22,92,81,40 // vmovhpd 0x28(%r9,%r10,2),%xmm3,%xmm3
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,130,248,254,255,255 // jb 1e28 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 15,130,248,254,255,255 // jb 1fc1 <_sk_load_tables_u16_be_hsw+0x31>
.byte 196,1,122,126,76,81,48 // vmovq 0x30(%r9,%r10,2),%xmm9
- .byte 233,236,254,255,255 // jmpq 1e28 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 233,236,254,255,255 // jmpq 1fc1 <_sk_load_tables_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,223,254,255,255 // jmpq 1e28 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 233,223,254,255,255 // jmpq 1fc1 <_sk_load_tables_u16_be_hsw+0x31>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,214,254,255,255 // jmpq 1e28 <_sk_load_tables_u16_be_hsw+0x31>
+ .byte 233,214,254,255,255 // jmpq 1fc1 <_sk_load_tables_u16_be_hsw+0x31>
HIDDEN _sk_load_tables_rgb_u16_be_hsw
.globl _sk_load_tables_rgb_u16_be_hsw
@@ -18584,7 +18689,7 @@ _sk_load_tables_rgb_u16_be_hsw:
.byte 76,139,8 // mov (%rax),%r9
.byte 76,141,20,82 // lea (%rdx,%rdx,2),%r10
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,193,0,0,0 // jne 2025 <_sk_load_tables_rgb_u16_be_hsw+0xd3>
+ .byte 15,133,193,0,0,0 // jne 21be <_sk_load_tables_rgb_u16_be_hsw+0xd3>
.byte 196,129,122,111,4,81 // vmovdqu (%r9,%r10,2),%xmm0
.byte 196,129,122,111,84,81,12 // vmovdqu 0xc(%r9,%r10,2),%xmm2
.byte 196,129,122,111,76,81,24 // vmovdqu 0x18(%r9,%r10,2),%xmm1
@@ -18605,7 +18710,7 @@ _sk_load_tables_rgb_u16_be_hsw:
.byte 197,185,108,218 // vpunpcklqdq %xmm2,%xmm8,%xmm3
.byte 197,185,109,210 // vpunpckhqdq %xmm2,%xmm8,%xmm2
.byte 197,121,108,193 // vpunpcklqdq %xmm1,%xmm0,%xmm8
- .byte 197,121,111,13,162,80,0,0 // vmovdqa 0x50a2(%rip),%xmm9 # 7070 <_sk_clut_4D_hsw+0x10e6>
+ .byte 197,121,111,13,201,83,0,0 // vmovdqa 0x53c9(%rip),%xmm9 # 7530 <_sk_clut_4D_hsw+0x10da>
.byte 196,193,97,219,193 // vpand %xmm9,%xmm3,%xmm0
.byte 196,226,125,51,200 // vpmovzxwd %xmm0,%ymm1
.byte 197,229,118,219 // vpcmpeqd %ymm3,%ymm3,%ymm3
@@ -18622,48 +18727,48 @@ _sk_load_tables_rgb_u16_be_hsw:
.byte 196,98,125,51,194 // vpmovzxwd %xmm2,%ymm8
.byte 196,162,101,146,20,128 // vgatherdps %ymm3,(%rax,%ymm8,4),%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,113,73,0,0 // vbroadcastss 0x4971(%rip),%ymm3 # 6994 <_sk_clut_4D_hsw+0xa0a>
+ .byte 196,226,125,24,29,164,76,0,0 // vbroadcastss 0x4ca4(%rip),%ymm3 # 6e60 <_sk_clut_4D_hsw+0xa0a>
.byte 255,224 // jmpq *%rax
.byte 196,129,121,110,4,81 // vmovd (%r9,%r10,2),%xmm0
.byte 196,129,121,196,68,81,4,2 // vpinsrw $0x2,0x4(%r9,%r10,2),%xmm0,%xmm0
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 117,5 // jne 203e <_sk_load_tables_rgb_u16_be_hsw+0xec>
- .byte 233,90,255,255,255 // jmpq 1f98 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 117,5 // jne 21d7 <_sk_load_tables_rgb_u16_be_hsw+0xec>
+ .byte 233,90,255,255,255 // jmpq 2131 <_sk_load_tables_rgb_u16_be_hsw+0x46>
.byte 196,129,121,110,76,81,6 // vmovd 0x6(%r9,%r10,2),%xmm1
.byte 196,1,113,196,68,81,10,2 // vpinsrw $0x2,0xa(%r9,%r10,2),%xmm1,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,26 // jb 206d <_sk_load_tables_rgb_u16_be_hsw+0x11b>
+ .byte 114,26 // jb 2206 <_sk_load_tables_rgb_u16_be_hsw+0x11b>
.byte 196,129,121,110,76,81,12 // vmovd 0xc(%r9,%r10,2),%xmm1
.byte 196,129,113,196,84,81,16,2 // vpinsrw $0x2,0x10(%r9,%r10,2),%xmm1,%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 117,10 // jne 2072 <_sk_load_tables_rgb_u16_be_hsw+0x120>
- .byte 233,43,255,255,255 // jmpq 1f98 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- .byte 233,38,255,255,255 // jmpq 1f98 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 220b <_sk_load_tables_rgb_u16_be_hsw+0x120>
+ .byte 233,43,255,255,255 // jmpq 2131 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 233,38,255,255,255 // jmpq 2131 <_sk_load_tables_rgb_u16_be_hsw+0x46>
.byte 196,129,121,110,76,81,18 // vmovd 0x12(%r9,%r10,2),%xmm1
.byte 196,1,113,196,76,81,22,2 // vpinsrw $0x2,0x16(%r9,%r10,2),%xmm1,%xmm9
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,26 // jb 20a1 <_sk_load_tables_rgb_u16_be_hsw+0x14f>
+ .byte 114,26 // jb 223a <_sk_load_tables_rgb_u16_be_hsw+0x14f>
.byte 196,129,121,110,76,81,24 // vmovd 0x18(%r9,%r10,2),%xmm1
.byte 196,129,113,196,76,81,28,2 // vpinsrw $0x2,0x1c(%r9,%r10,2),%xmm1,%xmm1
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 117,10 // jne 20a6 <_sk_load_tables_rgb_u16_be_hsw+0x154>
- .byte 233,247,254,255,255 // jmpq 1f98 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- .byte 233,242,254,255,255 // jmpq 1f98 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 117,10 // jne 223f <_sk_load_tables_rgb_u16_be_hsw+0x154>
+ .byte 233,247,254,255,255 // jmpq 2131 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 233,242,254,255,255 // jmpq 2131 <_sk_load_tables_rgb_u16_be_hsw+0x46>
.byte 196,129,121,110,92,81,30 // vmovd 0x1e(%r9,%r10,2),%xmm3
.byte 196,1,97,196,92,81,34,2 // vpinsrw $0x2,0x22(%r9,%r10,2),%xmm3,%xmm11
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,20 // jb 20cf <_sk_load_tables_rgb_u16_be_hsw+0x17d>
+ .byte 114,20 // jb 2268 <_sk_load_tables_rgb_u16_be_hsw+0x17d>
.byte 196,129,121,110,92,81,36 // vmovd 0x24(%r9,%r10,2),%xmm3
.byte 196,129,97,196,92,81,40,2 // vpinsrw $0x2,0x28(%r9,%r10,2),%xmm3,%xmm3
- .byte 233,201,254,255,255 // jmpq 1f98 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- .byte 233,196,254,255,255 // jmpq 1f98 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 233,201,254,255,255 // jmpq 2131 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ .byte 233,196,254,255,255 // jmpq 2131 <_sk_load_tables_rgb_u16_be_hsw+0x46>
HIDDEN _sk_byte_tables_hsw
.globl _sk_byte_tables_hsw
FUNCTION(_sk_byte_tables_hsw)
_sk_byte_tables_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,185,72,0,0 // vbroadcastss 0x48b9(%rip),%ymm8 # 6998 <_sk_clut_4D_hsw+0xa0e>
+ .byte 196,98,125,24,5,236,75,0,0 // vbroadcastss 0x4bec(%rip),%ymm8 # 6e64 <_sk_clut_4D_hsw+0xa0e>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 197,125,91,200 // vcvtps2dq %ymm0,%ymm9
.byte 196,65,249,126,201 // vmovq %xmm9,%r9
@@ -18785,7 +18890,7 @@ _sk_byte_tables_hsw:
.byte 67,15,182,4,26 // movzbl (%r10,%r11,1),%eax
.byte 196,194,125,49,193 // vpmovzxbd %xmm9,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,5,94,70,0,0 // vbroadcastss 0x465e(%rip),%ymm8 # 699c <_sk_clut_4D_hsw+0xa12>
+ .byte 196,98,125,24,5,145,73,0,0 // vbroadcastss 0x4991(%rip),%ymm8 # 6e68 <_sk_clut_4D_hsw+0xa12>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 196,226,125,49,201 // vpmovzxbd %xmm1,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
@@ -18901,7 +19006,7 @@ _sk_byte_tables_rgb_hsw:
.byte 67,15,182,4,26 // movzbl (%r10,%r11,1),%eax
.byte 196,194,125,49,193 // vpmovzxbd %xmm9,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,5,74,68,0,0 // vbroadcastss 0x444a(%rip),%ymm8 # 69a0 <_sk_clut_4D_hsw+0xa16>
+ .byte 196,98,125,24,5,125,71,0,0 // vbroadcastss 0x477d(%rip),%ymm8 # 6e6c <_sk_clut_4D_hsw+0xa16>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 196,226,125,49,201 // vpmovzxbd %xmm1,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
@@ -19000,33 +19105,33 @@ _sk_parametric_r_hsw:
.byte 196,66,125,168,211 // vfmadd213ps %ymm11,%ymm0,%ymm10
.byte 196,226,125,24,0 // vbroadcastss (%rax),%ymm0
.byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
- .byte 196,98,125,24,37,36,67,0,0 // vbroadcastss 0x4324(%rip),%ymm12 # 69a4 <_sk_clut_4D_hsw+0xa1a>
- .byte 196,98,125,24,45,31,67,0,0 // vbroadcastss 0x431f(%rip),%ymm13 # 69a8 <_sk_clut_4D_hsw+0xa1e>
+ .byte 196,98,125,24,37,87,70,0,0 // vbroadcastss 0x4657(%rip),%ymm12 # 6e70 <_sk_clut_4D_hsw+0xa1a>
+ .byte 196,98,125,24,45,82,70,0,0 // vbroadcastss 0x4652(%rip),%ymm13 # 6e74 <_sk_clut_4D_hsw+0xa1e>
.byte 196,65,44,84,213 // vandps %ymm13,%ymm10,%ymm10
- .byte 196,98,125,24,45,21,67,0,0 // vbroadcastss 0x4315(%rip),%ymm13 # 69ac <_sk_clut_4D_hsw+0xa22>
+ .byte 196,98,125,24,45,72,70,0,0 // vbroadcastss 0x4648(%rip),%ymm13 # 6e78 <_sk_clut_4D_hsw+0xa22>
.byte 196,65,44,86,213 // vorps %ymm13,%ymm10,%ymm10
- .byte 196,98,125,24,45,11,67,0,0 // vbroadcastss 0x430b(%rip),%ymm13 # 69b0 <_sk_clut_4D_hsw+0xa26>
+ .byte 196,98,125,24,45,62,70,0,0 // vbroadcastss 0x463e(%rip),%ymm13 # 6e7c <_sk_clut_4D_hsw+0xa26>
.byte 196,66,37,184,236 // vfmadd231ps %ymm12,%ymm11,%ymm13
- .byte 196,98,125,24,29,1,67,0,0 // vbroadcastss 0x4301(%rip),%ymm11 # 69b4 <_sk_clut_4D_hsw+0xa2a>
+ .byte 196,98,125,24,29,52,70,0,0 // vbroadcastss 0x4634(%rip),%ymm11 # 6e80 <_sk_clut_4D_hsw+0xa2a>
.byte 196,66,45,172,221 // vfnmadd213ps %ymm13,%ymm10,%ymm11
- .byte 196,98,125,24,37,247,66,0,0 // vbroadcastss 0x42f7(%rip),%ymm12 # 69b8 <_sk_clut_4D_hsw+0xa2e>
+ .byte 196,98,125,24,37,42,70,0,0 // vbroadcastss 0x462a(%rip),%ymm12 # 6e84 <_sk_clut_4D_hsw+0xa2e>
.byte 196,65,44,88,212 // vaddps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,37,237,66,0,0 // vbroadcastss 0x42ed(%rip),%ymm12 # 69bc <_sk_clut_4D_hsw+0xa32>
+ .byte 196,98,125,24,37,32,70,0,0 // vbroadcastss 0x4620(%rip),%ymm12 # 6e88 <_sk_clut_4D_hsw+0xa32>
.byte 196,65,28,94,210 // vdivps %ymm10,%ymm12,%ymm10
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
.byte 196,193,124,89,194 // vmulps %ymm10,%ymm0,%ymm0
.byte 196,99,125,8,208,1 // vroundps $0x1,%ymm0,%ymm10
.byte 196,65,124,92,210 // vsubps %ymm10,%ymm0,%ymm10
- .byte 196,98,125,24,29,206,66,0,0 // vbroadcastss 0x42ce(%rip),%ymm11 # 69c0 <_sk_clut_4D_hsw+0xa36>
+ .byte 196,98,125,24,29,1,70,0,0 // vbroadcastss 0x4601(%rip),%ymm11 # 6e8c <_sk_clut_4D_hsw+0xa36>
.byte 196,193,124,88,195 // vaddps %ymm11,%ymm0,%ymm0
- .byte 196,98,125,24,29,196,66,0,0 // vbroadcastss 0x42c4(%rip),%ymm11 # 69c4 <_sk_clut_4D_hsw+0xa3a>
+ .byte 196,98,125,24,29,247,69,0,0 // vbroadcastss 0x45f7(%rip),%ymm11 # 6e90 <_sk_clut_4D_hsw+0xa3a>
.byte 196,98,45,172,216 // vfnmadd213ps %ymm0,%ymm10,%ymm11
- .byte 196,226,125,24,5,186,66,0,0 // vbroadcastss 0x42ba(%rip),%ymm0 # 69c8 <_sk_clut_4D_hsw+0xa3e>
+ .byte 196,226,125,24,5,237,69,0,0 // vbroadcastss 0x45ed(%rip),%ymm0 # 6e94 <_sk_clut_4D_hsw+0xa3e>
.byte 196,193,124,92,194 // vsubps %ymm10,%ymm0,%ymm0
- .byte 196,98,125,24,21,176,66,0,0 // vbroadcastss 0x42b0(%rip),%ymm10 # 69cc <_sk_clut_4D_hsw+0xa42>
+ .byte 196,98,125,24,21,227,69,0,0 // vbroadcastss 0x45e3(%rip),%ymm10 # 6e98 <_sk_clut_4D_hsw+0xa42>
.byte 197,172,94,192 // vdivps %ymm0,%ymm10,%ymm0
.byte 197,164,88,192 // vaddps %ymm0,%ymm11,%ymm0
- .byte 196,98,125,24,21,163,66,0,0 // vbroadcastss 0x42a3(%rip),%ymm10 # 69d0 <_sk_clut_4D_hsw+0xa46>
+ .byte 196,98,125,24,21,214,69,0,0 // vbroadcastss 0x45d6(%rip),%ymm10 # 6e9c <_sk_clut_4D_hsw+0xa46>
.byte 196,193,124,89,194 // vmulps %ymm10,%ymm0,%ymm0
.byte 197,253,91,192 // vcvtps2dq %ymm0,%ymm0
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -19034,7 +19139,7 @@ _sk_parametric_r_hsw:
.byte 196,195,125,74,193,128 // vblendvps %ymm8,%ymm9,%ymm0,%ymm0
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,124,95,192 // vmaxps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,5,122,66,0,0 // vbroadcastss 0x427a(%rip),%ymm8 # 69d4 <_sk_clut_4D_hsw+0xa4a>
+ .byte 196,98,125,24,5,173,69,0,0 // vbroadcastss 0x45ad(%rip),%ymm8 # 6ea0 <_sk_clut_4D_hsw+0xa4a>
.byte 196,193,124,93,192 // vminps %ymm8,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -19054,33 +19159,33 @@ _sk_parametric_g_hsw:
.byte 196,66,117,168,211 // vfmadd213ps %ymm11,%ymm1,%ymm10
.byte 196,226,125,24,8 // vbroadcastss (%rax),%ymm1
.byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
- .byte 196,98,125,24,37,50,66,0,0 // vbroadcastss 0x4232(%rip),%ymm12 # 69d8 <_sk_clut_4D_hsw+0xa4e>
- .byte 196,98,125,24,45,45,66,0,0 // vbroadcastss 0x422d(%rip),%ymm13 # 69dc <_sk_clut_4D_hsw+0xa52>
+ .byte 196,98,125,24,37,101,69,0,0 // vbroadcastss 0x4565(%rip),%ymm12 # 6ea4 <_sk_clut_4D_hsw+0xa4e>
+ .byte 196,98,125,24,45,96,69,0,0 // vbroadcastss 0x4560(%rip),%ymm13 # 6ea8 <_sk_clut_4D_hsw+0xa52>
.byte 196,65,44,84,213 // vandps %ymm13,%ymm10,%ymm10
- .byte 196,98,125,24,45,35,66,0,0 // vbroadcastss 0x4223(%rip),%ymm13 # 69e0 <_sk_clut_4D_hsw+0xa56>
+ .byte 196,98,125,24,45,86,69,0,0 // vbroadcastss 0x4556(%rip),%ymm13 # 6eac <_sk_clut_4D_hsw+0xa56>
.byte 196,65,44,86,213 // vorps %ymm13,%ymm10,%ymm10
- .byte 196,98,125,24,45,25,66,0,0 // vbroadcastss 0x4219(%rip),%ymm13 # 69e4 <_sk_clut_4D_hsw+0xa5a>
+ .byte 196,98,125,24,45,76,69,0,0 // vbroadcastss 0x454c(%rip),%ymm13 # 6eb0 <_sk_clut_4D_hsw+0xa5a>
.byte 196,66,37,184,236 // vfmadd231ps %ymm12,%ymm11,%ymm13
- .byte 196,98,125,24,29,15,66,0,0 // vbroadcastss 0x420f(%rip),%ymm11 # 69e8 <_sk_clut_4D_hsw+0xa5e>
+ .byte 196,98,125,24,29,66,69,0,0 // vbroadcastss 0x4542(%rip),%ymm11 # 6eb4 <_sk_clut_4D_hsw+0xa5e>
.byte 196,66,45,172,221 // vfnmadd213ps %ymm13,%ymm10,%ymm11
- .byte 196,98,125,24,37,5,66,0,0 // vbroadcastss 0x4205(%rip),%ymm12 # 69ec <_sk_clut_4D_hsw+0xa62>
+ .byte 196,98,125,24,37,56,69,0,0 // vbroadcastss 0x4538(%rip),%ymm12 # 6eb8 <_sk_clut_4D_hsw+0xa62>
.byte 196,65,44,88,212 // vaddps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,37,251,65,0,0 // vbroadcastss 0x41fb(%rip),%ymm12 # 69f0 <_sk_clut_4D_hsw+0xa66>
+ .byte 196,98,125,24,37,46,69,0,0 // vbroadcastss 0x452e(%rip),%ymm12 # 6ebc <_sk_clut_4D_hsw+0xa66>
.byte 196,65,28,94,210 // vdivps %ymm10,%ymm12,%ymm10
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
.byte 196,193,116,89,202 // vmulps %ymm10,%ymm1,%ymm1
.byte 196,99,125,8,209,1 // vroundps $0x1,%ymm1,%ymm10
.byte 196,65,116,92,210 // vsubps %ymm10,%ymm1,%ymm10
- .byte 196,98,125,24,29,220,65,0,0 // vbroadcastss 0x41dc(%rip),%ymm11 # 69f4 <_sk_clut_4D_hsw+0xa6a>
+ .byte 196,98,125,24,29,15,69,0,0 // vbroadcastss 0x450f(%rip),%ymm11 # 6ec0 <_sk_clut_4D_hsw+0xa6a>
.byte 196,193,116,88,203 // vaddps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,29,210,65,0,0 // vbroadcastss 0x41d2(%rip),%ymm11 # 69f8 <_sk_clut_4D_hsw+0xa6e>
+ .byte 196,98,125,24,29,5,69,0,0 // vbroadcastss 0x4505(%rip),%ymm11 # 6ec4 <_sk_clut_4D_hsw+0xa6e>
.byte 196,98,45,172,217 // vfnmadd213ps %ymm1,%ymm10,%ymm11
- .byte 196,226,125,24,13,200,65,0,0 // vbroadcastss 0x41c8(%rip),%ymm1 # 69fc <_sk_clut_4D_hsw+0xa72>
+ .byte 196,226,125,24,13,251,68,0,0 // vbroadcastss 0x44fb(%rip),%ymm1 # 6ec8 <_sk_clut_4D_hsw+0xa72>
.byte 196,193,116,92,202 // vsubps %ymm10,%ymm1,%ymm1
- .byte 196,98,125,24,21,190,65,0,0 // vbroadcastss 0x41be(%rip),%ymm10 # 6a00 <_sk_clut_4D_hsw+0xa76>
+ .byte 196,98,125,24,21,241,68,0,0 // vbroadcastss 0x44f1(%rip),%ymm10 # 6ecc <_sk_clut_4D_hsw+0xa76>
.byte 197,172,94,201 // vdivps %ymm1,%ymm10,%ymm1
.byte 197,164,88,201 // vaddps %ymm1,%ymm11,%ymm1
- .byte 196,98,125,24,21,177,65,0,0 // vbroadcastss 0x41b1(%rip),%ymm10 # 6a04 <_sk_clut_4D_hsw+0xa7a>
+ .byte 196,98,125,24,21,228,68,0,0 // vbroadcastss 0x44e4(%rip),%ymm10 # 6ed0 <_sk_clut_4D_hsw+0xa7a>
.byte 196,193,116,89,202 // vmulps %ymm10,%ymm1,%ymm1
.byte 197,253,91,201 // vcvtps2dq %ymm1,%ymm1
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -19088,7 +19193,7 @@ _sk_parametric_g_hsw:
.byte 196,195,117,74,201,128 // vblendvps %ymm8,%ymm9,%ymm1,%ymm1
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,116,95,200 // vmaxps %ymm8,%ymm1,%ymm1
- .byte 196,98,125,24,5,136,65,0,0 // vbroadcastss 0x4188(%rip),%ymm8 # 6a08 <_sk_clut_4D_hsw+0xa7e>
+ .byte 196,98,125,24,5,187,68,0,0 // vbroadcastss 0x44bb(%rip),%ymm8 # 6ed4 <_sk_clut_4D_hsw+0xa7e>
.byte 196,193,116,93,200 // vminps %ymm8,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -19108,33 +19213,33 @@ _sk_parametric_b_hsw:
.byte 196,66,109,168,211 // vfmadd213ps %ymm11,%ymm2,%ymm10
.byte 196,226,125,24,16 // vbroadcastss (%rax),%ymm2
.byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
- .byte 196,98,125,24,37,64,65,0,0 // vbroadcastss 0x4140(%rip),%ymm12 # 6a0c <_sk_clut_4D_hsw+0xa82>
- .byte 196,98,125,24,45,59,65,0,0 // vbroadcastss 0x413b(%rip),%ymm13 # 6a10 <_sk_clut_4D_hsw+0xa86>
+ .byte 196,98,125,24,37,115,68,0,0 // vbroadcastss 0x4473(%rip),%ymm12 # 6ed8 <_sk_clut_4D_hsw+0xa82>
+ .byte 196,98,125,24,45,110,68,0,0 // vbroadcastss 0x446e(%rip),%ymm13 # 6edc <_sk_clut_4D_hsw+0xa86>
.byte 196,65,44,84,213 // vandps %ymm13,%ymm10,%ymm10
- .byte 196,98,125,24,45,49,65,0,0 // vbroadcastss 0x4131(%rip),%ymm13 # 6a14 <_sk_clut_4D_hsw+0xa8a>
+ .byte 196,98,125,24,45,100,68,0,0 // vbroadcastss 0x4464(%rip),%ymm13 # 6ee0 <_sk_clut_4D_hsw+0xa8a>
.byte 196,65,44,86,213 // vorps %ymm13,%ymm10,%ymm10
- .byte 196,98,125,24,45,39,65,0,0 // vbroadcastss 0x4127(%rip),%ymm13 # 6a18 <_sk_clut_4D_hsw+0xa8e>
+ .byte 196,98,125,24,45,90,68,0,0 // vbroadcastss 0x445a(%rip),%ymm13 # 6ee4 <_sk_clut_4D_hsw+0xa8e>
.byte 196,66,37,184,236 // vfmadd231ps %ymm12,%ymm11,%ymm13
- .byte 196,98,125,24,29,29,65,0,0 // vbroadcastss 0x411d(%rip),%ymm11 # 6a1c <_sk_clut_4D_hsw+0xa92>
+ .byte 196,98,125,24,29,80,68,0,0 // vbroadcastss 0x4450(%rip),%ymm11 # 6ee8 <_sk_clut_4D_hsw+0xa92>
.byte 196,66,45,172,221 // vfnmadd213ps %ymm13,%ymm10,%ymm11
- .byte 196,98,125,24,37,19,65,0,0 // vbroadcastss 0x4113(%rip),%ymm12 # 6a20 <_sk_clut_4D_hsw+0xa96>
+ .byte 196,98,125,24,37,70,68,0,0 // vbroadcastss 0x4446(%rip),%ymm12 # 6eec <_sk_clut_4D_hsw+0xa96>
.byte 196,65,44,88,212 // vaddps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,37,9,65,0,0 // vbroadcastss 0x4109(%rip),%ymm12 # 6a24 <_sk_clut_4D_hsw+0xa9a>
+ .byte 196,98,125,24,37,60,68,0,0 // vbroadcastss 0x443c(%rip),%ymm12 # 6ef0 <_sk_clut_4D_hsw+0xa9a>
.byte 196,65,28,94,210 // vdivps %ymm10,%ymm12,%ymm10
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
.byte 196,193,108,89,210 // vmulps %ymm10,%ymm2,%ymm2
.byte 196,99,125,8,210,1 // vroundps $0x1,%ymm2,%ymm10
.byte 196,65,108,92,210 // vsubps %ymm10,%ymm2,%ymm10
- .byte 196,98,125,24,29,234,64,0,0 // vbroadcastss 0x40ea(%rip),%ymm11 # 6a28 <_sk_clut_4D_hsw+0xa9e>
+ .byte 196,98,125,24,29,29,68,0,0 // vbroadcastss 0x441d(%rip),%ymm11 # 6ef4 <_sk_clut_4D_hsw+0xa9e>
.byte 196,193,108,88,211 // vaddps %ymm11,%ymm2,%ymm2
- .byte 196,98,125,24,29,224,64,0,0 // vbroadcastss 0x40e0(%rip),%ymm11 # 6a2c <_sk_clut_4D_hsw+0xaa2>
+ .byte 196,98,125,24,29,19,68,0,0 // vbroadcastss 0x4413(%rip),%ymm11 # 6ef8 <_sk_clut_4D_hsw+0xaa2>
.byte 196,98,45,172,218 // vfnmadd213ps %ymm2,%ymm10,%ymm11
- .byte 196,226,125,24,21,214,64,0,0 // vbroadcastss 0x40d6(%rip),%ymm2 # 6a30 <_sk_clut_4D_hsw+0xaa6>
+ .byte 196,226,125,24,21,9,68,0,0 // vbroadcastss 0x4409(%rip),%ymm2 # 6efc <_sk_clut_4D_hsw+0xaa6>
.byte 196,193,108,92,210 // vsubps %ymm10,%ymm2,%ymm2
- .byte 196,98,125,24,21,204,64,0,0 // vbroadcastss 0x40cc(%rip),%ymm10 # 6a34 <_sk_clut_4D_hsw+0xaaa>
+ .byte 196,98,125,24,21,255,67,0,0 // vbroadcastss 0x43ff(%rip),%ymm10 # 6f00 <_sk_clut_4D_hsw+0xaaa>
.byte 197,172,94,210 // vdivps %ymm2,%ymm10,%ymm2
.byte 197,164,88,210 // vaddps %ymm2,%ymm11,%ymm2
- .byte 196,98,125,24,21,191,64,0,0 // vbroadcastss 0x40bf(%rip),%ymm10 # 6a38 <_sk_clut_4D_hsw+0xaae>
+ .byte 196,98,125,24,21,242,67,0,0 // vbroadcastss 0x43f2(%rip),%ymm10 # 6f04 <_sk_clut_4D_hsw+0xaae>
.byte 196,193,108,89,210 // vmulps %ymm10,%ymm2,%ymm2
.byte 197,253,91,210 // vcvtps2dq %ymm2,%ymm2
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -19142,7 +19247,7 @@ _sk_parametric_b_hsw:
.byte 196,195,109,74,209,128 // vblendvps %ymm8,%ymm9,%ymm2,%ymm2
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,150,64,0,0 // vbroadcastss 0x4096(%rip),%ymm8 # 6a3c <_sk_clut_4D_hsw+0xab2>
+ .byte 196,98,125,24,5,201,67,0,0 // vbroadcastss 0x43c9(%rip),%ymm8 # 6f08 <_sk_clut_4D_hsw+0xab2>
.byte 196,193,108,93,208 // vminps %ymm8,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -19162,33 +19267,33 @@ _sk_parametric_a_hsw:
.byte 196,66,101,168,211 // vfmadd213ps %ymm11,%ymm3,%ymm10
.byte 196,226,125,24,24 // vbroadcastss (%rax),%ymm3
.byte 196,65,124,91,218 // vcvtdq2ps %ymm10,%ymm11
- .byte 196,98,125,24,37,78,64,0,0 // vbroadcastss 0x404e(%rip),%ymm12 # 6a40 <_sk_clut_4D_hsw+0xab6>
- .byte 196,98,125,24,45,73,64,0,0 // vbroadcastss 0x4049(%rip),%ymm13 # 6a44 <_sk_clut_4D_hsw+0xaba>
+ .byte 196,98,125,24,37,129,67,0,0 // vbroadcastss 0x4381(%rip),%ymm12 # 6f0c <_sk_clut_4D_hsw+0xab6>
+ .byte 196,98,125,24,45,124,67,0,0 // vbroadcastss 0x437c(%rip),%ymm13 # 6f10 <_sk_clut_4D_hsw+0xaba>
.byte 196,65,44,84,213 // vandps %ymm13,%ymm10,%ymm10
- .byte 196,98,125,24,45,63,64,0,0 // vbroadcastss 0x403f(%rip),%ymm13 # 6a48 <_sk_clut_4D_hsw+0xabe>
+ .byte 196,98,125,24,45,114,67,0,0 // vbroadcastss 0x4372(%rip),%ymm13 # 6f14 <_sk_clut_4D_hsw+0xabe>
.byte 196,65,44,86,213 // vorps %ymm13,%ymm10,%ymm10
- .byte 196,98,125,24,45,53,64,0,0 // vbroadcastss 0x4035(%rip),%ymm13 # 6a4c <_sk_clut_4D_hsw+0xac2>
+ .byte 196,98,125,24,45,104,67,0,0 // vbroadcastss 0x4368(%rip),%ymm13 # 6f18 <_sk_clut_4D_hsw+0xac2>
.byte 196,66,37,184,236 // vfmadd231ps %ymm12,%ymm11,%ymm13
- .byte 196,98,125,24,29,43,64,0,0 // vbroadcastss 0x402b(%rip),%ymm11 # 6a50 <_sk_clut_4D_hsw+0xac6>
+ .byte 196,98,125,24,29,94,67,0,0 // vbroadcastss 0x435e(%rip),%ymm11 # 6f1c <_sk_clut_4D_hsw+0xac6>
.byte 196,66,45,172,221 // vfnmadd213ps %ymm13,%ymm10,%ymm11
- .byte 196,98,125,24,37,33,64,0,0 // vbroadcastss 0x4021(%rip),%ymm12 # 6a54 <_sk_clut_4D_hsw+0xaca>
+ .byte 196,98,125,24,37,84,67,0,0 // vbroadcastss 0x4354(%rip),%ymm12 # 6f20 <_sk_clut_4D_hsw+0xaca>
.byte 196,65,44,88,212 // vaddps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,37,23,64,0,0 // vbroadcastss 0x4017(%rip),%ymm12 # 6a58 <_sk_clut_4D_hsw+0xace>
+ .byte 196,98,125,24,37,74,67,0,0 // vbroadcastss 0x434a(%rip),%ymm12 # 6f24 <_sk_clut_4D_hsw+0xace>
.byte 196,65,28,94,210 // vdivps %ymm10,%ymm12,%ymm10
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
.byte 196,193,100,89,218 // vmulps %ymm10,%ymm3,%ymm3
.byte 196,99,125,8,211,1 // vroundps $0x1,%ymm3,%ymm10
.byte 196,65,100,92,210 // vsubps %ymm10,%ymm3,%ymm10
- .byte 196,98,125,24,29,248,63,0,0 // vbroadcastss 0x3ff8(%rip),%ymm11 # 6a5c <_sk_clut_4D_hsw+0xad2>
+ .byte 196,98,125,24,29,43,67,0,0 // vbroadcastss 0x432b(%rip),%ymm11 # 6f28 <_sk_clut_4D_hsw+0xad2>
.byte 196,193,100,88,219 // vaddps %ymm11,%ymm3,%ymm3
- .byte 196,98,125,24,29,238,63,0,0 // vbroadcastss 0x3fee(%rip),%ymm11 # 6a60 <_sk_clut_4D_hsw+0xad6>
+ .byte 196,98,125,24,29,33,67,0,0 // vbroadcastss 0x4321(%rip),%ymm11 # 6f2c <_sk_clut_4D_hsw+0xad6>
.byte 196,98,45,172,219 // vfnmadd213ps %ymm3,%ymm10,%ymm11
- .byte 196,226,125,24,29,228,63,0,0 // vbroadcastss 0x3fe4(%rip),%ymm3 # 6a64 <_sk_clut_4D_hsw+0xada>
+ .byte 196,226,125,24,29,23,67,0,0 // vbroadcastss 0x4317(%rip),%ymm3 # 6f30 <_sk_clut_4D_hsw+0xada>
.byte 196,193,100,92,218 // vsubps %ymm10,%ymm3,%ymm3
- .byte 196,98,125,24,21,218,63,0,0 // vbroadcastss 0x3fda(%rip),%ymm10 # 6a68 <_sk_clut_4D_hsw+0xade>
+ .byte 196,98,125,24,21,13,67,0,0 // vbroadcastss 0x430d(%rip),%ymm10 # 6f34 <_sk_clut_4D_hsw+0xade>
.byte 197,172,94,219 // vdivps %ymm3,%ymm10,%ymm3
.byte 197,164,88,219 // vaddps %ymm3,%ymm11,%ymm3
- .byte 196,98,125,24,21,205,63,0,0 // vbroadcastss 0x3fcd(%rip),%ymm10 # 6a6c <_sk_clut_4D_hsw+0xae2>
+ .byte 196,98,125,24,21,0,67,0,0 // vbroadcastss 0x4300(%rip),%ymm10 # 6f38 <_sk_clut_4D_hsw+0xae2>
.byte 196,193,100,89,218 // vmulps %ymm10,%ymm3,%ymm3
.byte 197,253,91,219 // vcvtps2dq %ymm3,%ymm3
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -19196,7 +19301,7 @@ _sk_parametric_a_hsw:
.byte 196,195,101,74,217,128 // vblendvps %ymm8,%ymm9,%ymm3,%ymm3
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,100,95,216 // vmaxps %ymm8,%ymm3,%ymm3
- .byte 196,98,125,24,5,164,63,0,0 // vbroadcastss 0x3fa4(%rip),%ymm8 # 6a70 <_sk_clut_4D_hsw+0xae6>
+ .byte 196,98,125,24,5,215,66,0,0 // vbroadcastss 0x42d7(%rip),%ymm8 # 6f3c <_sk_clut_4D_hsw+0xae6>
.byte 196,193,100,93,216 // vminps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -19215,35 +19320,35 @@ _sk_gamma_hsw:
.byte 197,252,40,241 // vmovaps %ymm1,%ymm6
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,124,91,208 // vcvtdq2ps %ymm0,%ymm10
- .byte 196,98,125,24,29,103,63,0,0 // vbroadcastss 0x3f67(%rip),%ymm11 # 6a74 <_sk_clut_4D_hsw+0xaea>
- .byte 196,226,125,24,45,98,63,0,0 // vbroadcastss 0x3f62(%rip),%ymm5 # 6a78 <_sk_clut_4D_hsw+0xaee>
+ .byte 196,98,125,24,29,154,66,0,0 // vbroadcastss 0x429a(%rip),%ymm11 # 6f40 <_sk_clut_4D_hsw+0xaea>
+ .byte 196,226,125,24,45,149,66,0,0 // vbroadcastss 0x4295(%rip),%ymm5 # 6f44 <_sk_clut_4D_hsw+0xaee>
.byte 197,124,84,205 // vandps %ymm5,%ymm0,%ymm9
- .byte 196,226,125,24,37,89,63,0,0 // vbroadcastss 0x3f59(%rip),%ymm4 # 6a7c <_sk_clut_4D_hsw+0xaf2>
+ .byte 196,226,125,24,37,140,66,0,0 // vbroadcastss 0x428c(%rip),%ymm4 # 6f48 <_sk_clut_4D_hsw+0xaf2>
.byte 197,52,86,228 // vorps %ymm4,%ymm9,%ymm12
- .byte 196,98,125,24,45,80,63,0,0 // vbroadcastss 0x3f50(%rip),%ymm13 # 6a80 <_sk_clut_4D_hsw+0xaf6>
+ .byte 196,98,125,24,45,131,66,0,0 // vbroadcastss 0x4283(%rip),%ymm13 # 6f4c <_sk_clut_4D_hsw+0xaf6>
.byte 196,66,37,168,213 // vfmadd213ps %ymm13,%ymm11,%ymm10
- .byte 196,98,125,24,13,70,63,0,0 // vbroadcastss 0x3f46(%rip),%ymm9 # 6a84 <_sk_clut_4D_hsw+0xafa>
+ .byte 196,98,125,24,13,121,66,0,0 // vbroadcastss 0x4279(%rip),%ymm9 # 6f50 <_sk_clut_4D_hsw+0xafa>
.byte 196,66,29,188,209 // vfnmadd231ps %ymm9,%ymm12,%ymm10
.byte 197,124,91,246 // vcvtdq2ps %ymm6,%ymm14
.byte 196,66,37,168,245 // vfmadd213ps %ymm13,%ymm11,%ymm14
.byte 197,124,91,255 // vcvtdq2ps %ymm7,%ymm15
.byte 196,66,37,168,253 // vfmadd213ps %ymm13,%ymm11,%ymm15
- .byte 196,98,125,24,29,42,63,0,0 // vbroadcastss 0x3f2a(%rip),%ymm11 # 6a88 <_sk_clut_4D_hsw+0xafe>
+ .byte 196,98,125,24,29,93,66,0,0 // vbroadcastss 0x425d(%rip),%ymm11 # 6f54 <_sk_clut_4D_hsw+0xafe>
.byte 196,65,28,88,227 // vaddps %ymm11,%ymm12,%ymm12
- .byte 196,98,125,24,45,32,63,0,0 // vbroadcastss 0x3f20(%rip),%ymm13 # 6a8c <_sk_clut_4D_hsw+0xb02>
+ .byte 196,98,125,24,45,83,66,0,0 // vbroadcastss 0x4253(%rip),%ymm13 # 6f58 <_sk_clut_4D_hsw+0xb02>
.byte 196,65,20,94,228 // vdivps %ymm12,%ymm13,%ymm12
.byte 196,65,44,92,212 // vsubps %ymm12,%ymm10,%ymm10
.byte 196,98,125,24,32 // vbroadcastss (%rax),%ymm12
.byte 196,65,44,89,212 // vmulps %ymm12,%ymm10,%ymm10
.byte 196,67,125,8,194,1 // vroundps $0x1,%ymm10,%ymm8
.byte 196,65,44,92,192 // vsubps %ymm8,%ymm10,%ymm8
- .byte 196,226,125,24,21,252,62,0,0 // vbroadcastss 0x3efc(%rip),%ymm2 # 6a90 <_sk_clut_4D_hsw+0xb06>
+ .byte 196,226,125,24,21,47,66,0,0 // vbroadcastss 0x422f(%rip),%ymm2 # 6f5c <_sk_clut_4D_hsw+0xb06>
.byte 197,44,88,210 // vaddps %ymm2,%ymm10,%ymm10
- .byte 196,226,125,24,29,243,62,0,0 // vbroadcastss 0x3ef3(%rip),%ymm3 # 6a94 <_sk_clut_4D_hsw+0xb0a>
+ .byte 196,226,125,24,29,38,66,0,0 // vbroadcastss 0x4226(%rip),%ymm3 # 6f60 <_sk_clut_4D_hsw+0xb0a>
.byte 196,98,61,188,211 // vfnmadd231ps %ymm3,%ymm8,%ymm10
- .byte 196,226,125,24,13,233,62,0,0 // vbroadcastss 0x3ee9(%rip),%ymm1 # 6a98 <_sk_clut_4D_hsw+0xb0e>
+ .byte 196,226,125,24,13,28,66,0,0 // vbroadcastss 0x421c(%rip),%ymm1 # 6f64 <_sk_clut_4D_hsw+0xb0e>
.byte 196,65,116,92,192 // vsubps %ymm8,%ymm1,%ymm8
- .byte 196,226,125,24,5,223,62,0,0 // vbroadcastss 0x3edf(%rip),%ymm0 # 6a9c <_sk_clut_4D_hsw+0xb12>
+ .byte 196,226,125,24,5,18,66,0,0 // vbroadcastss 0x4212(%rip),%ymm0 # 6f68 <_sk_clut_4D_hsw+0xb12>
.byte 196,65,124,94,192 // vdivps %ymm8,%ymm0,%ymm8
.byte 196,65,44,88,192 // vaddps %ymm8,%ymm10,%ymm8
.byte 197,204,84,245 // vandps %ymm5,%ymm6,%ymm6
@@ -19274,7 +19379,7 @@ _sk_gamma_hsw:
.byte 197,244,92,205 // vsubps %ymm5,%ymm1,%ymm1
.byte 197,252,94,193 // vdivps %ymm1,%ymm0,%ymm0
.byte 197,236,88,192 // vaddps %ymm0,%ymm2,%ymm0
- .byte 196,226,125,24,13,80,62,0,0 // vbroadcastss 0x3e50(%rip),%ymm1 # 6aa0 <_sk_clut_4D_hsw+0xb16>
+ .byte 196,226,125,24,13,131,65,0,0 // vbroadcastss 0x4183(%rip),%ymm1 # 6f6c <_sk_clut_4D_hsw+0xb16>
.byte 197,188,89,209 // vmulps %ymm1,%ymm8,%ymm2
.byte 197,204,89,217 // vmulps %ymm1,%ymm6,%ymm3
.byte 197,252,89,225 // vmulps %ymm1,%ymm0,%ymm4
@@ -19294,26 +19399,26 @@ HIDDEN _sk_lab_to_xyz_hsw
.globl _sk_lab_to_xyz_hsw
FUNCTION(_sk_lab_to_xyz_hsw)
_sk_lab_to_xyz_hsw:
- .byte 196,98,125,24,5,14,62,0,0 // vbroadcastss 0x3e0e(%rip),%ymm8 # 6aa4 <_sk_clut_4D_hsw+0xb1a>
- .byte 196,98,125,24,13,9,62,0,0 // vbroadcastss 0x3e09(%rip),%ymm9 # 6aa8 <_sk_clut_4D_hsw+0xb1e>
- .byte 196,98,125,24,21,4,62,0,0 // vbroadcastss 0x3e04(%rip),%ymm10 # 6aac <_sk_clut_4D_hsw+0xb22>
+ .byte 196,98,125,24,5,65,65,0,0 // vbroadcastss 0x4141(%rip),%ymm8 # 6f70 <_sk_clut_4D_hsw+0xb1a>
+ .byte 196,98,125,24,13,60,65,0,0 // vbroadcastss 0x413c(%rip),%ymm9 # 6f74 <_sk_clut_4D_hsw+0xb1e>
+ .byte 196,98,125,24,21,55,65,0,0 // vbroadcastss 0x4137(%rip),%ymm10 # 6f78 <_sk_clut_4D_hsw+0xb22>
.byte 196,194,53,168,202 // vfmadd213ps %ymm10,%ymm9,%ymm1
.byte 196,194,53,168,210 // vfmadd213ps %ymm10,%ymm9,%ymm2
- .byte 196,98,125,24,13,245,61,0,0 // vbroadcastss 0x3df5(%rip),%ymm9 # 6ab0 <_sk_clut_4D_hsw+0xb26>
+ .byte 196,98,125,24,13,40,65,0,0 // vbroadcastss 0x4128(%rip),%ymm9 # 6f7c <_sk_clut_4D_hsw+0xb26>
.byte 196,66,125,184,200 // vfmadd231ps %ymm8,%ymm0,%ymm9
- .byte 196,226,125,24,5,235,61,0,0 // vbroadcastss 0x3deb(%rip),%ymm0 # 6ab4 <_sk_clut_4D_hsw+0xb2a>
+ .byte 196,226,125,24,5,30,65,0,0 // vbroadcastss 0x411e(%rip),%ymm0 # 6f80 <_sk_clut_4D_hsw+0xb2a>
.byte 197,180,89,192 // vmulps %ymm0,%ymm9,%ymm0
- .byte 196,98,125,24,5,226,61,0,0 // vbroadcastss 0x3de2(%rip),%ymm8 # 6ab8 <_sk_clut_4D_hsw+0xb2e>
+ .byte 196,98,125,24,5,21,65,0,0 // vbroadcastss 0x4115(%rip),%ymm8 # 6f84 <_sk_clut_4D_hsw+0xb2e>
.byte 196,98,117,168,192 // vfmadd213ps %ymm0,%ymm1,%ymm8
- .byte 196,98,125,24,13,216,61,0,0 // vbroadcastss 0x3dd8(%rip),%ymm9 # 6abc <_sk_clut_4D_hsw+0xb32>
+ .byte 196,98,125,24,13,11,65,0,0 // vbroadcastss 0x410b(%rip),%ymm9 # 6f88 <_sk_clut_4D_hsw+0xb32>
.byte 196,98,109,172,200 // vfnmadd213ps %ymm0,%ymm2,%ymm9
.byte 196,193,60,89,200 // vmulps %ymm8,%ymm8,%ymm1
.byte 197,188,89,201 // vmulps %ymm1,%ymm8,%ymm1
- .byte 196,226,125,24,21,197,61,0,0 // vbroadcastss 0x3dc5(%rip),%ymm2 # 6ac0 <_sk_clut_4D_hsw+0xb36>
+ .byte 196,226,125,24,21,248,64,0,0 // vbroadcastss 0x40f8(%rip),%ymm2 # 6f8c <_sk_clut_4D_hsw+0xb36>
.byte 197,108,194,209,1 // vcmpltps %ymm1,%ymm2,%ymm10
- .byte 196,98,125,24,29,187,61,0,0 // vbroadcastss 0x3dbb(%rip),%ymm11 # 6ac4 <_sk_clut_4D_hsw+0xb3a>
+ .byte 196,98,125,24,29,238,64,0,0 // vbroadcastss 0x40ee(%rip),%ymm11 # 6f90 <_sk_clut_4D_hsw+0xb3a>
.byte 196,65,60,88,195 // vaddps %ymm11,%ymm8,%ymm8
- .byte 196,98,125,24,37,177,61,0,0 // vbroadcastss 0x3db1(%rip),%ymm12 # 6ac8 <_sk_clut_4D_hsw+0xb3e>
+ .byte 196,98,125,24,37,228,64,0,0 // vbroadcastss 0x40e4(%rip),%ymm12 # 6f94 <_sk_clut_4D_hsw+0xb3e>
.byte 196,65,60,89,196 // vmulps %ymm12,%ymm8,%ymm8
.byte 196,99,61,74,193,160 // vblendvps %ymm10,%ymm1,%ymm8,%ymm8
.byte 197,252,89,200 // vmulps %ymm0,%ymm0,%ymm1
@@ -19328,9 +19433,9 @@ _sk_lab_to_xyz_hsw:
.byte 196,65,52,88,203 // vaddps %ymm11,%ymm9,%ymm9
.byte 196,65,52,89,204 // vmulps %ymm12,%ymm9,%ymm9
.byte 196,227,53,74,208,32 // vblendvps %ymm2,%ymm0,%ymm9,%ymm2
- .byte 196,226,125,24,5,102,61,0,0 // vbroadcastss 0x3d66(%rip),%ymm0 # 6acc <_sk_clut_4D_hsw+0xb42>
+ .byte 196,226,125,24,5,153,64,0,0 // vbroadcastss 0x4099(%rip),%ymm0 # 6f98 <_sk_clut_4D_hsw+0xb42>
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
- .byte 196,98,125,24,5,93,61,0,0 // vbroadcastss 0x3d5d(%rip),%ymm8 # 6ad0 <_sk_clut_4D_hsw+0xb46>
+ .byte 196,98,125,24,5,144,64,0,0 // vbroadcastss 0x4090(%rip),%ymm8 # 6f9c <_sk_clut_4D_hsw+0xb46>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -19347,12 +19452,12 @@ _sk_load_a8_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,53 // jne 2dca <_sk_load_a8_hsw+0x4e>
+ .byte 117,53 // jne 2f63 <_sk_load_a8_hsw+0x4e>
.byte 196,194,121,48,4,25 // vpmovzxbw (%r9,%rbx,1),%xmm0
- .byte 197,249,219,5,221,66,0,0 // vpand 0x42dd(%rip),%xmm0,%xmm0 # 7080 <_sk_clut_4D_hsw+0x10f6>
+ .byte 197,249,219,5,4,70,0,0 // vpand 0x4604(%rip),%xmm0,%xmm0 # 7540 <_sk_clut_4D_hsw+0x10ea>
.byte 196,226,125,51,192 // vpmovzxwd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,31,61,0,0 // vbroadcastss 0x3d1f(%rip),%ymm1 # 6ad4 <_sk_clut_4D_hsw+0xb4a>
+ .byte 196,226,125,24,13,82,64,0,0 // vbroadcastss 0x4052(%rip),%ymm1 # 6fa0 <_sk_clut_4D_hsw+0xb4a>
.byte 197,252,89,217 // vmulps %ymm1,%ymm0,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,87,192 // vxorps %ymm0,%ymm0,%ymm0
@@ -19365,15 +19470,15 @@ _sk_load_a8_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,189 // ja 2d9b <_sk_load_a8_hsw+0x1f>
+ .byte 119,189 // ja 2f34 <_sk_load_a8_hsw+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,119,0,0,0 // lea 0x77(%rip),%r11 # 2e60 <_sk_load_a8_hsw+0xe4>
+ .byte 76,141,29,122,0,0,0 // lea 0x7a(%rip),%r11 # 2ffc <_sk_load_a8_hsw+0xe7>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
- .byte 235,158 // jmp 2d9b <_sk_load_a8_hsw+0x1f>
+ .byte 235,158 // jmp 2f34 <_sk_load_a8_hsw+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 197,249,196,192,2 // vpinsrw $0x2,%eax,%xmm0,%xmm0
@@ -19381,7 +19486,7 @@ _sk_load_a8_hsw:
.byte 197,249,110,200 // vmovd %eax,%xmm1
.byte 196,226,121,48,201 // vpmovzxbw %xmm1,%xmm1
.byte 196,227,121,2,193,1 // vpblendd $0x1,%xmm1,%xmm0,%xmm0
- .byte 233,118,255,255,255 // jmpq 2d9b <_sk_load_a8_hsw+0x1f>
+ .byte 233,118,255,255,255 // jmpq 2f34 <_sk_load_a8_hsw+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 197,249,196,192,6 // vpinsrw $0x6,%eax,%xmm0,%xmm0
@@ -19392,23 +19497,24 @@ _sk_load_a8_hsw:
.byte 196,193,121,110,12,25 // vmovd (%r9,%rbx,1),%xmm1
.byte 196,226,121,48,201 // vpmovzxbw %xmm1,%xmm1
.byte 196,227,113,2,192,12 // vpblendd $0xc,%xmm0,%xmm1,%xmm0
- .byte 233,59,255,255,255 // jmpq 2d9b <_sk_load_a8_hsw+0x1f>
- .byte 146 // xchg %eax,%edx
+ .byte 233,59,255,255,255 // jmpq 2f34 <_sk_load_a8_hsw+0x1f>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 143 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,172,255,255,255,157,255 // ljmp *-0x620001(%rdi,%rdi,8)
+ .byte 255,169,255,255,255,154 // ljmp *-0x65000001(%rcx)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 234 // (bad)
+ .byte 255,231 // jmpq *%rdi
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 223,255 // (bad)
+ .byte 220,255 // fdivr %st,%st(7)
.byte 255 // (bad)
- .byte 255,212 // callq *%rsp
+ .byte 255,209 // callq *%rcx
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,197 // inc %ebp
+ .byte 255,194 // inc %edx
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -19425,12 +19531,12 @@ _sk_load_a8_dst_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,53 // jne 2eca <_sk_load_a8_dst_hsw+0x4e>
+ .byte 117,53 // jne 3066 <_sk_load_a8_dst_hsw+0x4e>
.byte 196,194,121,48,36,25 // vpmovzxbw (%r9,%rbx,1),%xmm4
- .byte 197,217,219,37,237,65,0,0 // vpand 0x41ed(%rip),%xmm4,%xmm4 # 7090 <_sk_clut_4D_hsw+0x1106>
+ .byte 197,217,219,37,17,69,0,0 // vpand 0x4511(%rip),%xmm4,%xmm4 # 7550 <_sk_clut_4D_hsw+0x10fa>
.byte 196,226,125,51,228 // vpmovzxwd %xmm4,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,226,125,24,45,35,60,0,0 // vbroadcastss 0x3c23(%rip),%ymm5 # 6ad8 <_sk_clut_4D_hsw+0xb4e>
+ .byte 196,226,125,24,45,83,63,0,0 // vbroadcastss 0x3f53(%rip),%ymm5 # 6fa4 <_sk_clut_4D_hsw+0xb4e>
.byte 197,220,89,253 // vmulps %ymm5,%ymm4,%ymm7
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,220,87,228 // vxorps %ymm4,%ymm4,%ymm4
@@ -19443,15 +19549,15 @@ _sk_load_a8_dst_hsw:
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,189 // ja 2e9b <_sk_load_a8_dst_hsw+0x1f>
+ .byte 119,189 // ja 3037 <_sk_load_a8_dst_hsw+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,119,0,0,0 // lea 0x77(%rip),%r11 # 2f60 <_sk_load_a8_dst_hsw+0xe4>
+ .byte 76,141,29,119,0,0,0 // lea 0x77(%rip),%r11 # 30fc <_sk_load_a8_dst_hsw+0xe4>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
- .byte 235,158 // jmp 2e9b <_sk_load_a8_dst_hsw+0x1f>
+ .byte 235,158 // jmp 3037 <_sk_load_a8_dst_hsw+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 197,217,196,224,2 // vpinsrw $0x2,%eax,%xmm4,%xmm4
@@ -19459,7 +19565,7 @@ _sk_load_a8_dst_hsw:
.byte 197,249,110,232 // vmovd %eax,%xmm5
.byte 196,226,121,48,237 // vpmovzxbw %xmm5,%xmm5
.byte 196,227,89,2,229,1 // vpblendd $0x1,%xmm5,%xmm4,%xmm4
- .byte 233,118,255,255,255 // jmpq 2e9b <_sk_load_a8_dst_hsw+0x1f>
+ .byte 233,118,255,255,255 // jmpq 3037 <_sk_load_a8_dst_hsw+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 197,217,196,224,6 // vpinsrw $0x6,%eax,%xmm4,%xmm4
@@ -19470,7 +19576,7 @@ _sk_load_a8_dst_hsw:
.byte 196,193,121,110,44,25 // vmovd (%r9,%rbx,1),%xmm5
.byte 196,226,121,48,237 // vpmovzxbw %xmm5,%xmm5
.byte 196,227,81,2,228,12 // vpblendd $0xc,%xmm4,%xmm5,%xmm4
- .byte 233,59,255,255,255 // jmpq 2e9b <_sk_load_a8_dst_hsw+0x1f>
+ .byte 233,59,255,255,255 // jmpq 3037 <_sk_load_a8_dst_hsw+0x1f>
.byte 146 // xchg %eax,%edx
.byte 255 // (bad)
.byte 255 // (bad)
@@ -19531,7 +19637,7 @@ _sk_gather_a8_hsw:
.byte 196,227,121,32,192,7 // vpinsrb $0x7,%eax,%xmm0,%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,170,58,0,0 // vbroadcastss 0x3aaa(%rip),%ymm1 # 6adc <_sk_clut_4D_hsw+0xb52>
+ .byte 196,226,125,24,13,218,61,0,0 // vbroadcastss 0x3dda(%rip),%ymm1 # 6fa8 <_sk_clut_4D_hsw+0xb52>
.byte 197,252,89,217 // vmulps %ymm1,%ymm0,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,87,192 // vxorps %ymm0,%ymm0,%ymm0
@@ -19550,14 +19656,14 @@ _sk_store_a8_hsw:
.byte 77,15,175,202 // imul %r10,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
- .byte 196,98,125,24,5,125,58,0,0 // vbroadcastss 0x3a7d(%rip),%ymm8 # 6ae0 <_sk_clut_4D_hsw+0xb56>
+ .byte 196,98,125,24,5,173,61,0,0 // vbroadcastss 0x3dad(%rip),%ymm8 # 6fac <_sk_clut_4D_hsw+0xb56>
.byte 196,65,100,89,192 // vmulps %ymm8,%ymm3,%ymm8
.byte 196,65,125,91,192 // vcvtps2dq %ymm8,%ymm8
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 196,65,57,103,192 // vpackuswb %xmm8,%xmm8,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 308d <_sk_store_a8_hsw+0x47>
+ .byte 117,11 // jne 3229 <_sk_store_a8_hsw+0x47>
.byte 196,65,123,17,4,25 // vmovsd %xmm8,(%r9,%rbx,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -19566,25 +19672,25 @@ _sk_store_a8_hsw:
.byte 65,128,226,7 // and $0x7,%r10b
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,235 // ja 3088 <_sk_store_a8_hsw+0x42>
+ .byte 119,235 // ja 3224 <_sk_store_a8_hsw+0x42>
.byte 196,66,121,48,192 // vpmovzxbw %xmm8,%xmm8
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,87,0,0,0 // lea 0x57(%rip),%r11 # 3104 <_sk_store_a8_hsw+0xbe>
+ .byte 76,141,29,87,0,0,0 // lea 0x57(%rip),%r11 # 32a0 <_sk_store_a8_hsw+0xbe>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 196,67,121,20,4,25,0 // vpextrb $0x0,%xmm8,(%r9,%rbx,1)
- .byte 235,201 // jmp 3088 <_sk_store_a8_hsw+0x42>
+ .byte 235,201 // jmp 3224 <_sk_store_a8_hsw+0x42>
.byte 196,67,121,20,68,25,2,4 // vpextrb $0x4,%xmm8,0x2(%r9,%rbx,1)
- .byte 196,98,57,0,5,208,63,0,0 // vpshufb 0x3fd0(%rip),%xmm8,%xmm8 # 70a0 <_sk_clut_4D_hsw+0x1116>
+ .byte 196,98,57,0,5,244,66,0,0 // vpshufb 0x42f4(%rip),%xmm8,%xmm8 # 7560 <_sk_clut_4D_hsw+0x110a>
.byte 196,67,121,21,4,25,0 // vpextrw $0x0,%xmm8,(%r9,%rbx,1)
- .byte 235,175 // jmp 3088 <_sk_store_a8_hsw+0x42>
+ .byte 235,175 // jmp 3224 <_sk_store_a8_hsw+0x42>
.byte 196,67,121,20,68,25,6,12 // vpextrb $0xc,%xmm8,0x6(%r9,%rbx,1)
.byte 196,67,121,20,68,25,5,10 // vpextrb $0xa,%xmm8,0x5(%r9,%rbx,1)
.byte 196,67,121,20,68,25,4,8 // vpextrb $0x8,%xmm8,0x4(%r9,%rbx,1)
- .byte 196,98,57,0,5,182,63,0,0 // vpshufb 0x3fb6(%rip),%xmm8,%xmm8 # 70b0 <_sk_clut_4D_hsw+0x1126>
+ .byte 196,98,57,0,5,218,66,0,0 // vpshufb 0x42da(%rip),%xmm8,%xmm8 # 7570 <_sk_clut_4D_hsw+0x111a>
.byte 196,65,121,126,4,25 // vmovd %xmm8,(%r9,%rbx,1)
- .byte 235,134 // jmp 3088 <_sk_store_a8_hsw+0x42>
+ .byte 235,134 // jmp 3224 <_sk_store_a8_hsw+0x42>
.byte 102,144 // xchg %ax,%ax
.byte 178,255 // mov $0xff,%dl
.byte 255 // (bad)
@@ -19618,15 +19724,15 @@ _sk_load_g8_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,58 // jne 3173 <_sk_load_g8_hsw+0x53>
+ .byte 117,58 // jne 330f <_sk_load_g8_hsw+0x53>
.byte 196,194,121,48,4,25 // vpmovzxbw (%r9,%rbx,1),%xmm0
- .byte 197,249,219,5,121,63,0,0 // vpand 0x3f79(%rip),%xmm0,%xmm0 # 70c0 <_sk_clut_4D_hsw+0x1136>
+ .byte 197,249,219,5,157,66,0,0 // vpand 0x429d(%rip),%xmm0,%xmm0 # 7580 <_sk_clut_4D_hsw+0x112a>
.byte 196,226,125,51,192 // vpmovzxwd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,139,57,0,0 // vbroadcastss 0x398b(%rip),%ymm1 # 6ae4 <_sk_clut_4D_hsw+0xb5a>
+ .byte 196,226,125,24,13,187,60,0,0 // vbroadcastss 0x3cbb(%rip),%ymm1 # 6fb0 <_sk_clut_4D_hsw+0xb5a>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,128,57,0,0 // vbroadcastss 0x3980(%rip),%ymm3 # 6ae8 <_sk_clut_4D_hsw+0xb5e>
+ .byte 196,226,125,24,29,176,60,0,0 // vbroadcastss 0x3cb0(%rip),%ymm3 # 6fb4 <_sk_clut_4D_hsw+0xb5e>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
.byte 91 // pop %rbx
@@ -19636,15 +19742,15 @@ _sk_load_g8_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,184 // ja 313f <_sk_load_g8_hsw+0x1f>
+ .byte 119,184 // ja 32db <_sk_load_g8_hsw+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,122,0,0,0 // lea 0x7a(%rip),%r11 # 320c <_sk_load_g8_hsw+0xec>
+ .byte 76,141,29,122,0,0,0 // lea 0x7a(%rip),%r11 # 33a8 <_sk_load_g8_hsw+0xec>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
- .byte 235,153 // jmp 313f <_sk_load_g8_hsw+0x1f>
+ .byte 235,153 // jmp 32db <_sk_load_g8_hsw+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 197,249,196,192,2 // vpinsrw $0x2,%eax,%xmm0,%xmm0
@@ -19652,7 +19758,7 @@ _sk_load_g8_hsw:
.byte 197,249,110,200 // vmovd %eax,%xmm1
.byte 196,226,121,48,201 // vpmovzxbw %xmm1,%xmm1
.byte 196,227,121,2,193,1 // vpblendd $0x1,%xmm1,%xmm0,%xmm0
- .byte 233,113,255,255,255 // jmpq 313f <_sk_load_g8_hsw+0x1f>
+ .byte 233,113,255,255,255 // jmpq 32db <_sk_load_g8_hsw+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 197,249,196,192,6 // vpinsrw $0x6,%eax,%xmm0,%xmm0
@@ -19663,7 +19769,7 @@ _sk_load_g8_hsw:
.byte 196,193,121,110,12,25 // vmovd (%r9,%rbx,1),%xmm1
.byte 196,226,121,48,201 // vpmovzxbw %xmm1,%xmm1
.byte 196,227,113,2,192,12 // vpblendd $0xc,%xmm0,%xmm1,%xmm0
- .byte 233,54,255,255,255 // jmpq 313f <_sk_load_g8_hsw+0x1f>
+ .byte 233,54,255,255,255 // jmpq 32db <_sk_load_g8_hsw+0x1f>
.byte 15,31,0 // nopl (%rax)
.byte 143 // (bad)
.byte 255 // (bad)
@@ -19697,15 +19803,15 @@ _sk_load_g8_dst_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,58 // jne 327b <_sk_load_g8_dst_hsw+0x53>
+ .byte 117,58 // jne 3417 <_sk_load_g8_dst_hsw+0x53>
.byte 196,194,121,48,36,25 // vpmovzxbw (%r9,%rbx,1),%xmm4
- .byte 197,217,219,37,129,62,0,0 // vpand 0x3e81(%rip),%xmm4,%xmm4 # 70d0 <_sk_clut_4D_hsw+0x1146>
+ .byte 197,217,219,37,165,65,0,0 // vpand 0x41a5(%rip),%xmm4,%xmm4 # 7590 <_sk_clut_4D_hsw+0x113a>
.byte 196,226,125,51,228 // vpmovzxwd %xmm4,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,226,125,24,45,139,56,0,0 // vbroadcastss 0x388b(%rip),%ymm5 # 6aec <_sk_clut_4D_hsw+0xb62>
+ .byte 196,226,125,24,45,187,59,0,0 // vbroadcastss 0x3bbb(%rip),%ymm5 # 6fb8 <_sk_clut_4D_hsw+0xb62>
.byte 197,220,89,229 // vmulps %ymm5,%ymm4,%ymm4
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,61,128,56,0,0 // vbroadcastss 0x3880(%rip),%ymm7 # 6af0 <_sk_clut_4D_hsw+0xb66>
+ .byte 196,226,125,24,61,176,59,0,0 // vbroadcastss 0x3bb0(%rip),%ymm7 # 6fbc <_sk_clut_4D_hsw+0xb66>
.byte 197,252,40,236 // vmovaps %ymm4,%ymm5
.byte 197,252,40,244 // vmovaps %ymm4,%ymm6
.byte 91 // pop %rbx
@@ -19715,15 +19821,15 @@ _sk_load_g8_dst_hsw:
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,184 // ja 3247 <_sk_load_g8_dst_hsw+0x1f>
+ .byte 119,184 // ja 33e3 <_sk_load_g8_dst_hsw+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,122,0,0,0 // lea 0x7a(%rip),%r11 # 3314 <_sk_load_g8_dst_hsw+0xec>
+ .byte 76,141,29,122,0,0,0 // lea 0x7a(%rip),%r11 # 34b0 <_sk_load_g8_dst_hsw+0xec>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
- .byte 235,153 // jmp 3247 <_sk_load_g8_dst_hsw+0x1f>
+ .byte 235,153 // jmp 33e3 <_sk_load_g8_dst_hsw+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 197,217,196,224,2 // vpinsrw $0x2,%eax,%xmm4,%xmm4
@@ -19731,7 +19837,7 @@ _sk_load_g8_dst_hsw:
.byte 197,249,110,232 // vmovd %eax,%xmm5
.byte 196,226,121,48,237 // vpmovzxbw %xmm5,%xmm5
.byte 196,227,89,2,229,1 // vpblendd $0x1,%xmm5,%xmm4,%xmm4
- .byte 233,113,255,255,255 // jmpq 3247 <_sk_load_g8_dst_hsw+0x1f>
+ .byte 233,113,255,255,255 // jmpq 33e3 <_sk_load_g8_dst_hsw+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 197,217,196,224,6 // vpinsrw $0x6,%eax,%xmm4,%xmm4
@@ -19742,7 +19848,7 @@ _sk_load_g8_dst_hsw:
.byte 196,193,121,110,44,25 // vmovd (%r9,%rbx,1),%xmm5
.byte 196,226,121,48,237 // vpmovzxbw %xmm5,%xmm5
.byte 196,227,81,2,228,12 // vpblendd $0xc,%xmm4,%xmm5,%xmm4
- .byte 233,54,255,255,255 // jmpq 3247 <_sk_load_g8_dst_hsw+0x1f>
+ .byte 233,54,255,255,255 // jmpq 33e3 <_sk_load_g8_dst_hsw+0x1f>
.byte 15,31,0 // nopl (%rax)
.byte 143 // (bad)
.byte 255 // (bad)
@@ -19804,10 +19910,10 @@ _sk_gather_g8_hsw:
.byte 196,227,121,32,192,7 // vpinsrb $0x7,%eax,%xmm0,%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,14,55,0,0 // vbroadcastss 0x370e(%rip),%ymm1 # 6af4 <_sk_clut_4D_hsw+0xb6a>
+ .byte 196,226,125,24,13,62,58,0,0 // vbroadcastss 0x3a3e(%rip),%ymm1 # 6fc0 <_sk_clut_4D_hsw+0xb6a>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,3,55,0,0 // vbroadcastss 0x3703(%rip),%ymm3 # 6af8 <_sk_clut_4D_hsw+0xb6e>
+ .byte 196,226,125,24,29,51,58,0,0 // vbroadcastss 0x3a33(%rip),%ymm3 # 6fc4 <_sk_clut_4D_hsw+0xb6e>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
.byte 255,224 // jmpq *%rax
@@ -19825,26 +19931,26 @@ _sk_load_565_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,115 // jne 348e <_sk_load_565_hsw+0x8f>
+ .byte 117,115 // jne 362a <_sk_load_565_hsw+0x8f>
.byte 196,193,122,111,4,89 // vmovdqu (%r9,%rbx,2),%xmm0
.byte 196,226,125,51,208 // vpmovzxwd %xmm0,%ymm2
- .byte 196,226,125,88,5,205,54,0,0 // vpbroadcastd 0x36cd(%rip),%ymm0 # 6afc <_sk_clut_4D_hsw+0xb72>
+ .byte 196,226,125,88,5,253,57,0,0 // vpbroadcastd 0x39fd(%rip),%ymm0 # 6fc8 <_sk_clut_4D_hsw+0xb72>
.byte 197,237,219,192 // vpand %ymm0,%ymm2,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,192,54,0,0 // vbroadcastss 0x36c0(%rip),%ymm1 # 6b00 <_sk_clut_4D_hsw+0xb76>
+ .byte 196,226,125,24,13,240,57,0,0 // vbroadcastss 0x39f0(%rip),%ymm1 # 6fcc <_sk_clut_4D_hsw+0xb76>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,88,13,183,54,0,0 // vpbroadcastd 0x36b7(%rip),%ymm1 # 6b04 <_sk_clut_4D_hsw+0xb7a>
+ .byte 196,226,125,88,13,231,57,0,0 // vpbroadcastd 0x39e7(%rip),%ymm1 # 6fd0 <_sk_clut_4D_hsw+0xb7a>
.byte 197,237,219,201 // vpand %ymm1,%ymm2,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,29,170,54,0,0 // vbroadcastss 0x36aa(%rip),%ymm3 # 6b08 <_sk_clut_4D_hsw+0xb7e>
+ .byte 196,226,125,24,29,218,57,0,0 // vbroadcastss 0x39da(%rip),%ymm3 # 6fd4 <_sk_clut_4D_hsw+0xb7e>
.byte 197,244,89,203 // vmulps %ymm3,%ymm1,%ymm1
- .byte 196,226,125,88,29,161,54,0,0 // vpbroadcastd 0x36a1(%rip),%ymm3 # 6b0c <_sk_clut_4D_hsw+0xb82>
+ .byte 196,226,125,88,29,209,57,0,0 // vpbroadcastd 0x39d1(%rip),%ymm3 # 6fd8 <_sk_clut_4D_hsw+0xb82>
.byte 197,237,219,211 // vpand %ymm3,%ymm2,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,226,125,24,29,148,54,0,0 // vbroadcastss 0x3694(%rip),%ymm3 # 6b10 <_sk_clut_4D_hsw+0xb86>
+ .byte 196,226,125,24,29,196,57,0,0 // vbroadcastss 0x39c4(%rip),%ymm3 # 6fdc <_sk_clut_4D_hsw+0xb86>
.byte 197,236,89,211 // vmulps %ymm3,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,137,54,0,0 // vbroadcastss 0x3689(%rip),%ymm3 # 6b14 <_sk_clut_4D_hsw+0xb8a>
+ .byte 196,226,125,24,29,185,57,0,0 // vbroadcastss 0x39b9(%rip),%ymm3 # 6fe0 <_sk_clut_4D_hsw+0xb8a>
.byte 91 // pop %rbx
.byte 255,224 // jmpq *%rax
.byte 69,137,194 // mov %r8d,%r10d
@@ -19852,27 +19958,27 @@ _sk_load_565_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,123,255,255,255 // ja 3421 <_sk_load_565_hsw+0x22>
+ .byte 15,135,123,255,255,255 // ja 35bd <_sk_load_565_hsw+0x22>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,99,0,0,0 // lea 0x63(%rip),%r11 # 3514 <_sk_load_565_hsw+0x115>
+ .byte 76,141,29,99,0,0,0 // lea 0x63(%rip),%r11 # 36b0 <_sk_load_565_hsw+0x115>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
- .byte 233,89,255,255,255 // jmpq 3421 <_sk_load_565_hsw+0x22>
+ .byte 233,89,255,255,255 // jmpq 35bd <_sk_load_565_hsw+0x22>
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 196,193,121,196,68,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,110,12,89 // vmovd (%r9,%rbx,2),%xmm1
.byte 196,227,121,2,193,1 // vpblendd $0x1,%xmm1,%xmm0,%xmm0
- .byte 233,60,255,255,255 // jmpq 3421 <_sk_load_565_hsw+0x22>
+ .byte 233,60,255,255,255 // jmpq 35bd <_sk_load_565_hsw+0x22>
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 196,193,121,196,68,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,196,68,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,196,68,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,122,126,12,89 // vmovq (%r9,%rbx,2),%xmm1
.byte 196,227,113,2,192,12 // vpblendd $0xc,%xmm0,%xmm1,%xmm0
- .byte 233,15,255,255,255 // jmpq 3421 <_sk_load_565_hsw+0x22>
+ .byte 233,15,255,255,255 // jmpq 35bd <_sk_load_565_hsw+0x22>
.byte 102,144 // xchg %ax,%ax
.byte 166 // cmpsb %es:(%rdi),%ds:(%rsi)
.byte 255 // (bad)
@@ -19906,26 +20012,26 @@ _sk_load_565_dst_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,115 // jne 35bf <_sk_load_565_dst_hsw+0x8f>
+ .byte 117,115 // jne 375b <_sk_load_565_dst_hsw+0x8f>
.byte 196,193,122,111,36,89 // vmovdqu (%r9,%rbx,2),%xmm4
.byte 196,226,125,51,244 // vpmovzxwd %xmm4,%ymm6
- .byte 196,226,125,88,37,184,53,0,0 // vpbroadcastd 0x35b8(%rip),%ymm4 # 6b18 <_sk_clut_4D_hsw+0xb8e>
+ .byte 196,226,125,88,37,232,56,0,0 // vpbroadcastd 0x38e8(%rip),%ymm4 # 6fe4 <_sk_clut_4D_hsw+0xb8e>
.byte 197,205,219,228 // vpand %ymm4,%ymm6,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,226,125,24,45,171,53,0,0 // vbroadcastss 0x35ab(%rip),%ymm5 # 6b1c <_sk_clut_4D_hsw+0xb92>
+ .byte 196,226,125,24,45,219,56,0,0 // vbroadcastss 0x38db(%rip),%ymm5 # 6fe8 <_sk_clut_4D_hsw+0xb92>
.byte 197,220,89,229 // vmulps %ymm5,%ymm4,%ymm4
- .byte 196,226,125,88,45,162,53,0,0 // vpbroadcastd 0x35a2(%rip),%ymm5 # 6b20 <_sk_clut_4D_hsw+0xb96>
+ .byte 196,226,125,88,45,210,56,0,0 // vpbroadcastd 0x38d2(%rip),%ymm5 # 6fec <_sk_clut_4D_hsw+0xb96>
.byte 197,205,219,237 // vpand %ymm5,%ymm6,%ymm5
.byte 197,252,91,237 // vcvtdq2ps %ymm5,%ymm5
- .byte 196,226,125,24,61,149,53,0,0 // vbroadcastss 0x3595(%rip),%ymm7 # 6b24 <_sk_clut_4D_hsw+0xb9a>
+ .byte 196,226,125,24,61,197,56,0,0 // vbroadcastss 0x38c5(%rip),%ymm7 # 6ff0 <_sk_clut_4D_hsw+0xb9a>
.byte 197,212,89,239 // vmulps %ymm7,%ymm5,%ymm5
- .byte 196,226,125,88,61,140,53,0,0 // vpbroadcastd 0x358c(%rip),%ymm7 # 6b28 <_sk_clut_4D_hsw+0xb9e>
+ .byte 196,226,125,88,61,188,56,0,0 // vpbroadcastd 0x38bc(%rip),%ymm7 # 6ff4 <_sk_clut_4D_hsw+0xb9e>
.byte 197,205,219,247 // vpand %ymm7,%ymm6,%ymm6
.byte 197,252,91,246 // vcvtdq2ps %ymm6,%ymm6
- .byte 196,226,125,24,61,127,53,0,0 // vbroadcastss 0x357f(%rip),%ymm7 # 6b2c <_sk_clut_4D_hsw+0xba2>
+ .byte 196,226,125,24,61,175,56,0,0 // vbroadcastss 0x38af(%rip),%ymm7 # 6ff8 <_sk_clut_4D_hsw+0xba2>
.byte 197,204,89,247 // vmulps %ymm7,%ymm6,%ymm6
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,61,116,53,0,0 // vbroadcastss 0x3574(%rip),%ymm7 # 6b30 <_sk_clut_4D_hsw+0xba6>
+ .byte 196,226,125,24,61,164,56,0,0 // vbroadcastss 0x38a4(%rip),%ymm7 # 6ffc <_sk_clut_4D_hsw+0xba6>
.byte 91 // pop %rbx
.byte 255,224 // jmpq *%rax
.byte 69,137,194 // mov %r8d,%r10d
@@ -19933,27 +20039,27 @@ _sk_load_565_dst_hsw:
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,123,255,255,255 // ja 3552 <_sk_load_565_dst_hsw+0x22>
+ .byte 15,135,123,255,255,255 // ja 36ee <_sk_load_565_dst_hsw+0x22>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,98,0,0,0 // lea 0x62(%rip),%r11 # 3644 <_sk_load_565_dst_hsw+0x114>
+ .byte 76,141,29,98,0,0,0 // lea 0x62(%rip),%r11 # 37e0 <_sk_load_565_dst_hsw+0x114>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
- .byte 233,89,255,255,255 // jmpq 3552 <_sk_load_565_dst_hsw+0x22>
+ .byte 233,89,255,255,255 // jmpq 36ee <_sk_load_565_dst_hsw+0x22>
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 196,193,89,196,100,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,121,110,44,89 // vmovd (%r9,%rbx,2),%xmm5
.byte 196,227,89,2,229,1 // vpblendd $0x1,%xmm5,%xmm4,%xmm4
- .byte 233,60,255,255,255 // jmpq 3552 <_sk_load_565_dst_hsw+0x22>
+ .byte 233,60,255,255,255 // jmpq 36ee <_sk_load_565_dst_hsw+0x22>
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 196,193,89,196,100,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,89,196,100,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,89,196,100,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,122,126,44,89 // vmovq (%r9,%rbx,2),%xmm5
.byte 196,227,81,2,228,12 // vpblendd $0xc,%xmm4,%xmm5,%xmm4
- .byte 233,15,255,255,255 // jmpq 3552 <_sk_load_565_dst_hsw+0x22>
+ .byte 233,15,255,255,255 // jmpq 36ee <_sk_load_565_dst_hsw+0x22>
.byte 144 // nop
.byte 167 // cmpsl %es:(%rdi),%ds:(%rsi)
.byte 255 // (bad)
@@ -20016,23 +20122,23 @@ _sk_gather_565_hsw:
.byte 67,15,183,4,89 // movzwl (%r9,%r11,2),%eax
.byte 197,249,196,192,7 // vpinsrw $0x7,%eax,%xmm0,%xmm0
.byte 196,226,125,51,208 // vpmovzxwd %xmm0,%ymm2
- .byte 196,226,125,88,5,35,52,0,0 // vpbroadcastd 0x3423(%rip),%ymm0 # 6b34 <_sk_clut_4D_hsw+0xbaa>
+ .byte 196,226,125,88,5,83,55,0,0 // vpbroadcastd 0x3753(%rip),%ymm0 # 7000 <_sk_clut_4D_hsw+0xbaa>
.byte 197,237,219,192 // vpand %ymm0,%ymm2,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,22,52,0,0 // vbroadcastss 0x3416(%rip),%ymm1 # 6b38 <_sk_clut_4D_hsw+0xbae>
+ .byte 196,226,125,24,13,70,55,0,0 // vbroadcastss 0x3746(%rip),%ymm1 # 7004 <_sk_clut_4D_hsw+0xbae>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,88,13,13,52,0,0 // vpbroadcastd 0x340d(%rip),%ymm1 # 6b3c <_sk_clut_4D_hsw+0xbb2>
+ .byte 196,226,125,88,13,61,55,0,0 // vpbroadcastd 0x373d(%rip),%ymm1 # 7008 <_sk_clut_4D_hsw+0xbb2>
.byte 197,237,219,201 // vpand %ymm1,%ymm2,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,29,0,52,0,0 // vbroadcastss 0x3400(%rip),%ymm3 # 6b40 <_sk_clut_4D_hsw+0xbb6>
+ .byte 196,226,125,24,29,48,55,0,0 // vbroadcastss 0x3730(%rip),%ymm3 # 700c <_sk_clut_4D_hsw+0xbb6>
.byte 197,244,89,203 // vmulps %ymm3,%ymm1,%ymm1
- .byte 196,226,125,88,29,247,51,0,0 // vpbroadcastd 0x33f7(%rip),%ymm3 # 6b44 <_sk_clut_4D_hsw+0xbba>
+ .byte 196,226,125,88,29,39,55,0,0 // vpbroadcastd 0x3727(%rip),%ymm3 # 7010 <_sk_clut_4D_hsw+0xbba>
.byte 197,237,219,211 // vpand %ymm3,%ymm2,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,226,125,24,29,234,51,0,0 // vbroadcastss 0x33ea(%rip),%ymm3 # 6b48 <_sk_clut_4D_hsw+0xbbe>
+ .byte 196,226,125,24,29,26,55,0,0 // vbroadcastss 0x371a(%rip),%ymm3 # 7014 <_sk_clut_4D_hsw+0xbbe>
.byte 197,236,89,211 // vmulps %ymm3,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,223,51,0,0 // vbroadcastss 0x33df(%rip),%ymm3 # 6b4c <_sk_clut_4D_hsw+0xbc2>
+ .byte 196,226,125,24,29,15,55,0,0 // vbroadcastss 0x370f(%rip),%ymm3 # 7018 <_sk_clut_4D_hsw+0xbc2>
.byte 255,224 // jmpq *%rax
HIDDEN _sk_store_565_hsw
@@ -20047,11 +20153,11 @@ _sk_store_565_hsw:
.byte 77,1,201 // add %r9,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
- .byte 196,98,125,24,5,193,51,0,0 // vbroadcastss 0x33c1(%rip),%ymm8 # 6b50 <_sk_clut_4D_hsw+0xbc6>
+ .byte 196,98,125,24,5,241,54,0,0 // vbroadcastss 0x36f1(%rip),%ymm8 # 701c <_sk_clut_4D_hsw+0xbc6>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,193,53,114,241,11 // vpslld $0xb,%ymm9,%ymm9
- .byte 196,98,125,24,21,172,51,0,0 // vbroadcastss 0x33ac(%rip),%ymm10 # 6b54 <_sk_clut_4D_hsw+0xbca>
+ .byte 196,98,125,24,21,220,54,0,0 // vbroadcastss 0x36dc(%rip),%ymm10 # 7020 <_sk_clut_4D_hsw+0xbca>
.byte 196,65,116,89,210 // vmulps %ymm10,%ymm1,%ymm10
.byte 196,65,125,91,210 // vcvtps2dq %ymm10,%ymm10
.byte 196,193,45,114,242,5 // vpslld $0x5,%ymm10,%ymm10
@@ -20062,7 +20168,7 @@ _sk_store_565_hsw:
.byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 37e7 <_sk_store_565_hsw+0x78>
+ .byte 117,11 // jne 3983 <_sk_store_565_hsw+0x78>
.byte 196,65,122,127,4,89 // vmovdqu %xmm8,(%r9,%rbx,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -20071,22 +20177,22 @@ _sk_store_565_hsw:
.byte 65,128,226,7 // and $0x7,%r10b
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,235 // ja 37e2 <_sk_store_565_hsw+0x73>
+ .byte 119,235 // ja 397e <_sk_store_565_hsw+0x73>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,66,0,0,0 // lea 0x42(%rip),%r11 # 3844 <_sk_store_565_hsw+0xd5>
+ .byte 76,141,29,66,0,0,0 // lea 0x42(%rip),%r11 # 39e0 <_sk_store_565_hsw+0xd5>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 196,67,121,21,4,89,0 // vpextrw $0x0,%xmm8,(%r9,%rbx,2)
- .byte 235,206 // jmp 37e2 <_sk_store_565_hsw+0x73>
+ .byte 235,206 // jmp 397e <_sk_store_565_hsw+0x73>
.byte 196,67,121,21,68,89,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rbx,2)
.byte 196,65,121,126,4,89 // vmovd %xmm8,(%r9,%rbx,2)
- .byte 235,190 // jmp 37e2 <_sk_store_565_hsw+0x73>
+ .byte 235,190 // jmp 397e <_sk_store_565_hsw+0x73>
.byte 196,67,121,21,68,89,12,6 // vpextrw $0x6,%xmm8,0xc(%r9,%rbx,2)
.byte 196,67,121,21,68,89,10,5 // vpextrw $0x5,%xmm8,0xa(%r9,%rbx,2)
.byte 196,67,121,21,68,89,8,4 // vpextrw $0x4,%xmm8,0x8(%r9,%rbx,2)
.byte 196,65,121,214,4,89 // vmovq %xmm8,(%r9,%rbx,2)
- .byte 235,158 // jmp 37e2 <_sk_store_565_hsw+0x73>
+ .byte 235,158 // jmp 397e <_sk_store_565_hsw+0x73>
.byte 199 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -20104,7 +20210,7 @@ _sk_store_565_hsw:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 232,255,255,255,224 // callq ffffffffe100385c <_sk_clut_4D_hsw+0xffffffffe0ffd8d2>
+ .byte 232,255,255,255,224 // callq ffffffffe10039f8 <_sk_clut_4D_hsw+0xffffffffe0ffd5a2>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -20122,28 +20228,28 @@ _sk_load_4444_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,139,0,0,0 // jne 390b <_sk_load_4444_hsw+0xab>
+ .byte 15,133,139,0,0,0 // jne 3aa7 <_sk_load_4444_hsw+0xab>
.byte 196,193,122,111,4,89 // vmovdqu (%r9,%rbx,2),%xmm0
.byte 196,226,125,51,216 // vpmovzxwd %xmm0,%ymm3
- .byte 196,226,125,88,5,196,50,0,0 // vpbroadcastd 0x32c4(%rip),%ymm0 # 6b58 <_sk_clut_4D_hsw+0xbce>
+ .byte 196,226,125,88,5,244,53,0,0 // vpbroadcastd 0x35f4(%rip),%ymm0 # 7024 <_sk_clut_4D_hsw+0xbce>
.byte 197,229,219,192 // vpand %ymm0,%ymm3,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,183,50,0,0 // vbroadcastss 0x32b7(%rip),%ymm1 # 6b5c <_sk_clut_4D_hsw+0xbd2>
+ .byte 196,226,125,24,13,231,53,0,0 // vbroadcastss 0x35e7(%rip),%ymm1 # 7028 <_sk_clut_4D_hsw+0xbd2>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,88,13,174,50,0,0 // vpbroadcastd 0x32ae(%rip),%ymm1 # 6b60 <_sk_clut_4D_hsw+0xbd6>
+ .byte 196,226,125,88,13,222,53,0,0 // vpbroadcastd 0x35de(%rip),%ymm1 # 702c <_sk_clut_4D_hsw+0xbd6>
.byte 197,229,219,201 // vpand %ymm1,%ymm3,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,21,161,50,0,0 // vbroadcastss 0x32a1(%rip),%ymm2 # 6b64 <_sk_clut_4D_hsw+0xbda>
+ .byte 196,226,125,24,21,209,53,0,0 // vbroadcastss 0x35d1(%rip),%ymm2 # 7030 <_sk_clut_4D_hsw+0xbda>
.byte 197,244,89,202 // vmulps %ymm2,%ymm1,%ymm1
- .byte 196,226,125,88,21,152,50,0,0 // vpbroadcastd 0x3298(%rip),%ymm2 # 6b68 <_sk_clut_4D_hsw+0xbde>
+ .byte 196,226,125,88,21,200,53,0,0 // vpbroadcastd 0x35c8(%rip),%ymm2 # 7034 <_sk_clut_4D_hsw+0xbde>
.byte 197,229,219,210 // vpand %ymm2,%ymm3,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,98,125,24,5,139,50,0,0 // vbroadcastss 0x328b(%rip),%ymm8 # 6b6c <_sk_clut_4D_hsw+0xbe2>
+ .byte 196,98,125,24,5,187,53,0,0 // vbroadcastss 0x35bb(%rip),%ymm8 # 7038 <_sk_clut_4D_hsw+0xbe2>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,88,5,129,50,0,0 // vpbroadcastd 0x3281(%rip),%ymm8 # 6b70 <_sk_clut_4D_hsw+0xbe6>
+ .byte 196,98,125,88,5,177,53,0,0 // vpbroadcastd 0x35b1(%rip),%ymm8 # 703c <_sk_clut_4D_hsw+0xbe6>
.byte 196,193,101,219,216 // vpand %ymm8,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,115,50,0,0 // vbroadcastss 0x3273(%rip),%ymm8 # 6b74 <_sk_clut_4D_hsw+0xbea>
+ .byte 196,98,125,24,5,163,53,0,0 // vbroadcastss 0x35a3(%rip),%ymm8 # 7040 <_sk_clut_4D_hsw+0xbea>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -20153,27 +20259,27 @@ _sk_load_4444_hsw:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,99,255,255,255 // ja 3886 <_sk_load_4444_hsw+0x26>
+ .byte 15,135,99,255,255,255 // ja 3a22 <_sk_load_4444_hsw+0x26>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,98,0,0,0 // lea 0x62(%rip),%r11 # 3990 <_sk_load_4444_hsw+0x130>
+ .byte 76,141,29,98,0,0,0 // lea 0x62(%rip),%r11 # 3b2c <_sk_load_4444_hsw+0x130>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
- .byte 233,65,255,255,255 // jmpq 3886 <_sk_load_4444_hsw+0x26>
+ .byte 233,65,255,255,255 // jmpq 3a22 <_sk_load_4444_hsw+0x26>
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 196,193,121,196,68,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,110,12,89 // vmovd (%r9,%rbx,2),%xmm1
.byte 196,227,121,2,193,1 // vpblendd $0x1,%xmm1,%xmm0,%xmm0
- .byte 233,36,255,255,255 // jmpq 3886 <_sk_load_4444_hsw+0x26>
+ .byte 233,36,255,255,255 // jmpq 3a22 <_sk_load_4444_hsw+0x26>
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 196,193,121,196,68,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,196,68,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,196,68,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,122,126,12,89 // vmovq (%r9,%rbx,2),%xmm1
.byte 196,227,113,2,192,12 // vpblendd $0xc,%xmm0,%xmm1,%xmm0
- .byte 233,247,254,255,255 // jmpq 3886 <_sk_load_4444_hsw+0x26>
+ .byte 233,247,254,255,255 // jmpq 3a22 <_sk_load_4444_hsw+0x26>
.byte 144 // nop
.byte 167 // cmpsl %es:(%rdi),%ds:(%rsi)
.byte 255 // (bad)
@@ -20208,28 +20314,28 @@ _sk_load_4444_dst_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,139,0,0,0 // jne 3a57 <_sk_load_4444_dst_hsw+0xab>
+ .byte 15,133,139,0,0,0 // jne 3bf3 <_sk_load_4444_dst_hsw+0xab>
.byte 196,193,122,111,36,89 // vmovdqu (%r9,%rbx,2),%xmm4
.byte 196,226,125,51,252 // vpmovzxwd %xmm4,%ymm7
- .byte 196,226,125,88,37,152,49,0,0 // vpbroadcastd 0x3198(%rip),%ymm4 # 6b78 <_sk_clut_4D_hsw+0xbee>
+ .byte 196,226,125,88,37,200,52,0,0 // vpbroadcastd 0x34c8(%rip),%ymm4 # 7044 <_sk_clut_4D_hsw+0xbee>
.byte 197,197,219,228 // vpand %ymm4,%ymm7,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,226,125,24,45,139,49,0,0 // vbroadcastss 0x318b(%rip),%ymm5 # 6b7c <_sk_clut_4D_hsw+0xbf2>
+ .byte 196,226,125,24,45,187,52,0,0 // vbroadcastss 0x34bb(%rip),%ymm5 # 7048 <_sk_clut_4D_hsw+0xbf2>
.byte 197,220,89,229 // vmulps %ymm5,%ymm4,%ymm4
- .byte 196,226,125,88,45,130,49,0,0 // vpbroadcastd 0x3182(%rip),%ymm5 # 6b80 <_sk_clut_4D_hsw+0xbf6>
+ .byte 196,226,125,88,45,178,52,0,0 // vpbroadcastd 0x34b2(%rip),%ymm5 # 704c <_sk_clut_4D_hsw+0xbf6>
.byte 197,197,219,237 // vpand %ymm5,%ymm7,%ymm5
.byte 197,252,91,237 // vcvtdq2ps %ymm5,%ymm5
- .byte 196,226,125,24,53,117,49,0,0 // vbroadcastss 0x3175(%rip),%ymm6 # 6b84 <_sk_clut_4D_hsw+0xbfa>
+ .byte 196,226,125,24,53,165,52,0,0 // vbroadcastss 0x34a5(%rip),%ymm6 # 7050 <_sk_clut_4D_hsw+0xbfa>
.byte 197,212,89,238 // vmulps %ymm6,%ymm5,%ymm5
- .byte 196,226,125,88,53,108,49,0,0 // vpbroadcastd 0x316c(%rip),%ymm6 # 6b88 <_sk_clut_4D_hsw+0xbfe>
+ .byte 196,226,125,88,53,156,52,0,0 // vpbroadcastd 0x349c(%rip),%ymm6 # 7054 <_sk_clut_4D_hsw+0xbfe>
.byte 197,197,219,246 // vpand %ymm6,%ymm7,%ymm6
.byte 197,252,91,246 // vcvtdq2ps %ymm6,%ymm6
- .byte 196,98,125,24,5,95,49,0,0 // vbroadcastss 0x315f(%rip),%ymm8 # 6b8c <_sk_clut_4D_hsw+0xc02>
+ .byte 196,98,125,24,5,143,52,0,0 // vbroadcastss 0x348f(%rip),%ymm8 # 7058 <_sk_clut_4D_hsw+0xc02>
.byte 196,193,76,89,240 // vmulps %ymm8,%ymm6,%ymm6
- .byte 196,98,125,88,5,85,49,0,0 // vpbroadcastd 0x3155(%rip),%ymm8 # 6b90 <_sk_clut_4D_hsw+0xc06>
+ .byte 196,98,125,88,5,133,52,0,0 // vpbroadcastd 0x3485(%rip),%ymm8 # 705c <_sk_clut_4D_hsw+0xc06>
.byte 196,193,69,219,248 // vpand %ymm8,%ymm7,%ymm7
.byte 197,252,91,255 // vcvtdq2ps %ymm7,%ymm7
- .byte 196,98,125,24,5,71,49,0,0 // vbroadcastss 0x3147(%rip),%ymm8 # 6b94 <_sk_clut_4D_hsw+0xc0a>
+ .byte 196,98,125,24,5,119,52,0,0 // vbroadcastss 0x3477(%rip),%ymm8 # 7060 <_sk_clut_4D_hsw+0xc0a>
.byte 196,193,68,89,248 // vmulps %ymm8,%ymm7,%ymm7
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -20239,27 +20345,27 @@ _sk_load_4444_dst_hsw:
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,99,255,255,255 // ja 39d2 <_sk_load_4444_dst_hsw+0x26>
+ .byte 15,135,99,255,255,255 // ja 3b6e <_sk_load_4444_dst_hsw+0x26>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,98,0,0,0 // lea 0x62(%rip),%r11 # 3adc <_sk_load_4444_dst_hsw+0x130>
+ .byte 76,141,29,98,0,0,0 // lea 0x62(%rip),%r11 # 3c78 <_sk_load_4444_dst_hsw+0x130>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
- .byte 233,65,255,255,255 // jmpq 39d2 <_sk_load_4444_dst_hsw+0x26>
+ .byte 233,65,255,255,255 // jmpq 3b6e <_sk_load_4444_dst_hsw+0x26>
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 196,193,89,196,100,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,121,110,44,89 // vmovd (%r9,%rbx,2),%xmm5
.byte 196,227,89,2,229,1 // vpblendd $0x1,%xmm5,%xmm4,%xmm4
- .byte 233,36,255,255,255 // jmpq 39d2 <_sk_load_4444_dst_hsw+0x26>
+ .byte 233,36,255,255,255 // jmpq 3b6e <_sk_load_4444_dst_hsw+0x26>
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 196,193,89,196,100,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,89,196,100,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,89,196,100,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,122,126,44,89 // vmovq (%r9,%rbx,2),%xmm5
.byte 196,227,81,2,228,12 // vpblendd $0xc,%xmm4,%xmm5,%xmm4
- .byte 233,247,254,255,255 // jmpq 39d2 <_sk_load_4444_dst_hsw+0x26>
+ .byte 233,247,254,255,255 // jmpq 3b6e <_sk_load_4444_dst_hsw+0x26>
.byte 144 // nop
.byte 167 // cmpsl %es:(%rdi),%ds:(%rsi)
.byte 255 // (bad)
@@ -20322,25 +20428,25 @@ _sk_gather_4444_hsw:
.byte 67,15,183,4,89 // movzwl (%r9,%r11,2),%eax
.byte 197,249,196,192,7 // vpinsrw $0x7,%eax,%xmm0,%xmm0
.byte 196,226,125,51,216 // vpmovzxwd %xmm0,%ymm3
- .byte 196,226,125,88,5,239,47,0,0 // vpbroadcastd 0x2fef(%rip),%ymm0 # 6b98 <_sk_clut_4D_hsw+0xc0e>
+ .byte 196,226,125,88,5,31,51,0,0 // vpbroadcastd 0x331f(%rip),%ymm0 # 7064 <_sk_clut_4D_hsw+0xc0e>
.byte 197,229,219,192 // vpand %ymm0,%ymm3,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,226,47,0,0 // vbroadcastss 0x2fe2(%rip),%ymm1 # 6b9c <_sk_clut_4D_hsw+0xc12>
+ .byte 196,226,125,24,13,18,51,0,0 // vbroadcastss 0x3312(%rip),%ymm1 # 7068 <_sk_clut_4D_hsw+0xc12>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,88,13,217,47,0,0 // vpbroadcastd 0x2fd9(%rip),%ymm1 # 6ba0 <_sk_clut_4D_hsw+0xc16>
+ .byte 196,226,125,88,13,9,51,0,0 // vpbroadcastd 0x3309(%rip),%ymm1 # 706c <_sk_clut_4D_hsw+0xc16>
.byte 197,229,219,201 // vpand %ymm1,%ymm3,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,21,204,47,0,0 // vbroadcastss 0x2fcc(%rip),%ymm2 # 6ba4 <_sk_clut_4D_hsw+0xc1a>
+ .byte 196,226,125,24,21,252,50,0,0 // vbroadcastss 0x32fc(%rip),%ymm2 # 7070 <_sk_clut_4D_hsw+0xc1a>
.byte 197,244,89,202 // vmulps %ymm2,%ymm1,%ymm1
- .byte 196,226,125,88,21,195,47,0,0 // vpbroadcastd 0x2fc3(%rip),%ymm2 # 6ba8 <_sk_clut_4D_hsw+0xc1e>
+ .byte 196,226,125,88,21,243,50,0,0 // vpbroadcastd 0x32f3(%rip),%ymm2 # 7074 <_sk_clut_4D_hsw+0xc1e>
.byte 197,229,219,210 // vpand %ymm2,%ymm3,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,98,125,24,5,182,47,0,0 // vbroadcastss 0x2fb6(%rip),%ymm8 # 6bac <_sk_clut_4D_hsw+0xc22>
+ .byte 196,98,125,24,5,230,50,0,0 // vbroadcastss 0x32e6(%rip),%ymm8 # 7078 <_sk_clut_4D_hsw+0xc22>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,88,5,172,47,0,0 // vpbroadcastd 0x2fac(%rip),%ymm8 # 6bb0 <_sk_clut_4D_hsw+0xc26>
+ .byte 196,98,125,88,5,220,50,0,0 // vpbroadcastd 0x32dc(%rip),%ymm8 # 707c <_sk_clut_4D_hsw+0xc26>
.byte 196,193,101,219,216 // vpand %ymm8,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,158,47,0,0 // vbroadcastss 0x2f9e(%rip),%ymm8 # 6bb4 <_sk_clut_4D_hsw+0xc2a>
+ .byte 196,98,125,24,5,206,50,0,0 // vbroadcastss 0x32ce(%rip),%ymm8 # 7080 <_sk_clut_4D_hsw+0xc2a>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -20357,7 +20463,7 @@ _sk_store_4444_hsw:
.byte 77,1,201 // add %r9,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
- .byte 196,98,125,24,5,121,47,0,0 // vbroadcastss 0x2f79(%rip),%ymm8 # 6bb8 <_sk_clut_4D_hsw+0xc2e>
+ .byte 196,98,125,24,5,169,50,0,0 // vbroadcastss 0x32a9(%rip),%ymm8 # 7084 <_sk_clut_4D_hsw+0xc2e>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,193,53,114,241,12 // vpslld $0xc,%ymm9,%ymm9
@@ -20375,7 +20481,7 @@ _sk_store_4444_hsw:
.byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 3ca3 <_sk_store_4444_hsw+0x84>
+ .byte 117,11 // jne 3e3f <_sk_store_4444_hsw+0x84>
.byte 196,65,122,127,4,89 // vmovdqu %xmm8,(%r9,%rbx,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -20384,22 +20490,22 @@ _sk_store_4444_hsw:
.byte 65,128,226,7 // and $0x7,%r10b
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,235 // ja 3c9e <_sk_store_4444_hsw+0x7f>
+ .byte 119,235 // ja 3e3a <_sk_store_4444_hsw+0x7f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,66,0,0,0 // lea 0x42(%rip),%r11 # 3d00 <_sk_store_4444_hsw+0xe1>
+ .byte 76,141,29,66,0,0,0 // lea 0x42(%rip),%r11 # 3e9c <_sk_store_4444_hsw+0xe1>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 196,67,121,21,4,89,0 // vpextrw $0x0,%xmm8,(%r9,%rbx,2)
- .byte 235,206 // jmp 3c9e <_sk_store_4444_hsw+0x7f>
+ .byte 235,206 // jmp 3e3a <_sk_store_4444_hsw+0x7f>
.byte 196,67,121,21,68,89,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rbx,2)
.byte 196,65,121,126,4,89 // vmovd %xmm8,(%r9,%rbx,2)
- .byte 235,190 // jmp 3c9e <_sk_store_4444_hsw+0x7f>
+ .byte 235,190 // jmp 3e3a <_sk_store_4444_hsw+0x7f>
.byte 196,67,121,21,68,89,12,6 // vpextrw $0x6,%xmm8,0xc(%r9,%rbx,2)
.byte 196,67,121,21,68,89,10,5 // vpextrw $0x5,%xmm8,0xa(%r9,%rbx,2)
.byte 196,67,121,21,68,89,8,4 // vpextrw $0x4,%xmm8,0x8(%r9,%rbx,2)
.byte 196,65,121,214,4,89 // vmovq %xmm8,(%r9,%rbx,2)
- .byte 235,158 // jmp 3c9e <_sk_store_4444_hsw+0x7f>
+ .byte 235,158 // jmp 3e3a <_sk_store_4444_hsw+0x7f>
.byte 199 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -20417,7 +20523,7 @@ _sk_store_4444_hsw:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 232,255,255,255,224 // callq ffffffffe1003d18 <_sk_clut_4D_hsw+0xffffffffe0ffdd8e>
+ .byte 232,255,255,255,224 // callq ffffffffe1003eb4 <_sk_clut_4D_hsw+0xffffffffe0ffda5e>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -20426,85 +20532,173 @@ HIDDEN _sk_load_8888_hsw
.globl _sk_load_8888_hsw
FUNCTION(_sk_load_8888_hsw)
_sk_load_8888_hsw:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,87 // jne 3d96 <_sk_load_8888_hsw+0x7a>
- .byte 197,252,16,24 // vmovups (%rax),%ymm3
- .byte 197,228,84,5,181,48,0,0 // vandps 0x30b5(%rip),%ymm3,%ymm0 # 6e00 <_sk_clut_4D_hsw+0xe76>
+ .byte 117,87 // jne 3f2c <_sk_load_8888_hsw+0x74>
+ .byte 196,193,126,111,28,153 // vmovdqu (%r9,%rbx,4),%ymm3
+ .byte 197,229,219,5,221,51,0,0 // vpand 0x33dd(%rip),%ymm3,%ymm0 # 72c0 <_sk_clut_4D_hsw+0xe6a>
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,5,100,46,0,0 // vbroadcastss 0x2e64(%rip),%ymm8 # 6bbc <_sk_clut_4D_hsw+0xc32>
+ .byte 196,98,125,24,5,152,49,0,0 // vbroadcastss 0x3198(%rip),%ymm8 # 7088 <_sk_clut_4D_hsw+0xc32>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 196,226,101,0,13,186,48,0,0 // vpshufb 0x30ba(%rip),%ymm3,%ymm1 # 6e20 <_sk_clut_4D_hsw+0xe96>
+ .byte 196,226,101,0,13,226,51,0,0 // vpshufb 0x33e2(%rip),%ymm3,%ymm1 # 72e0 <_sk_clut_4D_hsw+0xe8a>
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
- .byte 196,226,101,0,21,200,48,0,0 // vpshufb 0x30c8(%rip),%ymm3,%ymm2 # 6e40 <_sk_clut_4D_hsw+0xeb6>
+ .byte 196,226,101,0,21,240,51,0,0 // vpshufb 0x33f0(%rip),%ymm3,%ymm2 # 7300 <_sk_clut_4D_hsw+0xeaa>
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
.byte 197,229,114,211,24 // vpsrld $0x18,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 197,229,239,219 // vpxor %ymm3,%ymm3,%ymm3
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,155 // ja 3edb <_sk_load_8888_hsw+0x23>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,141,0,0,0 // lea 0x8d(%rip),%r11 # 3fd8 <_sk_load_8888_hsw+0x120>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,194 // vmovq %r10,%xmm0
- .byte 196,226,125,33,192 // vpmovsxbd %xmm0,%ymm0
- .byte 196,226,125,44,24 // vmaskmovps (%rax),%ymm0,%ymm3
- .byte 235,135 // jmp 3d43 <_sk_load_8888_hsw+0x27>
+ .byte 196,193,121,110,28,153 // vmovd (%r9,%rbx,4),%xmm3
+ .byte 233,124,255,255,255 // jmpq 3edb <_sk_load_8888_hsw+0x23>
+ .byte 196,193,121,110,68,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm0
+ .byte 196,226,121,89,192 // vpbroadcastq %xmm0,%xmm0
+ .byte 197,245,239,201 // vpxor %ymm1,%ymm1,%ymm1
+ .byte 196,227,117,2,216,4 // vpblendd $0x4,%ymm0,%ymm1,%ymm3
+ .byte 196,193,122,126,4,153 // vmovq (%r9,%rbx,4),%xmm0
+ .byte 196,227,101,2,216,3 // vpblendd $0x3,%ymm0,%ymm3,%ymm3
+ .byte 233,85,255,255,255 // jmpq 3edb <_sk_load_8888_hsw+0x23>
+ .byte 196,193,121,110,68,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm0
+ .byte 196,226,125,89,192 // vpbroadcastq %xmm0,%ymm0
+ .byte 197,245,239,201 // vpxor %ymm1,%ymm1,%ymm1
+ .byte 196,227,117,2,216,64 // vpblendd $0x40,%ymm0,%ymm1,%ymm3
+ .byte 196,227,125,57,216,1 // vextracti128 $0x1,%ymm3,%xmm0
+ .byte 196,195,121,34,68,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm0,%xmm0
+ .byte 196,227,101,56,216,1 // vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ .byte 196,227,125,57,216,1 // vextracti128 $0x1,%ymm3,%xmm0
+ .byte 196,195,121,34,68,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm0,%xmm0
+ .byte 196,227,101,56,216,1 // vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ .byte 196,193,122,111,4,153 // vmovdqu (%r9,%rbx,4),%xmm0
+ .byte 196,227,125,2,219,240 // vpblendd $0xf0,%ymm3,%ymm0,%ymm3
+ .byte 233,6,255,255,255 // jmpq 3edb <_sk_load_8888_hsw+0x23>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 124,255 // jl 3fd9 <_sk_load_8888_hsw+0x121>
+ .byte 255 // (bad)
+ .byte 255,157,255,255,255,135 // lcall *-0x78000001(%rbp)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 236 // in (%dx),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 216,255 // fdivr %st(7),%st
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 174 // scas %es:(%rdi),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_8888_dst_hsw
.globl _sk_load_8888_dst_hsw
FUNCTION(_sk_load_8888_dst_hsw)
_sk_load_8888_dst_hsw:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,87 // jne 3e36 <_sk_load_8888_dst_hsw+0x7a>
- .byte 197,252,16,56 // vmovups (%rax),%ymm7
- .byte 197,196,84,37,117,48,0,0 // vandps 0x3075(%rip),%ymm7,%ymm4 # 6e60 <_sk_clut_4D_hsw+0xed6>
+ .byte 117,87 // jne 4068 <_sk_load_8888_dst_hsw+0x74>
+ .byte 196,193,126,111,60,153 // vmovdqu (%r9,%rbx,4),%ymm7
+ .byte 197,197,219,37,1,51,0,0 // vpand 0x3301(%rip),%ymm7,%ymm4 # 7320 <_sk_clut_4D_hsw+0xeca>
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,98,125,24,5,200,45,0,0 // vbroadcastss 0x2dc8(%rip),%ymm8 # 6bc0 <_sk_clut_4D_hsw+0xc36>
+ .byte 196,98,125,24,5,96,48,0,0 // vbroadcastss 0x3060(%rip),%ymm8 # 708c <_sk_clut_4D_hsw+0xc36>
.byte 196,193,92,89,224 // vmulps %ymm8,%ymm4,%ymm4
- .byte 196,226,69,0,45,122,48,0,0 // vpshufb 0x307a(%rip),%ymm7,%ymm5 # 6e80 <_sk_clut_4D_hsw+0xef6>
+ .byte 196,226,69,0,45,6,51,0,0 // vpshufb 0x3306(%rip),%ymm7,%ymm5 # 7340 <_sk_clut_4D_hsw+0xeea>
.byte 197,252,91,237 // vcvtdq2ps %ymm5,%ymm5
.byte 196,193,84,89,232 // vmulps %ymm8,%ymm5,%ymm5
- .byte 196,226,69,0,53,136,48,0,0 // vpshufb 0x3088(%rip),%ymm7,%ymm6 # 6ea0 <_sk_clut_4D_hsw+0xf16>
+ .byte 196,226,69,0,53,20,51,0,0 // vpshufb 0x3314(%rip),%ymm7,%ymm6 # 7360 <_sk_clut_4D_hsw+0xf0a>
.byte 197,252,91,246 // vcvtdq2ps %ymm6,%ymm6
.byte 196,193,76,89,240 // vmulps %ymm8,%ymm6,%ymm6
.byte 197,197,114,215,24 // vpsrld $0x18,%ymm7,%ymm7
.byte 197,252,91,255 // vcvtdq2ps %ymm7,%ymm7
.byte 196,193,68,89,248 // vmulps %ymm8,%ymm7,%ymm7
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 197,197,239,255 // vpxor %ymm7,%ymm7,%ymm7
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,155 // ja 4017 <_sk_load_8888_dst_hsw+0x23>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,141,0,0,0 // lea 0x8d(%rip),%r11 # 4114 <_sk_load_8888_dst_hsw+0x120>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,226 // vmovq %r10,%xmm4
- .byte 196,226,125,33,228 // vpmovsxbd %xmm4,%ymm4
- .byte 196,226,93,44,56 // vmaskmovps (%rax),%ymm4,%ymm7
- .byte 235,135 // jmp 3de3 <_sk_load_8888_dst_hsw+0x27>
+ .byte 196,193,121,110,60,153 // vmovd (%r9,%rbx,4),%xmm7
+ .byte 233,124,255,255,255 // jmpq 4017 <_sk_load_8888_dst_hsw+0x23>
+ .byte 196,193,121,110,100,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm4
+ .byte 196,226,121,89,228 // vpbroadcastq %xmm4,%xmm4
+ .byte 197,213,239,237 // vpxor %ymm5,%ymm5,%ymm5
+ .byte 196,227,85,2,252,4 // vpblendd $0x4,%ymm4,%ymm5,%ymm7
+ .byte 196,193,122,126,36,153 // vmovq (%r9,%rbx,4),%xmm4
+ .byte 196,227,69,2,252,3 // vpblendd $0x3,%ymm4,%ymm7,%ymm7
+ .byte 233,85,255,255,255 // jmpq 4017 <_sk_load_8888_dst_hsw+0x23>
+ .byte 196,193,121,110,100,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm4
+ .byte 196,226,125,89,228 // vpbroadcastq %xmm4,%ymm4
+ .byte 197,213,239,237 // vpxor %ymm5,%ymm5,%ymm5
+ .byte 196,227,85,2,252,64 // vpblendd $0x40,%ymm4,%ymm5,%ymm7
+ .byte 196,227,125,57,252,1 // vextracti128 $0x1,%ymm7,%xmm4
+ .byte 196,195,89,34,100,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,227,69,56,252,1 // vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ .byte 196,227,125,57,252,1 // vextracti128 $0x1,%ymm7,%xmm4
+ .byte 196,195,89,34,100,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,227,69,56,252,1 // vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ .byte 196,193,122,111,36,153 // vmovdqu (%r9,%rbx,4),%xmm4
+ .byte 196,227,93,2,255,240 // vpblendd $0xf0,%ymm7,%ymm4,%ymm7
+ .byte 233,6,255,255,255 // jmpq 4017 <_sk_load_8888_dst_hsw+0x23>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 124,255 // jl 4115 <_sk_load_8888_dst_hsw+0x121>
+ .byte 255 // (bad)
+ .byte 255,157,255,255,255,135 // lcall *-0x78000001(%rbp)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 236 // in (%dx),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 216,255 // fdivr %st(7),%st
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 174 // scas %es:(%rdi),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_gather_8888_hsw
.globl _sk_gather_8888_hsw
@@ -20519,14 +20713,14 @@ _sk_gather_8888_hsw:
.byte 197,245,254,192 // vpaddd %ymm0,%ymm1,%ymm0
.byte 197,245,118,201 // vpcmpeqd %ymm1,%ymm1,%ymm1
.byte 196,194,117,144,28,129 // vpgatherdd %ymm1,(%r9,%ymm0,4),%ymm3
- .byte 197,229,219,5,54,48,0,0 // vpand 0x3036(%rip),%ymm3,%ymm0 # 6ec0 <_sk_clut_4D_hsw+0xf36>
+ .byte 197,229,219,5,34,50,0,0 // vpand 0x3222(%rip),%ymm3,%ymm0 # 7380 <_sk_clut_4D_hsw+0xf2a>
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,5,45,45,0,0 // vbroadcastss 0x2d2d(%rip),%ymm8 # 6bc4 <_sk_clut_4D_hsw+0xc3a>
+ .byte 196,98,125,24,5,37,47,0,0 // vbroadcastss 0x2f25(%rip),%ymm8 # 7090 <_sk_clut_4D_hsw+0xc3a>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 196,226,101,0,13,59,48,0,0 // vpshufb 0x303b(%rip),%ymm3,%ymm1 # 6ee0 <_sk_clut_4D_hsw+0xf56>
+ .byte 196,226,101,0,13,39,50,0,0 // vpshufb 0x3227(%rip),%ymm3,%ymm1 # 73a0 <_sk_clut_4D_hsw+0xf4a>
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
- .byte 196,226,101,0,21,73,48,0,0 // vpshufb 0x3049(%rip),%ymm3,%ymm2 # 6f00 <_sk_clut_4D_hsw+0xf76>
+ .byte 196,226,101,0,21,53,50,0,0 // vpshufb 0x3235(%rip),%ymm3,%ymm2 # 73c0 <_sk_clut_4D_hsw+0xf6a>
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
.byte 197,229,114,211,24 // vpsrld $0x18,%ymm3,%ymm3
@@ -20539,16 +20733,15 @@ HIDDEN _sk_store_8888_hsw
.globl _sk_store_8888_hsw
FUNCTION(_sk_store_8888_hsw)
_sk_store_8888_hsw:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
- .byte 196,98,125,24,5,207,44,0,0 // vbroadcastss 0x2ccf(%rip),%ymm8 # 6bc8 <_sk_clut_4D_hsw+0xc3e>
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
+ .byte 196,98,125,24,5,205,46,0,0 // vbroadcastss 0x2ecd(%rip),%ymm8 # 7094 <_sk_clut_4D_hsw+0xc3e>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,65,116,89,208 // vmulps %ymm8,%ymm1,%ymm10
@@ -20564,104 +20757,224 @@ _sk_store_8888_hsw:
.byte 196,65,45,235,192 // vpor %ymm8,%ymm10,%ymm8
.byte 196,65,53,235,192 // vpor %ymm8,%ymm9,%ymm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 3f52 <_sk_store_8888_hsw+0x80>
- .byte 197,124,17,0 // vmovups %ymm8,(%rax)
+ .byte 117,11 // jne 4220 <_sk_store_8888_hsw+0x7a>
+ .byte 196,65,126,127,4,153 // vmovdqu %ymm8,(%r9,%rbx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
+ .byte 91 // pop %rbx
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,65,249,110,202 // vmovq %r10,%xmm9
- .byte 196,66,125,33,201 // vpmovsxbd %xmm9,%ymm9
- .byte 196,98,53,46,0 // vmaskmovps %ymm8,%ymm9,(%rax)
- .byte 235,211 // jmp 3f4b <_sk_store_8888_hsw+0x79>
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,235 // ja 421b <_sk_store_8888_hsw+0x75>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,85,0,0,0 // lea 0x55(%rip),%r11 # 4290 <_sk_store_8888_hsw+0xea>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,121,126,4,153 // vmovd %xmm8,(%r9,%rbx,4)
+ .byte 235,207 // jmp 421b <_sk_store_8888_hsw+0x75>
+ .byte 196,67,121,22,68,153,8,2 // vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ .byte 196,65,121,214,4,153 // vmovq %xmm8,(%r9,%rbx,4)
+ .byte 235,191 // jmp 421b <_sk_store_8888_hsw+0x75>
+ .byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,24,2 // vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ .byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,20,1 // vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ .byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
+ .byte 196,65,121,126,76,153,16 // vmovd %xmm9,0x10(%r9,%rbx,4)
+ .byte 196,65,122,127,4,153 // vmovdqu %xmm8,(%r9,%rbx,4)
+ .byte 235,142 // jmp 421b <_sk_store_8888_hsw+0x75>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 180,255 // mov $0xff,%ah
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 188,255,255,255,245 // mov $0xf5ffffff,%esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 232,255,255,255,218 // callq ffffffffdb0042a4 <_sk_clut_4D_hsw+0xffffffffdaffde4e>
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,204 // dec %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_bgra_hsw
.globl _sk_load_bgra_hsw
FUNCTION(_sk_load_bgra_hsw)
_sk_load_bgra_hsw:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,87 // jne 3ff2 <_sk_load_bgra_hsw+0x7a>
- .byte 197,252,16,24 // vmovups (%rax),%ymm3
- .byte 197,228,84,5,121,47,0,0 // vandps 0x2f79(%rip),%ymm3,%ymm0 # 6f20 <_sk_clut_4D_hsw+0xf96>
+ .byte 117,87 // jne 4320 <_sk_load_bgra_hsw+0x74>
+ .byte 196,193,126,111,28,153 // vmovdqu (%r9,%rbx,4),%ymm3
+ .byte 197,229,219,5,9,49,0,0 // vpand 0x3109(%rip),%ymm3,%ymm0 # 73e0 <_sk_clut_4D_hsw+0xf8a>
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,5,24,44,0,0 // vbroadcastss 0x2c18(%rip),%ymm8 # 6bcc <_sk_clut_4D_hsw+0xc42>
+ .byte 196,98,125,24,5,180,45,0,0 // vbroadcastss 0x2db4(%rip),%ymm8 # 7098 <_sk_clut_4D_hsw+0xc42>
.byte 196,193,124,89,208 // vmulps %ymm8,%ymm0,%ymm2
- .byte 196,226,101,0,5,126,47,0,0 // vpshufb 0x2f7e(%rip),%ymm3,%ymm0 # 6f40 <_sk_clut_4D_hsw+0xfb6>
+ .byte 196,226,101,0,5,14,49,0,0 // vpshufb 0x310e(%rip),%ymm3,%ymm0 # 7400 <_sk_clut_4D_hsw+0xfaa>
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
.byte 196,193,124,89,200 // vmulps %ymm8,%ymm0,%ymm1
- .byte 196,226,101,0,5,140,47,0,0 // vpshufb 0x2f8c(%rip),%ymm3,%ymm0 # 6f60 <_sk_clut_4D_hsw+0xfd6>
+ .byte 196,226,101,0,5,28,49,0,0 // vpshufb 0x311c(%rip),%ymm3,%ymm0 # 7420 <_sk_clut_4D_hsw+0xfca>
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 197,229,114,211,24 // vpsrld $0x18,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 197,229,239,219 // vpxor %ymm3,%ymm3,%ymm3
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,155 // ja 42cf <_sk_load_bgra_hsw+0x23>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,141,0,0,0 // lea 0x8d(%rip),%r11 # 43cc <_sk_load_bgra_hsw+0x120>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,194 // vmovq %r10,%xmm0
- .byte 196,226,125,33,192 // vpmovsxbd %xmm0,%ymm0
- .byte 196,226,125,44,24 // vmaskmovps (%rax),%ymm0,%ymm3
- .byte 235,135 // jmp 3f9f <_sk_load_bgra_hsw+0x27>
+ .byte 196,193,121,110,28,153 // vmovd (%r9,%rbx,4),%xmm3
+ .byte 233,124,255,255,255 // jmpq 42cf <_sk_load_bgra_hsw+0x23>
+ .byte 196,193,121,110,68,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm0
+ .byte 196,226,121,89,192 // vpbroadcastq %xmm0,%xmm0
+ .byte 197,245,239,201 // vpxor %ymm1,%ymm1,%ymm1
+ .byte 196,227,117,2,216,4 // vpblendd $0x4,%ymm0,%ymm1,%ymm3
+ .byte 196,193,122,126,4,153 // vmovq (%r9,%rbx,4),%xmm0
+ .byte 196,227,101,2,216,3 // vpblendd $0x3,%ymm0,%ymm3,%ymm3
+ .byte 233,85,255,255,255 // jmpq 42cf <_sk_load_bgra_hsw+0x23>
+ .byte 196,193,121,110,68,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm0
+ .byte 196,226,125,89,192 // vpbroadcastq %xmm0,%ymm0
+ .byte 197,245,239,201 // vpxor %ymm1,%ymm1,%ymm1
+ .byte 196,227,117,2,216,64 // vpblendd $0x40,%ymm0,%ymm1,%ymm3
+ .byte 196,227,125,57,216,1 // vextracti128 $0x1,%ymm3,%xmm0
+ .byte 196,195,121,34,68,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm0,%xmm0
+ .byte 196,227,101,56,216,1 // vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ .byte 196,227,125,57,216,1 // vextracti128 $0x1,%ymm3,%xmm0
+ .byte 196,195,121,34,68,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm0,%xmm0
+ .byte 196,227,101,56,216,1 // vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ .byte 196,193,122,111,4,153 // vmovdqu (%r9,%rbx,4),%xmm0
+ .byte 196,227,125,2,219,240 // vpblendd $0xf0,%ymm3,%ymm0,%ymm3
+ .byte 233,6,255,255,255 // jmpq 42cf <_sk_load_bgra_hsw+0x23>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 124,255 // jl 43cd <_sk_load_bgra_hsw+0x121>
+ .byte 255 // (bad)
+ .byte 255,157,255,255,255,135 // lcall *-0x78000001(%rbp)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 236 // in (%dx),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 216,255 // fdivr %st(7),%st
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 174 // scas %es:(%rdi),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_bgra_dst_hsw
.globl _sk_load_bgra_dst_hsw
FUNCTION(_sk_load_bgra_dst_hsw)
_sk_load_bgra_dst_hsw:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,87 // jne 4092 <_sk_load_bgra_dst_hsw+0x7a>
- .byte 197,252,16,56 // vmovups (%rax),%ymm7
- .byte 197,196,84,37,57,47,0,0 // vandps 0x2f39(%rip),%ymm7,%ymm4 # 6f80 <_sk_clut_4D_hsw+0xff6>
+ .byte 117,87 // jne 445c <_sk_load_bgra_dst_hsw+0x74>
+ .byte 196,193,126,111,60,153 // vmovdqu (%r9,%rbx,4),%ymm7
+ .byte 197,197,219,37,45,48,0,0 // vpand 0x302d(%rip),%ymm7,%ymm4 # 7440 <_sk_clut_4D_hsw+0xfea>
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,98,125,24,5,124,43,0,0 // vbroadcastss 0x2b7c(%rip),%ymm8 # 6bd0 <_sk_clut_4D_hsw+0xc46>
+ .byte 196,98,125,24,5,124,44,0,0 // vbroadcastss 0x2c7c(%rip),%ymm8 # 709c <_sk_clut_4D_hsw+0xc46>
.byte 196,193,92,89,240 // vmulps %ymm8,%ymm4,%ymm6
- .byte 196,226,69,0,37,62,47,0,0 // vpshufb 0x2f3e(%rip),%ymm7,%ymm4 # 6fa0 <_sk_clut_4D_hsw+0x1016>
+ .byte 196,226,69,0,37,50,48,0,0 // vpshufb 0x3032(%rip),%ymm7,%ymm4 # 7460 <_sk_clut_4D_hsw+0x100a>
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
.byte 196,193,92,89,232 // vmulps %ymm8,%ymm4,%ymm5
- .byte 196,226,69,0,37,76,47,0,0 // vpshufb 0x2f4c(%rip),%ymm7,%ymm4 # 6fc0 <_sk_clut_4D_hsw+0x1036>
+ .byte 196,226,69,0,37,64,48,0,0 // vpshufb 0x3040(%rip),%ymm7,%ymm4 # 7480 <_sk_clut_4D_hsw+0x102a>
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
.byte 196,193,92,89,224 // vmulps %ymm8,%ymm4,%ymm4
.byte 197,197,114,215,24 // vpsrld $0x18,%ymm7,%ymm7
.byte 197,252,91,255 // vcvtdq2ps %ymm7,%ymm7
.byte 196,193,68,89,248 // vmulps %ymm8,%ymm7,%ymm7
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
+ .byte 91 // pop %rbx
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,226 // vmovq %r10,%xmm4
- .byte 196,226,125,33,228 // vpmovsxbd %xmm4,%ymm4
- .byte 196,226,93,44,56 // vmaskmovps (%rax),%ymm4,%ymm7
- .byte 235,135 // jmp 403f <_sk_load_bgra_dst_hsw+0x27>
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 197,197,239,255 // vpxor %ymm7,%ymm7,%ymm7
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,155 // ja 440b <_sk_load_bgra_dst_hsw+0x23>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,141,0,0,0 // lea 0x8d(%rip),%r11 # 4508 <_sk_load_bgra_dst_hsw+0x120>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,193,121,110,60,153 // vmovd (%r9,%rbx,4),%xmm7
+ .byte 233,124,255,255,255 // jmpq 440b <_sk_load_bgra_dst_hsw+0x23>
+ .byte 196,193,121,110,100,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm4
+ .byte 196,226,121,89,228 // vpbroadcastq %xmm4,%xmm4
+ .byte 197,213,239,237 // vpxor %ymm5,%ymm5,%ymm5
+ .byte 196,227,85,2,252,4 // vpblendd $0x4,%ymm4,%ymm5,%ymm7
+ .byte 196,193,122,126,36,153 // vmovq (%r9,%rbx,4),%xmm4
+ .byte 196,227,69,2,252,3 // vpblendd $0x3,%ymm4,%ymm7,%ymm7
+ .byte 233,85,255,255,255 // jmpq 440b <_sk_load_bgra_dst_hsw+0x23>
+ .byte 196,193,121,110,100,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm4
+ .byte 196,226,125,89,228 // vpbroadcastq %xmm4,%ymm4
+ .byte 197,213,239,237 // vpxor %ymm5,%ymm5,%ymm5
+ .byte 196,227,85,2,252,64 // vpblendd $0x40,%ymm4,%ymm5,%ymm7
+ .byte 196,227,125,57,252,1 // vextracti128 $0x1,%ymm7,%xmm4
+ .byte 196,195,89,34,100,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,227,69,56,252,1 // vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ .byte 196,227,125,57,252,1 // vextracti128 $0x1,%ymm7,%xmm4
+ .byte 196,195,89,34,100,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,227,69,56,252,1 // vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ .byte 196,193,122,111,36,153 // vmovdqu (%r9,%rbx,4),%xmm4
+ .byte 196,227,93,2,255,240 // vpblendd $0xf0,%ymm7,%ymm4,%ymm7
+ .byte 233,6,255,255,255 // jmpq 440b <_sk_load_bgra_dst_hsw+0x23>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 124,255 // jl 4509 <_sk_load_bgra_dst_hsw+0x121>
+ .byte 255 // (bad)
+ .byte 255,157,255,255,255,135 // lcall *-0x78000001(%rbp)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 236 // in (%dx),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 216,255 // fdivr %st(7),%st
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 174 // scas %es:(%rdi),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_gather_bgra_hsw
.globl _sk_gather_bgra_hsw
@@ -20676,14 +20989,14 @@ _sk_gather_bgra_hsw:
.byte 197,245,254,192 // vpaddd %ymm0,%ymm1,%ymm0
.byte 197,245,118,201 // vpcmpeqd %ymm1,%ymm1,%ymm1
.byte 196,194,117,144,28,129 // vpgatherdd %ymm1,(%r9,%ymm0,4),%ymm3
- .byte 197,229,219,5,250,46,0,0 // vpand 0x2efa(%rip),%ymm3,%ymm0 # 6fe0 <_sk_clut_4D_hsw+0x1056>
+ .byte 197,229,219,5,78,47,0,0 // vpand 0x2f4e(%rip),%ymm3,%ymm0 # 74a0 <_sk_clut_4D_hsw+0x104a>
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,5,225,42,0,0 // vbroadcastss 0x2ae1(%rip),%ymm8 # 6bd4 <_sk_clut_4D_hsw+0xc4a>
+ .byte 196,98,125,24,5,65,43,0,0 // vbroadcastss 0x2b41(%rip),%ymm8 # 70a0 <_sk_clut_4D_hsw+0xc4a>
.byte 196,193,124,89,208 // vmulps %ymm8,%ymm0,%ymm2
- .byte 196,226,101,0,5,255,46,0,0 // vpshufb 0x2eff(%rip),%ymm3,%ymm0 # 7000 <_sk_clut_4D_hsw+0x1076>
+ .byte 196,226,101,0,5,83,47,0,0 // vpshufb 0x2f53(%rip),%ymm3,%ymm0 # 74c0 <_sk_clut_4D_hsw+0x106a>
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
.byte 196,193,124,89,200 // vmulps %ymm8,%ymm0,%ymm1
- .byte 196,226,101,0,5,13,47,0,0 // vpshufb 0x2f0d(%rip),%ymm3,%ymm0 # 7020 <_sk_clut_4D_hsw+0x1096>
+ .byte 196,226,101,0,5,97,47,0,0 // vpshufb 0x2f61(%rip),%ymm3,%ymm0 # 74e0 <_sk_clut_4D_hsw+0x108a>
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 197,229,114,211,24 // vpsrld $0x18,%ymm3,%ymm3
@@ -20696,16 +21009,15 @@ HIDDEN _sk_store_bgra_hsw
.globl _sk_store_bgra_hsw
FUNCTION(_sk_store_bgra_hsw)
_sk_store_bgra_hsw:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
- .byte 196,98,125,24,5,131,42,0,0 // vbroadcastss 0x2a83(%rip),%ymm8 # 6bd8 <_sk_clut_4D_hsw+0xc4e>
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
+ .byte 196,98,125,24,5,233,42,0,0 // vbroadcastss 0x2ae9(%rip),%ymm8 # 70a4 <_sk_clut_4D_hsw+0xc4e>
.byte 196,65,108,89,200 // vmulps %ymm8,%ymm2,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,65,116,89,208 // vmulps %ymm8,%ymm1,%ymm10
@@ -20721,20 +21033,52 @@ _sk_store_bgra_hsw:
.byte 196,65,45,235,192 // vpor %ymm8,%ymm10,%ymm8
.byte 196,65,53,235,192 // vpor %ymm8,%ymm9,%ymm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 41ae <_sk_store_bgra_hsw+0x80>
- .byte 197,124,17,0 // vmovups %ymm8,(%rax)
+ .byte 117,11 // jne 4614 <_sk_store_bgra_hsw+0x7a>
+ .byte 196,65,126,127,4,153 // vmovdqu %ymm8,(%r9,%rbx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,235 // ja 460f <_sk_store_bgra_hsw+0x75>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,85,0,0,0 // lea 0x55(%rip),%r11 # 4684 <_sk_store_bgra_hsw+0xea>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,65,249,110,202 // vmovq %r10,%xmm9
- .byte 196,66,125,33,201 // vpmovsxbd %xmm9,%ymm9
- .byte 196,98,53,46,0 // vmaskmovps %ymm8,%ymm9,(%rax)
- .byte 235,211 // jmp 41a7 <_sk_store_bgra_hsw+0x79>
+ .byte 196,65,121,126,4,153 // vmovd %xmm8,(%r9,%rbx,4)
+ .byte 235,207 // jmp 460f <_sk_store_bgra_hsw+0x75>
+ .byte 196,67,121,22,68,153,8,2 // vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ .byte 196,65,121,214,4,153 // vmovq %xmm8,(%r9,%rbx,4)
+ .byte 235,191 // jmp 460f <_sk_store_bgra_hsw+0x75>
+ .byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,24,2 // vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ .byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,20,1 // vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ .byte 196,67,125,57,193,1 // vextracti128 $0x1,%ymm8,%xmm9
+ .byte 196,65,121,126,76,153,16 // vmovd %xmm9,0x10(%r9,%rbx,4)
+ .byte 196,65,122,127,4,153 // vmovdqu %xmm8,(%r9,%rbx,4)
+ .byte 235,142 // jmp 460f <_sk_store_bgra_hsw+0x75>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 180,255 // mov $0xff,%ah
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 188,255,255,255,245 // mov $0xf5ffffff,%esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 232,255,255,255,218 // callq ffffffffdb004698 <_sk_clut_4D_hsw+0xffffffffdaffe242>
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,204 // dec %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_f16_hsw
.globl _sk_load_f16_hsw
@@ -20748,7 +21092,7 @@ _sk_load_f16_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,194 // movslq %edx,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,101 // jne 4255 <_sk_load_f16_hsw+0x81>
+ .byte 117,101 // jne 4721 <_sk_load_f16_hsw+0x81>
.byte 196,65,121,16,4,193 // vmovupd (%r9,%rax,8),%xmm8
.byte 196,193,121,16,84,193,16 // vmovupd 0x10(%r9,%rax,8),%xmm2
.byte 196,193,121,16,92,193,32 // vmovupd 0x20(%r9,%rax,8),%xmm3
@@ -20774,29 +21118,29 @@ _sk_load_f16_hsw:
.byte 196,65,123,16,4,193 // vmovsd (%r9,%rax,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,85 // je 42bb <_sk_load_f16_hsw+0xe7>
+ .byte 116,85 // je 4787 <_sk_load_f16_hsw+0xe7>
.byte 196,65,57,22,68,193,8 // vmovhpd 0x8(%r9,%rax,8),%xmm8,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,72 // jb 42bb <_sk_load_f16_hsw+0xe7>
+ .byte 114,72 // jb 4787 <_sk_load_f16_hsw+0xe7>
.byte 196,193,123,16,84,193,16 // vmovsd 0x10(%r9,%rax,8),%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 116,72 // je 42c8 <_sk_load_f16_hsw+0xf4>
+ .byte 116,72 // je 4794 <_sk_load_f16_hsw+0xf4>
.byte 196,193,105,22,84,193,24 // vmovhpd 0x18(%r9,%rax,8),%xmm2,%xmm2
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,59 // jb 42c8 <_sk_load_f16_hsw+0xf4>
+ .byte 114,59 // jb 4794 <_sk_load_f16_hsw+0xf4>
.byte 196,193,123,16,92,193,32 // vmovsd 0x20(%r9,%rax,8),%xmm3
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 15,132,109,255,255,255 // je 420b <_sk_load_f16_hsw+0x37>
+ .byte 15,132,109,255,255,255 // je 46d7 <_sk_load_f16_hsw+0x37>
.byte 196,193,97,22,92,193,40 // vmovhpd 0x28(%r9,%rax,8),%xmm3,%xmm3
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,130,92,255,255,255 // jb 420b <_sk_load_f16_hsw+0x37>
+ .byte 15,130,92,255,255,255 // jb 46d7 <_sk_load_f16_hsw+0x37>
.byte 196,65,122,126,76,193,48 // vmovq 0x30(%r9,%rax,8),%xmm9
- .byte 233,80,255,255,255 // jmpq 420b <_sk_load_f16_hsw+0x37>
+ .byte 233,80,255,255,255 // jmpq 46d7 <_sk_load_f16_hsw+0x37>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,67,255,255,255 // jmpq 420b <_sk_load_f16_hsw+0x37>
+ .byte 233,67,255,255,255 // jmpq 46d7 <_sk_load_f16_hsw+0x37>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,58,255,255,255 // jmpq 420b <_sk_load_f16_hsw+0x37>
+ .byte 233,58,255,255,255 // jmpq 46d7 <_sk_load_f16_hsw+0x37>
HIDDEN _sk_load_f16_dst_hsw
.globl _sk_load_f16_dst_hsw
@@ -20810,7 +21154,7 @@ _sk_load_f16_dst_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,194 // movslq %edx,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,101 // jne 4352 <_sk_load_f16_dst_hsw+0x81>
+ .byte 117,101 // jne 481e <_sk_load_f16_dst_hsw+0x81>
.byte 196,65,121,16,4,193 // vmovupd (%r9,%rax,8),%xmm8
.byte 196,193,121,16,116,193,16 // vmovupd 0x10(%r9,%rax,8),%xmm6
.byte 196,193,121,16,124,193,32 // vmovupd 0x20(%r9,%rax,8),%xmm7
@@ -20836,29 +21180,29 @@ _sk_load_f16_dst_hsw:
.byte 196,65,123,16,4,193 // vmovsd (%r9,%rax,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,85 // je 43b8 <_sk_load_f16_dst_hsw+0xe7>
+ .byte 116,85 // je 4884 <_sk_load_f16_dst_hsw+0xe7>
.byte 196,65,57,22,68,193,8 // vmovhpd 0x8(%r9,%rax,8),%xmm8,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,72 // jb 43b8 <_sk_load_f16_dst_hsw+0xe7>
+ .byte 114,72 // jb 4884 <_sk_load_f16_dst_hsw+0xe7>
.byte 196,193,123,16,116,193,16 // vmovsd 0x10(%r9,%rax,8),%xmm6
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 116,72 // je 43c5 <_sk_load_f16_dst_hsw+0xf4>
+ .byte 116,72 // je 4891 <_sk_load_f16_dst_hsw+0xf4>
.byte 196,193,73,22,116,193,24 // vmovhpd 0x18(%r9,%rax,8),%xmm6,%xmm6
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,59 // jb 43c5 <_sk_load_f16_dst_hsw+0xf4>
+ .byte 114,59 // jb 4891 <_sk_load_f16_dst_hsw+0xf4>
.byte 196,193,123,16,124,193,32 // vmovsd 0x20(%r9,%rax,8),%xmm7
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 15,132,109,255,255,255 // je 4308 <_sk_load_f16_dst_hsw+0x37>
+ .byte 15,132,109,255,255,255 // je 47d4 <_sk_load_f16_dst_hsw+0x37>
.byte 196,193,65,22,124,193,40 // vmovhpd 0x28(%r9,%rax,8),%xmm7,%xmm7
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,130,92,255,255,255 // jb 4308 <_sk_load_f16_dst_hsw+0x37>
+ .byte 15,130,92,255,255,255 // jb 47d4 <_sk_load_f16_dst_hsw+0x37>
.byte 196,65,122,126,76,193,48 // vmovq 0x30(%r9,%rax,8),%xmm9
- .byte 233,80,255,255,255 // jmpq 4308 <_sk_load_f16_dst_hsw+0x37>
+ .byte 233,80,255,255,255 // jmpq 47d4 <_sk_load_f16_dst_hsw+0x37>
.byte 197,193,87,255 // vxorpd %xmm7,%xmm7,%xmm7
.byte 197,201,87,246 // vxorpd %xmm6,%xmm6,%xmm6
- .byte 233,67,255,255,255 // jmpq 4308 <_sk_load_f16_dst_hsw+0x37>
+ .byte 233,67,255,255,255 // jmpq 47d4 <_sk_load_f16_dst_hsw+0x37>
.byte 197,193,87,255 // vxorpd %xmm7,%xmm7,%xmm7
- .byte 233,58,255,255,255 // jmpq 4308 <_sk_load_f16_dst_hsw+0x37>
+ .byte 233,58,255,255,255 // jmpq 47d4 <_sk_load_f16_dst_hsw+0x37>
HIDDEN _sk_gather_f16_hsw
.globl _sk_gather_f16_hsw
@@ -20921,7 +21265,7 @@ _sk_store_f16_hsw:
.byte 196,65,57,98,205 // vpunpckldq %xmm13,%xmm8,%xmm9
.byte 196,65,57,106,197 // vpunpckhdq %xmm13,%xmm8,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,31 // jne 44d3 <_sk_store_f16_hsw+0x7b>
+ .byte 117,31 // jne 499f <_sk_store_f16_hsw+0x7b>
.byte 196,65,120,17,28,193 // vmovups %xmm11,(%r9,%rax,8)
.byte 196,65,120,17,84,193,16 // vmovups %xmm10,0x10(%r9,%rax,8)
.byte 196,65,120,17,76,193,32 // vmovups %xmm9,0x20(%r9,%rax,8)
@@ -20930,22 +21274,22 @@ _sk_store_f16_hsw:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,214,28,193 // vmovq %xmm11,(%r9,%rax,8)
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,240 // je 44cf <_sk_store_f16_hsw+0x77>
+ .byte 116,240 // je 499b <_sk_store_f16_hsw+0x77>
.byte 196,65,121,23,92,193,8 // vmovhpd %xmm11,0x8(%r9,%rax,8)
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,227 // jb 44cf <_sk_store_f16_hsw+0x77>
+ .byte 114,227 // jb 499b <_sk_store_f16_hsw+0x77>
.byte 196,65,121,214,84,193,16 // vmovq %xmm10,0x10(%r9,%rax,8)
- .byte 116,218 // je 44cf <_sk_store_f16_hsw+0x77>
+ .byte 116,218 // je 499b <_sk_store_f16_hsw+0x77>
.byte 196,65,121,23,84,193,24 // vmovhpd %xmm10,0x18(%r9,%rax,8)
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,205 // jb 44cf <_sk_store_f16_hsw+0x77>
+ .byte 114,205 // jb 499b <_sk_store_f16_hsw+0x77>
.byte 196,65,121,214,76,193,32 // vmovq %xmm9,0x20(%r9,%rax,8)
- .byte 116,196 // je 44cf <_sk_store_f16_hsw+0x77>
+ .byte 116,196 // je 499b <_sk_store_f16_hsw+0x77>
.byte 196,65,121,23,76,193,40 // vmovhpd %xmm9,0x28(%r9,%rax,8)
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,183 // jb 44cf <_sk_store_f16_hsw+0x77>
+ .byte 114,183 // jb 499b <_sk_store_f16_hsw+0x77>
.byte 196,65,121,214,68,193,48 // vmovq %xmm8,0x30(%r9,%rax,8)
- .byte 235,174 // jmp 44cf <_sk_store_f16_hsw+0x77>
+ .byte 235,174 // jmp 499b <_sk_store_f16_hsw+0x77>
HIDDEN _sk_load_u16_be_hsw
.globl _sk_load_u16_be_hsw
@@ -20960,7 +21304,7 @@ _sk_load_u16_be_hsw:
.byte 76,3,8 // add (%rax),%r9
.byte 73,99,194 // movslq %r10d,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,204,0,0,0 // jne 4614 <_sk_load_u16_be_hsw+0xf3>
+ .byte 15,133,204,0,0,0 // jne 4ae0 <_sk_load_u16_be_hsw+0xf3>
.byte 196,65,121,16,4,65 // vmovupd (%r9,%rax,2),%xmm8
.byte 196,193,121,16,84,65,16 // vmovupd 0x10(%r9,%rax,2),%xmm2
.byte 196,193,121,16,92,65,32 // vmovupd 0x20(%r9,%rax,2),%xmm3
@@ -20979,7 +21323,7 @@ _sk_load_u16_be_hsw:
.byte 197,241,235,192 // vpor %xmm0,%xmm1,%xmm0
.byte 196,226,125,51,192 // vpmovzxwd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,21,51,38,0,0 // vbroadcastss 0x2633(%rip),%ymm10 # 6bdc <_sk_clut_4D_hsw+0xc52>
+ .byte 196,98,125,24,21,51,38,0,0 // vbroadcastss 0x2633(%rip),%ymm10 # 70a8 <_sk_clut_4D_hsw+0xc52>
.byte 196,193,124,89,194 // vmulps %ymm10,%ymm0,%ymm0
.byte 197,185,109,202 // vpunpckhqdq %xmm2,%xmm8,%xmm1
.byte 197,233,113,241,8 // vpsllw $0x8,%xmm1,%xmm2
@@ -21007,29 +21351,29 @@ _sk_load_u16_be_hsw:
.byte 196,65,123,16,4,65 // vmovsd (%r9,%rax,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,85 // je 467a <_sk_load_u16_be_hsw+0x159>
+ .byte 116,85 // je 4b46 <_sk_load_u16_be_hsw+0x159>
.byte 196,65,57,22,68,65,8 // vmovhpd 0x8(%r9,%rax,2),%xmm8,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,72 // jb 467a <_sk_load_u16_be_hsw+0x159>
+ .byte 114,72 // jb 4b46 <_sk_load_u16_be_hsw+0x159>
.byte 196,193,123,16,84,65,16 // vmovsd 0x10(%r9,%rax,2),%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 116,72 // je 4687 <_sk_load_u16_be_hsw+0x166>
+ .byte 116,72 // je 4b53 <_sk_load_u16_be_hsw+0x166>
.byte 196,193,105,22,84,65,24 // vmovhpd 0x18(%r9,%rax,2),%xmm2,%xmm2
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,59 // jb 4687 <_sk_load_u16_be_hsw+0x166>
+ .byte 114,59 // jb 4b53 <_sk_load_u16_be_hsw+0x166>
.byte 196,193,123,16,92,65,32 // vmovsd 0x20(%r9,%rax,2),%xmm3
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 15,132,6,255,255,255 // je 4563 <_sk_load_u16_be_hsw+0x42>
+ .byte 15,132,6,255,255,255 // je 4a2f <_sk_load_u16_be_hsw+0x42>
.byte 196,193,97,22,92,65,40 // vmovhpd 0x28(%r9,%rax,2),%xmm3,%xmm3
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,130,245,254,255,255 // jb 4563 <_sk_load_u16_be_hsw+0x42>
+ .byte 15,130,245,254,255,255 // jb 4a2f <_sk_load_u16_be_hsw+0x42>
.byte 196,65,122,126,76,65,48 // vmovq 0x30(%r9,%rax,2),%xmm9
- .byte 233,233,254,255,255 // jmpq 4563 <_sk_load_u16_be_hsw+0x42>
+ .byte 233,233,254,255,255 // jmpq 4a2f <_sk_load_u16_be_hsw+0x42>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,220,254,255,255 // jmpq 4563 <_sk_load_u16_be_hsw+0x42>
+ .byte 233,220,254,255,255 // jmpq 4a2f <_sk_load_u16_be_hsw+0x42>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,211,254,255,255 // jmpq 4563 <_sk_load_u16_be_hsw+0x42>
+ .byte 233,211,254,255,255 // jmpq 4a2f <_sk_load_u16_be_hsw+0x42>
HIDDEN _sk_load_rgb_u16_be_hsw
.globl _sk_load_rgb_u16_be_hsw
@@ -21046,7 +21390,7 @@ _sk_load_rgb_u16_be_hsw:
.byte 72,141,4,64 // lea (%rax,%rax,2),%rax
.byte 72,193,248,32 // sar $0x20,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,204,0,0,0 // jne 4787 <_sk_load_rgb_u16_be_hsw+0xf7>
+ .byte 15,133,204,0,0,0 // jne 4c53 <_sk_load_rgb_u16_be_hsw+0xf7>
.byte 196,193,122,111,4,65 // vmovdqu (%r9,%rax,2),%xmm0
.byte 196,193,122,111,84,65,12 // vmovdqu 0xc(%r9,%rax,2),%xmm2
.byte 196,193,122,111,76,65,24 // vmovdqu 0x18(%r9,%rax,2),%xmm1
@@ -21070,7 +21414,7 @@ _sk_load_rgb_u16_be_hsw:
.byte 197,241,235,192 // vpor %xmm0,%xmm1,%xmm0
.byte 196,226,125,51,192 // vpmovzxwd %xmm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,21,171,36,0,0 // vbroadcastss 0x24ab(%rip),%ymm10 # 6be0 <_sk_clut_4D_hsw+0xc56>
+ .byte 196,98,125,24,21,171,36,0,0 // vbroadcastss 0x24ab(%rip),%ymm10 # 70ac <_sk_clut_4D_hsw+0xc56>
.byte 196,193,124,89,194 // vmulps %ymm10,%ymm0,%ymm0
.byte 197,185,109,202 // vpunpckhqdq %xmm2,%xmm8,%xmm1
.byte 197,233,113,241,8 // vpsllw $0x8,%xmm1,%xmm2
@@ -21087,41 +21431,41 @@ _sk_load_rgb_u16_be_hsw:
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 196,193,108,89,210 // vmulps %ymm10,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,95,36,0,0 // vbroadcastss 0x245f(%rip),%ymm3 # 6be4 <_sk_clut_4D_hsw+0xc5a>
+ .byte 196,226,125,24,29,95,36,0,0 // vbroadcastss 0x245f(%rip),%ymm3 # 70b0 <_sk_clut_4D_hsw+0xc5a>
.byte 255,224 // jmpq *%rax
.byte 196,193,121,110,4,65 // vmovd (%r9,%rax,2),%xmm0
.byte 196,193,121,196,68,65,4,2 // vpinsrw $0x2,0x4(%r9,%rax,2),%xmm0,%xmm0
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 117,5 // jne 47a0 <_sk_load_rgb_u16_be_hsw+0x110>
- .byte 233,79,255,255,255 // jmpq 46ef <_sk_load_rgb_u16_be_hsw+0x5f>
+ .byte 117,5 // jne 4c6c <_sk_load_rgb_u16_be_hsw+0x110>
+ .byte 233,79,255,255,255 // jmpq 4bbb <_sk_load_rgb_u16_be_hsw+0x5f>
.byte 196,193,121,110,76,65,6 // vmovd 0x6(%r9,%rax,2),%xmm1
.byte 196,65,113,196,68,65,10,2 // vpinsrw $0x2,0xa(%r9,%rax,2),%xmm1,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,26 // jb 47cf <_sk_load_rgb_u16_be_hsw+0x13f>
+ .byte 114,26 // jb 4c9b <_sk_load_rgb_u16_be_hsw+0x13f>
.byte 196,193,121,110,76,65,12 // vmovd 0xc(%r9,%rax,2),%xmm1
.byte 196,193,113,196,84,65,16,2 // vpinsrw $0x2,0x10(%r9,%rax,2),%xmm1,%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 117,10 // jne 47d4 <_sk_load_rgb_u16_be_hsw+0x144>
- .byte 233,32,255,255,255 // jmpq 46ef <_sk_load_rgb_u16_be_hsw+0x5f>
- .byte 233,27,255,255,255 // jmpq 46ef <_sk_load_rgb_u16_be_hsw+0x5f>
+ .byte 117,10 // jne 4ca0 <_sk_load_rgb_u16_be_hsw+0x144>
+ .byte 233,32,255,255,255 // jmpq 4bbb <_sk_load_rgb_u16_be_hsw+0x5f>
+ .byte 233,27,255,255,255 // jmpq 4bbb <_sk_load_rgb_u16_be_hsw+0x5f>
.byte 196,193,121,110,76,65,18 // vmovd 0x12(%r9,%rax,2),%xmm1
.byte 196,65,113,196,76,65,22,2 // vpinsrw $0x2,0x16(%r9,%rax,2),%xmm1,%xmm9
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,26 // jb 4803 <_sk_load_rgb_u16_be_hsw+0x173>
+ .byte 114,26 // jb 4ccf <_sk_load_rgb_u16_be_hsw+0x173>
.byte 196,193,121,110,76,65,24 // vmovd 0x18(%r9,%rax,2),%xmm1
.byte 196,193,113,196,76,65,28,2 // vpinsrw $0x2,0x1c(%r9,%rax,2),%xmm1,%xmm1
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 117,10 // jne 4808 <_sk_load_rgb_u16_be_hsw+0x178>
- .byte 233,236,254,255,255 // jmpq 46ef <_sk_load_rgb_u16_be_hsw+0x5f>
- .byte 233,231,254,255,255 // jmpq 46ef <_sk_load_rgb_u16_be_hsw+0x5f>
+ .byte 117,10 // jne 4cd4 <_sk_load_rgb_u16_be_hsw+0x178>
+ .byte 233,236,254,255,255 // jmpq 4bbb <_sk_load_rgb_u16_be_hsw+0x5f>
+ .byte 233,231,254,255,255 // jmpq 4bbb <_sk_load_rgb_u16_be_hsw+0x5f>
.byte 196,193,121,110,92,65,30 // vmovd 0x1e(%r9,%rax,2),%xmm3
.byte 196,65,97,196,92,65,34,2 // vpinsrw $0x2,0x22(%r9,%rax,2),%xmm3,%xmm11
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,20 // jb 4831 <_sk_load_rgb_u16_be_hsw+0x1a1>
+ .byte 114,20 // jb 4cfd <_sk_load_rgb_u16_be_hsw+0x1a1>
.byte 196,193,121,110,92,65,36 // vmovd 0x24(%r9,%rax,2),%xmm3
.byte 196,193,97,196,92,65,40,2 // vpinsrw $0x2,0x28(%r9,%rax,2),%xmm3,%xmm3
- .byte 233,190,254,255,255 // jmpq 46ef <_sk_load_rgb_u16_be_hsw+0x5f>
- .byte 233,185,254,255,255 // jmpq 46ef <_sk_load_rgb_u16_be_hsw+0x5f>
+ .byte 233,190,254,255,255 // jmpq 4bbb <_sk_load_rgb_u16_be_hsw+0x5f>
+ .byte 233,185,254,255,255 // jmpq 4bbb <_sk_load_rgb_u16_be_hsw+0x5f>
HIDDEN _sk_store_u16_be_hsw
.globl _sk_store_u16_be_hsw
@@ -21135,7 +21479,7 @@ _sk_store_u16_be_hsw:
.byte 77,1,201 // add %r9,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 73,99,194 // movslq %r10d,%rax
- .byte 196,98,125,24,5,139,35,0,0 // vbroadcastss 0x238b(%rip),%ymm8 # 6be8 <_sk_clut_4D_hsw+0xc5e>
+ .byte 196,98,125,24,5,139,35,0,0 // vbroadcastss 0x238b(%rip),%ymm8 # 70b4 <_sk_clut_4D_hsw+0xc5e>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,67,125,25,202,1 // vextractf128 $0x1,%ymm9,%xmm10
@@ -21173,7 +21517,7 @@ _sk_store_u16_be_hsw:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,31 // jne 4941 <_sk_store_u16_be_hsw+0x10b>
+ .byte 117,31 // jne 4e0d <_sk_store_u16_be_hsw+0x10b>
.byte 196,65,120,17,28,65 // vmovups %xmm11,(%r9,%rax,2)
.byte 196,65,120,17,84,65,16 // vmovups %xmm10,0x10(%r9,%rax,2)
.byte 196,65,120,17,76,65,32 // vmovups %xmm9,0x20(%r9,%rax,2)
@@ -21182,22 +21526,22 @@ _sk_store_u16_be_hsw:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,214,28,65 // vmovq %xmm11,(%r9,%rax,2)
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,240 // je 493d <_sk_store_u16_be_hsw+0x107>
+ .byte 116,240 // je 4e09 <_sk_store_u16_be_hsw+0x107>
.byte 196,65,121,23,92,65,8 // vmovhpd %xmm11,0x8(%r9,%rax,2)
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,227 // jb 493d <_sk_store_u16_be_hsw+0x107>
+ .byte 114,227 // jb 4e09 <_sk_store_u16_be_hsw+0x107>
.byte 196,65,121,214,84,65,16 // vmovq %xmm10,0x10(%r9,%rax,2)
- .byte 116,218 // je 493d <_sk_store_u16_be_hsw+0x107>
+ .byte 116,218 // je 4e09 <_sk_store_u16_be_hsw+0x107>
.byte 196,65,121,23,84,65,24 // vmovhpd %xmm10,0x18(%r9,%rax,2)
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,205 // jb 493d <_sk_store_u16_be_hsw+0x107>
+ .byte 114,205 // jb 4e09 <_sk_store_u16_be_hsw+0x107>
.byte 196,65,121,214,76,65,32 // vmovq %xmm9,0x20(%r9,%rax,2)
- .byte 116,196 // je 493d <_sk_store_u16_be_hsw+0x107>
+ .byte 116,196 // je 4e09 <_sk_store_u16_be_hsw+0x107>
.byte 196,65,121,23,76,65,40 // vmovhpd %xmm9,0x28(%r9,%rax,2)
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,183 // jb 493d <_sk_store_u16_be_hsw+0x107>
+ .byte 114,183 // jb 4e09 <_sk_store_u16_be_hsw+0x107>
.byte 196,65,121,214,68,65,48 // vmovq %xmm8,0x30(%r9,%rax,2)
- .byte 235,174 // jmp 493d <_sk_store_u16_be_hsw+0x107>
+ .byte 235,174 // jmp 4e09 <_sk_store_u16_be_hsw+0x107>
HIDDEN _sk_load_f32_hsw
.globl _sk_load_f32_hsw
@@ -21205,7 +21549,7 @@ FUNCTION(_sk_load_f32_hsw)
_sk_load_f32_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,135,128,0,0,0 // ja 4a1b <_sk_load_f32_hsw+0x8c>
+ .byte 15,135,128,0,0,0 // ja 4ee7 <_sk_load_f32_hsw+0x8c>
.byte 68,141,20,149,0,0,0,0 // lea 0x0(,%rdx,4),%r10d
.byte 76,99,88,8 // movslq 0x8(%rax),%r11
.byte 76,99,201 // movslq %ecx,%r9
@@ -21213,7 +21557,7 @@ _sk_load_f32_hsw:
.byte 73,193,225,2 // shl $0x2,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 77,99,210 // movslq %r10d,%r10
- .byte 76,141,29,133,0,0,0 // lea 0x85(%rip),%r11 # 4a44 <_sk_load_f32_hsw+0xb5>
+ .byte 76,141,29,133,0,0,0 // lea 0x85(%rip),%r11 # 4f10 <_sk_load_f32_hsw+0xb5>
.byte 75,99,4,131 // movslq (%r11,%r8,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
@@ -21261,7 +21605,7 @@ FUNCTION(_sk_load_f32_dst_hsw)
_sk_load_f32_dst_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,135,128,0,0,0 // ja 4af0 <_sk_load_f32_dst_hsw+0x8c>
+ .byte 15,135,128,0,0,0 // ja 4fbc <_sk_load_f32_dst_hsw+0x8c>
.byte 68,141,20,149,0,0,0,0 // lea 0x0(,%rdx,4),%r10d
.byte 76,99,88,8 // movslq 0x8(%rax),%r11
.byte 76,99,201 // movslq %ecx,%r9
@@ -21269,7 +21613,7 @@ _sk_load_f32_dst_hsw:
.byte 73,193,225,2 // shl $0x2,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 77,99,210 // movslq %r10d,%r10
- .byte 76,141,29,132,0,0,0 // lea 0x84(%rip),%r11 # 4b18 <_sk_load_f32_dst_hsw+0xb4>
+ .byte 76,141,29,132,0,0,0 // lea 0x84(%rip),%r11 # 4fe4 <_sk_load_f32_dst_hsw+0xb4>
.byte 75,99,4,131 // movslq (%r11,%r8,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
@@ -21333,7 +21677,7 @@ _sk_store_f32_hsw:
.byte 196,65,37,20,196 // vunpcklpd %ymm12,%ymm11,%ymm8
.byte 196,65,37,21,220 // vunpckhpd %ymm12,%ymm11,%ymm11
.byte 77,133,192 // test %r8,%r8
- .byte 117,55 // jne 4bb7 <_sk_store_f32_hsw+0x7f>
+ .byte 117,55 // jne 5083 <_sk_store_f32_hsw+0x7f>
.byte 196,67,45,24,225,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
.byte 196,67,61,24,235,1 // vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
.byte 196,67,45,6,201,49 // vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -21346,22 +21690,22 @@ _sk_store_f32_hsw:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,17,20,129 // vmovupd %xmm10,(%r9,%rax,4)
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,240 // je 4bb3 <_sk_store_f32_hsw+0x7b>
+ .byte 116,240 // je 507f <_sk_store_f32_hsw+0x7b>
.byte 196,65,121,17,76,129,16 // vmovupd %xmm9,0x10(%r9,%rax,4)
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,227 // jb 4bb3 <_sk_store_f32_hsw+0x7b>
+ .byte 114,227 // jb 507f <_sk_store_f32_hsw+0x7b>
.byte 196,65,121,17,68,129,32 // vmovupd %xmm8,0x20(%r9,%rax,4)
- .byte 116,218 // je 4bb3 <_sk_store_f32_hsw+0x7b>
+ .byte 116,218 // je 507f <_sk_store_f32_hsw+0x7b>
.byte 196,65,121,17,92,129,48 // vmovupd %xmm11,0x30(%r9,%rax,4)
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,205 // jb 4bb3 <_sk_store_f32_hsw+0x7b>
+ .byte 114,205 // jb 507f <_sk_store_f32_hsw+0x7b>
.byte 196,67,125,25,84,129,64,1 // vextractf128 $0x1,%ymm10,0x40(%r9,%rax,4)
- .byte 116,195 // je 4bb3 <_sk_store_f32_hsw+0x7b>
+ .byte 116,195 // je 507f <_sk_store_f32_hsw+0x7b>
.byte 196,67,125,25,76,129,80,1 // vextractf128 $0x1,%ymm9,0x50(%r9,%rax,4)
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,181 // jb 4bb3 <_sk_store_f32_hsw+0x7b>
+ .byte 114,181 // jb 507f <_sk_store_f32_hsw+0x7b>
.byte 196,67,125,25,68,129,96,1 // vextractf128 $0x1,%ymm8,0x60(%r9,%rax,4)
- .byte 235,171 // jmp 4bb3 <_sk_store_f32_hsw+0x7b>
+ .byte 235,171 // jmp 507f <_sk_store_f32_hsw+0x7b>
HIDDEN _sk_clamp_x_hsw
.globl _sk_clamp_x_hsw
@@ -21438,7 +21782,7 @@ _sk_mirror_x_hsw:
.byte 196,65,124,92,218 // vsubps %ymm10,%ymm0,%ymm11
.byte 196,193,58,88,192 // vaddss %xmm8,%xmm8,%xmm0
.byte 196,98,125,24,192 // vbroadcastss %xmm0,%ymm8
- .byte 197,178,89,5,7,31,0,0 // vmulss 0x1f07(%rip),%xmm9,%xmm0 # 6bec <_sk_clut_4D_hsw+0xc62>
+ .byte 197,178,89,5,7,31,0,0 // vmulss 0x1f07(%rip),%xmm9,%xmm0 # 70b8 <_sk_clut_4D_hsw+0xc62>
.byte 196,226,125,24,192 // vbroadcastss %xmm0,%ymm0
.byte 197,164,89,192 // vmulps %ymm0,%ymm11,%ymm0
.byte 196,227,125,8,192,1 // vroundps $0x1,%ymm0,%ymm0
@@ -21465,7 +21809,7 @@ _sk_mirror_y_hsw:
.byte 196,65,116,92,218 // vsubps %ymm10,%ymm1,%ymm11
.byte 196,193,58,88,200 // vaddss %xmm8,%xmm8,%xmm1
.byte 196,98,125,24,193 // vbroadcastss %xmm1,%ymm8
- .byte 197,178,89,13,167,30,0,0 // vmulss 0x1ea7(%rip),%xmm9,%xmm1 # 6bf0 <_sk_clut_4D_hsw+0xc66>
+ .byte 197,178,89,13,167,30,0,0 // vmulss 0x1ea7(%rip),%xmm9,%xmm1 # 70bc <_sk_clut_4D_hsw+0xc66>
.byte 196,226,125,24,201 // vbroadcastss %xmm1,%ymm1
.byte 197,164,89,201 // vmulps %ymm1,%ymm11,%ymm1
.byte 196,227,125,8,201,1 // vroundps $0x1,%ymm1,%ymm1
@@ -21487,7 +21831,7 @@ FUNCTION(_sk_clamp_x_1_hsw)
_sk_clamp_x_1_hsw:
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 197,188,95,192 // vmaxps %ymm0,%ymm8,%ymm0
- .byte 196,98,125,24,5,92,30,0,0 // vbroadcastss 0x1e5c(%rip),%ymm8 # 6bf4 <_sk_clut_4D_hsw+0xc6a>
+ .byte 196,98,125,24,5,92,30,0,0 // vbroadcastss 0x1e5c(%rip),%ymm8 # 70c0 <_sk_clut_4D_hsw+0xc6a>
.byte 196,193,124,93,192 // vminps %ymm8,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -21505,9 +21849,9 @@ HIDDEN _sk_mirror_x_1_hsw
.globl _sk_mirror_x_1_hsw
FUNCTION(_sk_mirror_x_1_hsw)
_sk_mirror_x_1_hsw:
- .byte 196,98,125,24,5,63,30,0,0 // vbroadcastss 0x1e3f(%rip),%ymm8 # 6bf8 <_sk_clut_4D_hsw+0xc6e>
+ .byte 196,98,125,24,5,63,30,0,0 // vbroadcastss 0x1e3f(%rip),%ymm8 # 70c4 <_sk_clut_4D_hsw+0xc6e>
.byte 196,193,124,88,192 // vaddps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,13,53,30,0,0 // vbroadcastss 0x1e35(%rip),%ymm9 # 6bfc <_sk_clut_4D_hsw+0xc72>
+ .byte 196,98,125,24,13,53,30,0,0 // vbroadcastss 0x1e35(%rip),%ymm9 # 70c8 <_sk_clut_4D_hsw+0xc72>
.byte 196,65,124,89,201 // vmulps %ymm9,%ymm0,%ymm9
.byte 196,67,125,8,201,1 // vroundps $0x1,%ymm9,%ymm9
.byte 196,65,52,88,201 // vaddps %ymm9,%ymm9,%ymm9
@@ -21523,11 +21867,11 @@ HIDDEN _sk_luminance_to_alpha_hsw
.globl _sk_luminance_to_alpha_hsw
FUNCTION(_sk_luminance_to_alpha_hsw)
_sk_luminance_to_alpha_hsw:
- .byte 196,226,125,24,29,5,30,0,0 // vbroadcastss 0x1e05(%rip),%ymm3 # 6c00 <_sk_clut_4D_hsw+0xc76>
- .byte 196,98,125,24,5,0,30,0,0 // vbroadcastss 0x1e00(%rip),%ymm8 # 6c04 <_sk_clut_4D_hsw+0xc7a>
+ .byte 196,226,125,24,29,5,30,0,0 // vbroadcastss 0x1e05(%rip),%ymm3 # 70cc <_sk_clut_4D_hsw+0xc76>
+ .byte 196,98,125,24,5,0,30,0,0 // vbroadcastss 0x1e00(%rip),%ymm8 # 70d0 <_sk_clut_4D_hsw+0xc7a>
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
.byte 196,226,125,184,203 // vfmadd231ps %ymm3,%ymm0,%ymm1
- .byte 196,226,125,24,29,241,29,0,0 // vbroadcastss 0x1df1(%rip),%ymm3 # 6c08 <_sk_clut_4D_hsw+0xc7e>
+ .byte 196,226,125,24,29,241,29,0,0 // vbroadcastss 0x1df1(%rip),%ymm3 # 70d4 <_sk_clut_4D_hsw+0xc7e>
.byte 196,226,109,168,217 // vfmadd213ps %ymm1,%ymm2,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,87,192 // vxorps %ymm0,%ymm0,%ymm0
@@ -21727,9 +22071,9 @@ _sk_evenly_spaced_gradient_hsw:
.byte 76,139,72,8 // mov 0x8(%rax),%r9
.byte 77,137,211 // mov %r10,%r11
.byte 73,255,203 // dec %r11
- .byte 120,7 // js 5110 <_sk_evenly_spaced_gradient_hsw+0x19>
+ .byte 120,7 // js 55dc <_sk_evenly_spaced_gradient_hsw+0x19>
.byte 196,193,242,42,203 // vcvtsi2ss %r11,%xmm1,%xmm1
- .byte 235,22 // jmp 5126 <_sk_evenly_spaced_gradient_hsw+0x2f>
+ .byte 235,22 // jmp 55f2 <_sk_evenly_spaced_gradient_hsw+0x2f>
.byte 76,137,219 // mov %r11,%rbx
.byte 72,209,235 // shr %rbx
.byte 65,131,227,1 // and $0x1,%r11d
@@ -21740,7 +22084,7 @@ _sk_evenly_spaced_gradient_hsw:
.byte 197,244,89,200 // vmulps %ymm0,%ymm1,%ymm1
.byte 197,126,91,217 // vcvttps2dq %ymm1,%ymm11
.byte 73,131,250,8 // cmp $0x8,%r10
- .byte 119,70 // ja 517f <_sk_evenly_spaced_gradient_hsw+0x88>
+ .byte 119,70 // ja 564b <_sk_evenly_spaced_gradient_hsw+0x88>
.byte 196,66,37,22,1 // vpermps (%r9),%ymm11,%ymm8
.byte 72,139,88,40 // mov 0x28(%rax),%rbx
.byte 196,98,37,22,11 // vpermps (%rbx),%ymm11,%ymm9
@@ -21756,7 +22100,7 @@ _sk_evenly_spaced_gradient_hsw:
.byte 196,226,37,22,27 // vpermps (%rbx),%ymm11,%ymm3
.byte 72,139,64,64 // mov 0x40(%rax),%rax
.byte 196,98,37,22,40 // vpermps (%rax),%ymm11,%ymm13
- .byte 235,110 // jmp 51ed <_sk_evenly_spaced_gradient_hsw+0xf6>
+ .byte 235,110 // jmp 56b9 <_sk_evenly_spaced_gradient_hsw+0xf6>
.byte 196,65,13,118,246 // vpcmpeqd %ymm14,%ymm14,%ymm14
.byte 197,245,118,201 // vpcmpeqd %ymm1,%ymm1,%ymm1
.byte 196,2,117,146,4,153 // vgatherdps %ymm1,(%r9,%ymm11,4),%ymm8
@@ -21793,14 +22137,14 @@ HIDDEN _sk_gauss_a_to_rgba_hsw
.globl _sk_gauss_a_to_rgba_hsw
FUNCTION(_sk_gauss_a_to_rgba_hsw)
_sk_gauss_a_to_rgba_hsw:
- .byte 196,226,125,24,5,249,25,0,0 // vbroadcastss 0x19f9(%rip),%ymm0 # 6c0c <_sk_clut_4D_hsw+0xc82>
- .byte 196,226,125,24,13,244,25,0,0 // vbroadcastss 0x19f4(%rip),%ymm1 # 6c10 <_sk_clut_4D_hsw+0xc86>
+ .byte 196,226,125,24,5,249,25,0,0 // vbroadcastss 0x19f9(%rip),%ymm0 # 70d8 <_sk_clut_4D_hsw+0xc82>
+ .byte 196,226,125,24,13,244,25,0,0 // vbroadcastss 0x19f4(%rip),%ymm1 # 70dc <_sk_clut_4D_hsw+0xc86>
.byte 196,226,101,168,200 // vfmadd213ps %ymm0,%ymm3,%ymm1
- .byte 196,226,125,24,5,234,25,0,0 // vbroadcastss 0x19ea(%rip),%ymm0 # 6c14 <_sk_clut_4D_hsw+0xc8a>
+ .byte 196,226,125,24,5,234,25,0,0 // vbroadcastss 0x19ea(%rip),%ymm0 # 70e0 <_sk_clut_4D_hsw+0xc8a>
.byte 196,226,101,184,193 // vfmadd231ps %ymm1,%ymm3,%ymm0
- .byte 196,226,125,24,13,224,25,0,0 // vbroadcastss 0x19e0(%rip),%ymm1 # 6c18 <_sk_clut_4D_hsw+0xc8e>
+ .byte 196,226,125,24,13,224,25,0,0 // vbroadcastss 0x19e0(%rip),%ymm1 # 70e4 <_sk_clut_4D_hsw+0xc8e>
.byte 196,226,101,184,200 // vfmadd231ps %ymm0,%ymm3,%ymm1
- .byte 196,226,125,24,5,214,25,0,0 // vbroadcastss 0x19d6(%rip),%ymm0 # 6c1c <_sk_clut_4D_hsw+0xc92>
+ .byte 196,226,125,24,5,214,25,0,0 // vbroadcastss 0x19d6(%rip),%ymm0 # 70e8 <_sk_clut_4D_hsw+0xc92>
.byte 196,226,101,184,193 // vfmadd231ps %ymm1,%ymm3,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
@@ -21815,11 +22159,11 @@ _sk_gradient_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,8 // mov (%rax),%r9
.byte 73,131,249,1 // cmp $0x1,%r9
- .byte 15,134,180,0,0,0 // jbe 531e <_sk_gradient_hsw+0xc3>
+ .byte 15,134,180,0,0,0 // jbe 57ea <_sk_gradient_hsw+0xc3>
.byte 76,139,80,72 // mov 0x48(%rax),%r10
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 65,187,1,0,0,0 // mov $0x1,%r11d
- .byte 196,226,125,24,21,159,25,0,0 // vbroadcastss 0x199f(%rip),%ymm2 # 6c20 <_sk_clut_4D_hsw+0xc96>
+ .byte 196,226,125,24,21,159,25,0,0 // vbroadcastss 0x199f(%rip),%ymm2 # 70ec <_sk_clut_4D_hsw+0xc96>
.byte 196,65,53,239,201 // vpxor %ymm9,%ymm9,%ymm9
.byte 196,130,125,24,28,154 // vbroadcastss (%r10,%r11,4),%ymm3
.byte 197,228,194,216,2 // vcmpleps %ymm0,%ymm3,%ymm3
@@ -21827,10 +22171,10 @@ _sk_gradient_hsw:
.byte 196,65,101,254,201 // vpaddd %ymm9,%ymm3,%ymm9
.byte 73,255,195 // inc %r11
.byte 77,57,217 // cmp %r11,%r9
- .byte 117,226 // jne 5286 <_sk_gradient_hsw+0x2b>
+ .byte 117,226 // jne 5752 <_sk_gradient_hsw+0x2b>
.byte 76,139,80,8 // mov 0x8(%rax),%r10
.byte 73,131,249,8 // cmp $0x8,%r9
- .byte 118,121 // jbe 5327 <_sk_gradient_hsw+0xcc>
+ .byte 118,121 // jbe 57f3 <_sk_gradient_hsw+0xcc>
.byte 196,65,13,118,246 // vpcmpeqd %ymm14,%ymm14,%ymm14
.byte 197,245,118,201 // vpcmpeqd %ymm1,%ymm1,%ymm1
.byte 196,2,117,146,4,138 // vgatherdps %ymm1,(%r10,%ymm9,4),%ymm8
@@ -21854,7 +22198,7 @@ _sk_gradient_hsw:
.byte 196,130,21,146,28,137 // vgatherdps %ymm13,(%r9,%ymm9,4),%ymm3
.byte 72,139,64,64 // mov 0x40(%rax),%rax
.byte 196,34,13,146,44,136 // vgatherdps %ymm14,(%rax,%ymm9,4),%ymm13
- .byte 235,77 // jmp 536b <_sk_gradient_hsw+0x110>
+ .byte 235,77 // jmp 5837 <_sk_gradient_hsw+0x110>
.byte 76,139,80,8 // mov 0x8(%rax),%r10
.byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
.byte 196,66,53,22,2 // vpermps (%r10),%ymm9,%ymm8
@@ -21914,24 +22258,24 @@ _sk_xy_to_unit_angle_hsw:
.byte 196,65,52,95,226 // vmaxps %ymm10,%ymm9,%ymm12
.byte 196,65,36,94,220 // vdivps %ymm12,%ymm11,%ymm11
.byte 196,65,36,89,227 // vmulps %ymm11,%ymm11,%ymm12
- .byte 196,98,125,24,45,30,24,0,0 // vbroadcastss 0x181e(%rip),%ymm13 # 6c24 <_sk_clut_4D_hsw+0xc9a>
- .byte 196,98,125,24,53,25,24,0,0 // vbroadcastss 0x1819(%rip),%ymm14 # 6c28 <_sk_clut_4D_hsw+0xc9e>
+ .byte 196,98,125,24,45,30,24,0,0 // vbroadcastss 0x181e(%rip),%ymm13 # 70f0 <_sk_clut_4D_hsw+0xc9a>
+ .byte 196,98,125,24,53,25,24,0,0 // vbroadcastss 0x1819(%rip),%ymm14 # 70f4 <_sk_clut_4D_hsw+0xc9e>
.byte 196,66,29,184,245 // vfmadd231ps %ymm13,%ymm12,%ymm14
- .byte 196,98,125,24,45,15,24,0,0 // vbroadcastss 0x180f(%rip),%ymm13 # 6c2c <_sk_clut_4D_hsw+0xca2>
+ .byte 196,98,125,24,45,15,24,0,0 // vbroadcastss 0x180f(%rip),%ymm13 # 70f8 <_sk_clut_4D_hsw+0xca2>
.byte 196,66,29,184,238 // vfmadd231ps %ymm14,%ymm12,%ymm13
- .byte 196,98,125,24,53,5,24,0,0 // vbroadcastss 0x1805(%rip),%ymm14 # 6c30 <_sk_clut_4D_hsw+0xca6>
+ .byte 196,98,125,24,53,5,24,0,0 // vbroadcastss 0x1805(%rip),%ymm14 # 70fc <_sk_clut_4D_hsw+0xca6>
.byte 196,66,29,184,245 // vfmadd231ps %ymm13,%ymm12,%ymm14
.byte 196,65,36,89,222 // vmulps %ymm14,%ymm11,%ymm11
.byte 196,65,52,194,202,1 // vcmpltps %ymm10,%ymm9,%ymm9
- .byte 196,98,125,24,21,240,23,0,0 // vbroadcastss 0x17f0(%rip),%ymm10 # 6c34 <_sk_clut_4D_hsw+0xcaa>
+ .byte 196,98,125,24,21,240,23,0,0 // vbroadcastss 0x17f0(%rip),%ymm10 # 7100 <_sk_clut_4D_hsw+0xcaa>
.byte 196,65,44,92,211 // vsubps %ymm11,%ymm10,%ymm10
.byte 196,67,37,74,202,144 // vblendvps %ymm9,%ymm10,%ymm11,%ymm9
.byte 196,193,124,194,192,1 // vcmpltps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,21,218,23,0,0 // vbroadcastss 0x17da(%rip),%ymm10 # 6c38 <_sk_clut_4D_hsw+0xcae>
+ .byte 196,98,125,24,21,218,23,0,0 // vbroadcastss 0x17da(%rip),%ymm10 # 7104 <_sk_clut_4D_hsw+0xcae>
.byte 196,65,44,92,209 // vsubps %ymm9,%ymm10,%ymm10
.byte 196,195,53,74,194,0 // vblendvps %ymm0,%ymm10,%ymm9,%ymm0
.byte 196,65,116,194,200,1 // vcmpltps %ymm8,%ymm1,%ymm9
- .byte 196,98,125,24,21,196,23,0,0 // vbroadcastss 0x17c4(%rip),%ymm10 # 6c3c <_sk_clut_4D_hsw+0xcb2>
+ .byte 196,98,125,24,21,196,23,0,0 // vbroadcastss 0x17c4(%rip),%ymm10 # 7108 <_sk_clut_4D_hsw+0xcb2>
.byte 197,44,92,208 // vsubps %ymm0,%ymm10,%ymm10
.byte 196,195,125,74,194,144 // vblendvps %ymm9,%ymm10,%ymm0,%ymm0
.byte 196,65,124,194,200,3 // vcmpunordps %ymm8,%ymm0,%ymm9
@@ -21959,23 +22303,23 @@ _sk_xy_to_2pt_conical_quadratic_max_hsw:
.byte 197,50,89,80,44 // vmulss 0x2c(%rax),%xmm9,%xmm10
.byte 196,66,125,24,210 // vbroadcastss %xmm10,%ymm10
.byte 197,44,88,208 // vaddps %ymm0,%ymm10,%ymm10
- .byte 196,98,125,24,29,120,23,0,0 // vbroadcastss 0x1778(%rip),%ymm11 # 6c40 <_sk_clut_4D_hsw+0xcb6>
+ .byte 196,98,125,24,29,120,23,0,0 // vbroadcastss 0x1778(%rip),%ymm11 # 710c <_sk_clut_4D_hsw+0xcb6>
.byte 196,65,44,89,211 // vmulps %ymm11,%ymm10,%ymm10
.byte 197,116,89,217 // vmulps %ymm1,%ymm1,%ymm11
.byte 196,98,125,184,216 // vfmadd231ps %ymm0,%ymm0,%ymm11
.byte 196,193,50,89,193 // vmulss %xmm9,%xmm9,%xmm0
.byte 196,226,125,24,192 // vbroadcastss %xmm0,%ymm0
.byte 197,164,92,192 // vsubps %ymm0,%ymm11,%ymm0
- .byte 196,98,125,24,13,87,23,0,0 // vbroadcastss 0x1757(%rip),%ymm9 # 6c44 <_sk_clut_4D_hsw+0xcba>
+ .byte 196,98,125,24,13,87,23,0,0 // vbroadcastss 0x1757(%rip),%ymm9 # 7110 <_sk_clut_4D_hsw+0xcba>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 196,194,45,184,194 // vfmadd231ps %ymm10,%ymm10,%ymm0
.byte 197,252,81,192 // vsqrtps %ymm0,%ymm0
.byte 196,98,125,24,64,36 // vbroadcastss 0x24(%rax),%ymm8
- .byte 196,98,125,24,13,58,23,0,0 // vbroadcastss 0x173a(%rip),%ymm9 # 6c48 <_sk_clut_4D_hsw+0xcbe>
+ .byte 196,98,125,24,13,58,23,0,0 // vbroadcastss 0x173a(%rip),%ymm9 # 7114 <_sk_clut_4D_hsw+0xcbe>
.byte 196,65,44,87,201 // vxorps %ymm9,%ymm10,%ymm9
.byte 196,65,124,92,210 // vsubps %ymm10,%ymm0,%ymm10
- .byte 196,98,125,24,29,43,23,0,0 // vbroadcastss 0x172b(%rip),%ymm11 # 6c4c <_sk_clut_4D_hsw+0xcc2>
+ .byte 196,98,125,24,29,43,23,0,0 // vbroadcastss 0x172b(%rip),%ymm11 # 7118 <_sk_clut_4D_hsw+0xcc2>
.byte 196,65,60,89,195 // vmulps %ymm11,%ymm8,%ymm8
.byte 196,65,44,89,208 // vmulps %ymm8,%ymm10,%ymm10
.byte 197,180,92,192 // vsubps %ymm0,%ymm9,%ymm0
@@ -21994,23 +22338,23 @@ _sk_xy_to_2pt_conical_quadratic_min_hsw:
.byte 197,50,89,80,44 // vmulss 0x2c(%rax),%xmm9,%xmm10
.byte 196,66,125,24,210 // vbroadcastss %xmm10,%ymm10
.byte 197,44,88,208 // vaddps %ymm0,%ymm10,%ymm10
- .byte 196,98,125,24,29,240,22,0,0 // vbroadcastss 0x16f0(%rip),%ymm11 # 6c50 <_sk_clut_4D_hsw+0xcc6>
+ .byte 196,98,125,24,29,240,22,0,0 // vbroadcastss 0x16f0(%rip),%ymm11 # 711c <_sk_clut_4D_hsw+0xcc6>
.byte 196,65,44,89,211 // vmulps %ymm11,%ymm10,%ymm10
.byte 197,116,89,217 // vmulps %ymm1,%ymm1,%ymm11
.byte 196,98,125,184,216 // vfmadd231ps %ymm0,%ymm0,%ymm11
.byte 196,193,50,89,193 // vmulss %xmm9,%xmm9,%xmm0
.byte 196,226,125,24,192 // vbroadcastss %xmm0,%ymm0
.byte 197,164,92,192 // vsubps %ymm0,%ymm11,%ymm0
- .byte 196,98,125,24,13,207,22,0,0 // vbroadcastss 0x16cf(%rip),%ymm9 # 6c54 <_sk_clut_4D_hsw+0xcca>
+ .byte 196,98,125,24,13,207,22,0,0 // vbroadcastss 0x16cf(%rip),%ymm9 # 7120 <_sk_clut_4D_hsw+0xcca>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 196,194,45,184,194 // vfmadd231ps %ymm10,%ymm10,%ymm0
.byte 197,252,81,192 // vsqrtps %ymm0,%ymm0
.byte 196,98,125,24,64,36 // vbroadcastss 0x24(%rax),%ymm8
- .byte 196,98,125,24,13,178,22,0,0 // vbroadcastss 0x16b2(%rip),%ymm9 # 6c58 <_sk_clut_4D_hsw+0xcce>
+ .byte 196,98,125,24,13,178,22,0,0 // vbroadcastss 0x16b2(%rip),%ymm9 # 7124 <_sk_clut_4D_hsw+0xcce>
.byte 196,65,44,87,201 // vxorps %ymm9,%ymm10,%ymm9
.byte 196,65,124,92,210 // vsubps %ymm10,%ymm0,%ymm10
- .byte 196,98,125,24,29,163,22,0,0 // vbroadcastss 0x16a3(%rip),%ymm11 # 6c5c <_sk_clut_4D_hsw+0xcd2>
+ .byte 196,98,125,24,29,163,22,0,0 // vbroadcastss 0x16a3(%rip),%ymm11 # 7128 <_sk_clut_4D_hsw+0xcd2>
.byte 196,65,60,89,195 // vmulps %ymm11,%ymm8,%ymm8
.byte 196,65,44,89,208 // vmulps %ymm8,%ymm10,%ymm10
.byte 197,180,92,192 // vsubps %ymm0,%ymm9,%ymm0
@@ -22028,14 +22372,14 @@ _sk_xy_to_2pt_conical_linear_hsw:
.byte 197,58,89,72,44 // vmulss 0x2c(%rax),%xmm8,%xmm9
.byte 196,66,125,24,201 // vbroadcastss %xmm9,%ymm9
.byte 197,52,88,200 // vaddps %ymm0,%ymm9,%ymm9
- .byte 196,98,125,24,21,110,22,0,0 // vbroadcastss 0x166e(%rip),%ymm10 # 6c60 <_sk_clut_4D_hsw+0xcd6>
+ .byte 196,98,125,24,21,110,22,0,0 // vbroadcastss 0x166e(%rip),%ymm10 # 712c <_sk_clut_4D_hsw+0xcd6>
.byte 196,65,52,89,202 // vmulps %ymm10,%ymm9,%ymm9
.byte 197,116,89,209 // vmulps %ymm1,%ymm1,%ymm10
.byte 196,98,125,184,208 // vfmadd231ps %ymm0,%ymm0,%ymm10
.byte 196,193,58,89,192 // vmulss %xmm8,%xmm8,%xmm0
.byte 196,226,125,24,192 // vbroadcastss %xmm0,%ymm0
.byte 197,172,92,192 // vsubps %ymm0,%ymm10,%ymm0
- .byte 196,98,125,24,5,77,22,0,0 // vbroadcastss 0x164d(%rip),%ymm8 # 6c64 <_sk_clut_4D_hsw+0xcda>
+ .byte 196,98,125,24,5,77,22,0,0 // vbroadcastss 0x164d(%rip),%ymm8 # 7130 <_sk_clut_4D_hsw+0xcda>
.byte 196,193,124,87,192 // vxorps %ymm8,%ymm0,%ymm0
.byte 196,193,124,94,193 // vdivps %ymm9,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -22075,7 +22419,7 @@ HIDDEN _sk_save_xy_hsw
FUNCTION(_sk_save_xy_hsw)
_sk_save_xy_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,237,21,0,0 // vbroadcastss 0x15ed(%rip),%ymm8 # 6c68 <_sk_clut_4D_hsw+0xcde>
+ .byte 196,98,125,24,5,237,21,0,0 // vbroadcastss 0x15ed(%rip),%ymm8 # 7134 <_sk_clut_4D_hsw+0xcde>
.byte 196,65,124,88,200 // vaddps %ymm8,%ymm0,%ymm9
.byte 196,67,125,8,209,1 // vroundps $0x1,%ymm9,%ymm10
.byte 196,65,52,92,202 // vsubps %ymm10,%ymm9,%ymm9
@@ -22109,9 +22453,9 @@ HIDDEN _sk_bilinear_nx_hsw
FUNCTION(_sk_bilinear_nx_hsw)
_sk_bilinear_nx_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,129,21,0,0 // vbroadcastss 0x1581(%rip),%ymm0 # 6c6c <_sk_clut_4D_hsw+0xce2>
+ .byte 196,226,125,24,5,129,21,0,0 // vbroadcastss 0x1581(%rip),%ymm0 # 7138 <_sk_clut_4D_hsw+0xce2>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
- .byte 196,98,125,24,5,120,21,0,0 // vbroadcastss 0x1578(%rip),%ymm8 # 6c70 <_sk_clut_4D_hsw+0xce6>
+ .byte 196,98,125,24,5,120,21,0,0 // vbroadcastss 0x1578(%rip),%ymm8 # 713c <_sk_clut_4D_hsw+0xce6>
.byte 197,60,92,64,64 // vsubps 0x40(%rax),%ymm8,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -22122,7 +22466,7 @@ HIDDEN _sk_bilinear_px_hsw
FUNCTION(_sk_bilinear_px_hsw)
_sk_bilinear_px_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,96,21,0,0 // vbroadcastss 0x1560(%rip),%ymm0 # 6c74 <_sk_clut_4D_hsw+0xcea>
+ .byte 196,226,125,24,5,96,21,0,0 // vbroadcastss 0x1560(%rip),%ymm0 # 7140 <_sk_clut_4D_hsw+0xcea>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
.byte 197,124,16,64,64 // vmovups 0x40(%rax),%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
@@ -22134,9 +22478,9 @@ HIDDEN _sk_bilinear_ny_hsw
FUNCTION(_sk_bilinear_ny_hsw)
_sk_bilinear_ny_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,68,21,0,0 // vbroadcastss 0x1544(%rip),%ymm1 # 6c78 <_sk_clut_4D_hsw+0xcee>
+ .byte 196,226,125,24,13,68,21,0,0 // vbroadcastss 0x1544(%rip),%ymm1 # 7144 <_sk_clut_4D_hsw+0xcee>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
- .byte 196,98,125,24,5,58,21,0,0 // vbroadcastss 0x153a(%rip),%ymm8 # 6c7c <_sk_clut_4D_hsw+0xcf2>
+ .byte 196,98,125,24,5,58,21,0,0 // vbroadcastss 0x153a(%rip),%ymm8 # 7148 <_sk_clut_4D_hsw+0xcf2>
.byte 197,60,92,64,96 // vsubps 0x60(%rax),%ymm8,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -22147,7 +22491,7 @@ HIDDEN _sk_bilinear_py_hsw
FUNCTION(_sk_bilinear_py_hsw)
_sk_bilinear_py_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,34,21,0,0 // vbroadcastss 0x1522(%rip),%ymm1 # 6c80 <_sk_clut_4D_hsw+0xcf6>
+ .byte 196,226,125,24,13,34,21,0,0 // vbroadcastss 0x1522(%rip),%ymm1 # 714c <_sk_clut_4D_hsw+0xcf6>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
.byte 197,124,16,64,96 // vmovups 0x60(%rax),%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
@@ -22159,13 +22503,13 @@ HIDDEN _sk_bicubic_n3x_hsw
FUNCTION(_sk_bicubic_n3x_hsw)
_sk_bicubic_n3x_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,5,21,0,0 // vbroadcastss 0x1505(%rip),%ymm0 # 6c84 <_sk_clut_4D_hsw+0xcfa>
+ .byte 196,226,125,24,5,5,21,0,0 // vbroadcastss 0x1505(%rip),%ymm0 # 7150 <_sk_clut_4D_hsw+0xcfa>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
- .byte 196,98,125,24,5,252,20,0,0 // vbroadcastss 0x14fc(%rip),%ymm8 # 6c88 <_sk_clut_4D_hsw+0xcfe>
+ .byte 196,98,125,24,5,252,20,0,0 // vbroadcastss 0x14fc(%rip),%ymm8 # 7154 <_sk_clut_4D_hsw+0xcfe>
.byte 197,60,92,64,64 // vsubps 0x40(%rax),%ymm8,%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,237,20,0,0 // vbroadcastss 0x14ed(%rip),%ymm10 # 6c8c <_sk_clut_4D_hsw+0xd02>
- .byte 196,98,125,24,29,232,20,0,0 // vbroadcastss 0x14e8(%rip),%ymm11 # 6c90 <_sk_clut_4D_hsw+0xd06>
+ .byte 196,98,125,24,21,237,20,0,0 // vbroadcastss 0x14ed(%rip),%ymm10 # 7158 <_sk_clut_4D_hsw+0xd02>
+ .byte 196,98,125,24,29,232,20,0,0 // vbroadcastss 0x14e8(%rip),%ymm11 # 715c <_sk_clut_4D_hsw+0xd06>
.byte 196,66,61,168,218 // vfmadd213ps %ymm10,%ymm8,%ymm11
.byte 196,65,36,89,193 // vmulps %ymm9,%ymm11,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
@@ -22177,16 +22521,16 @@ HIDDEN _sk_bicubic_n1x_hsw
FUNCTION(_sk_bicubic_n1x_hsw)
_sk_bicubic_n1x_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,203,20,0,0 // vbroadcastss 0x14cb(%rip),%ymm0 # 6c94 <_sk_clut_4D_hsw+0xd0a>
+ .byte 196,226,125,24,5,203,20,0,0 // vbroadcastss 0x14cb(%rip),%ymm0 # 7160 <_sk_clut_4D_hsw+0xd0a>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
- .byte 196,98,125,24,5,194,20,0,0 // vbroadcastss 0x14c2(%rip),%ymm8 # 6c98 <_sk_clut_4D_hsw+0xd0e>
+ .byte 196,98,125,24,5,194,20,0,0 // vbroadcastss 0x14c2(%rip),%ymm8 # 7164 <_sk_clut_4D_hsw+0xd0e>
.byte 197,60,92,64,64 // vsubps 0x40(%rax),%ymm8,%ymm8
- .byte 196,98,125,24,13,184,20,0,0 // vbroadcastss 0x14b8(%rip),%ymm9 # 6c9c <_sk_clut_4D_hsw+0xd12>
- .byte 196,98,125,24,21,179,20,0,0 // vbroadcastss 0x14b3(%rip),%ymm10 # 6ca0 <_sk_clut_4D_hsw+0xd16>
+ .byte 196,98,125,24,13,184,20,0,0 // vbroadcastss 0x14b8(%rip),%ymm9 # 7168 <_sk_clut_4D_hsw+0xd12>
+ .byte 196,98,125,24,21,179,20,0,0 // vbroadcastss 0x14b3(%rip),%ymm10 # 716c <_sk_clut_4D_hsw+0xd16>
.byte 196,66,61,168,209 // vfmadd213ps %ymm9,%ymm8,%ymm10
- .byte 196,98,125,24,13,169,20,0,0 // vbroadcastss 0x14a9(%rip),%ymm9 # 6ca4 <_sk_clut_4D_hsw+0xd1a>
+ .byte 196,98,125,24,13,169,20,0,0 // vbroadcastss 0x14a9(%rip),%ymm9 # 7170 <_sk_clut_4D_hsw+0xd1a>
.byte 196,66,61,184,202 // vfmadd231ps %ymm10,%ymm8,%ymm9
- .byte 196,98,125,24,21,159,20,0,0 // vbroadcastss 0x149f(%rip),%ymm10 # 6ca8 <_sk_clut_4D_hsw+0xd1e>
+ .byte 196,98,125,24,21,159,20,0,0 // vbroadcastss 0x149f(%rip),%ymm10 # 7174 <_sk_clut_4D_hsw+0xd1e>
.byte 196,66,61,184,209 // vfmadd231ps %ymm9,%ymm8,%ymm10
.byte 197,124,17,144,128,0,0,0 // vmovups %ymm10,0x80(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -22197,14 +22541,14 @@ HIDDEN _sk_bicubic_p1x_hsw
FUNCTION(_sk_bicubic_p1x_hsw)
_sk_bicubic_p1x_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,135,20,0,0 // vbroadcastss 0x1487(%rip),%ymm8 # 6cac <_sk_clut_4D_hsw+0xd22>
+ .byte 196,98,125,24,5,135,20,0,0 // vbroadcastss 0x1487(%rip),%ymm8 # 7178 <_sk_clut_4D_hsw+0xd22>
.byte 197,188,88,0 // vaddps (%rax),%ymm8,%ymm0
.byte 197,124,16,72,64 // vmovups 0x40(%rax),%ymm9
- .byte 196,98,125,24,21,121,20,0,0 // vbroadcastss 0x1479(%rip),%ymm10 # 6cb0 <_sk_clut_4D_hsw+0xd26>
- .byte 196,98,125,24,29,116,20,0,0 // vbroadcastss 0x1474(%rip),%ymm11 # 6cb4 <_sk_clut_4D_hsw+0xd2a>
+ .byte 196,98,125,24,21,121,20,0,0 // vbroadcastss 0x1479(%rip),%ymm10 # 717c <_sk_clut_4D_hsw+0xd26>
+ .byte 196,98,125,24,29,116,20,0,0 // vbroadcastss 0x1474(%rip),%ymm11 # 7180 <_sk_clut_4D_hsw+0xd2a>
.byte 196,66,53,168,218 // vfmadd213ps %ymm10,%ymm9,%ymm11
.byte 196,66,53,168,216 // vfmadd213ps %ymm8,%ymm9,%ymm11
- .byte 196,98,125,24,5,101,20,0,0 // vbroadcastss 0x1465(%rip),%ymm8 # 6cb8 <_sk_clut_4D_hsw+0xd2e>
+ .byte 196,98,125,24,5,101,20,0,0 // vbroadcastss 0x1465(%rip),%ymm8 # 7184 <_sk_clut_4D_hsw+0xd2e>
.byte 196,66,53,184,195 // vfmadd231ps %ymm11,%ymm9,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -22215,12 +22559,12 @@ HIDDEN _sk_bicubic_p3x_hsw
FUNCTION(_sk_bicubic_p3x_hsw)
_sk_bicubic_p3x_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,77,20,0,0 // vbroadcastss 0x144d(%rip),%ymm0 # 6cbc <_sk_clut_4D_hsw+0xd32>
+ .byte 196,226,125,24,5,77,20,0,0 // vbroadcastss 0x144d(%rip),%ymm0 # 7188 <_sk_clut_4D_hsw+0xd32>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
.byte 197,124,16,64,64 // vmovups 0x40(%rax),%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,58,20,0,0 // vbroadcastss 0x143a(%rip),%ymm10 # 6cc0 <_sk_clut_4D_hsw+0xd36>
- .byte 196,98,125,24,29,53,20,0,0 // vbroadcastss 0x1435(%rip),%ymm11 # 6cc4 <_sk_clut_4D_hsw+0xd3a>
+ .byte 196,98,125,24,21,58,20,0,0 // vbroadcastss 0x143a(%rip),%ymm10 # 718c <_sk_clut_4D_hsw+0xd36>
+ .byte 196,98,125,24,29,53,20,0,0 // vbroadcastss 0x1435(%rip),%ymm11 # 7190 <_sk_clut_4D_hsw+0xd3a>
.byte 196,66,61,168,218 // vfmadd213ps %ymm10,%ymm8,%ymm11
.byte 196,65,52,89,195 // vmulps %ymm11,%ymm9,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
@@ -22232,13 +22576,13 @@ HIDDEN _sk_bicubic_n3y_hsw
FUNCTION(_sk_bicubic_n3y_hsw)
_sk_bicubic_n3y_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,24,20,0,0 // vbroadcastss 0x1418(%rip),%ymm1 # 6cc8 <_sk_clut_4D_hsw+0xd3e>
+ .byte 196,226,125,24,13,24,20,0,0 // vbroadcastss 0x1418(%rip),%ymm1 # 7194 <_sk_clut_4D_hsw+0xd3e>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
- .byte 196,98,125,24,5,14,20,0,0 // vbroadcastss 0x140e(%rip),%ymm8 # 6ccc <_sk_clut_4D_hsw+0xd42>
+ .byte 196,98,125,24,5,14,20,0,0 // vbroadcastss 0x140e(%rip),%ymm8 # 7198 <_sk_clut_4D_hsw+0xd42>
.byte 197,60,92,64,96 // vsubps 0x60(%rax),%ymm8,%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,255,19,0,0 // vbroadcastss 0x13ff(%rip),%ymm10 # 6cd0 <_sk_clut_4D_hsw+0xd46>
- .byte 196,98,125,24,29,250,19,0,0 // vbroadcastss 0x13fa(%rip),%ymm11 # 6cd4 <_sk_clut_4D_hsw+0xd4a>
+ .byte 196,98,125,24,21,255,19,0,0 // vbroadcastss 0x13ff(%rip),%ymm10 # 719c <_sk_clut_4D_hsw+0xd46>
+ .byte 196,98,125,24,29,250,19,0,0 // vbroadcastss 0x13fa(%rip),%ymm11 # 71a0 <_sk_clut_4D_hsw+0xd4a>
.byte 196,66,61,168,218 // vfmadd213ps %ymm10,%ymm8,%ymm11
.byte 196,65,36,89,193 // vmulps %ymm9,%ymm11,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
@@ -22250,16 +22594,16 @@ HIDDEN _sk_bicubic_n1y_hsw
FUNCTION(_sk_bicubic_n1y_hsw)
_sk_bicubic_n1y_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,221,19,0,0 // vbroadcastss 0x13dd(%rip),%ymm1 # 6cd8 <_sk_clut_4D_hsw+0xd4e>
+ .byte 196,226,125,24,13,221,19,0,0 // vbroadcastss 0x13dd(%rip),%ymm1 # 71a4 <_sk_clut_4D_hsw+0xd4e>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
- .byte 196,98,125,24,5,211,19,0,0 // vbroadcastss 0x13d3(%rip),%ymm8 # 6cdc <_sk_clut_4D_hsw+0xd52>
+ .byte 196,98,125,24,5,211,19,0,0 // vbroadcastss 0x13d3(%rip),%ymm8 # 71a8 <_sk_clut_4D_hsw+0xd52>
.byte 197,60,92,64,96 // vsubps 0x60(%rax),%ymm8,%ymm8
- .byte 196,98,125,24,13,201,19,0,0 // vbroadcastss 0x13c9(%rip),%ymm9 # 6ce0 <_sk_clut_4D_hsw+0xd56>
- .byte 196,98,125,24,21,196,19,0,0 // vbroadcastss 0x13c4(%rip),%ymm10 # 6ce4 <_sk_clut_4D_hsw+0xd5a>
+ .byte 196,98,125,24,13,201,19,0,0 // vbroadcastss 0x13c9(%rip),%ymm9 # 71ac <_sk_clut_4D_hsw+0xd56>
+ .byte 196,98,125,24,21,196,19,0,0 // vbroadcastss 0x13c4(%rip),%ymm10 # 71b0 <_sk_clut_4D_hsw+0xd5a>
.byte 196,66,61,168,209 // vfmadd213ps %ymm9,%ymm8,%ymm10
- .byte 196,98,125,24,13,186,19,0,0 // vbroadcastss 0x13ba(%rip),%ymm9 # 6ce8 <_sk_clut_4D_hsw+0xd5e>
+ .byte 196,98,125,24,13,186,19,0,0 // vbroadcastss 0x13ba(%rip),%ymm9 # 71b4 <_sk_clut_4D_hsw+0xd5e>
.byte 196,66,61,184,202 // vfmadd231ps %ymm10,%ymm8,%ymm9
- .byte 196,98,125,24,21,176,19,0,0 // vbroadcastss 0x13b0(%rip),%ymm10 # 6cec <_sk_clut_4D_hsw+0xd62>
+ .byte 196,98,125,24,21,176,19,0,0 // vbroadcastss 0x13b0(%rip),%ymm10 # 71b8 <_sk_clut_4D_hsw+0xd62>
.byte 196,66,61,184,209 // vfmadd231ps %ymm9,%ymm8,%ymm10
.byte 197,124,17,144,160,0,0,0 // vmovups %ymm10,0xa0(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -22270,14 +22614,14 @@ HIDDEN _sk_bicubic_p1y_hsw
FUNCTION(_sk_bicubic_p1y_hsw)
_sk_bicubic_p1y_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,152,19,0,0 // vbroadcastss 0x1398(%rip),%ymm8 # 6cf0 <_sk_clut_4D_hsw+0xd66>
+ .byte 196,98,125,24,5,152,19,0,0 // vbroadcastss 0x1398(%rip),%ymm8 # 71bc <_sk_clut_4D_hsw+0xd66>
.byte 197,188,88,72,32 // vaddps 0x20(%rax),%ymm8,%ymm1
.byte 197,124,16,72,96 // vmovups 0x60(%rax),%ymm9
- .byte 196,98,125,24,21,137,19,0,0 // vbroadcastss 0x1389(%rip),%ymm10 # 6cf4 <_sk_clut_4D_hsw+0xd6a>
- .byte 196,98,125,24,29,132,19,0,0 // vbroadcastss 0x1384(%rip),%ymm11 # 6cf8 <_sk_clut_4D_hsw+0xd6e>
+ .byte 196,98,125,24,21,137,19,0,0 // vbroadcastss 0x1389(%rip),%ymm10 # 71c0 <_sk_clut_4D_hsw+0xd6a>
+ .byte 196,98,125,24,29,132,19,0,0 // vbroadcastss 0x1384(%rip),%ymm11 # 71c4 <_sk_clut_4D_hsw+0xd6e>
.byte 196,66,53,168,218 // vfmadd213ps %ymm10,%ymm9,%ymm11
.byte 196,66,53,168,216 // vfmadd213ps %ymm8,%ymm9,%ymm11
- .byte 196,98,125,24,5,117,19,0,0 // vbroadcastss 0x1375(%rip),%ymm8 # 6cfc <_sk_clut_4D_hsw+0xd72>
+ .byte 196,98,125,24,5,117,19,0,0 // vbroadcastss 0x1375(%rip),%ymm8 # 71c8 <_sk_clut_4D_hsw+0xd72>
.byte 196,66,53,184,195 // vfmadd231ps %ymm11,%ymm9,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -22288,12 +22632,12 @@ HIDDEN _sk_bicubic_p3y_hsw
FUNCTION(_sk_bicubic_p3y_hsw)
_sk_bicubic_p3y_hsw:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,93,19,0,0 // vbroadcastss 0x135d(%rip),%ymm1 # 6d00 <_sk_clut_4D_hsw+0xd76>
+ .byte 196,226,125,24,13,93,19,0,0 // vbroadcastss 0x135d(%rip),%ymm1 # 71cc <_sk_clut_4D_hsw+0xd76>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
.byte 197,124,16,64,96 // vmovups 0x60(%rax),%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,73,19,0,0 // vbroadcastss 0x1349(%rip),%ymm10 # 6d04 <_sk_clut_4D_hsw+0xd7a>
- .byte 196,98,125,24,29,68,19,0,0 // vbroadcastss 0x1344(%rip),%ymm11 # 6d08 <_sk_clut_4D_hsw+0xd7e>
+ .byte 196,98,125,24,21,73,19,0,0 // vbroadcastss 0x1349(%rip),%ymm10 # 71d0 <_sk_clut_4D_hsw+0xd7a>
+ .byte 196,98,125,24,29,68,19,0,0 // vbroadcastss 0x1344(%rip),%ymm11 # 71d4 <_sk_clut_4D_hsw+0xd7e>
.byte 196,66,61,168,218 // vfmadd213ps %ymm10,%ymm8,%ymm11
.byte 196,65,52,89,195 // vmulps %ymm11,%ymm9,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
@@ -22433,19 +22777,19 @@ _sk_clut_3D_hsw:
.byte 197,254,127,76,36,160 // vmovdqu %ymm1,-0x60(%rsp)
.byte 72,139,0 // mov (%rax),%rax
.byte 196,193,117,254,204 // vpaddd %ymm12,%ymm1,%ymm1
- .byte 196,226,125,88,61,245,16,0,0 // vpbroadcastd 0x10f5(%rip),%ymm7 # 6d10 <_sk_clut_4D_hsw+0xd86>
+ .byte 196,226,125,88,61,245,16,0,0 // vpbroadcastd 0x10f5(%rip),%ymm7 # 71dc <_sk_clut_4D_hsw+0xd86>
.byte 196,226,117,64,207 // vpmulld %ymm7,%ymm1,%ymm1
.byte 197,205,118,246 // vpcmpeqd %ymm6,%ymm6,%ymm6
.byte 196,98,77,146,4,136 // vgatherdps %ymm6,(%rax,%ymm1,4),%ymm8
- .byte 196,98,125,88,21,225,16,0,0 // vpbroadcastd 0x10e1(%rip),%ymm10 # 6d14 <_sk_clut_4D_hsw+0xd8a>
+ .byte 196,98,125,88,21,225,16,0,0 // vpbroadcastd 0x10e1(%rip),%ymm10 # 71e0 <_sk_clut_4D_hsw+0xd8a>
.byte 196,193,117,254,242 // vpaddd %ymm10,%ymm1,%ymm6
.byte 196,65,37,118,219 // vpcmpeqd %ymm11,%ymm11,%ymm11
.byte 196,226,37,146,28,176 // vgatherdps %ymm11,(%rax,%ymm6,4),%ymm3
- .byte 196,98,125,88,29,204,16,0,0 // vpbroadcastd 0x10cc(%rip),%ymm11 # 6d18 <_sk_clut_4D_hsw+0xd8e>
+ .byte 196,98,125,88,29,204,16,0,0 // vpbroadcastd 0x10cc(%rip),%ymm11 # 71e4 <_sk_clut_4D_hsw+0xd8e>
.byte 196,193,117,254,203 // vpaddd %ymm11,%ymm1,%ymm1
.byte 197,205,118,246 // vpcmpeqd %ymm6,%ymm6,%ymm6
.byte 196,226,77,146,36,136 // vgatherdps %ymm6,(%rax,%ymm1,4),%ymm4
- .byte 196,98,125,24,45,168,16,0,0 // vbroadcastss 0x10a8(%rip),%ymm13 # 6d0c <_sk_clut_4D_hsw+0xd82>
+ .byte 196,98,125,24,45,168,16,0,0 // vbroadcastss 0x10a8(%rip),%ymm13 # 71d8 <_sk_clut_4D_hsw+0xd82>
.byte 196,193,124,88,245 // vaddps %ymm13,%ymm0,%ymm6
.byte 197,254,91,246 // vcvttps2dq %ymm6,%ymm6
.byte 196,226,77,64,205 // vpmulld %ymm5,%ymm6,%ymm1
@@ -22665,19 +23009,19 @@ _sk_clut_4D_hsw:
.byte 197,126,91,245 // vcvttps2dq %ymm5,%ymm14
.byte 196,98,13,64,233 // vpmulld %ymm1,%ymm14,%ymm13
.byte 197,149,254,198 // vpaddd %ymm6,%ymm13,%ymm0
- .byte 196,98,125,88,21,78,12,0,0 // vpbroadcastd 0xc4e(%rip),%ymm10 # 6d20 <_sk_clut_4D_hsw+0xd96>
+ .byte 196,98,125,88,21,78,12,0,0 // vpbroadcastd 0xc4e(%rip),%ymm10 # 71ec <_sk_clut_4D_hsw+0xd96>
.byte 196,194,125,64,194 // vpmulld %ymm10,%ymm0,%ymm0
.byte 196,65,61,118,192 // vpcmpeqd %ymm8,%ymm8,%ymm8
.byte 196,98,61,146,60,128 // vgatherdps %ymm8,(%rax,%ymm0,4),%ymm15
- .byte 196,98,125,88,29,57,12,0,0 // vpbroadcastd 0xc39(%rip),%ymm11 # 6d24 <_sk_clut_4D_hsw+0xd9a>
+ .byte 196,98,125,88,29,57,12,0,0 // vpbroadcastd 0xc39(%rip),%ymm11 # 71f0 <_sk_clut_4D_hsw+0xd9a>
.byte 196,65,125,254,195 // vpaddd %ymm11,%ymm0,%ymm8
.byte 196,65,53,118,201 // vpcmpeqd %ymm9,%ymm9,%ymm9
.byte 196,162,53,146,20,128 // vgatherdps %ymm9,(%rax,%ymm8,4),%ymm2
- .byte 196,98,125,88,37,36,12,0,0 // vpbroadcastd 0xc24(%rip),%ymm12 # 6d28 <_sk_clut_4D_hsw+0xd9e>
+ .byte 196,98,125,88,37,36,12,0,0 // vpbroadcastd 0xc24(%rip),%ymm12 # 71f4 <_sk_clut_4D_hsw+0xd9e>
.byte 196,193,125,254,196 // vpaddd %ymm12,%ymm0,%ymm0
.byte 196,65,61,118,192 // vpcmpeqd %ymm8,%ymm8,%ymm8
.byte 196,226,61,146,60,128 // vgatherdps %ymm8,(%rax,%ymm0,4),%ymm7
- .byte 196,98,125,24,13,255,11,0,0 // vbroadcastss 0xbff(%rip),%ymm9 # 6d1c <_sk_clut_4D_hsw+0xd92>
+ .byte 196,98,125,24,13,255,11,0,0 // vbroadcastss 0xbff(%rip),%ymm9 # 71e8 <_sk_clut_4D_hsw+0xd92>
.byte 196,65,84,88,193 // vaddps %ymm9,%ymm5,%ymm8
.byte 196,65,126,91,192 // vcvttps2dq %ymm8,%ymm8
.byte 196,226,61,64,193 // vpmulld %ymm1,%ymm8,%ymm0
@@ -23002,7 +23346,7 @@ _sk_clut_4D_hsw:
.byte 197,196,92,214 // vsubps %ymm6,%ymm7,%ymm2
.byte 196,226,61,168,214 // vfmadd213ps %ymm6,%ymm8,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,49,5,0,0 // vbroadcastss 0x531(%rip),%ymm3 # 6d2c <_sk_clut_4D_hsw+0xda2>
+ .byte 196,226,125,24,29,49,5,0,0 // vbroadcastss 0x531(%rip),%ymm3 # 71f8 <_sk_clut_4D_hsw+0xda2>
.byte 197,252,16,164,36,32,3,0,0 // vmovups 0x320(%rsp),%ymm4
.byte 197,252,16,172,36,64,3,0,0 // vmovups 0x340(%rsp),%ymm5
.byte 197,252,16,180,36,96,3,0,0 // vmovups 0x360(%rsp),%ymm6
@@ -23053,25 +23397,25 @@ BALIGN4
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope 68e1 <.literal4+0xb9>
+ .byte 71,225,61 // rex.RXB loope 6dad <.literal4+0xb9>
.byte 0,0 // add %al,(%rax)
.byte 128,63,154 // cmpb $0x9a,(%rdi)
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope 68f1 <.literal4+0xc9>
+ .byte 71,225,61 // rex.RXB loope 6dbd <.literal4+0xc9>
.byte 0,0 // add %al,(%rax)
.byte 128,63,154 // cmpb $0x9a,(%rdi)
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope 6901 <.literal4+0xd9>
+ .byte 71,225,61 // rex.RXB loope 6dcd <.literal4+0xd9>
.byte 0,0 // add %al,(%rax)
.byte 128,63,154 // cmpb $0x9a,(%rdi)
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope 6911 <.literal4+0xe9>
+ .byte 71,225,61 // rex.RXB loope 6ddd <.literal4+0xe9>
.byte 0,0 // add %al,(%rax)
.byte 128,63,0 // cmpb $0x0,(%rdi)
.byte 0,128,63,0,0,127 // add %al,0x7f00003f(%rax)
@@ -23134,7 +23478,7 @@ BALIGN4
.byte 190,129,128,128,59 // mov $0x3b808081,%esi
.byte 129,128,128,59,0,248,0,0,8,33 // addl $0x21080000,-0x7ffc480(%rax)
.byte 132,55 // test %dh,(%rdi)
- .byte 224,7 // loopne 6985 <.literal4+0x15d>
+ .byte 224,7 // loopne 6e51 <.literal4+0x15d>
.byte 0,0 // add %al,(%rax)
.byte 33,8 // and %ecx,(%rax)
.byte 2,58 // add (%rdx),%bh
@@ -23150,10 +23494,10 @@ BALIGN4
.byte 129,128,128,59,129,128,128,59,0,0 // addl $0x3b80,-0x7f7ec480(%rax)
.byte 0,52,255 // add %dh,(%rdi,%rdi,8)
.byte 255 // (bad)
- .byte 127,0 // jg 69ac <.literal4+0x184>
+ .byte 127,0 // jg 6e78 <.literal4+0x184>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja 6a25 <.literal4+0x1fd>
+ .byte 119,115 // ja 6ef1 <.literal4+0x1fd>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -23167,10 +23511,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg 69e0 <.literal4+0x1b8>
+ .byte 127,0 // jg 6eac <.literal4+0x1b8>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja 6a59 <.literal4+0x231>
+ .byte 119,115 // ja 6f25 <.literal4+0x231>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -23184,10 +23528,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg 6a14 <.literal4+0x1ec>
+ .byte 127,0 // jg 6ee0 <.literal4+0x1ec>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja 6a8d <.literal4+0x265>
+ .byte 119,115 // ja 6f59 <.literal4+0x265>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -23201,10 +23545,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg 6a48 <.literal4+0x220>
+ .byte 127,0 // jg 6f14 <.literal4+0x220>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja 6ac1 <.literal4+0x299>
+ .byte 119,115 // ja 6f8d <.literal4+0x299>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -23218,10 +23562,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg 6a7c <.literal4+0x254>
+ .byte 127,0 // jg 6f48 <.literal4+0x254>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja 6af5 <.literal4+0x2cd>
+ .byte 119,115 // ja 6fc1 <.literal4+0x2cd>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -23234,7 +23578,7 @@ BALIGN4
.byte 0,75,0 // add %cl,0x0(%rbx)
.byte 0,200 // add %cl,%al
.byte 66,0,0 // rex.X add %al,(%rax)
- .byte 127,67 // jg 6aef <.literal4+0x2c7>
+ .byte 127,67 // jg 6fbb <.literal4+0x2c7>
.byte 0,0 // add %al,(%rax)
.byte 0,195 // add %al,%bl
.byte 0,0 // add %al,(%rax)
@@ -23246,7 +23590,7 @@ BALIGN4
.byte 190,80,128,3,62 // mov $0x3e038050,%esi
.byte 31 // (bad)
.byte 215 // xlat %ds:(%rbx)
- .byte 118,63 // jbe 6b0f <.literal4+0x2e7>
+ .byte 118,63 // jbe 6fdb <.literal4+0x2e7>
.byte 246,64,83,63 // testb $0x3f,0x53(%rax)
.byte 129,128,128,59,129,128,128,59,129,128// addl $0x80813b80,-0x7f7ec480(%rax)
.byte 128,59,0 // cmpb $0x0,(%rbx)
@@ -23268,7 +23612,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 8,33 // or %ah,(%rcx)
.byte 132,55 // test %dh,(%rdi)
- .byte 224,7 // loopne 6b29 <.literal4+0x301>
+ .byte 224,7 // loopne 6ff5 <.literal4+0x301>
.byte 0,0 // add %al,(%rax)
.byte 33,8 // and %ecx,(%rax)
.byte 2,58 // add (%rdx),%bh
@@ -23280,7 +23624,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 8,33 // or %ah,(%rcx)
.byte 132,55 // test %dh,(%rdi)
- .byte 224,7 // loopne 6b45 <.literal4+0x31d>
+ .byte 224,7 // loopne 7011 <.literal4+0x31d>
.byte 0,0 // add %al,(%rax)
.byte 33,8 // and %ecx,(%rax)
.byte 2,58 // add (%rdx),%bh
@@ -23291,7 +23635,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 248 // clc
.byte 65,0,0 // add %al,(%r8)
- .byte 124,66 // jl 6b9a <.literal4+0x372>
+ .byte 124,66 // jl 7066 <.literal4+0x372>
.byte 0,240 // add %dh,%al
.byte 0,0 // add %al,(%rax)
.byte 137,136,136,55,0,15 // mov %ecx,0xf003788(%rax)
@@ -23317,7 +23661,7 @@ BALIGN4
.byte 137,136,136,59,15,0 // mov %ecx,0xf3b88(%rax)
.byte 0,0 // add %al,(%rax)
.byte 137,136,136,61,0,0 // mov %ecx,0x3d88(%rax)
- .byte 112,65 // jo 6bfd <.literal4+0x3d5>
+ .byte 112,65 // jo 70c9 <.literal4+0x3d5>
.byte 129,128,128,59,129,128,128,59,129,128// addl $0x80813b80,-0x7f7ec480(%rax)
.byte 128,59,0 // cmpb $0x0,(%rbx)
.byte 0,127,67 // add %bh,0x43(%rdi)
@@ -23331,7 +23675,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 128,63,0 // cmpb $0x0,(%rdi)
.byte 255 // (bad)
- .byte 127,71 // jg 6c33 <.literal4+0x40b>
+ .byte 127,71 // jg 70ff <.literal4+0x40b>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
.byte 0,0 // add %al,(%rax)
@@ -23438,16 +23782,16 @@ BALIGN4
.byte 170 // stos %al,%es:(%rdi)
.byte 170 // stos %al,%es:(%rdi)
.byte 190,114,28,199,62 // mov $0x3ec71c72,%esi
- .byte 114,249 // jb 6d07 <.literal4+0x4df>
- .byte 127,63 // jg 6d4f <_sk_clut_4D_hsw+0xdc5>
+ .byte 114,249 // jb 71d3 <.literal4+0x4df>
+ .byte 127,63 // jg 721b <_sk_clut_4D_hsw+0xdc5>
.byte 3,0 // add (%rax),%eax
.byte 0,0 // add %al,(%rax)
.byte 1,0 // add %eax,(%rax)
.byte 0,0 // add %al,(%rax)
.byte 2,0 // add (%rax),%al
.byte 0,0 // add %al,(%rax)
- .byte 114,249 // jb 6d17 <.literal4+0x4ef>
- .byte 127,63 // jg 6d5f <_sk_clut_4D_hsw+0xdd5>
+ .byte 114,249 // jb 71e3 <.literal4+0x4ef>
+ .byte 127,63 // jg 722b <_sk_clut_4D_hsw+0xdd5>
.byte 3,0 // add (%rax),%eax
.byte 0,0 // add %al,(%rax)
.byte 1,0 // add %eax,(%rax)
@@ -23477,16 +23821,16 @@ BALIGN32
.byte 0,0 // add %al,(%rax)
.byte 1,255 // add %edi,%edi
.byte 255 // (bad)
- .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a006d68 <_sk_clut_4D_hsw+0xa000dde>
+ .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a007228 <_sk_clut_4D_hsw+0xa000dd2>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12006d70 <_sk_clut_4D_hsw+0x12000de6>
+ .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12007230 <_sk_clut_4D_hsw+0x12000dda>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a006d78 <_sk_clut_4D_hsw+0x1a000dee>
+ .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a007238 <_sk_clut_4D_hsw+0x1a000de2>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3006d80 <_sk_clut_4D_hsw+0x3000df6>
+ .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3007240 <_sk_clut_4D_hsw+0x3000dea>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,6 // incl (%rsi)
@@ -23529,16 +23873,16 @@ BALIGN32
.byte 0,0 // add %al,(%rax)
.byte 1,255 // add %edi,%edi
.byte 255 // (bad)
- .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a006dc8 <_sk_clut_4D_hsw+0xa000e3e>
+ .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a007288 <_sk_clut_4D_hsw+0xa000e32>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12006dd0 <_sk_clut_4D_hsw+0x12000e46>
+ .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12007290 <_sk_clut_4D_hsw+0x12000e3a>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a006dd8 <_sk_clut_4D_hsw+0x1a000e4e>
+ .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a007298 <_sk_clut_4D_hsw+0x1a000e42>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3006de0 <_sk_clut_4D_hsw+0x3000e56>
+ .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 30072a0 <_sk_clut_4D_hsw+0x3000e4a>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,6 // incl (%rsi)
@@ -23581,16 +23925,16 @@ BALIGN32
.byte 0,0 // add %al,(%rax)
.byte 1,255 // add %edi,%edi
.byte 255 // (bad)
- .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a006e28 <_sk_clut_4D_hsw+0xa000e9e>
+ .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a0072e8 <_sk_clut_4D_hsw+0xa000e92>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12006e30 <_sk_clut_4D_hsw+0x12000ea6>
+ .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 120072f0 <_sk_clut_4D_hsw+0x12000e9a>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a006e38 <_sk_clut_4D_hsw+0x1a000eae>
+ .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a0072f8 <_sk_clut_4D_hsw+0x1a000ea2>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3006e40 <_sk_clut_4D_hsw+0x3000eb6>
+ .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3007300 <_sk_clut_4D_hsw+0x3000eaa>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,6 // incl (%rsi)
@@ -23633,16 +23977,16 @@ BALIGN32
.byte 0,0 // add %al,(%rax)
.byte 1,255 // add %edi,%edi
.byte 255 // (bad)
- .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a006e88 <_sk_clut_4D_hsw+0xa000efe>
+ .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a007348 <_sk_clut_4D_hsw+0xa000ef2>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12006e90 <_sk_clut_4D_hsw+0x12000f06>
+ .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12007350 <_sk_clut_4D_hsw+0x12000efa>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a006e98 <_sk_clut_4D_hsw+0x1a000f0e>
+ .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a007358 <_sk_clut_4D_hsw+0x1a000f02>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3006ea0 <_sk_clut_4D_hsw+0x3000f16>
+ .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3007360 <_sk_clut_4D_hsw+0x3000f0a>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,6 // incl (%rsi)
@@ -23685,16 +24029,16 @@ BALIGN32
.byte 0,0 // add %al,(%rax)
.byte 1,255 // add %edi,%edi
.byte 255 // (bad)
- .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a006ee8 <_sk_clut_4D_hsw+0xa000f5e>
+ .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a0073a8 <_sk_clut_4D_hsw+0xa000f52>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12006ef0 <_sk_clut_4D_hsw+0x12000f66>
+ .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 120073b0 <_sk_clut_4D_hsw+0x12000f5a>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a006ef8 <_sk_clut_4D_hsw+0x1a000f6e>
+ .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a0073b8 <_sk_clut_4D_hsw+0x1a000f62>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3006f00 <_sk_clut_4D_hsw+0x3000f76>
+ .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 30073c0 <_sk_clut_4D_hsw+0x3000f6a>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,6 // incl (%rsi)
@@ -23737,16 +24081,16 @@ BALIGN32
.byte 0,0 // add %al,(%rax)
.byte 1,255 // add %edi,%edi
.byte 255 // (bad)
- .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a006f48 <_sk_clut_4D_hsw+0xa000fbe>
+ .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a007408 <_sk_clut_4D_hsw+0xa000fb2>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12006f50 <_sk_clut_4D_hsw+0x12000fc6>
+ .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12007410 <_sk_clut_4D_hsw+0x12000fba>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a006f58 <_sk_clut_4D_hsw+0x1a000fce>
+ .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a007418 <_sk_clut_4D_hsw+0x1a000fc2>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3006f60 <_sk_clut_4D_hsw+0x3000fd6>
+ .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3007420 <_sk_clut_4D_hsw+0x3000fca>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,6 // incl (%rsi)
@@ -23789,16 +24133,16 @@ BALIGN32
.byte 0,0 // add %al,(%rax)
.byte 1,255 // add %edi,%edi
.byte 255 // (bad)
- .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a006fa8 <_sk_clut_4D_hsw+0xa00101e>
+ .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a007468 <_sk_clut_4D_hsw+0xa001012>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12006fb0 <_sk_clut_4D_hsw+0x12001026>
+ .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12007470 <_sk_clut_4D_hsw+0x1200101a>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a006fb8 <_sk_clut_4D_hsw+0x1a00102e>
+ .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a007478 <_sk_clut_4D_hsw+0x1a001022>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3006fc0 <_sk_clut_4D_hsw+0x3001036>
+ .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3007480 <_sk_clut_4D_hsw+0x300102a>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,6 // incl (%rsi)
@@ -23841,16 +24185,16 @@ BALIGN32
.byte 0,0 // add %al,(%rax)
.byte 1,255 // add %edi,%edi
.byte 255 // (bad)
- .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a007008 <_sk_clut_4D_hsw+0xa00107e>
+ .byte 255,5,255,255,255,9 // incl 0x9ffffff(%rip) # a0074c8 <_sk_clut_4D_hsw+0xa001072>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 12007010 <_sk_clut_4D_hsw+0x12001086>
+ .byte 255,13,255,255,255,17 // decl 0x11ffffff(%rip) # 120074d0 <_sk_clut_4D_hsw+0x1200107a>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a007018 <_sk_clut_4D_hsw+0x1a00108e>
+ .byte 255,21,255,255,255,25 // callq *0x19ffffff(%rip) # 1a0074d8 <_sk_clut_4D_hsw+0x1a001082>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 3007020 <_sk_clut_4D_hsw+0x3001096>
+ .byte 255,29,255,255,255,2 // lcall *0x2ffffff(%rip) # 30074e0 <_sk_clut_4D_hsw+0x300108a>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,6 // incl (%rsi)
@@ -24040,7 +24384,7 @@ _sk_seed_shader_avx:
.byte 197,249,112,192,0 // vpshufd $0x0,%xmm0,%xmm0
.byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,236,172,0,0 // vbroadcastss 0xacec(%rip),%ymm1 # adbc <_sk_clut_4D_avx+0x2125>
+ .byte 196,226,125,24,13,184,176,0,0 // vbroadcastss 0xb0b8(%rip),%ymm1 # b188 <_sk_clut_4D_avx+0x2125>
.byte 197,252,88,193 // vaddps %ymm1,%ymm0,%ymm0
.byte 197,252,88,7 // vaddps (%rdi),%ymm0,%ymm0
.byte 197,249,110,209 // vmovd %ecx,%xmm2
@@ -24049,7 +24393,7 @@ _sk_seed_shader_avx:
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 197,236,88,201 // vaddps %ymm1,%ymm2,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,21,198,172,0,0 // vbroadcastss 0xacc6(%rip),%ymm2 # adc0 <_sk_clut_4D_avx+0x2129>
+ .byte 196,226,125,24,21,146,176,0,0 // vbroadcastss 0xb092(%rip),%ymm2 # b18c <_sk_clut_4D_avx+0x2129>
.byte 197,228,87,219 // vxorps %ymm3,%ymm3,%ymm3
.byte 197,220,87,228 // vxorps %ymm4,%ymm4,%ymm4
.byte 197,212,87,237 // vxorps %ymm5,%ymm5,%ymm5
@@ -24073,7 +24417,7 @@ _sk_dither_avx:
.byte 196,65,121,112,201,0 // vpshufd $0x0,%xmm9,%xmm9
.byte 196,67,53,24,201,1 // vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
.byte 196,65,52,87,208 // vxorps %ymm8,%ymm9,%ymm10
- .byte 196,98,125,24,29,111,172,0,0 // vbroadcastss 0xac6f(%rip),%ymm11 # adc4 <_sk_clut_4D_avx+0x212d>
+ .byte 196,98,125,24,29,59,176,0,0 // vbroadcastss 0xb03b(%rip),%ymm11 # b190 <_sk_clut_4D_avx+0x212d>
.byte 196,65,44,84,203 // vandps %ymm11,%ymm10,%ymm9
.byte 196,193,25,114,241,5 // vpslld $0x5,%xmm9,%xmm12
.byte 196,67,125,25,201,1 // vextractf128 $0x1,%ymm9,%xmm9
@@ -24084,8 +24428,8 @@ _sk_dither_avx:
.byte 196,67,125,25,219,1 // vextractf128 $0x1,%ymm11,%xmm11
.byte 196,193,33,114,243,4 // vpslld $0x4,%xmm11,%xmm11
.byte 196,67,29,24,219,1 // vinsertf128 $0x1,%xmm11,%ymm12,%ymm11
- .byte 196,98,125,24,37,48,172,0,0 // vbroadcastss 0xac30(%rip),%ymm12 # adc8 <_sk_clut_4D_avx+0x2131>
- .byte 196,98,125,24,45,43,172,0,0 // vbroadcastss 0xac2b(%rip),%ymm13 # adcc <_sk_clut_4D_avx+0x2135>
+ .byte 196,98,125,24,37,252,175,0,0 // vbroadcastss 0xaffc(%rip),%ymm12 # b194 <_sk_clut_4D_avx+0x2131>
+ .byte 196,98,125,24,45,247,175,0,0 // vbroadcastss 0xaff7(%rip),%ymm13 # b198 <_sk_clut_4D_avx+0x2135>
.byte 196,65,44,84,245 // vandps %ymm13,%ymm10,%ymm14
.byte 196,193,1,114,246,2 // vpslld $0x2,%xmm14,%xmm15
.byte 196,67,125,25,246,1 // vextractf128 $0x1,%ymm14,%xmm14
@@ -24112,9 +24456,9 @@ _sk_dither_avx:
.byte 196,65,60,86,193 // vorps %ymm9,%ymm8,%ymm8
.byte 196,65,60,86,194 // vorps %ymm10,%ymm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,13,150,171,0,0 // vbroadcastss 0xab96(%rip),%ymm9 # add0 <_sk_clut_4D_avx+0x2139>
+ .byte 196,98,125,24,13,98,175,0,0 // vbroadcastss 0xaf62(%rip),%ymm9 # b19c <_sk_clut_4D_avx+0x2139>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
- .byte 196,98,125,24,13,140,171,0,0 // vbroadcastss 0xab8c(%rip),%ymm9 # add4 <_sk_clut_4D_avx+0x213d>
+ .byte 196,98,125,24,13,88,175,0,0 // vbroadcastss 0xaf58(%rip),%ymm9 # b1a0 <_sk_clut_4D_avx+0x213d>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 196,98,125,24,8 // vbroadcastss (%rax),%ymm9
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
@@ -24148,7 +24492,7 @@ HIDDEN _sk_black_color_avx
FUNCTION(_sk_black_color_avx)
_sk_black_color_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,44,171,0,0 // vbroadcastss 0xab2c(%rip),%ymm3 # add8 <_sk_clut_4D_avx+0x2141>
+ .byte 196,226,125,24,29,248,174,0,0 // vbroadcastss 0xaef8(%rip),%ymm3 # b1a4 <_sk_clut_4D_avx+0x2141>
.byte 197,252,87,192 // vxorps %ymm0,%ymm0,%ymm0
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 197,236,87,210 // vxorps %ymm2,%ymm2,%ymm2
@@ -24159,7 +24503,7 @@ HIDDEN _sk_white_color_avx
FUNCTION(_sk_white_color_avx)
_sk_white_color_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,23,171,0,0 // vbroadcastss 0xab17(%rip),%ymm0 # addc <_sk_clut_4D_avx+0x2145>
+ .byte 196,226,125,24,5,227,174,0,0 // vbroadcastss 0xaee3(%rip),%ymm0 # b1a8 <_sk_clut_4D_avx+0x2145>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
.byte 197,252,40,216 // vmovaps %ymm0,%ymm3
@@ -24205,7 +24549,7 @@ HIDDEN _sk_srcatop_avx
FUNCTION(_sk_srcatop_avx)
_sk_srcatop_avx:
.byte 197,252,89,199 // vmulps %ymm7,%ymm0,%ymm0
- .byte 196,98,125,24,5,186,170,0,0 // vbroadcastss 0xaaba(%rip),%ymm8 # ade0 <_sk_clut_4D_avx+0x2149>
+ .byte 196,98,125,24,5,134,174,0,0 // vbroadcastss 0xae86(%rip),%ymm8 # b1ac <_sk_clut_4D_avx+0x2149>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,204 // vmulps %ymm4,%ymm8,%ymm9
.byte 197,180,88,192 // vaddps %ymm0,%ymm9,%ymm0
@@ -24226,7 +24570,7 @@ HIDDEN _sk_dstatop_avx
FUNCTION(_sk_dstatop_avx)
_sk_dstatop_avx:
.byte 197,100,89,196 // vmulps %ymm4,%ymm3,%ymm8
- .byte 196,98,125,24,13,124,170,0,0 // vbroadcastss 0xaa7c(%rip),%ymm9 # ade4 <_sk_clut_4D_avx+0x214d>
+ .byte 196,98,125,24,13,72,174,0,0 // vbroadcastss 0xae48(%rip),%ymm9 # b1b0 <_sk_clut_4D_avx+0x214d>
.byte 197,52,92,207 // vsubps %ymm7,%ymm9,%ymm9
.byte 197,180,89,192 // vmulps %ymm0,%ymm9,%ymm0
.byte 197,188,88,192 // vaddps %ymm0,%ymm8,%ymm0
@@ -24268,7 +24612,7 @@ HIDDEN _sk_srcout_avx
.globl _sk_srcout_avx
FUNCTION(_sk_srcout_avx)
_sk_srcout_avx:
- .byte 196,98,125,24,5,27,170,0,0 // vbroadcastss 0xaa1b(%rip),%ymm8 # ade8 <_sk_clut_4D_avx+0x2151>
+ .byte 196,98,125,24,5,231,173,0,0 // vbroadcastss 0xade7(%rip),%ymm8 # b1b4 <_sk_clut_4D_avx+0x2151>
.byte 197,60,92,199 // vsubps %ymm7,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 197,188,89,201 // vmulps %ymm1,%ymm8,%ymm1
@@ -24281,7 +24625,7 @@ HIDDEN _sk_dstout_avx
.globl _sk_dstout_avx
FUNCTION(_sk_dstout_avx)
_sk_dstout_avx:
- .byte 196,226,125,24,5,254,169,0,0 // vbroadcastss 0xa9fe(%rip),%ymm0 # adec <_sk_clut_4D_avx+0x2155>
+ .byte 196,226,125,24,5,202,173,0,0 // vbroadcastss 0xadca(%rip),%ymm0 # b1b8 <_sk_clut_4D_avx+0x2155>
.byte 197,252,92,219 // vsubps %ymm3,%ymm0,%ymm3
.byte 197,228,89,196 // vmulps %ymm4,%ymm3,%ymm0
.byte 197,228,89,205 // vmulps %ymm5,%ymm3,%ymm1
@@ -24294,7 +24638,7 @@ HIDDEN _sk_srcover_avx
.globl _sk_srcover_avx
FUNCTION(_sk_srcover_avx)
_sk_srcover_avx:
- .byte 196,98,125,24,5,225,169,0,0 // vbroadcastss 0xa9e1(%rip),%ymm8 # adf0 <_sk_clut_4D_avx+0x2159>
+ .byte 196,98,125,24,5,173,173,0,0 // vbroadcastss 0xadad(%rip),%ymm8 # b1bc <_sk_clut_4D_avx+0x2159>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,204 // vmulps %ymm4,%ymm8,%ymm9
.byte 197,180,88,192 // vaddps %ymm0,%ymm9,%ymm0
@@ -24311,7 +24655,7 @@ HIDDEN _sk_dstover_avx
.globl _sk_dstover_avx
FUNCTION(_sk_dstover_avx)
_sk_dstover_avx:
- .byte 196,98,125,24,5,180,169,0,0 // vbroadcastss 0xa9b4(%rip),%ymm8 # adf4 <_sk_clut_4D_avx+0x215d>
+ .byte 196,98,125,24,5,128,173,0,0 // vbroadcastss 0xad80(%rip),%ymm8 # b1c0 <_sk_clut_4D_avx+0x215d>
.byte 197,60,92,199 // vsubps %ymm7,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 197,252,88,196 // vaddps %ymm4,%ymm0,%ymm0
@@ -24339,7 +24683,7 @@ HIDDEN _sk_multiply_avx
.globl _sk_multiply_avx
FUNCTION(_sk_multiply_avx)
_sk_multiply_avx:
- .byte 196,98,125,24,5,115,169,0,0 // vbroadcastss 0xa973(%rip),%ymm8 # adf8 <_sk_clut_4D_avx+0x2161>
+ .byte 196,98,125,24,5,63,173,0,0 // vbroadcastss 0xad3f(%rip),%ymm8 # b1c4 <_sk_clut_4D_avx+0x2161>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,52,89,208 // vmulps %ymm0,%ymm9,%ymm10
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -24399,7 +24743,7 @@ HIDDEN _sk_xor__avx
.globl _sk_xor__avx
FUNCTION(_sk_xor__avx)
_sk_xor__avx:
- .byte 196,98,125,24,5,194,168,0,0 // vbroadcastss 0xa8c2(%rip),%ymm8 # adfc <_sk_clut_4D_avx+0x2165>
+ .byte 196,98,125,24,5,142,172,0,0 // vbroadcastss 0xac8e(%rip),%ymm8 # b1c8 <_sk_clut_4D_avx+0x2165>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,180,89,192 // vmulps %ymm0,%ymm9,%ymm0
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -24436,7 +24780,7 @@ _sk_darken_avx:
.byte 197,100,89,206 // vmulps %ymm6,%ymm3,%ymm9
.byte 196,193,108,95,209 // vmaxps %ymm9,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,66,168,0,0 // vbroadcastss 0xa842(%rip),%ymm8 # ae00 <_sk_clut_4D_avx+0x2169>
+ .byte 196,98,125,24,5,14,172,0,0 // vbroadcastss 0xac0e(%rip),%ymm8 # b1cc <_sk_clut_4D_avx+0x2169>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,199 // vmulps %ymm7,%ymm8,%ymm8
.byte 197,188,88,219 // vaddps %ymm3,%ymm8,%ymm3
@@ -24462,7 +24806,7 @@ _sk_lighten_avx:
.byte 197,100,89,206 // vmulps %ymm6,%ymm3,%ymm9
.byte 196,193,108,93,209 // vminps %ymm9,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,238,167,0,0 // vbroadcastss 0xa7ee(%rip),%ymm8 # ae04 <_sk_clut_4D_avx+0x216d>
+ .byte 196,98,125,24,5,186,171,0,0 // vbroadcastss 0xabba(%rip),%ymm8 # b1d0 <_sk_clut_4D_avx+0x216d>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,199 // vmulps %ymm7,%ymm8,%ymm8
.byte 197,188,88,219 // vaddps %ymm3,%ymm8,%ymm3
@@ -24491,7 +24835,7 @@ _sk_difference_avx:
.byte 196,193,108,93,209 // vminps %ymm9,%ymm2,%ymm2
.byte 197,236,88,210 // vaddps %ymm2,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,142,167,0,0 // vbroadcastss 0xa78e(%rip),%ymm8 # ae08 <_sk_clut_4D_avx+0x2171>
+ .byte 196,98,125,24,5,90,171,0,0 // vbroadcastss 0xab5a(%rip),%ymm8 # b1d4 <_sk_clut_4D_avx+0x2171>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,199 // vmulps %ymm7,%ymm8,%ymm8
.byte 197,188,88,219 // vaddps %ymm3,%ymm8,%ymm3
@@ -24514,7 +24858,7 @@ _sk_exclusion_avx:
.byte 197,236,89,214 // vmulps %ymm6,%ymm2,%ymm2
.byte 197,236,88,210 // vaddps %ymm2,%ymm2,%ymm2
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
- .byte 196,98,125,24,5,73,167,0,0 // vbroadcastss 0xa749(%rip),%ymm8 # ae0c <_sk_clut_4D_avx+0x2175>
+ .byte 196,98,125,24,5,21,171,0,0 // vbroadcastss 0xab15(%rip),%ymm8 # b1d8 <_sk_clut_4D_avx+0x2175>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
.byte 197,60,89,199 // vmulps %ymm7,%ymm8,%ymm8
.byte 197,188,88,219 // vaddps %ymm3,%ymm8,%ymm3
@@ -24525,7 +24869,7 @@ HIDDEN _sk_colorburn_avx
.globl _sk_colorburn_avx
FUNCTION(_sk_colorburn_avx)
_sk_colorburn_avx:
- .byte 196,98,125,24,5,52,167,0,0 // vbroadcastss 0xa734(%rip),%ymm8 # ae10 <_sk_clut_4D_avx+0x2179>
+ .byte 196,98,125,24,5,0,171,0,0 // vbroadcastss 0xab00(%rip),%ymm8 # b1dc <_sk_clut_4D_avx+0x2179>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,52,89,216 // vmulps %ymm0,%ymm9,%ymm11
.byte 196,65,44,87,210 // vxorps %ymm10,%ymm10,%ymm10
@@ -24587,7 +24931,7 @@ HIDDEN _sk_colordodge_avx
FUNCTION(_sk_colordodge_avx)
_sk_colordodge_avx:
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
- .byte 196,98,125,24,13,48,166,0,0 // vbroadcastss 0xa630(%rip),%ymm9 # ae14 <_sk_clut_4D_avx+0x217d>
+ .byte 196,98,125,24,13,252,169,0,0 // vbroadcastss 0xa9fc(%rip),%ymm9 # b1e0 <_sk_clut_4D_avx+0x217d>
.byte 197,52,92,215 // vsubps %ymm7,%ymm9,%ymm10
.byte 197,44,89,216 // vmulps %ymm0,%ymm10,%ymm11
.byte 197,52,92,203 // vsubps %ymm3,%ymm9,%ymm9
@@ -24644,7 +24988,7 @@ HIDDEN _sk_hardlight_avx
.globl _sk_hardlight_avx
FUNCTION(_sk_hardlight_avx)
_sk_hardlight_avx:
- .byte 196,98,125,24,5,66,165,0,0 // vbroadcastss 0xa542(%rip),%ymm8 # ae18 <_sk_clut_4D_avx+0x2181>
+ .byte 196,98,125,24,5,14,169,0,0 // vbroadcastss 0xa90e(%rip),%ymm8 # b1e4 <_sk_clut_4D_avx+0x2181>
.byte 197,60,92,215 // vsubps %ymm7,%ymm8,%ymm10
.byte 197,44,89,200 // vmulps %ymm0,%ymm10,%ymm9
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -24699,7 +25043,7 @@ HIDDEN _sk_overlay_avx
.globl _sk_overlay_avx
FUNCTION(_sk_overlay_avx)
_sk_overlay_avx:
- .byte 196,98,125,24,5,107,164,0,0 // vbroadcastss 0xa46b(%rip),%ymm8 # ae1c <_sk_clut_4D_avx+0x2185>
+ .byte 196,98,125,24,5,55,168,0,0 // vbroadcastss 0xa837(%rip),%ymm8 # b1e8 <_sk_clut_4D_avx+0x2185>
.byte 197,60,92,215 // vsubps %ymm7,%ymm8,%ymm10
.byte 197,44,89,200 // vmulps %ymm0,%ymm10,%ymm9
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -24765,10 +25109,10 @@ _sk_softlight_avx:
.byte 196,65,60,88,192 // vaddps %ymm8,%ymm8,%ymm8
.byte 196,65,60,89,216 // vmulps %ymm8,%ymm8,%ymm11
.byte 196,65,60,88,195 // vaddps %ymm11,%ymm8,%ymm8
- .byte 196,98,125,24,29,98,163,0,0 // vbroadcastss 0xa362(%rip),%ymm11 # ae24 <_sk_clut_4D_avx+0x218d>
+ .byte 196,98,125,24,29,46,167,0,0 // vbroadcastss 0xa72e(%rip),%ymm11 # b1f0 <_sk_clut_4D_avx+0x218d>
.byte 196,65,28,88,235 // vaddps %ymm11,%ymm12,%ymm13
.byte 196,65,20,89,192 // vmulps %ymm8,%ymm13,%ymm8
- .byte 196,98,125,24,45,83,163,0,0 // vbroadcastss 0xa353(%rip),%ymm13 # ae28 <_sk_clut_4D_avx+0x2191>
+ .byte 196,98,125,24,45,31,167,0,0 // vbroadcastss 0xa71f(%rip),%ymm13 # b1f4 <_sk_clut_4D_avx+0x2191>
.byte 196,65,28,89,245 // vmulps %ymm13,%ymm12,%ymm14
.byte 196,65,12,88,192 // vaddps %ymm8,%ymm14,%ymm8
.byte 196,65,124,82,244 // vrsqrtps %ymm12,%ymm14
@@ -24779,7 +25123,7 @@ _sk_softlight_avx:
.byte 197,4,194,255,2 // vcmpleps %ymm7,%ymm15,%ymm15
.byte 196,67,13,74,240,240 // vblendvps %ymm15,%ymm8,%ymm14,%ymm14
.byte 197,116,88,249 // vaddps %ymm1,%ymm1,%ymm15
- .byte 196,98,125,24,5,17,163,0,0 // vbroadcastss 0xa311(%rip),%ymm8 # ae20 <_sk_clut_4D_avx+0x2189>
+ .byte 196,98,125,24,5,221,166,0,0 // vbroadcastss 0xa6dd(%rip),%ymm8 # b1ec <_sk_clut_4D_avx+0x2189>
.byte 196,65,60,92,228 // vsubps %ymm12,%ymm8,%ymm12
.byte 197,132,92,195 // vsubps %ymm3,%ymm15,%ymm0
.byte 196,65,124,89,228 // vmulps %ymm12,%ymm0,%ymm12
@@ -24906,12 +25250,12 @@ _sk_hue_avx:
.byte 196,65,28,89,219 // vmulps %ymm11,%ymm12,%ymm11
.byte 196,65,36,94,222 // vdivps %ymm14,%ymm11,%ymm11
.byte 196,67,37,74,224,240 // vblendvps %ymm15,%ymm8,%ymm11,%ymm12
- .byte 196,98,125,24,53,224,160,0,0 // vbroadcastss 0xa0e0(%rip),%ymm14 # ae2c <_sk_clut_4D_avx+0x2195>
+ .byte 196,98,125,24,53,172,164,0,0 // vbroadcastss 0xa4ac(%rip),%ymm14 # b1f8 <_sk_clut_4D_avx+0x2195>
.byte 196,65,92,89,222 // vmulps %ymm14,%ymm4,%ymm11
- .byte 196,98,125,24,61,214,160,0,0 // vbroadcastss 0xa0d6(%rip),%ymm15 # ae30 <_sk_clut_4D_avx+0x2199>
+ .byte 196,98,125,24,61,162,164,0,0 // vbroadcastss 0xa4a2(%rip),%ymm15 # b1fc <_sk_clut_4D_avx+0x2199>
.byte 196,65,84,89,239 // vmulps %ymm15,%ymm5,%ymm13
.byte 196,65,36,88,221 // vaddps %ymm13,%ymm11,%ymm11
- .byte 196,226,125,24,5,199,160,0,0 // vbroadcastss 0xa0c7(%rip),%ymm0 # ae34 <_sk_clut_4D_avx+0x219d>
+ .byte 196,226,125,24,5,147,164,0,0 // vbroadcastss 0xa493(%rip),%ymm0 # b200 <_sk_clut_4D_avx+0x219d>
.byte 197,76,89,232 // vmulps %ymm0,%ymm6,%ymm13
.byte 196,65,36,88,221 // vaddps %ymm13,%ymm11,%ymm11
.byte 196,65,52,89,238 // vmulps %ymm14,%ymm9,%ymm13
@@ -24972,7 +25316,7 @@ _sk_hue_avx:
.byte 196,65,36,95,208 // vmaxps %ymm8,%ymm11,%ymm10
.byte 196,195,109,74,209,240 // vblendvps %ymm15,%ymm9,%ymm2,%ymm2
.byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,160,159,0,0 // vbroadcastss 0x9fa0(%rip),%ymm8 # ae38 <_sk_clut_4D_avx+0x21a1>
+ .byte 196,98,125,24,5,108,163,0,0 // vbroadcastss 0xa36c(%rip),%ymm8 # b204 <_sk_clut_4D_avx+0x21a1>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,180,89,201 // vmulps %ymm1,%ymm9,%ymm1
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -25029,12 +25373,12 @@ _sk_saturation_avx:
.byte 196,65,28,89,219 // vmulps %ymm11,%ymm12,%ymm11
.byte 196,65,36,94,222 // vdivps %ymm14,%ymm11,%ymm11
.byte 196,67,37,74,224,240 // vblendvps %ymm15,%ymm8,%ymm11,%ymm12
- .byte 196,98,125,24,53,174,158,0,0 // vbroadcastss 0x9eae(%rip),%ymm14 # ae3c <_sk_clut_4D_avx+0x21a5>
+ .byte 196,98,125,24,53,122,162,0,0 // vbroadcastss 0xa27a(%rip),%ymm14 # b208 <_sk_clut_4D_avx+0x21a5>
.byte 196,65,92,89,222 // vmulps %ymm14,%ymm4,%ymm11
- .byte 196,98,125,24,61,164,158,0,0 // vbroadcastss 0x9ea4(%rip),%ymm15 # ae40 <_sk_clut_4D_avx+0x21a9>
+ .byte 196,98,125,24,61,112,162,0,0 // vbroadcastss 0xa270(%rip),%ymm15 # b20c <_sk_clut_4D_avx+0x21a9>
.byte 196,65,84,89,239 // vmulps %ymm15,%ymm5,%ymm13
.byte 196,65,36,88,221 // vaddps %ymm13,%ymm11,%ymm11
- .byte 196,226,125,24,5,149,158,0,0 // vbroadcastss 0x9e95(%rip),%ymm0 # ae44 <_sk_clut_4D_avx+0x21ad>
+ .byte 196,226,125,24,5,97,162,0,0 // vbroadcastss 0xa261(%rip),%ymm0 # b210 <_sk_clut_4D_avx+0x21ad>
.byte 197,76,89,232 // vmulps %ymm0,%ymm6,%ymm13
.byte 196,65,36,88,221 // vaddps %ymm13,%ymm11,%ymm11
.byte 196,65,52,89,238 // vmulps %ymm14,%ymm9,%ymm13
@@ -25095,7 +25439,7 @@ _sk_saturation_avx:
.byte 196,65,36,95,208 // vmaxps %ymm8,%ymm11,%ymm10
.byte 196,195,109,74,209,240 // vblendvps %ymm15,%ymm9,%ymm2,%ymm2
.byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,110,157,0,0 // vbroadcastss 0x9d6e(%rip),%ymm8 # ae48 <_sk_clut_4D_avx+0x21b1>
+ .byte 196,98,125,24,5,58,161,0,0 // vbroadcastss 0xa13a(%rip),%ymm8 # b214 <_sk_clut_4D_avx+0x21b1>
.byte 197,60,92,207 // vsubps %ymm7,%ymm8,%ymm9
.byte 197,180,89,201 // vmulps %ymm1,%ymm9,%ymm1
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
@@ -25124,12 +25468,12 @@ _sk_color_avx:
.byte 197,252,17,68,36,168 // vmovups %ymm0,-0x58(%rsp)
.byte 197,124,89,199 // vmulps %ymm7,%ymm0,%ymm8
.byte 197,116,89,207 // vmulps %ymm7,%ymm1,%ymm9
- .byte 196,98,125,24,45,4,157,0,0 // vbroadcastss 0x9d04(%rip),%ymm13 # ae4c <_sk_clut_4D_avx+0x21b5>
+ .byte 196,98,125,24,45,208,160,0,0 // vbroadcastss 0xa0d0(%rip),%ymm13 # b218 <_sk_clut_4D_avx+0x21b5>
.byte 196,65,92,89,213 // vmulps %ymm13,%ymm4,%ymm10
- .byte 196,98,125,24,53,250,156,0,0 // vbroadcastss 0x9cfa(%rip),%ymm14 # ae50 <_sk_clut_4D_avx+0x21b9>
+ .byte 196,98,125,24,53,198,160,0,0 // vbroadcastss 0xa0c6(%rip),%ymm14 # b21c <_sk_clut_4D_avx+0x21b9>
.byte 196,65,84,89,222 // vmulps %ymm14,%ymm5,%ymm11
.byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
- .byte 196,98,125,24,61,235,156,0,0 // vbroadcastss 0x9ceb(%rip),%ymm15 # ae54 <_sk_clut_4D_avx+0x21bd>
+ .byte 196,98,125,24,61,183,160,0,0 // vbroadcastss 0xa0b7(%rip),%ymm15 # b220 <_sk_clut_4D_avx+0x21bd>
.byte 196,65,76,89,223 // vmulps %ymm15,%ymm6,%ymm11
.byte 196,193,44,88,195 // vaddps %ymm11,%ymm10,%ymm0
.byte 196,65,60,89,221 // vmulps %ymm13,%ymm8,%ymm11
@@ -25192,7 +25536,7 @@ _sk_color_avx:
.byte 196,65,44,95,207 // vmaxps %ymm15,%ymm10,%ymm9
.byte 196,195,37,74,192,0 // vblendvps %ymm0,%ymm8,%ymm11,%ymm0
.byte 196,65,124,95,199 // vmaxps %ymm15,%ymm0,%ymm8
- .byte 196,226,125,24,5,178,155,0,0 // vbroadcastss 0x9bb2(%rip),%ymm0 # ae58 <_sk_clut_4D_avx+0x21c1>
+ .byte 196,226,125,24,5,126,159,0,0 // vbroadcastss 0x9f7e(%rip),%ymm0 # b224 <_sk_clut_4D_avx+0x21c1>
.byte 197,124,92,215 // vsubps %ymm7,%ymm0,%ymm10
.byte 197,172,89,84,36,168 // vmulps -0x58(%rsp),%ymm10,%ymm2
.byte 197,124,92,219 // vsubps %ymm3,%ymm0,%ymm11
@@ -25222,12 +25566,12 @@ _sk_luminosity_avx:
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
.byte 197,100,89,196 // vmulps %ymm4,%ymm3,%ymm8
.byte 197,100,89,205 // vmulps %ymm5,%ymm3,%ymm9
- .byte 196,98,125,24,45,68,155,0,0 // vbroadcastss 0x9b44(%rip),%ymm13 # ae5c <_sk_clut_4D_avx+0x21c5>
+ .byte 196,98,125,24,45,16,159,0,0 // vbroadcastss 0x9f10(%rip),%ymm13 # b228 <_sk_clut_4D_avx+0x21c5>
.byte 196,65,108,89,213 // vmulps %ymm13,%ymm2,%ymm10
- .byte 196,98,125,24,53,58,155,0,0 // vbroadcastss 0x9b3a(%rip),%ymm14 # ae60 <_sk_clut_4D_avx+0x21c9>
+ .byte 196,98,125,24,53,6,159,0,0 // vbroadcastss 0x9f06(%rip),%ymm14 # b22c <_sk_clut_4D_avx+0x21c9>
.byte 196,65,116,89,222 // vmulps %ymm14,%ymm1,%ymm11
.byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
- .byte 196,98,125,24,61,43,155,0,0 // vbroadcastss 0x9b2b(%rip),%ymm15 # ae64 <_sk_clut_4D_avx+0x21cd>
+ .byte 196,98,125,24,61,247,158,0,0 // vbroadcastss 0x9ef7(%rip),%ymm15 # b230 <_sk_clut_4D_avx+0x21cd>
.byte 196,65,28,89,223 // vmulps %ymm15,%ymm12,%ymm11
.byte 196,193,44,88,195 // vaddps %ymm11,%ymm10,%ymm0
.byte 196,65,60,89,221 // vmulps %ymm13,%ymm8,%ymm11
@@ -25290,7 +25634,7 @@ _sk_luminosity_avx:
.byte 196,65,44,95,207 // vmaxps %ymm15,%ymm10,%ymm9
.byte 196,195,37,74,192,0 // vblendvps %ymm0,%ymm8,%ymm11,%ymm0
.byte 196,65,124,95,199 // vmaxps %ymm15,%ymm0,%ymm8
- .byte 196,226,125,24,5,242,153,0,0 // vbroadcastss 0x99f2(%rip),%ymm0 # ae68 <_sk_clut_4D_avx+0x21d1>
+ .byte 196,226,125,24,5,190,157,0,0 // vbroadcastss 0x9dbe(%rip),%ymm0 # b234 <_sk_clut_4D_avx+0x21d1>
.byte 197,124,92,215 // vsubps %ymm7,%ymm0,%ymm10
.byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
.byte 197,124,92,219 // vsubps %ymm3,%ymm0,%ymm11
@@ -25314,39 +25658,38 @@ HIDDEN _sk_srcover_rgba_8888_avx
.globl _sk_srcover_rgba_8888_avx
FUNCTION(_sk_srcover_rgba_8888_avx)
_sk_srcover_rgba_8888_avx:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,41,1,0,0 // jne 1611 <_sk_srcover_rgba_8888_avx+0x150>
- .byte 197,252,16,56 // vmovups (%rax),%ymm7
- .byte 197,124,40,13,172,159,0,0 // vmovaps 0x9fac(%rip),%ymm9 # b4a0 <_sk_clut_4D_avx+0x2809>
- .byte 196,193,68,84,225 // vandps %ymm9,%ymm7,%ymm4
+ .byte 15,133,43,1,0,0 // jne 160d <_sk_srcover_rgba_8888_avx+0x14c>
+ .byte 196,65,125,16,4,153 // vmovupd (%r9,%rbx,4),%ymm8
+ .byte 197,125,40,13,240,161,0,0 // vmovapd 0xa1f0(%rip),%ymm9 # b6e0 <_sk_clut_4D_avx+0x267d>
+ .byte 196,193,61,84,225 // vandpd %ymm9,%ymm8,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 197,209,114,215,8 // vpsrld $0x8,%xmm7,%xmm5
- .byte 196,195,125,25,248,1 // vextractf128 $0x1,%ymm7,%xmm8
- .byte 196,193,73,114,208,8 // vpsrld $0x8,%xmm8,%xmm6
+ .byte 196,193,81,114,208,8 // vpsrld $0x8,%xmm8,%xmm5
+ .byte 196,99,125,25,199,1 // vextractf128 $0x1,%ymm8,%xmm7
+ .byte 197,201,114,215,8 // vpsrld $0x8,%xmm7,%xmm6
.byte 196,227,85,24,238,1 // vinsertf128 $0x1,%xmm6,%ymm5,%ymm5
- .byte 196,193,84,84,233 // vandps %ymm9,%ymm5,%ymm5
+ .byte 196,193,85,84,233 // vandpd %ymm9,%ymm5,%ymm5
.byte 197,252,91,237 // vcvtdq2ps %ymm5,%ymm5
- .byte 197,169,114,215,16 // vpsrld $0x10,%xmm7,%xmm10
- .byte 196,193,73,114,208,16 // vpsrld $0x10,%xmm8,%xmm6
+ .byte 196,193,41,114,208,16 // vpsrld $0x10,%xmm8,%xmm10
+ .byte 197,201,114,215,16 // vpsrld $0x10,%xmm7,%xmm6
.byte 196,227,45,24,246,1 // vinsertf128 $0x1,%xmm6,%ymm10,%ymm6
- .byte 196,193,76,84,241 // vandps %ymm9,%ymm6,%ymm6
+ .byte 196,193,77,84,241 // vandpd %ymm9,%ymm6,%ymm6
.byte 197,252,91,246 // vcvtdq2ps %ymm6,%ymm6
- .byte 197,177,114,215,24 // vpsrld $0x18,%xmm7,%xmm9
- .byte 196,193,65,114,208,24 // vpsrld $0x18,%xmm8,%xmm7
- .byte 196,227,53,24,255,1 // vinsertf128 $0x1,%xmm7,%ymm9,%ymm7
+ .byte 196,193,57,114,208,24 // vpsrld $0x18,%xmm8,%xmm8
+ .byte 197,193,114,215,24 // vpsrld $0x18,%xmm7,%xmm7
+ .byte 196,227,61,24,255,1 // vinsertf128 $0x1,%xmm7,%ymm8,%ymm7
.byte 197,252,91,255 // vcvtdq2ps %ymm7,%ymm7
- .byte 196,98,125,24,5,23,153,0,0 // vbroadcastss 0x9917(%rip),%ymm8 # ae6c <_sk_clut_4D_avx+0x21d5>
+ .byte 196,98,125,24,5,231,156,0,0 // vbroadcastss 0x9ce7(%rip),%ymm8 # b238 <_sk_clut_4D_avx+0x21d5>
.byte 197,60,92,195 // vsubps %ymm3,%ymm8,%ymm8
- .byte 196,98,125,24,13,14,153,0,0 // vbroadcastss 0x990e(%rip),%ymm9 # ae70 <_sk_clut_4D_avx+0x21d9>
+ .byte 196,98,125,24,13,222,156,0,0 // vbroadcastss 0x9cde(%rip),%ymm9 # b23c <_sk_clut_4D_avx+0x21d9>
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
.byte 197,60,89,212 // vmulps %ymm4,%ymm8,%ymm10
.byte 196,193,124,88,194 // vaddps %ymm10,%ymm0,%ymm0
@@ -25379,39 +25722,97 @@ _sk_srcover_rgba_8888_avx:
.byte 196,65,53,86,202 // vorpd %ymm10,%ymm9,%ymm9
.byte 196,65,61,86,193 // vorpd %ymm9,%ymm8,%ymm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,86 // jne 165c <_sk_srcover_rgba_8888_avx+0x19b>
- .byte 197,124,17,0 // vmovups %ymm8,(%rax)
+ .byte 117,67 // jne 1645 <_sk_srcover_rgba_8888_avx+0x184>
+ .byte 196,65,124,17,4,153 // vmovups %ymm8,(%r9,%rbx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
- .byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,226 // vmovq %r10,%xmm4
- .byte 196,226,121,48,228 // vpmovzxbw %xmm4,%xmm4
- .byte 196,226,89,0,45,199,156,0,0 // vpshufb 0x9cc7(%rip),%xmm4,%xmm5 # b300 <_sk_clut_4D_avx+0x2669>
- .byte 196,226,121,33,237 // vpmovsxbd %xmm5,%xmm5
- .byte 196,226,89,0,37,201,156,0,0 // vpshufb 0x9cc9(%rip),%xmm4,%xmm4 # b310 <_sk_clut_4D_avx+0x2679>
- .byte 196,226,121,33,228 // vpmovsxbd %xmm4,%xmm4
- .byte 196,227,85,24,228,1 // vinsertf128 $0x1,%xmm4,%ymm5,%ymm4
- .byte 196,226,93,44,56 // vmaskmovps (%rax),%ymm4,%ymm7
- .byte 233,144,254,255,255 // jmpq 14ec <_sk_srcover_rgba_8888_avx+0x2b>
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,65,249,110,202 // vmovq %r10,%xmm9
- .byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
- .byte 196,98,49,0,21,124,156,0,0 // vpshufb 0x9c7c(%rip),%xmm9,%xmm10 # b300 <_sk_clut_4D_avx+0x2669>
- .byte 196,66,121,33,210 // vpmovsxbd %xmm10,%xmm10
- .byte 196,98,49,0,13,126,156,0,0 // vpshufb 0x9c7e(%rip),%xmm9,%xmm9 # b310 <_sk_clut_4D_avx+0x2679>
- .byte 196,66,121,33,201 // vpmovsxbd %xmm9,%xmm9
- .byte 196,67,45,24,201,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- .byte 196,98,53,46,0 // vmaskmovps %ymm8,%ymm9,(%rax)
- .byte 233,99,255,255,255 // jmpq 160a <_sk_srcover_rgba_8888_avx+0x149>
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 15,135,194,254,255,255 // ja 14e8 <_sk_srcover_rgba_8888_avx+0x27>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,3,1,0,0 // lea 0x103(%rip),%r11 # 1734 <_sk_srcover_rgba_8888_avx+0x273>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,122,16,4,153 // vmovss (%r9,%rbx,4),%xmm8
+ .byte 233,163,254,255,255 // jmpq 14e8 <_sk_srcover_rgba_8888_avx+0x27>
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,179 // ja 1608 <_sk_srcover_rgba_8888_avx+0x147>
+ .byte 65,15,182,194 // movzbl %r10b,%eax
+ .byte 76,141,21,240,0,0,0 // lea 0xf0(%rip),%r10 # 1750 <_sk_srcover_rgba_8888_avx+0x28f>
+ .byte 73,99,4,130 // movslq (%r10,%rax,4),%rax
+ .byte 76,1,208 // add %r10,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,121,126,4,153 // vmovd %xmm8,(%r9,%rbx,4)
+ .byte 235,151 // jmp 1608 <_sk_srcover_rgba_8888_avx+0x147>
+ .byte 196,193,121,110,100,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm4
+ .byte 197,249,112,228,68 // vpshufd $0x44,%xmm4,%xmm4
+ .byte 197,212,87,237 // vxorps %ymm5,%ymm5,%ymm5
+ .byte 196,99,85,12,196,4 // vblendps $0x4,%ymm4,%ymm5,%ymm8
+ .byte 196,193,123,16,36,153 // vmovsd (%r9,%rbx,4),%xmm4
+ .byte 196,99,61,13,196,1 // vblendpd $0x1,%ymm4,%ymm8,%ymm8
+ .byte 233,80,254,255,255 // jmpq 14e8 <_sk_srcover_rgba_8888_avx+0x27>
+ .byte 196,193,121,110,100,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm4
+ .byte 197,249,112,228,68 // vpshufd $0x44,%xmm4,%xmm4
+ .byte 196,227,125,24,228,1 // vinsertf128 $0x1,%xmm4,%ymm0,%ymm4
+ .byte 197,212,87,237 // vxorps %ymm5,%ymm5,%ymm5
+ .byte 196,99,85,12,196,64 // vblendps $0x40,%ymm4,%ymm5,%ymm8
+ .byte 196,99,125,25,196,1 // vextractf128 $0x1,%ymm8,%xmm4
+ .byte 196,195,89,34,100,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,99,61,24,196,1 // vinsertf128 $0x1,%xmm4,%ymm8,%ymm8
+ .byte 196,99,125,25,196,1 // vextractf128 $0x1,%ymm8,%xmm4
+ .byte 196,195,89,34,100,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,99,61,24,196,1 // vinsertf128 $0x1,%xmm4,%ymm8,%ymm8
+ .byte 196,193,121,16,36,153 // vmovupd (%r9,%rbx,4),%xmm4
+ .byte 196,67,93,13,192,12 // vblendpd $0xc,%ymm8,%ymm4,%ymm8
+ .byte 233,251,253,255,255 // jmpq 14e8 <_sk_srcover_rgba_8888_avx+0x27>
+ .byte 196,67,121,22,68,153,8,2 // vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ .byte 196,65,121,214,4,153 // vmovq %xmm8,(%r9,%rbx,4)
+ .byte 233,8,255,255,255 // jmpq 1608 <_sk_srcover_rgba_8888_avx+0x147>
+ .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,24,2 // vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,20,1 // vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
+ .byte 196,65,122,17,76,153,16 // vmovss %xmm9,0x10(%r9,%rbx,4)
+ .byte 196,65,121,17,4,153 // vmovupd %xmm8,(%r9,%rbx,4)
+ .byte 233,212,254,255,255 // jmpq 1608 <_sk_srcover_rgba_8888_avx+0x147>
+ .byte 6 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,83,255 // callq *-0x1(%rbx)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 61,255,255,255,168 // cmp $0xa8ffffff,%eax
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,148,255,255,255,128,255 // callq *-0x7f0001(%rdi,%rdi,8)
+ .byte 255 // (bad)
+ .byte 255,100,255,255 // jmpq *-0x1(%rdi,%rdi,8)
+ .byte 255,25 // lcall *(%rcx)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,165,255,255,255,157 // jmpq *-0x62000001(%rbp)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 217,255 // fcos
+ .byte 255 // (bad)
+ .byte 255,204 // dec %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 190,255,255,255,176 // mov $0xb0ffffff,%esi
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_clamp_0_avx
.globl _sk_clamp_0_avx
@@ -25429,7 +25830,7 @@ HIDDEN _sk_clamp_1_avx
.globl _sk_clamp_1_avx
FUNCTION(_sk_clamp_1_avx)
_sk_clamp_1_avx:
- .byte 196,98,125,24,5,167,151,0,0 // vbroadcastss 0x97a7(%rip),%ymm8 # ae74 <_sk_clut_4D_avx+0x21dd>
+ .byte 196,98,125,24,5,174,154,0,0 // vbroadcastss 0x9aae(%rip),%ymm8 # b240 <_sk_clut_4D_avx+0x21dd>
.byte 196,193,124,93,192 // vminps %ymm8,%ymm0,%ymm0
.byte 196,193,116,93,200 // vminps %ymm8,%ymm1,%ymm1
.byte 196,193,108,93,208 // vminps %ymm8,%ymm2,%ymm2
@@ -25441,7 +25842,7 @@ HIDDEN _sk_clamp_a_avx
.globl _sk_clamp_a_avx
FUNCTION(_sk_clamp_a_avx)
_sk_clamp_a_avx:
- .byte 196,98,125,24,5,138,151,0,0 // vbroadcastss 0x978a(%rip),%ymm8 # ae78 <_sk_clut_4D_avx+0x21e1>
+ .byte 196,98,125,24,5,145,154,0,0 // vbroadcastss 0x9a91(%rip),%ymm8 # b244 <_sk_clut_4D_avx+0x21e1>
.byte 196,193,100,93,216 // vminps %ymm8,%ymm3,%ymm3
.byte 197,252,93,195 // vminps %ymm3,%ymm0,%ymm0
.byte 197,244,93,203 // vminps %ymm3,%ymm1,%ymm1
@@ -25453,7 +25854,7 @@ HIDDEN _sk_clamp_a_dst_avx
.globl _sk_clamp_a_dst_avx
FUNCTION(_sk_clamp_a_dst_avx)
_sk_clamp_a_dst_avx:
- .byte 196,98,125,24,5,112,151,0,0 // vbroadcastss 0x9770(%rip),%ymm8 # ae7c <_sk_clut_4D_avx+0x21e5>
+ .byte 196,98,125,24,5,119,154,0,0 // vbroadcastss 0x9a77(%rip),%ymm8 # b248 <_sk_clut_4D_avx+0x21e5>
.byte 196,193,68,93,248 // vminps %ymm8,%ymm7,%ymm7
.byte 197,220,93,231 // vminps %ymm7,%ymm4,%ymm4
.byte 197,212,93,239 // vminps %ymm7,%ymm5,%ymm5
@@ -25486,7 +25887,7 @@ HIDDEN _sk_invert_avx
.globl _sk_invert_avx
FUNCTION(_sk_invert_avx)
_sk_invert_avx:
- .byte 196,98,125,24,5,47,151,0,0 // vbroadcastss 0x972f(%rip),%ymm8 # ae80 <_sk_clut_4D_avx+0x21e9>
+ .byte 196,98,125,24,5,54,154,0,0 // vbroadcastss 0x9a36(%rip),%ymm8 # b24c <_sk_clut_4D_avx+0x21e9>
.byte 197,188,92,192 // vsubps %ymm0,%ymm8,%ymm0
.byte 197,188,92,201 // vsubps %ymm1,%ymm8,%ymm1
.byte 197,188,92,210 // vsubps %ymm2,%ymm8,%ymm2
@@ -25542,7 +25943,7 @@ FUNCTION(_sk_unpremul_avx)
_sk_unpremul_avx:
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,65,100,194,200,0 // vcmpeqps %ymm8,%ymm3,%ymm9
- .byte 196,98,125,24,21,195,150,0,0 // vbroadcastss 0x96c3(%rip),%ymm10 # ae84 <_sk_clut_4D_avx+0x21ed>
+ .byte 196,98,125,24,21,202,153,0,0 // vbroadcastss 0x99ca(%rip),%ymm10 # b250 <_sk_clut_4D_avx+0x21ed>
.byte 197,44,94,211 // vdivps %ymm3,%ymm10,%ymm10
.byte 196,67,45,74,192,144 // vblendvps %ymm9,%ymm8,%ymm10,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
@@ -25555,17 +25956,17 @@ HIDDEN _sk_from_srgb_avx
.globl _sk_from_srgb_avx
FUNCTION(_sk_from_srgb_avx)
_sk_from_srgb_avx:
- .byte 196,98,125,24,5,164,150,0,0 // vbroadcastss 0x96a4(%rip),%ymm8 # ae88 <_sk_clut_4D_avx+0x21f1>
+ .byte 196,98,125,24,5,171,153,0,0 // vbroadcastss 0x99ab(%rip),%ymm8 # b254 <_sk_clut_4D_avx+0x21f1>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 197,124,89,208 // vmulps %ymm0,%ymm0,%ymm10
- .byte 196,98,125,24,29,150,150,0,0 // vbroadcastss 0x9696(%rip),%ymm11 # ae8c <_sk_clut_4D_avx+0x21f5>
+ .byte 196,98,125,24,29,157,153,0,0 // vbroadcastss 0x999d(%rip),%ymm11 # b258 <_sk_clut_4D_avx+0x21f5>
.byte 196,65,124,89,227 // vmulps %ymm11,%ymm0,%ymm12
- .byte 196,98,125,24,45,140,150,0,0 // vbroadcastss 0x968c(%rip),%ymm13 # ae90 <_sk_clut_4D_avx+0x21f9>
+ .byte 196,98,125,24,45,147,153,0,0 // vbroadcastss 0x9993(%rip),%ymm13 # b25c <_sk_clut_4D_avx+0x21f9>
.byte 196,65,28,88,229 // vaddps %ymm13,%ymm12,%ymm12
.byte 196,65,44,89,212 // vmulps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,37,125,150,0,0 // vbroadcastss 0x967d(%rip),%ymm12 # ae94 <_sk_clut_4D_avx+0x21fd>
+ .byte 196,98,125,24,37,132,153,0,0 // vbroadcastss 0x9984(%rip),%ymm12 # b260 <_sk_clut_4D_avx+0x21fd>
.byte 196,65,44,88,212 // vaddps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,53,115,150,0,0 // vbroadcastss 0x9673(%rip),%ymm14 # ae98 <_sk_clut_4D_avx+0x2201>
+ .byte 196,98,125,24,53,122,153,0,0 // vbroadcastss 0x997a(%rip),%ymm14 # b264 <_sk_clut_4D_avx+0x2201>
.byte 196,193,124,194,198,1 // vcmpltps %ymm14,%ymm0,%ymm0
.byte 196,195,45,74,193,0 // vblendvps %ymm0,%ymm9,%ymm10,%ymm0
.byte 196,65,116,89,200 // vmulps %ymm8,%ymm1,%ymm9
@@ -25591,17 +25992,17 @@ HIDDEN _sk_from_srgb_dst_avx
.globl _sk_from_srgb_dst_avx
FUNCTION(_sk_from_srgb_dst_avx)
_sk_from_srgb_dst_avx:
- .byte 196,98,125,24,5,12,150,0,0 // vbroadcastss 0x960c(%rip),%ymm8 # ae9c <_sk_clut_4D_avx+0x2205>
+ .byte 196,98,125,24,5,19,153,0,0 // vbroadcastss 0x9913(%rip),%ymm8 # b268 <_sk_clut_4D_avx+0x2205>
.byte 196,65,92,89,200 // vmulps %ymm8,%ymm4,%ymm9
.byte 197,92,89,212 // vmulps %ymm4,%ymm4,%ymm10
- .byte 196,98,125,24,29,254,149,0,0 // vbroadcastss 0x95fe(%rip),%ymm11 # aea0 <_sk_clut_4D_avx+0x2209>
+ .byte 196,98,125,24,29,5,153,0,0 // vbroadcastss 0x9905(%rip),%ymm11 # b26c <_sk_clut_4D_avx+0x2209>
.byte 196,65,92,89,227 // vmulps %ymm11,%ymm4,%ymm12
- .byte 196,98,125,24,45,244,149,0,0 // vbroadcastss 0x95f4(%rip),%ymm13 # aea4 <_sk_clut_4D_avx+0x220d>
+ .byte 196,98,125,24,45,251,152,0,0 // vbroadcastss 0x98fb(%rip),%ymm13 # b270 <_sk_clut_4D_avx+0x220d>
.byte 196,65,28,88,229 // vaddps %ymm13,%ymm12,%ymm12
.byte 196,65,44,89,212 // vmulps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,37,229,149,0,0 // vbroadcastss 0x95e5(%rip),%ymm12 # aea8 <_sk_clut_4D_avx+0x2211>
+ .byte 196,98,125,24,37,236,152,0,0 // vbroadcastss 0x98ec(%rip),%ymm12 # b274 <_sk_clut_4D_avx+0x2211>
.byte 196,65,44,88,212 // vaddps %ymm12,%ymm10,%ymm10
- .byte 196,98,125,24,53,219,149,0,0 // vbroadcastss 0x95db(%rip),%ymm14 # aeac <_sk_clut_4D_avx+0x2215>
+ .byte 196,98,125,24,53,226,152,0,0 // vbroadcastss 0x98e2(%rip),%ymm14 # b278 <_sk_clut_4D_avx+0x2215>
.byte 196,193,92,194,230,1 // vcmpltps %ymm14,%ymm4,%ymm4
.byte 196,195,45,74,225,64 // vblendvps %ymm4,%ymm9,%ymm10,%ymm4
.byte 196,65,84,89,200 // vmulps %ymm8,%ymm5,%ymm9
@@ -25628,20 +26029,20 @@ HIDDEN _sk_to_srgb_avx
FUNCTION(_sk_to_srgb_avx)
_sk_to_srgb_avx:
.byte 197,124,82,200 // vrsqrtps %ymm0,%ymm9
- .byte 196,98,125,24,5,112,149,0,0 // vbroadcastss 0x9570(%rip),%ymm8 # aeb0 <_sk_clut_4D_avx+0x2219>
+ .byte 196,98,125,24,5,119,152,0,0 // vbroadcastss 0x9877(%rip),%ymm8 # b27c <_sk_clut_4D_avx+0x2219>
.byte 196,65,124,89,208 // vmulps %ymm8,%ymm0,%ymm10
- .byte 196,98,125,24,29,102,149,0,0 // vbroadcastss 0x9566(%rip),%ymm11 # aeb4 <_sk_clut_4D_avx+0x221d>
+ .byte 196,98,125,24,29,109,152,0,0 // vbroadcastss 0x986d(%rip),%ymm11 # b280 <_sk_clut_4D_avx+0x221d>
.byte 196,65,52,89,227 // vmulps %ymm11,%ymm9,%ymm12
- .byte 196,98,125,24,45,92,149,0,0 // vbroadcastss 0x955c(%rip),%ymm13 # aeb8 <_sk_clut_4D_avx+0x2221>
+ .byte 196,98,125,24,45,99,152,0,0 // vbroadcastss 0x9863(%rip),%ymm13 # b284 <_sk_clut_4D_avx+0x2221>
.byte 196,65,28,88,229 // vaddps %ymm13,%ymm12,%ymm12
.byte 196,65,52,89,228 // vmulps %ymm12,%ymm9,%ymm12
- .byte 196,98,125,24,53,77,149,0,0 // vbroadcastss 0x954d(%rip),%ymm14 # aebc <_sk_clut_4D_avx+0x2225>
+ .byte 196,98,125,24,53,84,152,0,0 // vbroadcastss 0x9854(%rip),%ymm14 # b288 <_sk_clut_4D_avx+0x2225>
.byte 196,65,28,88,230 // vaddps %ymm14,%ymm12,%ymm12
- .byte 196,98,125,24,61,67,149,0,0 // vbroadcastss 0x9543(%rip),%ymm15 # aec0 <_sk_clut_4D_avx+0x2229>
+ .byte 196,98,125,24,61,74,152,0,0 // vbroadcastss 0x984a(%rip),%ymm15 # b28c <_sk_clut_4D_avx+0x2229>
.byte 196,65,52,88,207 // vaddps %ymm15,%ymm9,%ymm9
.byte 196,65,124,83,201 // vrcpps %ymm9,%ymm9
.byte 196,65,52,89,204 // vmulps %ymm12,%ymm9,%ymm9
- .byte 196,98,125,24,37,47,149,0,0 // vbroadcastss 0x952f(%rip),%ymm12 # aec4 <_sk_clut_4D_avx+0x222d>
+ .byte 196,98,125,24,37,54,152,0,0 // vbroadcastss 0x9836(%rip),%ymm12 # b290 <_sk_clut_4D_avx+0x222d>
.byte 196,193,124,194,196,1 // vcmpltps %ymm12,%ymm0,%ymm0
.byte 196,195,53,74,194,0 // vblendvps %ymm0,%ymm10,%ymm9,%ymm0
.byte 197,124,82,201 // vrsqrtps %ymm1,%ymm9
@@ -25678,7 +26079,7 @@ _sk_rgb_to_hsl_avx:
.byte 197,124,93,201 // vminps %ymm1,%ymm0,%ymm9
.byte 197,52,93,202 // vminps %ymm2,%ymm9,%ymm9
.byte 196,65,60,92,209 // vsubps %ymm9,%ymm8,%ymm10
- .byte 196,98,125,24,29,149,148,0,0 // vbroadcastss 0x9495(%rip),%ymm11 # aec8 <_sk_clut_4D_avx+0x2231>
+ .byte 196,98,125,24,29,156,151,0,0 // vbroadcastss 0x979c(%rip),%ymm11 # b294 <_sk_clut_4D_avx+0x2231>
.byte 196,65,36,94,218 // vdivps %ymm10,%ymm11,%ymm11
.byte 197,116,92,226 // vsubps %ymm2,%ymm1,%ymm12
.byte 196,65,28,89,227 // vmulps %ymm11,%ymm12,%ymm12
@@ -25688,19 +26089,19 @@ _sk_rgb_to_hsl_avx:
.byte 196,193,108,89,211 // vmulps %ymm11,%ymm2,%ymm2
.byte 197,252,92,201 // vsubps %ymm1,%ymm0,%ymm1
.byte 196,193,116,89,203 // vmulps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,29,110,148,0,0 // vbroadcastss 0x946e(%rip),%ymm11 # aed4 <_sk_clut_4D_avx+0x223d>
+ .byte 196,98,125,24,29,117,151,0,0 // vbroadcastss 0x9775(%rip),%ymm11 # b2a0 <_sk_clut_4D_avx+0x223d>
.byte 196,193,116,88,203 // vaddps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,29,92,148,0,0 // vbroadcastss 0x945c(%rip),%ymm11 # aed0 <_sk_clut_4D_avx+0x2239>
+ .byte 196,98,125,24,29,99,151,0,0 // vbroadcastss 0x9763(%rip),%ymm11 # b29c <_sk_clut_4D_avx+0x2239>
.byte 196,193,108,88,211 // vaddps %ymm11,%ymm2,%ymm2
.byte 196,227,117,74,202,224 // vblendvps %ymm14,%ymm2,%ymm1,%ymm1
- .byte 196,226,125,24,21,68,148,0,0 // vbroadcastss 0x9444(%rip),%ymm2 # aecc <_sk_clut_4D_avx+0x2235>
+ .byte 196,226,125,24,21,75,151,0,0 // vbroadcastss 0x974b(%rip),%ymm2 # b298 <_sk_clut_4D_avx+0x2235>
.byte 196,65,12,87,246 // vxorps %ymm14,%ymm14,%ymm14
.byte 196,227,13,74,210,208 // vblendvps %ymm13,%ymm2,%ymm14,%ymm2
.byte 197,188,194,192,0 // vcmpeqps %ymm0,%ymm8,%ymm0
.byte 196,193,108,88,212 // vaddps %ymm12,%ymm2,%ymm2
.byte 196,227,117,74,194,0 // vblendvps %ymm0,%ymm2,%ymm1,%ymm0
.byte 196,193,60,88,201 // vaddps %ymm9,%ymm8,%ymm1
- .byte 196,98,125,24,37,43,148,0,0 // vbroadcastss 0x942b(%rip),%ymm12 # aedc <_sk_clut_4D_avx+0x2245>
+ .byte 196,98,125,24,37,50,151,0,0 // vbroadcastss 0x9732(%rip),%ymm12 # b2a8 <_sk_clut_4D_avx+0x2245>
.byte 196,193,116,89,212 // vmulps %ymm12,%ymm1,%ymm2
.byte 197,28,194,226,1 // vcmpltps %ymm2,%ymm12,%ymm12
.byte 196,65,36,92,216 // vsubps %ymm8,%ymm11,%ymm11
@@ -25710,7 +26111,7 @@ _sk_rgb_to_hsl_avx:
.byte 197,172,94,201 // vdivps %ymm1,%ymm10,%ymm1
.byte 196,195,125,74,198,128 // vblendvps %ymm8,%ymm14,%ymm0,%ymm0
.byte 196,195,117,74,206,128 // vblendvps %ymm8,%ymm14,%ymm1,%ymm1
- .byte 196,98,125,24,5,238,147,0,0 // vbroadcastss 0x93ee(%rip),%ymm8 # aed8 <_sk_clut_4D_avx+0x2241>
+ .byte 196,98,125,24,5,245,150,0,0 // vbroadcastss 0x96f5(%rip),%ymm8 # b2a4 <_sk_clut_4D_avx+0x2241>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -25727,7 +26128,7 @@ _sk_hsl_to_rgb_avx:
.byte 197,252,17,92,36,128 // vmovups %ymm3,-0x80(%rsp)
.byte 197,252,40,225 // vmovaps %ymm1,%ymm4
.byte 197,252,40,216 // vmovaps %ymm0,%ymm3
- .byte 196,98,125,24,5,187,147,0,0 // vbroadcastss 0x93bb(%rip),%ymm8 # aee0 <_sk_clut_4D_avx+0x2249>
+ .byte 196,98,125,24,5,194,150,0,0 // vbroadcastss 0x96c2(%rip),%ymm8 # b2ac <_sk_clut_4D_avx+0x2249>
.byte 197,60,194,202,2 // vcmpleps %ymm2,%ymm8,%ymm9
.byte 197,92,89,210 // vmulps %ymm2,%ymm4,%ymm10
.byte 196,65,92,92,218 // vsubps %ymm10,%ymm4,%ymm11
@@ -25735,23 +26136,23 @@ _sk_hsl_to_rgb_avx:
.byte 197,52,88,210 // vaddps %ymm2,%ymm9,%ymm10
.byte 197,108,88,202 // vaddps %ymm2,%ymm2,%ymm9
.byte 196,65,52,92,202 // vsubps %ymm10,%ymm9,%ymm9
- .byte 196,98,125,24,29,149,147,0,0 // vbroadcastss 0x9395(%rip),%ymm11 # aee4 <_sk_clut_4D_avx+0x224d>
+ .byte 196,98,125,24,29,156,150,0,0 // vbroadcastss 0x969c(%rip),%ymm11 # b2b0 <_sk_clut_4D_avx+0x224d>
.byte 196,65,100,88,219 // vaddps %ymm11,%ymm3,%ymm11
.byte 196,67,125,8,227,1 // vroundps $0x1,%ymm11,%ymm12
.byte 196,65,36,92,252 // vsubps %ymm12,%ymm11,%ymm15
.byte 196,65,44,92,217 // vsubps %ymm9,%ymm10,%ymm11
- .byte 196,98,125,24,37,127,147,0,0 // vbroadcastss 0x937f(%rip),%ymm12 # aeec <_sk_clut_4D_avx+0x2255>
+ .byte 196,98,125,24,37,134,150,0,0 // vbroadcastss 0x9686(%rip),%ymm12 # b2b8 <_sk_clut_4D_avx+0x2255>
.byte 196,193,4,89,196 // vmulps %ymm12,%ymm15,%ymm0
- .byte 196,98,125,24,45,117,147,0,0 // vbroadcastss 0x9375(%rip),%ymm13 # aef0 <_sk_clut_4D_avx+0x2259>
+ .byte 196,98,125,24,45,124,150,0,0 // vbroadcastss 0x967c(%rip),%ymm13 # b2bc <_sk_clut_4D_avx+0x2259>
.byte 197,20,92,240 // vsubps %ymm0,%ymm13,%ymm14
.byte 196,65,36,89,246 // vmulps %ymm14,%ymm11,%ymm14
.byte 196,65,52,88,246 // vaddps %ymm14,%ymm9,%ymm14
- .byte 196,226,125,24,13,86,147,0,0 // vbroadcastss 0x9356(%rip),%ymm1 # aee8 <_sk_clut_4D_avx+0x2251>
+ .byte 196,226,125,24,13,93,150,0,0 // vbroadcastss 0x965d(%rip),%ymm1 # b2b4 <_sk_clut_4D_avx+0x2251>
.byte 196,193,116,194,255,2 // vcmpleps %ymm15,%ymm1,%ymm7
.byte 196,195,13,74,249,112 // vblendvps %ymm7,%ymm9,%ymm14,%ymm7
.byte 196,65,60,194,247,2 // vcmpleps %ymm15,%ymm8,%ymm14
.byte 196,227,45,74,255,224 // vblendvps %ymm14,%ymm7,%ymm10,%ymm7
- .byte 196,98,125,24,53,65,147,0,0 // vbroadcastss 0x9341(%rip),%ymm14 # aef4 <_sk_clut_4D_avx+0x225d>
+ .byte 196,98,125,24,53,72,150,0,0 // vbroadcastss 0x9648(%rip),%ymm14 # b2c0 <_sk_clut_4D_avx+0x225d>
.byte 196,65,12,194,255,2 // vcmpleps %ymm15,%ymm14,%ymm15
.byte 196,193,124,89,195 // vmulps %ymm11,%ymm0,%ymm0
.byte 197,180,88,192 // vaddps %ymm0,%ymm9,%ymm0
@@ -25770,7 +26171,7 @@ _sk_hsl_to_rgb_avx:
.byte 197,164,89,247 // vmulps %ymm7,%ymm11,%ymm6
.byte 197,180,88,246 // vaddps %ymm6,%ymm9,%ymm6
.byte 196,227,77,74,237,0 // vblendvps %ymm0,%ymm5,%ymm6,%ymm5
- .byte 196,226,125,24,5,227,146,0,0 // vbroadcastss 0x92e3(%rip),%ymm0 # aef8 <_sk_clut_4D_avx+0x2261>
+ .byte 196,226,125,24,5,234,149,0,0 // vbroadcastss 0x95ea(%rip),%ymm0 # b2c4 <_sk_clut_4D_avx+0x2261>
.byte 197,228,88,192 // vaddps %ymm0,%ymm3,%ymm0
.byte 196,227,125,8,216,1 // vroundps $0x1,%ymm0,%ymm3
.byte 197,252,92,195 // vsubps %ymm3,%ymm0,%ymm0
@@ -25825,15 +26226,15 @@ _sk_scale_u8_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,75 // jne 1d1c <_sk_scale_u8_avx+0x64>
+ .byte 117,75 // jne 1de1 <_sk_scale_u8_avx+0x64>
.byte 196,66,121,48,4,25 // vpmovzxbw (%r9,%rbx,1),%xmm8
- .byte 197,57,219,5,65,150,0,0 // vpand 0x9641(%rip),%xmm8,%xmm8 # b320 <_sk_clut_4D_avx+0x2689>
+ .byte 197,57,219,5,60,154,0,0 // vpand 0x9a3c(%rip),%xmm8,%xmm8 # b7e0 <_sk_clut_4D_avx+0x277d>
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 196,65,57,105,201 // vpunpckhwd %xmm9,%xmm8,%xmm9
.byte 196,66,121,51,192 // vpmovzxwd %xmm8,%xmm8
.byte 196,67,61,24,193,1 // vinsertf128 $0x1,%xmm9,%ymm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,13,250,145,0,0 // vbroadcastss 0x91fa(%rip),%ymm9 # aefc <_sk_clut_4D_avx+0x2265>
+ .byte 196,98,125,24,13,1,149,0,0 // vbroadcastss 0x9501(%rip),%ymm9 # b2c8 <_sk_clut_4D_avx+0x2265>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 197,188,89,201 // vmulps %ymm1,%ymm8,%ymm1
@@ -25847,15 +26248,15 @@ _sk_scale_u8_avx:
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,166 // ja 1cd7 <_sk_scale_u8_avx+0x1f>
+ .byte 119,166 // ja 1d9c <_sk_scale_u8_avx+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,124,0,0,0 // lea 0x7c(%rip),%r11 # 1db8 <_sk_scale_u8_avx+0x100>
+ .byte 76,141,29,123,0,0,0 // lea 0x7b(%rip),%r11 # 1e7c <_sk_scale_u8_avx+0xff>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,121,110,192 // vmovd %eax,%xmm8
- .byte 235,135 // jmp 1cd7 <_sk_scale_u8_avx+0x1f>
+ .byte 235,135 // jmp 1d9c <_sk_scale_u8_avx+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 197,57,196,192,2 // vpinsrw $0x2,%eax,%xmm8,%xmm8
@@ -25863,7 +26264,7 @@ _sk_scale_u8_avx:
.byte 197,121,110,200 // vmovd %eax,%xmm9
.byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
.byte 196,67,57,14,193,3 // vpblendw $0x3,%xmm9,%xmm8,%xmm8
- .byte 233,94,255,255,255 // jmpq 1cd7 <_sk_scale_u8_avx+0x1f>
+ .byte 233,94,255,255,255 // jmpq 1d9c <_sk_scale_u8_avx+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 197,57,196,192,6 // vpinsrw $0x6,%eax,%xmm8,%xmm8
@@ -25874,24 +26275,21 @@ _sk_scale_u8_avx:
.byte 196,65,121,110,12,25 // vmovd (%r9,%rbx,1),%xmm9
.byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
.byte 196,67,49,14,192,240 // vpblendw $0xf0,%xmm8,%xmm9,%xmm8
- .byte 233,34,255,255,255 // jmpq 1cd7 <_sk_scale_u8_avx+0x1f>
- .byte 15,31,0 // nopl (%rax)
- .byte 141 // (bad)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255,168,255,255,255,152 // ljmp *-0x67000001(%rax)
+ .byte 233,34,255,255,255 // jmpq 1d9c <_sk_scale_u8_avx+0x1f>
+ .byte 102,144 // xchg %ax,%ax
+ .byte 142,255 // mov %edi,%?
.byte 255 // (bad)
+ .byte 255,169,255,255,255,153 // ljmp *-0x66000001(%rcx)
.byte 255 // (bad)
- .byte 255,231 // jmpq *%rdi
.byte 255 // (bad)
.byte 255 // (bad)
+ .byte 232,255,255,255,221 // callq ffffffffde001e8c <_sk_clut_4D_avx+0xffffffffddff8e29>
.byte 255 // (bad)
- .byte 220,255 // fdivr %st,%st(7)
.byte 255 // (bad)
- .byte 255,209 // callq *%rcx
+ .byte 255,210 // callq *%rdx
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,193 // inc %ecx
+ .byte 255,194 // inc %edx
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -25929,15 +26327,15 @@ _sk_lerp_u8_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,111 // jne 1e9b <_sk_lerp_u8_avx+0x88>
+ .byte 117,111 // jne 1f5f <_sk_lerp_u8_avx+0x88>
.byte 196,66,121,48,4,25 // vpmovzxbw (%r9,%rbx,1),%xmm8
- .byte 197,57,219,5,246,148,0,0 // vpand 0x94f6(%rip),%xmm8,%xmm8 # b330 <_sk_clut_4D_avx+0x2699>
+ .byte 197,57,219,5,242,152,0,0 // vpand 0x98f2(%rip),%xmm8,%xmm8 # b7f0 <_sk_clut_4D_avx+0x278d>
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 196,65,57,105,201 // vpunpckhwd %xmm9,%xmm8,%xmm9
.byte 196,66,121,51,192 // vpmovzxwd %xmm8,%xmm8
.byte 196,67,61,24,193,1 // vinsertf128 $0x1,%xmm9,%ymm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,13,163,144,0,0 // vbroadcastss 0x90a3(%rip),%ymm9 # af00 <_sk_clut_4D_avx+0x2269>
+ .byte 196,98,125,24,13,171,147,0,0 // vbroadcastss 0x93ab(%rip),%ymm9 # b2cc <_sk_clut_4D_avx+0x2269>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,252,92,196 // vsubps %ymm4,%ymm0,%ymm0
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
@@ -25959,15 +26357,15 @@ _sk_lerp_u8_avx:
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,130 // ja 1e32 <_sk_lerp_u8_avx+0x1f>
+ .byte 119,130 // ja 1ef6 <_sk_lerp_u8_avx+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,125,0,0,0 // lea 0x7d(%rip),%r11 # 1f38 <_sk_lerp_u8_avx+0x125>
+ .byte 76,141,29,125,0,0,0 // lea 0x7d(%rip),%r11 # 1ffc <_sk_lerp_u8_avx+0x125>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,121,110,192 // vmovd %eax,%xmm8
- .byte 233,96,255,255,255 // jmpq 1e32 <_sk_lerp_u8_avx+0x1f>
+ .byte 233,96,255,255,255 // jmpq 1ef6 <_sk_lerp_u8_avx+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 197,57,196,192,2 // vpinsrw $0x2,%eax,%xmm8,%xmm8
@@ -25975,7 +26373,7 @@ _sk_lerp_u8_avx:
.byte 197,121,110,200 // vmovd %eax,%xmm9
.byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
.byte 196,67,57,14,193,3 // vpblendw $0x3,%xmm9,%xmm8,%xmm8
- .byte 233,55,255,255,255 // jmpq 1e32 <_sk_lerp_u8_avx+0x1f>
+ .byte 233,55,255,255,255 // jmpq 1ef6 <_sk_lerp_u8_avx+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 197,57,196,192,6 // vpinsrw $0x6,%eax,%xmm8,%xmm8
@@ -25986,7 +26384,7 @@ _sk_lerp_u8_avx:
.byte 196,65,121,110,12,25 // vmovd (%r9,%rbx,1),%xmm9
.byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
.byte 196,67,49,14,192,240 // vpblendw $0xf0,%xmm8,%xmm9,%xmm8
- .byte 233,251,254,255,255 // jmpq 1e32 <_sk_lerp_u8_avx+0x1f>
+ .byte 233,251,254,255,255 // jmpq 1ef6 <_sk_lerp_u8_avx+0x1f>
.byte 144 // nop
.byte 140,255 // mov %?,%edi
.byte 255 // (bad)
@@ -25994,7 +26392,7 @@ _sk_lerp_u8_avx:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf001f48 <_sk_clut_4D_avx+0xffffffffdeff92b1>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf00200c <_sk_clut_4D_avx+0xffffffffdeff8fa9>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -26018,26 +26416,26 @@ _sk_lerp_565_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,209,0,0,0 // jne 2045 <_sk_lerp_565_avx+0xf1>
+ .byte 15,133,209,0,0,0 // jne 2109 <_sk_lerp_565_avx+0xf1>
.byte 196,65,122,111,4,89 // vmovdqu (%r9,%rbx,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 196,65,57,105,201 // vpunpckhwd %xmm9,%xmm8,%xmm9
.byte 196,66,121,51,192 // vpmovzxwd %xmm8,%xmm8
.byte 196,67,61,24,193,1 // vinsertf128 $0x1,%xmm9,%ymm8,%ymm8
- .byte 196,98,125,24,13,108,143,0,0 // vbroadcastss 0x8f6c(%rip),%ymm9 # af04 <_sk_clut_4D_avx+0x226d>
+ .byte 196,98,125,24,13,116,146,0,0 // vbroadcastss 0x9274(%rip),%ymm9 # b2d0 <_sk_clut_4D_avx+0x226d>
.byte 196,65,60,84,201 // vandps %ymm9,%ymm8,%ymm9
.byte 196,65,124,91,201 // vcvtdq2ps %ymm9,%ymm9
- .byte 196,98,125,24,21,93,143,0,0 // vbroadcastss 0x8f5d(%rip),%ymm10 # af08 <_sk_clut_4D_avx+0x2271>
+ .byte 196,98,125,24,21,101,146,0,0 // vbroadcastss 0x9265(%rip),%ymm10 # b2d4 <_sk_clut_4D_avx+0x2271>
.byte 196,65,52,89,202 // vmulps %ymm10,%ymm9,%ymm9
- .byte 196,98,125,24,21,83,143,0,0 // vbroadcastss 0x8f53(%rip),%ymm10 # af0c <_sk_clut_4D_avx+0x2275>
+ .byte 196,98,125,24,21,91,146,0,0 // vbroadcastss 0x925b(%rip),%ymm10 # b2d8 <_sk_clut_4D_avx+0x2275>
.byte 196,65,60,84,210 // vandps %ymm10,%ymm8,%ymm10
.byte 196,65,124,91,210 // vcvtdq2ps %ymm10,%ymm10
- .byte 196,98,125,24,29,68,143,0,0 // vbroadcastss 0x8f44(%rip),%ymm11 # af10 <_sk_clut_4D_avx+0x2279>
+ .byte 196,98,125,24,29,76,146,0,0 // vbroadcastss 0x924c(%rip),%ymm11 # b2dc <_sk_clut_4D_avx+0x2279>
.byte 196,65,44,89,211 // vmulps %ymm11,%ymm10,%ymm10
- .byte 196,98,125,24,29,58,143,0,0 // vbroadcastss 0x8f3a(%rip),%ymm11 # af14 <_sk_clut_4D_avx+0x227d>
+ .byte 196,98,125,24,29,66,146,0,0 // vbroadcastss 0x9242(%rip),%ymm11 # b2e0 <_sk_clut_4D_avx+0x227d>
.byte 196,65,60,84,195 // vandps %ymm11,%ymm8,%ymm8
.byte 196,65,124,91,192 // vcvtdq2ps %ymm8,%ymm8
- .byte 196,98,125,24,29,43,143,0,0 // vbroadcastss 0x8f2b(%rip),%ymm11 # af18 <_sk_clut_4D_avx+0x2281>
+ .byte 196,98,125,24,29,51,146,0,0 // vbroadcastss 0x9233(%rip),%ymm11 # b2e4 <_sk_clut_4D_avx+0x2281>
.byte 196,65,60,89,195 // vmulps %ymm11,%ymm8,%ymm8
.byte 197,252,92,196 // vsubps %ymm4,%ymm0,%ymm0
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
@@ -26065,27 +26463,27 @@ _sk_lerp_565_avx:
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,28,255,255,255 // ja 1f7a <_sk_lerp_565_avx+0x26>
+ .byte 15,135,28,255,255,255 // ja 203e <_sk_lerp_565_avx+0x26>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,99,0,0,0 // lea 0x63(%rip),%r11 # 20cc <_sk_lerp_565_avx+0x178>
+ .byte 76,141,29,99,0,0,0 // lea 0x63(%rip),%r11 # 2190 <_sk_lerp_565_avx+0x178>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,121,110,192 // vmovd %eax,%xmm8
- .byte 233,250,254,255,255 // jmpq 1f7a <_sk_lerp_565_avx+0x26>
+ .byte 233,250,254,255,255 // jmpq 203e <_sk_lerp_565_avx+0x26>
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 196,65,57,196,68,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm8,%xmm8
.byte 196,65,121,110,12,89 // vmovd (%r9,%rbx,2),%xmm9
.byte 196,67,57,14,193,3 // vpblendw $0x3,%xmm9,%xmm8,%xmm8
- .byte 233,220,254,255,255 // jmpq 1f7a <_sk_lerp_565_avx+0x26>
+ .byte 233,220,254,255,255 // jmpq 203e <_sk_lerp_565_avx+0x26>
.byte 196,65,57,239,192 // vpxor %xmm8,%xmm8,%xmm8
.byte 196,65,57,196,68,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm8,%xmm8
.byte 196,65,57,196,68,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm8,%xmm8
.byte 196,65,57,196,68,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm8,%xmm8
.byte 196,65,122,126,12,89 // vmovq (%r9,%rbx,2),%xmm9
.byte 196,67,49,14,192,240 // vpblendw $0xf0,%xmm8,%xmm9,%xmm8
- .byte 233,174,254,255,255 // jmpq 1f7a <_sk_lerp_565_avx+0x26>
+ .byte 233,174,254,255,255 // jmpq 203e <_sk_lerp_565_avx+0x26>
.byte 166 // cmpsb %es:(%rdi),%ds:(%rsi)
.byte 255 // (bad)
.byte 255 // (bad)
@@ -26109,128 +26507,163 @@ HIDDEN _sk_load_tables_avx
.globl _sk_load_tables_avx
FUNCTION(_sk_load_tables_avx)
_sk_load_tables_avx:
- .byte 83 // push %rbx
- .byte 197,252,17,124,36,208 // vmovups %ymm7,-0x30(%rsp)
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 197,252,17,124,36,200 // vmovups %ymm7,-0x38(%rsp)
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,141,20,149,0,0,0,0 // lea 0x0(,%rdx,4),%r10
- .byte 76,3,16 // add (%rax),%r10
+ .byte 76,139,8 // mov (%rax),%r9
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,248,1,0,0 // jne 2300 <_sk_load_tables_avx+0x218>
- .byte 196,65,124,16,18 // vmovups (%r10),%ymm10
- .byte 197,124,40,13,171,147,0,0 // vmovaps 0x93ab(%rip),%ymm9 # b4c0 <_sk_clut_4D_avx+0x2829>
- .byte 196,193,44,84,201 // vandps %ymm9,%ymm10,%ymm1
+ .byte 15,133,4,2,0,0 // jne 23c4 <_sk_load_tables_avx+0x218>
+ .byte 196,65,125,16,20,145 // vmovupd (%r9,%rdx,4),%ymm10
+ .byte 197,125,40,13,50,149,0,0 // vmovapd 0x9532(%rip),%ymm9 # b700 <_sk_clut_4D_avx+0x269d>
+ .byte 196,193,45,84,201 // vandpd %ymm9,%ymm10,%ymm1
.byte 196,227,125,25,200,1 // vextractf128 $0x1,%ymm1,%xmm0
- .byte 196,193,249,126,195 // vmovq %xmm0,%r11
- .byte 69,137,218 // mov %r11d,%r10d
- .byte 72,139,88,8 // mov 0x8(%rax),%rbx
- .byte 196,161,122,16,20,147 // vmovss (%rbx,%r10,4),%xmm2
+ .byte 196,193,249,126,193 // vmovq %xmm0,%r9
+ .byte 69,137,202 // mov %r9d,%r10d
+ .byte 76,139,88,8 // mov 0x8(%rax),%r11
+ .byte 196,129,122,16,20,147 // vmovss (%r11,%r10,4),%xmm2
.byte 196,195,249,22,194,1 // vpextrq $0x1,%xmm0,%r10
- .byte 73,193,235,32 // shr $0x20,%r11
- .byte 196,163,105,33,4,155,16 // vinsertps $0x10,(%rbx,%r11,4),%xmm2,%xmm0
- .byte 68,137,209 // mov %r10d,%ecx
- .byte 196,227,121,33,4,139,32 // vinsertps $0x20,(%rbx,%rcx,4),%xmm0,%xmm0
- .byte 196,193,249,126,203 // vmovq %xmm1,%r11
+ .byte 73,193,233,32 // shr $0x20,%r9
+ .byte 196,131,105,33,4,139,16 // vinsertps $0x10,(%r11,%r9,4),%xmm2,%xmm0
+ .byte 69,137,209 // mov %r10d,%r9d
+ .byte 196,131,121,33,4,139,32 // vinsertps $0x20,(%r11,%r9,4),%xmm0,%xmm0
+ .byte 196,193,249,126,201 // vmovq %xmm1,%r9
.byte 73,193,234,32 // shr $0x20,%r10
- .byte 196,35,121,33,44,147,48 // vinsertps $0x30,(%rbx,%r10,4),%xmm0,%xmm13
- .byte 68,137,217 // mov %r11d,%ecx
- .byte 197,250,16,20,139 // vmovss (%rbx,%rcx,4),%xmm2
- .byte 196,227,249,22,201,1 // vpextrq $0x1,%xmm1,%rcx
- .byte 73,193,235,32 // shr $0x20,%r11
- .byte 196,163,105,33,12,155,16 // vinsertps $0x10,(%rbx,%r11,4),%xmm2,%xmm1
- .byte 65,137,202 // mov %ecx,%r10d
- .byte 72,193,233,32 // shr $0x20,%rcx
- .byte 196,163,113,33,12,147,32 // vinsertps $0x20,(%rbx,%r10,4),%xmm1,%xmm1
- .byte 76,139,80,16 // mov 0x10(%rax),%r10
- .byte 196,99,113,33,36,139,48 // vinsertps $0x30,(%rbx,%rcx,4),%xmm1,%xmm12
+ .byte 196,3,121,33,4,147,48 // vinsertps $0x30,(%r11,%r10,4),%xmm0,%xmm8
+ .byte 69,137,202 // mov %r9d,%r10d
+ .byte 196,129,122,16,20,147 // vmovss (%r11,%r10,4),%xmm2
+ .byte 196,195,249,22,202,1 // vpextrq $0x1,%xmm1,%r10
+ .byte 73,193,233,32 // shr $0x20,%r9
+ .byte 196,131,105,33,12,139,16 // vinsertps $0x10,(%r11,%r9,4),%xmm2,%xmm1
+ .byte 69,137,209 // mov %r10d,%r9d
+ .byte 73,193,234,32 // shr $0x20,%r10
+ .byte 196,131,113,33,12,139,32 // vinsertps $0x20,(%r11,%r9,4),%xmm1,%xmm1
+ .byte 76,139,72,16 // mov 0x10(%rax),%r9
+ .byte 196,3,113,33,36,147,48 // vinsertps $0x30,(%r11,%r10,4),%xmm1,%xmm12
.byte 196,193,105,114,210,8 // vpsrld $0x8,%xmm10,%xmm2
- .byte 196,67,125,25,208,1 // vextractf128 $0x1,%ymm10,%xmm8
- .byte 196,193,121,114,208,8 // vpsrld $0x8,%xmm8,%xmm0
+ .byte 196,67,125,25,213,1 // vextractf128 $0x1,%ymm10,%xmm13
+ .byte 196,193,121,114,213,8 // vpsrld $0x8,%xmm13,%xmm0
.byte 196,227,109,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm2,%ymm0
- .byte 196,193,124,84,209 // vandps %ymm9,%ymm0,%ymm2
+ .byte 196,193,125,84,209 // vandpd %ymm9,%ymm0,%ymm2
.byte 196,227,125,25,208,1 // vextractf128 $0x1,%ymm2,%xmm0
- .byte 196,225,249,126,193 // vmovq %xmm0,%rcx
- .byte 137,203 // mov %ecx,%ebx
- .byte 196,193,122,16,12,154 // vmovss (%r10,%rbx,4),%xmm1
- .byte 196,227,249,22,195,1 // vpextrq $0x1,%xmm0,%rbx
- .byte 72,193,233,32 // shr $0x20,%rcx
- .byte 196,67,113,33,52,138,16 // vinsertps $0x10,(%r10,%rcx,4),%xmm1,%xmm14
- .byte 137,217 // mov %ebx,%ecx
- .byte 196,193,122,16,28,138 // vmovss (%r10,%rcx,4),%xmm3
- .byte 196,225,249,126,209 // vmovq %xmm2,%rcx
- .byte 72,193,235,32 // shr $0x20,%rbx
- .byte 196,193,122,16,12,154 // vmovss (%r10,%rbx,4),%xmm1
- .byte 137,203 // mov %ecx,%ebx
- .byte 196,193,122,16,4,154 // vmovss (%r10,%rbx,4),%xmm0
- .byte 196,227,249,22,211,1 // vpextrq $0x1,%xmm2,%rbx
- .byte 72,193,233,32 // shr $0x20,%rcx
- .byte 196,67,121,33,28,138,16 // vinsertps $0x10,(%r10,%rcx,4),%xmm0,%xmm11
- .byte 137,217 // mov %ebx,%ecx
- .byte 196,65,122,16,60,138 // vmovss (%r10,%rcx,4),%xmm15
- .byte 196,195,29,24,197,1 // vinsertf128 $0x1,%xmm13,%ymm12,%ymm0
- .byte 72,193,235,32 // shr $0x20,%rbx
+ .byte 196,193,249,126,194 // vmovq %xmm0,%r10
+ .byte 69,137,211 // mov %r10d,%r11d
+ .byte 196,129,122,16,12,153 // vmovss (%r9,%r11,4),%xmm1
+ .byte 196,195,249,22,195,1 // vpextrq $0x1,%xmm0,%r11
+ .byte 73,193,234,32 // shr $0x20,%r10
+ .byte 196,3,113,33,52,145,16 // vinsertps $0x10,(%r9,%r10,4),%xmm1,%xmm14
+ .byte 69,137,218 // mov %r11d,%r10d
+ .byte 196,129,122,16,28,145 // vmovss (%r9,%r10,4),%xmm3
+ .byte 196,193,249,126,210 // vmovq %xmm2,%r10
+ .byte 73,193,235,32 // shr $0x20,%r11
+ .byte 196,129,122,16,12,153 // vmovss (%r9,%r11,4),%xmm1
+ .byte 69,137,211 // mov %r10d,%r11d
+ .byte 196,129,122,16,4,153 // vmovss (%r9,%r11,4),%xmm0
+ .byte 196,195,249,22,211,1 // vpextrq $0x1,%xmm2,%r11
+ .byte 73,193,234,32 // shr $0x20,%r10
+ .byte 196,3,121,33,28,145,16 // vinsertps $0x10,(%r9,%r10,4),%xmm0,%xmm11
+ .byte 69,137,218 // mov %r11d,%r10d
+ .byte 196,1,122,16,60,145 // vmovss (%r9,%r10,4),%xmm15
+ .byte 196,195,29,24,192,1 // vinsertf128 $0x1,%xmm8,%ymm12,%ymm0
+ .byte 73,193,235,32 // shr $0x20,%r11
.byte 196,227,9,33,219,32 // vinsertps $0x20,%xmm3,%xmm14,%xmm3
.byte 196,227,97,33,249,48 // vinsertps $0x30,%xmm1,%xmm3,%xmm7
- .byte 196,65,122,16,52,154 // vmovss (%r10,%rbx,4),%xmm14
- .byte 72,139,64,24 // mov 0x18(%rax),%rax
+ .byte 196,1,122,16,52,153 // vmovss (%r9,%r11,4),%xmm14
+ .byte 76,139,80,24 // mov 0x18(%rax),%r10
.byte 196,193,97,114,210,16 // vpsrld $0x10,%xmm10,%xmm3
- .byte 196,193,105,114,208,16 // vpsrld $0x10,%xmm8,%xmm2
+ .byte 196,193,105,114,213,16 // vpsrld $0x10,%xmm13,%xmm2
.byte 196,227,101,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm3,%ymm2
- .byte 196,65,108,84,201 // vandps %ymm9,%ymm2,%ymm9
+ .byte 196,65,109,84,201 // vandpd %ymm9,%ymm2,%ymm9
.byte 196,99,125,25,202,1 // vextractf128 $0x1,%ymm9,%xmm2
- .byte 196,225,249,126,209 // vmovq %xmm2,%rcx
- .byte 137,203 // mov %ecx,%ebx
- .byte 197,250,16,28,152 // vmovss (%rax,%rbx,4),%xmm3
- .byte 196,227,249,22,211,1 // vpextrq $0x1,%xmm2,%rbx
- .byte 72,193,233,32 // shr $0x20,%rcx
- .byte 196,99,97,33,36,136,16 // vinsertps $0x10,(%rax,%rcx,4),%xmm3,%xmm12
- .byte 137,217 // mov %ebx,%ecx
- .byte 197,250,16,28,136 // vmovss (%rax,%rcx,4),%xmm3
- .byte 196,97,249,126,201 // vmovq %xmm9,%rcx
- .byte 72,193,235,32 // shr $0x20,%rbx
- .byte 197,250,16,20,152 // vmovss (%rax,%rbx,4),%xmm2
- .byte 137,203 // mov %ecx,%ebx
- .byte 197,250,16,12,152 // vmovss (%rax,%rbx,4),%xmm1
- .byte 196,99,249,22,203,1 // vpextrq $0x1,%xmm9,%rbx
- .byte 72,193,233,32 // shr $0x20,%rcx
- .byte 196,99,113,33,12,136,16 // vinsertps $0x10,(%rax,%rcx,4),%xmm1,%xmm9
- .byte 137,217 // mov %ebx,%ecx
- .byte 197,122,16,44,136 // vmovss (%rax,%rcx,4),%xmm13
+ .byte 196,193,249,126,209 // vmovq %xmm2,%r9
+ .byte 68,137,200 // mov %r9d,%eax
+ .byte 196,193,122,16,28,130 // vmovss (%r10,%rax,4),%xmm3
+ .byte 196,195,249,22,211,1 // vpextrq $0x1,%xmm2,%r11
+ .byte 73,193,233,32 // shr $0x20,%r9
+ .byte 196,3,97,33,36,138,16 // vinsertps $0x10,(%r10,%r9,4),%xmm3,%xmm12
+ .byte 68,137,216 // mov %r11d,%eax
+ .byte 196,193,122,16,28,130 // vmovss (%r10,%rax,4),%xmm3
+ .byte 196,65,249,126,201 // vmovq %xmm9,%r9
+ .byte 73,193,235,32 // shr $0x20,%r11
+ .byte 196,129,122,16,20,154 // vmovss (%r10,%r11,4),%xmm2
+ .byte 68,137,200 // mov %r9d,%eax
+ .byte 196,193,122,16,12,130 // vmovss (%r10,%rax,4),%xmm1
+ .byte 196,67,249,22,203,1 // vpextrq $0x1,%xmm9,%r11
+ .byte 73,193,233,32 // shr $0x20,%r9
+ .byte 196,3,113,33,12,138,16 // vinsertps $0x10,(%r10,%r9,4),%xmm1,%xmm9
+ .byte 68,137,216 // mov %r11d,%eax
+ .byte 196,65,122,16,4,130 // vmovss (%r10,%rax,4),%xmm8
.byte 196,195,33,33,207,32 // vinsertps $0x20,%xmm15,%xmm11,%xmm1
- .byte 72,193,235,32 // shr $0x20,%rbx
- .byte 197,122,16,28,152 // vmovss (%rax,%rbx,4),%xmm11
+ .byte 73,193,235,32 // shr $0x20,%r11
+ .byte 196,1,122,16,28,154 // vmovss (%r10,%r11,4),%xmm11
.byte 196,195,113,33,206,48 // vinsertps $0x30,%xmm14,%xmm1,%xmm1
.byte 196,227,117,24,207,1 // vinsertf128 $0x1,%xmm7,%ymm1,%ymm1
.byte 196,227,25,33,219,32 // vinsertps $0x20,%xmm3,%xmm12,%xmm3
.byte 196,227,97,33,210,48 // vinsertps $0x30,%xmm2,%xmm3,%xmm2
- .byte 196,195,49,33,221,32 // vinsertps $0x20,%xmm13,%xmm9,%xmm3
+ .byte 196,195,49,33,216,32 // vinsertps $0x20,%xmm8,%xmm9,%xmm3
.byte 196,195,97,33,219,48 // vinsertps $0x30,%xmm11,%xmm3,%xmm3
.byte 196,227,101,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm3,%ymm2
.byte 196,193,97,114,210,24 // vpsrld $0x18,%xmm10,%xmm3
- .byte 196,193,65,114,208,24 // vpsrld $0x18,%xmm8,%xmm7
+ .byte 196,193,65,114,213,24 // vpsrld $0x18,%xmm13,%xmm7
.byte 196,227,101,24,223,1 // vinsertf128 $0x1,%xmm7,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,226,125,24,61,46,140,0,0 // vbroadcastss 0x8c2e(%rip),%ymm7 # af1c <_sk_clut_4D_avx+0x2285>
+ .byte 196,226,125,24,61,50,143,0,0 // vbroadcastss 0x8f32(%rip),%ymm7 # b2e8 <_sk_clut_4D_avx+0x2285>
.byte 197,228,89,223 // vmulps %ymm7,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
- .byte 197,252,16,124,36,208 // vmovups -0x30(%rsp),%ymm7
- .byte 91 // pop %rbx
+ .byte 197,252,16,124,36,200 // vmovups -0x38(%rsp),%ymm7
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,195,255,255,255,255 // mov $0xffffffffffffffff,%r11
- .byte 73,211,235 // shr %cl,%r11
- .byte 196,193,249,110,195 // vmovq %r11,%xmm0
- .byte 196,226,121,48,192 // vpmovzxbw %xmm0,%xmm0
- .byte 196,226,121,0,13,24,144,0,0 // vpshufb 0x9018(%rip),%xmm0,%xmm1 # b340 <_sk_clut_4D_avx+0x26a9>
- .byte 196,226,121,33,201 // vpmovsxbd %xmm1,%xmm1
- .byte 196,226,121,0,5,26,144,0,0 // vpshufb 0x901a(%rip),%xmm0,%xmm0 # b350 <_sk_clut_4D_avx+0x26b9>
- .byte 196,226,121,33,192 // vpmovsxbd %xmm0,%xmm0
- .byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- .byte 196,66,125,44,18 // vmaskmovps (%r10),%ymm0,%ymm10
- .byte 233,194,253,255,255 // jmpq 210d <_sk_load_tables_avx+0x25>
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 196,65,44,87,210 // vxorps %ymm10,%ymm10,%ymm10
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 15,135,233,253,255,255 // ja 21c6 <_sk_load_tables_avx+0x1a>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,148,0,0,0 // lea 0x94(%rip),%r11 # 247c <_sk_load_tables_avx+0x2d0>
+ .byte 79,99,20,147 // movslq (%r11,%r10,4),%r10
+ .byte 77,1,218 // add %r11,%r10
+ .byte 65,255,226 // jmpq *%r10
+ .byte 196,65,122,16,20,145 // vmovss (%r9,%rdx,4),%xmm10
+ .byte 233,201,253,255,255 // jmpq 21c6 <_sk_load_tables_avx+0x1a>
+ .byte 196,193,121,110,68,145,8 // vmovd 0x8(%r9,%rdx,4),%xmm0
+ .byte 197,249,112,192,68 // vpshufd $0x44,%xmm0,%xmm0
+ .byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
+ .byte 196,99,117,12,208,4 // vblendps $0x4,%ymm0,%ymm1,%ymm10
+ .byte 196,193,123,16,4,145 // vmovsd (%r9,%rdx,4),%xmm0
+ .byte 196,99,45,13,208,1 // vblendpd $0x1,%ymm0,%ymm10,%ymm10
+ .byte 233,162,253,255,255 // jmpq 21c6 <_sk_load_tables_avx+0x1a>
+ .byte 196,193,121,110,68,145,24 // vmovd 0x18(%r9,%rdx,4),%xmm0
+ .byte 197,249,112,192,68 // vpshufd $0x44,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
+ .byte 196,99,117,12,208,64 // vblendps $0x40,%ymm0,%ymm1,%ymm10
+ .byte 196,99,125,25,208,1 // vextractf128 $0x1,%ymm10,%xmm0
+ .byte 196,195,121,34,68,145,20,1 // vpinsrd $0x1,0x14(%r9,%rdx,4),%xmm0,%xmm0
+ .byte 196,99,45,24,208,1 // vinsertf128 $0x1,%xmm0,%ymm10,%ymm10
+ .byte 196,99,125,25,208,1 // vextractf128 $0x1,%ymm10,%xmm0
+ .byte 196,195,121,34,68,145,16,0 // vpinsrd $0x0,0x10(%r9,%rdx,4),%xmm0,%xmm0
+ .byte 196,99,45,24,208,1 // vinsertf128 $0x1,%xmm0,%ymm10,%ymm10
+ .byte 196,193,121,16,4,145 // vmovupd (%r9,%rdx,4),%xmm0
+ .byte 196,67,125,13,210,12 // vblendpd $0xc,%ymm10,%ymm0,%ymm10
+ .byte 233,77,253,255,255 // jmpq 21c6 <_sk_load_tables_avx+0x1a>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 118,255 // jbe 247d <_sk_load_tables_avx+0x2d1>
+ .byte 255 // (bad)
+ .byte 255,151,255,255,255,129 // callq *-0x7e000001(%rdi)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 236 // in (%dx),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 216,255 // fdivr %st(7),%st
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 168,255 // test $0xff,%al
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_tables_u16_be_avx
.globl _sk_load_tables_u16_be_avx
@@ -26241,7 +26674,7 @@ _sk_load_tables_u16_be_avx:
.byte 76,141,20,149,0,0,0,0 // lea 0x0(,%rdx,4),%r10
.byte 77,133,192 // test %r8,%r8
.byte 197,252,17,124,36,200 // vmovups %ymm7,-0x38(%rsp)
- .byte 15,133,84,2,0,0 // jne 25bb <_sk_load_tables_u16_be_avx+0x270>
+ .byte 15,133,84,2,0,0 // jne 2708 <_sk_load_tables_u16_be_avx+0x270>
.byte 196,1,121,16,4,81 // vmovupd (%r9,%r10,2),%xmm8
.byte 196,129,121,16,84,81,16 // vmovupd 0x10(%r9,%r10,2),%xmm2
.byte 196,129,121,16,92,81,32 // vmovupd 0x20(%r9,%r10,2),%xmm3
@@ -26256,7 +26689,7 @@ _sk_load_tables_u16_be_avx:
.byte 197,113,105,219 // vpunpckhwd %xmm3,%xmm1,%xmm11
.byte 197,177,108,200 // vpunpcklqdq %xmm0,%xmm9,%xmm1
.byte 197,49,109,224 // vpunpckhqdq %xmm0,%xmm9,%xmm12
- .byte 197,121,111,21,172,143,0,0 // vmovdqa 0x8fac(%rip),%xmm10 # b360 <_sk_clut_4D_avx+0x26c9>
+ .byte 197,121,111,21,255,146,0,0 // vmovdqa 0x92ff(%rip),%xmm10 # b800 <_sk_clut_4D_avx+0x279d>
.byte 196,193,113,219,202 // vpand %xmm10,%xmm1,%xmm1
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 196,193,113,105,209 // vpunpckhwd %xmm9,%xmm1,%xmm2
@@ -26354,7 +26787,7 @@ _sk_load_tables_u16_be_avx:
.byte 196,226,121,51,219 // vpmovzxwd %xmm3,%xmm3
.byte 196,227,101,24,223,1 // vinsertf128 $0x1,%xmm7,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,226,125,24,61,115,137,0,0 // vbroadcastss 0x8973(%rip),%ymm7 # af20 <_sk_clut_4D_avx+0x2289>
+ .byte 196,226,125,24,61,242,139,0,0 // vbroadcastss 0x8bf2(%rip),%ymm7 # b2ec <_sk_clut_4D_avx+0x2289>
.byte 197,228,89,223 // vmulps %ymm7,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,16,124,36,200 // vmovups -0x38(%rsp),%ymm7
@@ -26362,29 +26795,29 @@ _sk_load_tables_u16_be_avx:
.byte 196,1,123,16,4,81 // vmovsd (%r9,%r10,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,85 // je 2621 <_sk_load_tables_u16_be_avx+0x2d6>
+ .byte 116,85 // je 276e <_sk_load_tables_u16_be_avx+0x2d6>
.byte 196,1,57,22,68,81,8 // vmovhpd 0x8(%r9,%r10,2),%xmm8,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,72 // jb 2621 <_sk_load_tables_u16_be_avx+0x2d6>
+ .byte 114,72 // jb 276e <_sk_load_tables_u16_be_avx+0x2d6>
.byte 196,129,123,16,84,81,16 // vmovsd 0x10(%r9,%r10,2),%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 116,72 // je 262e <_sk_load_tables_u16_be_avx+0x2e3>
+ .byte 116,72 // je 277b <_sk_load_tables_u16_be_avx+0x2e3>
.byte 196,129,105,22,84,81,24 // vmovhpd 0x18(%r9,%r10,2),%xmm2,%xmm2
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,59 // jb 262e <_sk_load_tables_u16_be_avx+0x2e3>
+ .byte 114,59 // jb 277b <_sk_load_tables_u16_be_avx+0x2e3>
.byte 196,129,123,16,92,81,32 // vmovsd 0x20(%r9,%r10,2),%xmm3
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 15,132,126,253,255,255 // je 2382 <_sk_load_tables_u16_be_avx+0x37>
+ .byte 15,132,126,253,255,255 // je 24cf <_sk_load_tables_u16_be_avx+0x37>
.byte 196,129,97,22,92,81,40 // vmovhpd 0x28(%r9,%r10,2),%xmm3,%xmm3
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,130,109,253,255,255 // jb 2382 <_sk_load_tables_u16_be_avx+0x37>
+ .byte 15,130,109,253,255,255 // jb 24cf <_sk_load_tables_u16_be_avx+0x37>
.byte 196,1,122,126,76,81,48 // vmovq 0x30(%r9,%r10,2),%xmm9
- .byte 233,97,253,255,255 // jmpq 2382 <_sk_load_tables_u16_be_avx+0x37>
+ .byte 233,97,253,255,255 // jmpq 24cf <_sk_load_tables_u16_be_avx+0x37>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,84,253,255,255 // jmpq 2382 <_sk_load_tables_u16_be_avx+0x37>
+ .byte 233,84,253,255,255 // jmpq 24cf <_sk_load_tables_u16_be_avx+0x37>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,75,253,255,255 // jmpq 2382 <_sk_load_tables_u16_be_avx+0x37>
+ .byte 233,75,253,255,255 // jmpq 24cf <_sk_load_tables_u16_be_avx+0x37>
HIDDEN _sk_load_tables_rgb_u16_be_avx
.globl _sk_load_tables_rgb_u16_be_avx
@@ -26396,7 +26829,7 @@ _sk_load_tables_rgb_u16_be_avx:
.byte 77,133,192 // test %r8,%r8
.byte 197,252,17,124,36,200 // vmovups %ymm7,-0x38(%rsp)
.byte 197,252,17,116,36,168 // vmovups %ymm6,-0x58(%rsp)
- .byte 15,133,71,2,0,0 // jne 289c <_sk_load_tables_rgb_u16_be_avx+0x265>
+ .byte 15,133,71,2,0,0 // jne 29e9 <_sk_load_tables_rgb_u16_be_avx+0x265>
.byte 196,129,122,111,4,81 // vmovdqu (%r9,%r10,2),%xmm0
.byte 196,129,122,111,84,81,12 // vmovdqu 0xc(%r9,%r10,2),%xmm2
.byte 196,129,122,111,76,81,24 // vmovdqu 0x18(%r9,%r10,2),%xmm1
@@ -26417,7 +26850,7 @@ _sk_load_tables_rgb_u16_be_avx:
.byte 197,185,108,218 // vpunpcklqdq %xmm2,%xmm8,%xmm3
.byte 197,57,109,218 // vpunpckhqdq %xmm2,%xmm8,%xmm11
.byte 197,121,108,193 // vpunpcklqdq %xmm1,%xmm0,%xmm8
- .byte 197,121,111,13,177,140,0,0 // vmovdqa 0x8cb1(%rip),%xmm9 # b370 <_sk_clut_4D_avx+0x26d9>
+ .byte 197,121,111,13,4,144,0,0 // vmovdqa 0x9004(%rip),%xmm9 # b810 <_sk_clut_4D_avx+0x27ad>
.byte 196,193,97,219,193 // vpand %xmm9,%xmm3,%xmm0
.byte 196,65,41,239,210 // vpxor %xmm10,%xmm10,%xmm10
.byte 196,193,121,105,202 // vpunpckhwd %xmm10,%xmm0,%xmm1
@@ -26507,50 +26940,50 @@ _sk_load_tables_rgb_u16_be_avx:
.byte 196,195,105,33,211,48 // vinsertps $0x30,%xmm11,%xmm2,%xmm2
.byte 196,227,109,24,211,1 // vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,150,134,0,0 // vbroadcastss 0x8696(%rip),%ymm3 # af24 <_sk_clut_4D_avx+0x228d>
+ .byte 196,226,125,24,29,21,137,0,0 // vbroadcastss 0x8915(%rip),%ymm3 # b2f0 <_sk_clut_4D_avx+0x228d>
.byte 197,252,16,116,36,168 // vmovups -0x58(%rsp),%ymm6
.byte 197,252,16,124,36,200 // vmovups -0x38(%rsp),%ymm7
.byte 255,224 // jmpq *%rax
.byte 196,129,121,110,4,81 // vmovd (%r9,%r10,2),%xmm0
.byte 196,129,121,196,68,81,4,2 // vpinsrw $0x2,0x4(%r9,%r10,2),%xmm0,%xmm0
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 117,5 // jne 28b5 <_sk_load_tables_rgb_u16_be_avx+0x27e>
- .byte 233,212,253,255,255 // jmpq 2689 <_sk_load_tables_rgb_u16_be_avx+0x52>
+ .byte 117,5 // jne 2a02 <_sk_load_tables_rgb_u16_be_avx+0x27e>
+ .byte 233,212,253,255,255 // jmpq 27d6 <_sk_load_tables_rgb_u16_be_avx+0x52>
.byte 196,129,121,110,76,81,6 // vmovd 0x6(%r9,%r10,2),%xmm1
.byte 196,1,113,196,68,81,10,2 // vpinsrw $0x2,0xa(%r9,%r10,2),%xmm1,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,26 // jb 28e4 <_sk_load_tables_rgb_u16_be_avx+0x2ad>
+ .byte 114,26 // jb 2a31 <_sk_load_tables_rgb_u16_be_avx+0x2ad>
.byte 196,129,121,110,76,81,12 // vmovd 0xc(%r9,%r10,2),%xmm1
.byte 196,129,113,196,84,81,16,2 // vpinsrw $0x2,0x10(%r9,%r10,2),%xmm1,%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 117,10 // jne 28e9 <_sk_load_tables_rgb_u16_be_avx+0x2b2>
- .byte 233,165,253,255,255 // jmpq 2689 <_sk_load_tables_rgb_u16_be_avx+0x52>
- .byte 233,160,253,255,255 // jmpq 2689 <_sk_load_tables_rgb_u16_be_avx+0x52>
+ .byte 117,10 // jne 2a36 <_sk_load_tables_rgb_u16_be_avx+0x2b2>
+ .byte 233,165,253,255,255 // jmpq 27d6 <_sk_load_tables_rgb_u16_be_avx+0x52>
+ .byte 233,160,253,255,255 // jmpq 27d6 <_sk_load_tables_rgb_u16_be_avx+0x52>
.byte 196,129,121,110,76,81,18 // vmovd 0x12(%r9,%r10,2),%xmm1
.byte 196,1,113,196,76,81,22,2 // vpinsrw $0x2,0x16(%r9,%r10,2),%xmm1,%xmm9
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,26 // jb 2918 <_sk_load_tables_rgb_u16_be_avx+0x2e1>
+ .byte 114,26 // jb 2a65 <_sk_load_tables_rgb_u16_be_avx+0x2e1>
.byte 196,129,121,110,76,81,24 // vmovd 0x18(%r9,%r10,2),%xmm1
.byte 196,129,113,196,76,81,28,2 // vpinsrw $0x2,0x1c(%r9,%r10,2),%xmm1,%xmm1
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 117,10 // jne 291d <_sk_load_tables_rgb_u16_be_avx+0x2e6>
- .byte 233,113,253,255,255 // jmpq 2689 <_sk_load_tables_rgb_u16_be_avx+0x52>
- .byte 233,108,253,255,255 // jmpq 2689 <_sk_load_tables_rgb_u16_be_avx+0x52>
+ .byte 117,10 // jne 2a6a <_sk_load_tables_rgb_u16_be_avx+0x2e6>
+ .byte 233,113,253,255,255 // jmpq 27d6 <_sk_load_tables_rgb_u16_be_avx+0x52>
+ .byte 233,108,253,255,255 // jmpq 27d6 <_sk_load_tables_rgb_u16_be_avx+0x52>
.byte 196,129,121,110,92,81,30 // vmovd 0x1e(%r9,%r10,2),%xmm3
.byte 196,1,97,196,92,81,34,2 // vpinsrw $0x2,0x22(%r9,%r10,2),%xmm3,%xmm11
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,20 // jb 2946 <_sk_load_tables_rgb_u16_be_avx+0x30f>
+ .byte 114,20 // jb 2a93 <_sk_load_tables_rgb_u16_be_avx+0x30f>
.byte 196,129,121,110,92,81,36 // vmovd 0x24(%r9,%r10,2),%xmm3
.byte 196,129,97,196,92,81,40,2 // vpinsrw $0x2,0x28(%r9,%r10,2),%xmm3,%xmm3
- .byte 233,67,253,255,255 // jmpq 2689 <_sk_load_tables_rgb_u16_be_avx+0x52>
- .byte 233,62,253,255,255 // jmpq 2689 <_sk_load_tables_rgb_u16_be_avx+0x52>
+ .byte 233,67,253,255,255 // jmpq 27d6 <_sk_load_tables_rgb_u16_be_avx+0x52>
+ .byte 233,62,253,255,255 // jmpq 27d6 <_sk_load_tables_rgb_u16_be_avx+0x52>
HIDDEN _sk_byte_tables_avx
.globl _sk_byte_tables_avx
FUNCTION(_sk_byte_tables_avx)
_sk_byte_tables_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,210,133,0,0 // vbroadcastss 0x85d2(%rip),%ymm8 # af28 <_sk_clut_4D_avx+0x2291>
+ .byte 196,98,125,24,5,81,136,0,0 // vbroadcastss 0x8851(%rip),%ymm8 # b2f4 <_sk_clut_4D_avx+0x2291>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
.byte 197,125,91,200 // vcvtps2dq %ymm0,%ymm9
.byte 196,65,249,126,201 // vmovq %xmm9,%r9
@@ -26669,7 +27102,7 @@ _sk_byte_tables_avx:
.byte 196,194,121,49,204 // vpmovzxbd %xmm12,%xmm1
.byte 196,194,121,49,213 // vpmovzxbd %xmm13,%xmm2
.byte 196,227,117,24,202,1 // vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
- .byte 196,98,125,24,13,124,131,0,0 // vbroadcastss 0x837c(%rip),%ymm9 # af2c <_sk_clut_4D_avx+0x2295>
+ .byte 196,98,125,24,13,251,133,0,0 // vbroadcastss 0x85fb(%rip),%ymm9 # b2f8 <_sk_clut_4D_avx+0x2295>
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
.byte 196,193,116,89,201 // vmulps %ymm9,%ymm1,%ymm1
@@ -26785,7 +27218,7 @@ _sk_byte_tables_rgb_avx:
.byte 196,194,121,49,203 // vpmovzxbd %xmm11,%xmm1
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,13,93,129,0,0 // vbroadcastss 0x815d(%rip),%ymm9 # af30 <_sk_clut_4D_avx+0x2299>
+ .byte 196,98,125,24,13,220,131,0,0 // vbroadcastss 0x83dc(%rip),%ymm9 # b2fc <_sk_clut_4D_avx+0x2299>
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
.byte 196,194,121,49,202 // vpmovzxbd %xmm10,%xmm1
.byte 196,194,121,49,212 // vpmovzxbd %xmm12,%xmm2
@@ -26990,36 +27423,36 @@ _sk_parametric_r_avx:
.byte 196,193,124,88,195 // vaddps %ymm11,%ymm0,%ymm0
.byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
.byte 197,124,91,216 // vcvtdq2ps %ymm0,%ymm11
- .byte 196,98,125,24,37,10,126,0,0 // vbroadcastss 0x7e0a(%rip),%ymm12 # af34 <_sk_clut_4D_avx+0x229d>
+ .byte 196,98,125,24,37,137,128,0,0 // vbroadcastss 0x8089(%rip),%ymm12 # b300 <_sk_clut_4D_avx+0x229d>
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,0,126,0,0 // vbroadcastss 0x7e00(%rip),%ymm12 # af38 <_sk_clut_4D_avx+0x22a1>
+ .byte 196,98,125,24,37,127,128,0,0 // vbroadcastss 0x807f(%rip),%ymm12 # b304 <_sk_clut_4D_avx+0x22a1>
.byte 196,193,124,84,196 // vandps %ymm12,%ymm0,%ymm0
- .byte 196,98,125,24,37,246,125,0,0 // vbroadcastss 0x7df6(%rip),%ymm12 # af3c <_sk_clut_4D_avx+0x22a5>
+ .byte 196,98,125,24,37,117,128,0,0 // vbroadcastss 0x8075(%rip),%ymm12 # b308 <_sk_clut_4D_avx+0x22a5>
.byte 196,193,124,86,196 // vorps %ymm12,%ymm0,%ymm0
- .byte 196,98,125,24,37,236,125,0,0 // vbroadcastss 0x7dec(%rip),%ymm12 # af40 <_sk_clut_4D_avx+0x22a9>
+ .byte 196,98,125,24,37,107,128,0,0 // vbroadcastss 0x806b(%rip),%ymm12 # b30c <_sk_clut_4D_avx+0x22a9>
.byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,226,125,0,0 // vbroadcastss 0x7de2(%rip),%ymm12 # af44 <_sk_clut_4D_avx+0x22ad>
+ .byte 196,98,125,24,37,97,128,0,0 // vbroadcastss 0x8061(%rip),%ymm12 # b310 <_sk_clut_4D_avx+0x22ad>
.byte 196,65,124,89,228 // vmulps %ymm12,%ymm0,%ymm12
.byte 196,65,36,92,220 // vsubps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,211,125,0,0 // vbroadcastss 0x7dd3(%rip),%ymm12 # af48 <_sk_clut_4D_avx+0x22b1>
+ .byte 196,98,125,24,37,82,128,0,0 // vbroadcastss 0x8052(%rip),%ymm12 # b314 <_sk_clut_4D_avx+0x22b1>
.byte 196,193,124,88,196 // vaddps %ymm12,%ymm0,%ymm0
- .byte 196,98,125,24,37,201,125,0,0 // vbroadcastss 0x7dc9(%rip),%ymm12 # af4c <_sk_clut_4D_avx+0x22b5>
+ .byte 196,98,125,24,37,72,128,0,0 // vbroadcastss 0x8048(%rip),%ymm12 # b318 <_sk_clut_4D_avx+0x22b5>
.byte 197,156,94,192 // vdivps %ymm0,%ymm12,%ymm0
.byte 197,164,92,192 // vsubps %ymm0,%ymm11,%ymm0
.byte 197,172,89,192 // vmulps %ymm0,%ymm10,%ymm0
.byte 196,99,125,8,208,1 // vroundps $0x1,%ymm0,%ymm10
.byte 196,65,124,92,210 // vsubps %ymm10,%ymm0,%ymm10
- .byte 196,98,125,24,29,173,125,0,0 // vbroadcastss 0x7dad(%rip),%ymm11 # af50 <_sk_clut_4D_avx+0x22b9>
+ .byte 196,98,125,24,29,44,128,0,0 // vbroadcastss 0x802c(%rip),%ymm11 # b31c <_sk_clut_4D_avx+0x22b9>
.byte 196,193,124,88,195 // vaddps %ymm11,%ymm0,%ymm0
- .byte 196,98,125,24,29,163,125,0,0 // vbroadcastss 0x7da3(%rip),%ymm11 # af54 <_sk_clut_4D_avx+0x22bd>
+ .byte 196,98,125,24,29,34,128,0,0 // vbroadcastss 0x8022(%rip),%ymm11 # b320 <_sk_clut_4D_avx+0x22bd>
.byte 196,65,44,89,219 // vmulps %ymm11,%ymm10,%ymm11
.byte 196,193,124,92,195 // vsubps %ymm11,%ymm0,%ymm0
- .byte 196,98,125,24,29,148,125,0,0 // vbroadcastss 0x7d94(%rip),%ymm11 # af58 <_sk_clut_4D_avx+0x22c1>
+ .byte 196,98,125,24,29,19,128,0,0 // vbroadcastss 0x8013(%rip),%ymm11 # b324 <_sk_clut_4D_avx+0x22c1>
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
- .byte 196,98,125,24,29,138,125,0,0 // vbroadcastss 0x7d8a(%rip),%ymm11 # af5c <_sk_clut_4D_avx+0x22c5>
+ .byte 196,98,125,24,29,9,128,0,0 // vbroadcastss 0x8009(%rip),%ymm11 # b328 <_sk_clut_4D_avx+0x22c5>
.byte 196,65,36,94,210 // vdivps %ymm10,%ymm11,%ymm10
.byte 196,193,124,88,194 // vaddps %ymm10,%ymm0,%ymm0
- .byte 196,98,125,24,21,123,125,0,0 // vbroadcastss 0x7d7b(%rip),%ymm10 # af60 <_sk_clut_4D_avx+0x22c9>
+ .byte 196,98,125,24,21,250,127,0,0 // vbroadcastss 0x7ffa(%rip),%ymm10 # b32c <_sk_clut_4D_avx+0x22c9>
.byte 196,193,124,89,194 // vmulps %ymm10,%ymm0,%ymm0
.byte 197,253,91,192 // vcvtps2dq %ymm0,%ymm0
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -27027,7 +27460,7 @@ _sk_parametric_r_avx:
.byte 196,195,125,74,193,128 // vblendvps %ymm8,%ymm9,%ymm0,%ymm0
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,124,95,192 // vmaxps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,5,82,125,0,0 // vbroadcastss 0x7d52(%rip),%ymm8 # af64 <_sk_clut_4D_avx+0x22cd>
+ .byte 196,98,125,24,5,209,127,0,0 // vbroadcastss 0x7fd1(%rip),%ymm8 # b330 <_sk_clut_4D_avx+0x22cd>
.byte 196,193,124,93,192 // vminps %ymm8,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -27049,36 +27482,36 @@ _sk_parametric_g_avx:
.byte 196,193,116,88,203 // vaddps %ymm11,%ymm1,%ymm1
.byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
.byte 197,124,91,217 // vcvtdq2ps %ymm1,%ymm11
- .byte 196,98,125,24,37,3,125,0,0 // vbroadcastss 0x7d03(%rip),%ymm12 # af68 <_sk_clut_4D_avx+0x22d1>
+ .byte 196,98,125,24,37,130,127,0,0 // vbroadcastss 0x7f82(%rip),%ymm12 # b334 <_sk_clut_4D_avx+0x22d1>
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,249,124,0,0 // vbroadcastss 0x7cf9(%rip),%ymm12 # af6c <_sk_clut_4D_avx+0x22d5>
+ .byte 196,98,125,24,37,120,127,0,0 // vbroadcastss 0x7f78(%rip),%ymm12 # b338 <_sk_clut_4D_avx+0x22d5>
.byte 196,193,116,84,204 // vandps %ymm12,%ymm1,%ymm1
- .byte 196,98,125,24,37,239,124,0,0 // vbroadcastss 0x7cef(%rip),%ymm12 # af70 <_sk_clut_4D_avx+0x22d9>
+ .byte 196,98,125,24,37,110,127,0,0 // vbroadcastss 0x7f6e(%rip),%ymm12 # b33c <_sk_clut_4D_avx+0x22d9>
.byte 196,193,116,86,204 // vorps %ymm12,%ymm1,%ymm1
- .byte 196,98,125,24,37,229,124,0,0 // vbroadcastss 0x7ce5(%rip),%ymm12 # af74 <_sk_clut_4D_avx+0x22dd>
+ .byte 196,98,125,24,37,100,127,0,0 // vbroadcastss 0x7f64(%rip),%ymm12 # b340 <_sk_clut_4D_avx+0x22dd>
.byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,219,124,0,0 // vbroadcastss 0x7cdb(%rip),%ymm12 # af78 <_sk_clut_4D_avx+0x22e1>
+ .byte 196,98,125,24,37,90,127,0,0 // vbroadcastss 0x7f5a(%rip),%ymm12 # b344 <_sk_clut_4D_avx+0x22e1>
.byte 196,65,116,89,228 // vmulps %ymm12,%ymm1,%ymm12
.byte 196,65,36,92,220 // vsubps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,204,124,0,0 // vbroadcastss 0x7ccc(%rip),%ymm12 # af7c <_sk_clut_4D_avx+0x22e5>
+ .byte 196,98,125,24,37,75,127,0,0 // vbroadcastss 0x7f4b(%rip),%ymm12 # b348 <_sk_clut_4D_avx+0x22e5>
.byte 196,193,116,88,204 // vaddps %ymm12,%ymm1,%ymm1
- .byte 196,98,125,24,37,194,124,0,0 // vbroadcastss 0x7cc2(%rip),%ymm12 # af80 <_sk_clut_4D_avx+0x22e9>
+ .byte 196,98,125,24,37,65,127,0,0 // vbroadcastss 0x7f41(%rip),%ymm12 # b34c <_sk_clut_4D_avx+0x22e9>
.byte 197,156,94,201 // vdivps %ymm1,%ymm12,%ymm1
.byte 197,164,92,201 // vsubps %ymm1,%ymm11,%ymm1
.byte 197,172,89,201 // vmulps %ymm1,%ymm10,%ymm1
.byte 196,99,125,8,209,1 // vroundps $0x1,%ymm1,%ymm10
.byte 196,65,116,92,210 // vsubps %ymm10,%ymm1,%ymm10
- .byte 196,98,125,24,29,166,124,0,0 // vbroadcastss 0x7ca6(%rip),%ymm11 # af84 <_sk_clut_4D_avx+0x22ed>
+ .byte 196,98,125,24,29,37,127,0,0 // vbroadcastss 0x7f25(%rip),%ymm11 # b350 <_sk_clut_4D_avx+0x22ed>
.byte 196,193,116,88,203 // vaddps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,29,156,124,0,0 // vbroadcastss 0x7c9c(%rip),%ymm11 # af88 <_sk_clut_4D_avx+0x22f1>
+ .byte 196,98,125,24,29,27,127,0,0 // vbroadcastss 0x7f1b(%rip),%ymm11 # b354 <_sk_clut_4D_avx+0x22f1>
.byte 196,65,44,89,219 // vmulps %ymm11,%ymm10,%ymm11
.byte 196,193,116,92,203 // vsubps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,29,141,124,0,0 // vbroadcastss 0x7c8d(%rip),%ymm11 # af8c <_sk_clut_4D_avx+0x22f5>
+ .byte 196,98,125,24,29,12,127,0,0 // vbroadcastss 0x7f0c(%rip),%ymm11 # b358 <_sk_clut_4D_avx+0x22f5>
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
- .byte 196,98,125,24,29,131,124,0,0 // vbroadcastss 0x7c83(%rip),%ymm11 # af90 <_sk_clut_4D_avx+0x22f9>
+ .byte 196,98,125,24,29,2,127,0,0 // vbroadcastss 0x7f02(%rip),%ymm11 # b35c <_sk_clut_4D_avx+0x22f9>
.byte 196,65,36,94,210 // vdivps %ymm10,%ymm11,%ymm10
.byte 196,193,116,88,202 // vaddps %ymm10,%ymm1,%ymm1
- .byte 196,98,125,24,21,116,124,0,0 // vbroadcastss 0x7c74(%rip),%ymm10 # af94 <_sk_clut_4D_avx+0x22fd>
+ .byte 196,98,125,24,21,243,126,0,0 // vbroadcastss 0x7ef3(%rip),%ymm10 # b360 <_sk_clut_4D_avx+0x22fd>
.byte 196,193,116,89,202 // vmulps %ymm10,%ymm1,%ymm1
.byte 197,253,91,201 // vcvtps2dq %ymm1,%ymm1
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -27086,7 +27519,7 @@ _sk_parametric_g_avx:
.byte 196,195,117,74,201,128 // vblendvps %ymm8,%ymm9,%ymm1,%ymm1
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,116,95,200 // vmaxps %ymm8,%ymm1,%ymm1
- .byte 196,98,125,24,5,75,124,0,0 // vbroadcastss 0x7c4b(%rip),%ymm8 # af98 <_sk_clut_4D_avx+0x2301>
+ .byte 196,98,125,24,5,202,126,0,0 // vbroadcastss 0x7eca(%rip),%ymm8 # b364 <_sk_clut_4D_avx+0x2301>
.byte 196,193,116,93,200 // vminps %ymm8,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -27108,36 +27541,36 @@ _sk_parametric_b_avx:
.byte 196,193,108,88,211 // vaddps %ymm11,%ymm2,%ymm2
.byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
.byte 197,124,91,218 // vcvtdq2ps %ymm2,%ymm11
- .byte 196,98,125,24,37,252,123,0,0 // vbroadcastss 0x7bfc(%rip),%ymm12 # af9c <_sk_clut_4D_avx+0x2305>
+ .byte 196,98,125,24,37,123,126,0,0 // vbroadcastss 0x7e7b(%rip),%ymm12 # b368 <_sk_clut_4D_avx+0x2305>
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,242,123,0,0 // vbroadcastss 0x7bf2(%rip),%ymm12 # afa0 <_sk_clut_4D_avx+0x2309>
+ .byte 196,98,125,24,37,113,126,0,0 // vbroadcastss 0x7e71(%rip),%ymm12 # b36c <_sk_clut_4D_avx+0x2309>
.byte 196,193,108,84,212 // vandps %ymm12,%ymm2,%ymm2
- .byte 196,98,125,24,37,232,123,0,0 // vbroadcastss 0x7be8(%rip),%ymm12 # afa4 <_sk_clut_4D_avx+0x230d>
+ .byte 196,98,125,24,37,103,126,0,0 // vbroadcastss 0x7e67(%rip),%ymm12 # b370 <_sk_clut_4D_avx+0x230d>
.byte 196,193,108,86,212 // vorps %ymm12,%ymm2,%ymm2
- .byte 196,98,125,24,37,222,123,0,0 // vbroadcastss 0x7bde(%rip),%ymm12 # afa8 <_sk_clut_4D_avx+0x2311>
+ .byte 196,98,125,24,37,93,126,0,0 // vbroadcastss 0x7e5d(%rip),%ymm12 # b374 <_sk_clut_4D_avx+0x2311>
.byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,212,123,0,0 // vbroadcastss 0x7bd4(%rip),%ymm12 # afac <_sk_clut_4D_avx+0x2315>
+ .byte 196,98,125,24,37,83,126,0,0 // vbroadcastss 0x7e53(%rip),%ymm12 # b378 <_sk_clut_4D_avx+0x2315>
.byte 196,65,108,89,228 // vmulps %ymm12,%ymm2,%ymm12
.byte 196,65,36,92,220 // vsubps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,197,123,0,0 // vbroadcastss 0x7bc5(%rip),%ymm12 # afb0 <_sk_clut_4D_avx+0x2319>
+ .byte 196,98,125,24,37,68,126,0,0 // vbroadcastss 0x7e44(%rip),%ymm12 # b37c <_sk_clut_4D_avx+0x2319>
.byte 196,193,108,88,212 // vaddps %ymm12,%ymm2,%ymm2
- .byte 196,98,125,24,37,187,123,0,0 // vbroadcastss 0x7bbb(%rip),%ymm12 # afb4 <_sk_clut_4D_avx+0x231d>
+ .byte 196,98,125,24,37,58,126,0,0 // vbroadcastss 0x7e3a(%rip),%ymm12 # b380 <_sk_clut_4D_avx+0x231d>
.byte 197,156,94,210 // vdivps %ymm2,%ymm12,%ymm2
.byte 197,164,92,210 // vsubps %ymm2,%ymm11,%ymm2
.byte 197,172,89,210 // vmulps %ymm2,%ymm10,%ymm2
.byte 196,99,125,8,210,1 // vroundps $0x1,%ymm2,%ymm10
.byte 196,65,108,92,210 // vsubps %ymm10,%ymm2,%ymm10
- .byte 196,98,125,24,29,159,123,0,0 // vbroadcastss 0x7b9f(%rip),%ymm11 # afb8 <_sk_clut_4D_avx+0x2321>
+ .byte 196,98,125,24,29,30,126,0,0 // vbroadcastss 0x7e1e(%rip),%ymm11 # b384 <_sk_clut_4D_avx+0x2321>
.byte 196,193,108,88,211 // vaddps %ymm11,%ymm2,%ymm2
- .byte 196,98,125,24,29,149,123,0,0 // vbroadcastss 0x7b95(%rip),%ymm11 # afbc <_sk_clut_4D_avx+0x2325>
+ .byte 196,98,125,24,29,20,126,0,0 // vbroadcastss 0x7e14(%rip),%ymm11 # b388 <_sk_clut_4D_avx+0x2325>
.byte 196,65,44,89,219 // vmulps %ymm11,%ymm10,%ymm11
.byte 196,193,108,92,211 // vsubps %ymm11,%ymm2,%ymm2
- .byte 196,98,125,24,29,134,123,0,0 // vbroadcastss 0x7b86(%rip),%ymm11 # afc0 <_sk_clut_4D_avx+0x2329>
+ .byte 196,98,125,24,29,5,126,0,0 // vbroadcastss 0x7e05(%rip),%ymm11 # b38c <_sk_clut_4D_avx+0x2329>
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
- .byte 196,98,125,24,29,124,123,0,0 // vbroadcastss 0x7b7c(%rip),%ymm11 # afc4 <_sk_clut_4D_avx+0x232d>
+ .byte 196,98,125,24,29,251,125,0,0 // vbroadcastss 0x7dfb(%rip),%ymm11 # b390 <_sk_clut_4D_avx+0x232d>
.byte 196,65,36,94,210 // vdivps %ymm10,%ymm11,%ymm10
.byte 196,193,108,88,210 // vaddps %ymm10,%ymm2,%ymm2
- .byte 196,98,125,24,21,109,123,0,0 // vbroadcastss 0x7b6d(%rip),%ymm10 # afc8 <_sk_clut_4D_avx+0x2331>
+ .byte 196,98,125,24,21,236,125,0,0 // vbroadcastss 0x7dec(%rip),%ymm10 # b394 <_sk_clut_4D_avx+0x2331>
.byte 196,193,108,89,210 // vmulps %ymm10,%ymm2,%ymm2
.byte 197,253,91,210 // vcvtps2dq %ymm2,%ymm2
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -27145,7 +27578,7 @@ _sk_parametric_b_avx:
.byte 196,195,109,74,209,128 // vblendvps %ymm8,%ymm9,%ymm2,%ymm2
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,108,95,208 // vmaxps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,68,123,0,0 // vbroadcastss 0x7b44(%rip),%ymm8 # afcc <_sk_clut_4D_avx+0x2335>
+ .byte 196,98,125,24,5,195,125,0,0 // vbroadcastss 0x7dc3(%rip),%ymm8 # b398 <_sk_clut_4D_avx+0x2335>
.byte 196,193,108,93,208 // vminps %ymm8,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -27167,36 +27600,36 @@ _sk_parametric_a_avx:
.byte 196,193,100,88,219 // vaddps %ymm11,%ymm3,%ymm3
.byte 196,98,125,24,16 // vbroadcastss (%rax),%ymm10
.byte 197,124,91,219 // vcvtdq2ps %ymm3,%ymm11
- .byte 196,98,125,24,37,245,122,0,0 // vbroadcastss 0x7af5(%rip),%ymm12 # afd0 <_sk_clut_4D_avx+0x2339>
+ .byte 196,98,125,24,37,116,125,0,0 // vbroadcastss 0x7d74(%rip),%ymm12 # b39c <_sk_clut_4D_avx+0x2339>
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,235,122,0,0 // vbroadcastss 0x7aeb(%rip),%ymm12 # afd4 <_sk_clut_4D_avx+0x233d>
+ .byte 196,98,125,24,37,106,125,0,0 // vbroadcastss 0x7d6a(%rip),%ymm12 # b3a0 <_sk_clut_4D_avx+0x233d>
.byte 196,193,100,84,220 // vandps %ymm12,%ymm3,%ymm3
- .byte 196,98,125,24,37,225,122,0,0 // vbroadcastss 0x7ae1(%rip),%ymm12 # afd8 <_sk_clut_4D_avx+0x2341>
+ .byte 196,98,125,24,37,96,125,0,0 // vbroadcastss 0x7d60(%rip),%ymm12 # b3a4 <_sk_clut_4D_avx+0x2341>
.byte 196,193,100,86,220 // vorps %ymm12,%ymm3,%ymm3
- .byte 196,98,125,24,37,215,122,0,0 // vbroadcastss 0x7ad7(%rip),%ymm12 # afdc <_sk_clut_4D_avx+0x2345>
+ .byte 196,98,125,24,37,86,125,0,0 // vbroadcastss 0x7d56(%rip),%ymm12 # b3a8 <_sk_clut_4D_avx+0x2345>
.byte 196,65,36,88,220 // vaddps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,205,122,0,0 // vbroadcastss 0x7acd(%rip),%ymm12 # afe0 <_sk_clut_4D_avx+0x2349>
+ .byte 196,98,125,24,37,76,125,0,0 // vbroadcastss 0x7d4c(%rip),%ymm12 # b3ac <_sk_clut_4D_avx+0x2349>
.byte 196,65,100,89,228 // vmulps %ymm12,%ymm3,%ymm12
.byte 196,65,36,92,220 // vsubps %ymm12,%ymm11,%ymm11
- .byte 196,98,125,24,37,190,122,0,0 // vbroadcastss 0x7abe(%rip),%ymm12 # afe4 <_sk_clut_4D_avx+0x234d>
+ .byte 196,98,125,24,37,61,125,0,0 // vbroadcastss 0x7d3d(%rip),%ymm12 # b3b0 <_sk_clut_4D_avx+0x234d>
.byte 196,193,100,88,220 // vaddps %ymm12,%ymm3,%ymm3
- .byte 196,98,125,24,37,180,122,0,0 // vbroadcastss 0x7ab4(%rip),%ymm12 # afe8 <_sk_clut_4D_avx+0x2351>
+ .byte 196,98,125,24,37,51,125,0,0 // vbroadcastss 0x7d33(%rip),%ymm12 # b3b4 <_sk_clut_4D_avx+0x2351>
.byte 197,156,94,219 // vdivps %ymm3,%ymm12,%ymm3
.byte 197,164,92,219 // vsubps %ymm3,%ymm11,%ymm3
.byte 197,172,89,219 // vmulps %ymm3,%ymm10,%ymm3
.byte 196,99,125,8,211,1 // vroundps $0x1,%ymm3,%ymm10
.byte 196,65,100,92,210 // vsubps %ymm10,%ymm3,%ymm10
- .byte 196,98,125,24,29,152,122,0,0 // vbroadcastss 0x7a98(%rip),%ymm11 # afec <_sk_clut_4D_avx+0x2355>
+ .byte 196,98,125,24,29,23,125,0,0 // vbroadcastss 0x7d17(%rip),%ymm11 # b3b8 <_sk_clut_4D_avx+0x2355>
.byte 196,193,100,88,219 // vaddps %ymm11,%ymm3,%ymm3
- .byte 196,98,125,24,29,142,122,0,0 // vbroadcastss 0x7a8e(%rip),%ymm11 # aff0 <_sk_clut_4D_avx+0x2359>
+ .byte 196,98,125,24,29,13,125,0,0 // vbroadcastss 0x7d0d(%rip),%ymm11 # b3bc <_sk_clut_4D_avx+0x2359>
.byte 196,65,44,89,219 // vmulps %ymm11,%ymm10,%ymm11
.byte 196,193,100,92,219 // vsubps %ymm11,%ymm3,%ymm3
- .byte 196,98,125,24,29,127,122,0,0 // vbroadcastss 0x7a7f(%rip),%ymm11 # aff4 <_sk_clut_4D_avx+0x235d>
+ .byte 196,98,125,24,29,254,124,0,0 // vbroadcastss 0x7cfe(%rip),%ymm11 # b3c0 <_sk_clut_4D_avx+0x235d>
.byte 196,65,36,92,210 // vsubps %ymm10,%ymm11,%ymm10
- .byte 196,98,125,24,29,117,122,0,0 // vbroadcastss 0x7a75(%rip),%ymm11 # aff8 <_sk_clut_4D_avx+0x2361>
+ .byte 196,98,125,24,29,244,124,0,0 // vbroadcastss 0x7cf4(%rip),%ymm11 # b3c4 <_sk_clut_4D_avx+0x2361>
.byte 196,65,36,94,210 // vdivps %ymm10,%ymm11,%ymm10
.byte 196,193,100,88,218 // vaddps %ymm10,%ymm3,%ymm3
- .byte 196,98,125,24,21,102,122,0,0 // vbroadcastss 0x7a66(%rip),%ymm10 # affc <_sk_clut_4D_avx+0x2365>
+ .byte 196,98,125,24,21,229,124,0,0 // vbroadcastss 0x7ce5(%rip),%ymm10 # b3c8 <_sk_clut_4D_avx+0x2365>
.byte 196,193,100,89,218 // vmulps %ymm10,%ymm3,%ymm3
.byte 197,253,91,219 // vcvtps2dq %ymm3,%ymm3
.byte 196,98,125,24,80,20 // vbroadcastss 0x14(%rax),%ymm10
@@ -27204,7 +27637,7 @@ _sk_parametric_a_avx:
.byte 196,195,101,74,217,128 // vblendvps %ymm8,%ymm9,%ymm3,%ymm3
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 196,193,100,95,216 // vmaxps %ymm8,%ymm3,%ymm3
- .byte 196,98,125,24,5,61,122,0,0 // vbroadcastss 0x7a3d(%rip),%ymm8 # b000 <_sk_clut_4D_avx+0x2369>
+ .byte 196,98,125,24,5,188,124,0,0 // vbroadcastss 0x7cbc(%rip),%ymm8 # b3cc <_sk_clut_4D_avx+0x2369>
.byte 196,193,100,93,216 // vminps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -27223,34 +27656,34 @@ _sk_gamma_avx:
.byte 197,252,40,233 // vmovaps %ymm1,%ymm5
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,91,200 // vcvtdq2ps %ymm0,%ymm1
- .byte 196,98,125,24,5,0,122,0,0 // vbroadcastss 0x7a00(%rip),%ymm8 # b004 <_sk_clut_4D_avx+0x236d>
+ .byte 196,98,125,24,5,127,124,0,0 // vbroadcastss 0x7c7f(%rip),%ymm8 # b3d0 <_sk_clut_4D_avx+0x236d>
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
- .byte 196,98,125,24,13,246,121,0,0 // vbroadcastss 0x79f6(%rip),%ymm9 # b008 <_sk_clut_4D_avx+0x2371>
+ .byte 196,98,125,24,13,117,124,0,0 // vbroadcastss 0x7c75(%rip),%ymm9 # b3d4 <_sk_clut_4D_avx+0x2371>
.byte 196,193,124,84,193 // vandps %ymm9,%ymm0,%ymm0
- .byte 196,226,125,24,37,236,121,0,0 // vbroadcastss 0x79ec(%rip),%ymm4 # b00c <_sk_clut_4D_avx+0x2375>
+ .byte 196,226,125,24,37,107,124,0,0 // vbroadcastss 0x7c6b(%rip),%ymm4 # b3d8 <_sk_clut_4D_avx+0x2375>
.byte 197,252,86,196 // vorps %ymm4,%ymm0,%ymm0
- .byte 196,98,125,24,21,227,121,0,0 // vbroadcastss 0x79e3(%rip),%ymm10 # b010 <_sk_clut_4D_avx+0x2379>
+ .byte 196,98,125,24,21,98,124,0,0 // vbroadcastss 0x7c62(%rip),%ymm10 # b3dc <_sk_clut_4D_avx+0x2379>
.byte 196,193,116,88,202 // vaddps %ymm10,%ymm1,%ymm1
- .byte 196,98,125,24,29,217,121,0,0 // vbroadcastss 0x79d9(%rip),%ymm11 # b014 <_sk_clut_4D_avx+0x237d>
+ .byte 196,98,125,24,29,88,124,0,0 // vbroadcastss 0x7c58(%rip),%ymm11 # b3e0 <_sk_clut_4D_avx+0x237d>
.byte 196,193,124,89,211 // vmulps %ymm11,%ymm0,%ymm2
.byte 197,244,92,202 // vsubps %ymm2,%ymm1,%ymm1
- .byte 196,98,125,24,37,203,121,0,0 // vbroadcastss 0x79cb(%rip),%ymm12 # b018 <_sk_clut_4D_avx+0x2381>
+ .byte 196,98,125,24,37,74,124,0,0 // vbroadcastss 0x7c4a(%rip),%ymm12 # b3e4 <_sk_clut_4D_avx+0x2381>
.byte 196,193,124,88,196 // vaddps %ymm12,%ymm0,%ymm0
- .byte 196,98,125,24,45,193,121,0,0 // vbroadcastss 0x79c1(%rip),%ymm13 # b01c <_sk_clut_4D_avx+0x2385>
+ .byte 196,98,125,24,45,64,124,0,0 // vbroadcastss 0x7c40(%rip),%ymm13 # b3e8 <_sk_clut_4D_avx+0x2385>
.byte 197,148,94,192 // vdivps %ymm0,%ymm13,%ymm0
.byte 197,244,92,192 // vsubps %ymm0,%ymm1,%ymm0
.byte 196,98,125,24,48 // vbroadcastss (%rax),%ymm14
.byte 196,193,124,89,198 // vmulps %ymm14,%ymm0,%ymm0
.byte 196,227,125,8,200,1 // vroundps $0x1,%ymm0,%ymm1
.byte 197,252,92,241 // vsubps %ymm1,%ymm0,%ymm6
- .byte 196,98,125,24,61,160,121,0,0 // vbroadcastss 0x79a0(%rip),%ymm15 # b020 <_sk_clut_4D_avx+0x2389>
+ .byte 196,98,125,24,61,31,124,0,0 // vbroadcastss 0x7c1f(%rip),%ymm15 # b3ec <_sk_clut_4D_avx+0x2389>
.byte 196,193,124,88,199 // vaddps %ymm15,%ymm0,%ymm0
- .byte 196,226,125,24,29,150,121,0,0 // vbroadcastss 0x7996(%rip),%ymm3 # b024 <_sk_clut_4D_avx+0x238d>
+ .byte 196,226,125,24,29,21,124,0,0 // vbroadcastss 0x7c15(%rip),%ymm3 # b3f0 <_sk_clut_4D_avx+0x238d>
.byte 197,204,89,203 // vmulps %ymm3,%ymm6,%ymm1
.byte 197,252,92,201 // vsubps %ymm1,%ymm0,%ymm1
- .byte 196,226,125,24,21,137,121,0,0 // vbroadcastss 0x7989(%rip),%ymm2 # b028 <_sk_clut_4D_avx+0x2391>
+ .byte 196,226,125,24,21,8,124,0,0 // vbroadcastss 0x7c08(%rip),%ymm2 # b3f4 <_sk_clut_4D_avx+0x2391>
.byte 197,236,92,198 // vsubps %ymm6,%ymm2,%ymm0
- .byte 196,226,125,24,53,128,121,0,0 // vbroadcastss 0x7980(%rip),%ymm6 # b02c <_sk_clut_4D_avx+0x2395>
+ .byte 196,226,125,24,53,255,123,0,0 // vbroadcastss 0x7bff(%rip),%ymm6 # b3f8 <_sk_clut_4D_avx+0x2395>
.byte 197,204,94,192 // vdivps %ymm0,%ymm6,%ymm0
.byte 197,244,88,192 // vaddps %ymm0,%ymm1,%ymm0
.byte 197,252,17,68,36,128 // vmovups %ymm0,-0x80(%rsp)
@@ -27292,7 +27725,7 @@ _sk_gamma_avx:
.byte 197,236,92,212 // vsubps %ymm4,%ymm2,%ymm2
.byte 197,204,94,210 // vdivps %ymm2,%ymm6,%ymm2
.byte 197,244,88,202 // vaddps %ymm2,%ymm1,%ymm1
- .byte 196,226,125,24,21,195,120,0,0 // vbroadcastss 0x78c3(%rip),%ymm2 # b030 <_sk_clut_4D_avx+0x2399>
+ .byte 196,226,125,24,21,66,123,0,0 // vbroadcastss 0x7b42(%rip),%ymm2 # b3fc <_sk_clut_4D_avx+0x2399>
.byte 197,236,89,92,36,128 // vmulps -0x80(%rsp),%ymm2,%ymm3
.byte 197,252,89,226 // vmulps %ymm2,%ymm0,%ymm4
.byte 197,244,89,210 // vmulps %ymm2,%ymm1,%ymm2
@@ -27312,31 +27745,31 @@ HIDDEN _sk_lab_to_xyz_avx
.globl _sk_lab_to_xyz_avx
FUNCTION(_sk_lab_to_xyz_avx)
_sk_lab_to_xyz_avx:
- .byte 196,98,125,24,5,127,120,0,0 // vbroadcastss 0x787f(%rip),%ymm8 # b034 <_sk_clut_4D_avx+0x239d>
+ .byte 196,98,125,24,5,254,122,0,0 // vbroadcastss 0x7afe(%rip),%ymm8 # b400 <_sk_clut_4D_avx+0x239d>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,5,117,120,0,0 // vbroadcastss 0x7875(%rip),%ymm8 # b038 <_sk_clut_4D_avx+0x23a1>
+ .byte 196,98,125,24,5,244,122,0,0 // vbroadcastss 0x7af4(%rip),%ymm8 # b404 <_sk_clut_4D_avx+0x23a1>
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
- .byte 196,98,125,24,13,107,120,0,0 // vbroadcastss 0x786b(%rip),%ymm9 # b03c <_sk_clut_4D_avx+0x23a5>
+ .byte 196,98,125,24,13,234,122,0,0 // vbroadcastss 0x7aea(%rip),%ymm9 # b408 <_sk_clut_4D_avx+0x23a5>
.byte 196,193,116,88,201 // vaddps %ymm9,%ymm1,%ymm1
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
.byte 196,193,108,88,209 // vaddps %ymm9,%ymm2,%ymm2
- .byte 196,98,125,24,5,87,120,0,0 // vbroadcastss 0x7857(%rip),%ymm8 # b040 <_sk_clut_4D_avx+0x23a9>
+ .byte 196,98,125,24,5,214,122,0,0 // vbroadcastss 0x7ad6(%rip),%ymm8 # b40c <_sk_clut_4D_avx+0x23a9>
.byte 196,193,124,88,192 // vaddps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,5,77,120,0,0 // vbroadcastss 0x784d(%rip),%ymm8 # b044 <_sk_clut_4D_avx+0x23ad>
+ .byte 196,98,125,24,5,204,122,0,0 // vbroadcastss 0x7acc(%rip),%ymm8 # b410 <_sk_clut_4D_avx+0x23ad>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,5,67,120,0,0 // vbroadcastss 0x7843(%rip),%ymm8 # b048 <_sk_clut_4D_avx+0x23b1>
+ .byte 196,98,125,24,5,194,122,0,0 // vbroadcastss 0x7ac2(%rip),%ymm8 # b414 <_sk_clut_4D_avx+0x23b1>
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
.byte 197,252,88,201 // vaddps %ymm1,%ymm0,%ymm1
- .byte 196,98,125,24,5,53,120,0,0 // vbroadcastss 0x7835(%rip),%ymm8 # b04c <_sk_clut_4D_avx+0x23b5>
+ .byte 196,98,125,24,5,180,122,0,0 // vbroadcastss 0x7ab4(%rip),%ymm8 # b418 <_sk_clut_4D_avx+0x23b5>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
.byte 197,252,92,210 // vsubps %ymm2,%ymm0,%ymm2
.byte 197,116,89,193 // vmulps %ymm1,%ymm1,%ymm8
.byte 196,65,116,89,192 // vmulps %ymm8,%ymm1,%ymm8
- .byte 196,98,125,24,13,30,120,0,0 // vbroadcastss 0x781e(%rip),%ymm9 # b050 <_sk_clut_4D_avx+0x23b9>
+ .byte 196,98,125,24,13,157,122,0,0 // vbroadcastss 0x7a9d(%rip),%ymm9 # b41c <_sk_clut_4D_avx+0x23b9>
.byte 196,65,52,194,208,1 // vcmpltps %ymm8,%ymm9,%ymm10
- .byte 196,98,125,24,29,19,120,0,0 // vbroadcastss 0x7813(%rip),%ymm11 # b054 <_sk_clut_4D_avx+0x23bd>
+ .byte 196,98,125,24,29,146,122,0,0 // vbroadcastss 0x7a92(%rip),%ymm11 # b420 <_sk_clut_4D_avx+0x23bd>
.byte 196,193,116,88,203 // vaddps %ymm11,%ymm1,%ymm1
- .byte 196,98,125,24,37,9,120,0,0 // vbroadcastss 0x7809(%rip),%ymm12 # b058 <_sk_clut_4D_avx+0x23c1>
+ .byte 196,98,125,24,37,136,122,0,0 // vbroadcastss 0x7a88(%rip),%ymm12 # b424 <_sk_clut_4D_avx+0x23c1>
.byte 196,193,116,89,204 // vmulps %ymm12,%ymm1,%ymm1
.byte 196,67,117,74,192,160 // vblendvps %ymm10,%ymm8,%ymm1,%ymm8
.byte 197,252,89,200 // vmulps %ymm0,%ymm0,%ymm1
@@ -27351,9 +27784,9 @@ _sk_lab_to_xyz_avx:
.byte 196,193,108,88,211 // vaddps %ymm11,%ymm2,%ymm2
.byte 196,193,108,89,212 // vmulps %ymm12,%ymm2,%ymm2
.byte 196,227,109,74,208,144 // vblendvps %ymm9,%ymm0,%ymm2,%ymm2
- .byte 196,226,125,24,5,191,119,0,0 // vbroadcastss 0x77bf(%rip),%ymm0 # b05c <_sk_clut_4D_avx+0x23c5>
+ .byte 196,226,125,24,5,62,122,0,0 // vbroadcastss 0x7a3e(%rip),%ymm0 # b428 <_sk_clut_4D_avx+0x23c5>
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
- .byte 196,98,125,24,5,182,119,0,0 // vbroadcastss 0x77b6(%rip),%ymm8 # b060 <_sk_clut_4D_avx+0x23c9>
+ .byte 196,98,125,24,5,53,122,0,0 // vbroadcastss 0x7a35(%rip),%ymm8 # b42c <_sk_clut_4D_avx+0x23c9>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -27370,15 +27803,15 @@ _sk_load_a8_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,67 // jne 390f <_sk_load_a8_avx+0x5c>
+ .byte 117,67 // jne 3a5c <_sk_load_a8_avx+0x5c>
.byte 196,194,121,48,4,25 // vpmovzxbw (%r9,%rbx,1),%xmm0
- .byte 197,249,219,5,166,122,0,0 // vpand 0x7aa6(%rip),%xmm0,%xmm0 # b380 <_sk_clut_4D_avx+0x26e9>
+ .byte 197,249,219,5,249,125,0,0 // vpand 0x7df9(%rip),%xmm0,%xmm0 # b820 <_sk_clut_4D_avx+0x27bd>
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,106,119,0,0 // vbroadcastss 0x776a(%rip),%ymm1 # b064 <_sk_clut_4D_avx+0x23cd>
+ .byte 196,226,125,24,13,233,121,0,0 // vbroadcastss 0x79e9(%rip),%ymm1 # b430 <_sk_clut_4D_avx+0x23cd>
.byte 197,252,89,217 // vmulps %ymm1,%ymm0,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,87,192 // vxorps %ymm0,%ymm0,%ymm0
@@ -27391,15 +27824,15 @@ _sk_load_a8_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,175 // ja 38d2 <_sk_load_a8_avx+0x1f>
+ .byte 119,175 // ja 3a1f <_sk_load_a8_avx+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,122,0,0,0 // lea 0x7a(%rip),%r11 # 39a8 <_sk_load_a8_avx+0xf5>
+ .byte 76,141,29,121,0,0,0 // lea 0x79(%rip),%r11 # 3af4 <_sk_load_a8_avx+0xf4>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
- .byte 235,144 // jmp 38d2 <_sk_load_a8_avx+0x1f>
+ .byte 235,144 // jmp 3a1f <_sk_load_a8_avx+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 197,249,196,192,2 // vpinsrw $0x2,%eax,%xmm0,%xmm0
@@ -27407,7 +27840,7 @@ _sk_load_a8_avx:
.byte 197,249,110,200 // vmovd %eax,%xmm1
.byte 196,226,121,48,201 // vpmovzxbw %xmm1,%xmm1
.byte 196,227,121,14,193,3 // vpblendw $0x3,%xmm1,%xmm0,%xmm0
- .byte 233,104,255,255,255 // jmpq 38d2 <_sk_load_a8_avx+0x1f>
+ .byte 233,104,255,255,255 // jmpq 3a1f <_sk_load_a8_avx+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 197,249,196,192,6 // vpinsrw $0x6,%eax,%xmm0,%xmm0
@@ -27418,24 +27851,22 @@ _sk_load_a8_avx:
.byte 196,193,121,110,12,25 // vmovd (%r9,%rbx,1),%xmm1
.byte 196,226,121,48,201 // vpmovzxbw %xmm1,%xmm1
.byte 196,227,113,14,192,240 // vpblendw $0xf0,%xmm0,%xmm1,%xmm0
- .byte 233,45,255,255,255 // jmpq 38d2 <_sk_load_a8_avx+0x1f>
- .byte 15,31,0 // nopl (%rax)
- .byte 143 // (bad)
+ .byte 233,45,255,255,255 // jmpq 3a1f <_sk_load_a8_avx+0x1f>
+ .byte 102,144 // xchg %ax,%ax
+ .byte 144 // nop
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,169,255,255,255,154 // ljmp *-0x65000001(%rcx)
+ .byte 255,170,255,255,255,155 // ljmp *-0x64000001(%rdx)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,231 // jmpq *%rdi
.byte 255 // (bad)
+ .byte 232,255,255,255,221 // callq ffffffffde003b04 <_sk_clut_4D_avx+0xffffffffddffaaa1>
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 220,255 // fdivr %st,%st(7)
- .byte 255 // (bad)
- .byte 255,209 // callq *%rcx
+ .byte 255,210 // callq *%rdx
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,194 // inc %edx
+ .byte 255,195 // inc %ebx
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -27452,15 +27883,15 @@ _sk_load_a8_dst_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,67 // jne 3a20 <_sk_load_a8_dst_avx+0x5c>
+ .byte 117,67 // jne 3b6c <_sk_load_a8_dst_avx+0x5c>
.byte 196,194,121,48,36,25 // vpmovzxbw (%r9,%rbx,1),%xmm4
- .byte 197,217,219,37,165,121,0,0 // vpand 0x79a5(%rip),%xmm4,%xmm4 # b390 <_sk_clut_4D_avx+0x26f9>
+ .byte 197,217,219,37,249,124,0,0 // vpand 0x7cf9(%rip),%xmm4,%xmm4 # b830 <_sk_clut_4D_avx+0x27cd>
.byte 197,209,239,237 // vpxor %xmm5,%xmm5,%xmm5
.byte 197,217,105,237 // vpunpckhwd %xmm5,%xmm4,%xmm5
.byte 196,226,121,51,228 // vpmovzxwd %xmm4,%xmm4
.byte 196,227,93,24,229,1 // vinsertf128 $0x1,%xmm5,%ymm4,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,226,125,24,45,93,118,0,0 // vbroadcastss 0x765d(%rip),%ymm5 # b068 <_sk_clut_4D_avx+0x23d1>
+ .byte 196,226,125,24,45,221,120,0,0 // vbroadcastss 0x78dd(%rip),%ymm5 # b434 <_sk_clut_4D_avx+0x23d1>
.byte 197,220,89,253 // vmulps %ymm5,%ymm4,%ymm7
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,220,87,228 // vxorps %ymm4,%ymm4,%ymm4
@@ -27473,15 +27904,15 @@ _sk_load_a8_dst_avx:
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,175 // ja 39e3 <_sk_load_a8_dst_avx+0x1f>
+ .byte 119,175 // ja 3b2f <_sk_load_a8_dst_avx+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,121,0,0,0 // lea 0x79(%rip),%r11 # 3ab8 <_sk_load_a8_dst_avx+0xf4>
+ .byte 76,141,29,121,0,0,0 // lea 0x79(%rip),%r11 # 3c04 <_sk_load_a8_dst_avx+0xf4>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
- .byte 235,144 // jmp 39e3 <_sk_load_a8_dst_avx+0x1f>
+ .byte 235,144 // jmp 3b2f <_sk_load_a8_dst_avx+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 197,217,196,224,2 // vpinsrw $0x2,%eax,%xmm4,%xmm4
@@ -27489,7 +27920,7 @@ _sk_load_a8_dst_avx:
.byte 197,249,110,232 // vmovd %eax,%xmm5
.byte 196,226,121,48,237 // vpmovzxbw %xmm5,%xmm5
.byte 196,227,89,14,229,3 // vpblendw $0x3,%xmm5,%xmm4,%xmm4
- .byte 233,104,255,255,255 // jmpq 39e3 <_sk_load_a8_dst_avx+0x1f>
+ .byte 233,104,255,255,255 // jmpq 3b2f <_sk_load_a8_dst_avx+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 197,217,196,224,6 // vpinsrw $0x6,%eax,%xmm4,%xmm4
@@ -27500,7 +27931,7 @@ _sk_load_a8_dst_avx:
.byte 196,193,121,110,44,25 // vmovd (%r9,%rbx,1),%xmm5
.byte 196,226,121,48,237 // vpmovzxbw %xmm5,%xmm5
.byte 196,227,81,14,228,240 // vpblendw $0xf0,%xmm4,%xmm5,%xmm4
- .byte 233,45,255,255,255 // jmpq 39e3 <_sk_load_a8_dst_avx+0x1f>
+ .byte 233,45,255,255,255 // jmpq 3b2f <_sk_load_a8_dst_avx+0x1f>
.byte 102,144 // xchg %ax,%ax
.byte 144 // nop
.byte 255 // (bad)
@@ -27509,7 +27940,7 @@ _sk_load_a8_dst_avx:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 232,255,255,255,221 // callq ffffffffde003ac8 <_sk_clut_4D_avx+0xffffffffddffae31>
+ .byte 232,255,255,255,221 // callq ffffffffde003c14 <_sk_clut_4D_avx+0xffffffffddffabb1>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,210 // callq *%rdx
@@ -27564,7 +27995,7 @@ _sk_gather_a8_avx:
.byte 196,226,121,49,192 // vpmovzxbd %xmm0,%xmm0
.byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,204,116,0,0 // vbroadcastss 0x74cc(%rip),%ymm1 # b06c <_sk_clut_4D_avx+0x23d5>
+ .byte 196,226,125,24,13,76,119,0,0 // vbroadcastss 0x774c(%rip),%ymm1 # b438 <_sk_clut_4D_avx+0x23d5>
.byte 197,252,89,217 // vmulps %ymm1,%ymm0,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,87,192 // vxorps %ymm0,%ymm0,%ymm0
@@ -27583,14 +28014,14 @@ _sk_store_a8_avx:
.byte 77,15,175,202 // imul %r10,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
- .byte 196,98,125,24,5,159,116,0,0 // vbroadcastss 0x749f(%rip),%ymm8 # b070 <_sk_clut_4D_avx+0x23d9>
+ .byte 196,98,125,24,5,31,119,0,0 // vbroadcastss 0x771f(%rip),%ymm8 # b43c <_sk_clut_4D_avx+0x23d9>
.byte 196,65,100,89,192 // vmulps %ymm8,%ymm3,%ymm8
.byte 196,65,125,91,192 // vcvtps2dq %ymm8,%ymm8
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 196,65,57,103,192 // vpackuswb %xmm8,%xmm8,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 3bfb <_sk_store_a8_avx+0x47>
+ .byte 117,11 // jne 3d47 <_sk_store_a8_avx+0x47>
.byte 196,65,123,17,4,25 // vmovsd %xmm8,(%r9,%rbx,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -27599,25 +28030,25 @@ _sk_store_a8_avx:
.byte 65,128,226,7 // and $0x7,%r10b
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,235 // ja 3bf6 <_sk_store_a8_avx+0x42>
+ .byte 119,235 // ja 3d42 <_sk_store_a8_avx+0x42>
.byte 196,66,121,48,192 // vpmovzxbw %xmm8,%xmm8
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,85,0,0,0 // lea 0x55(%rip),%r11 # 3c70 <_sk_store_a8_avx+0xbc>
+ .byte 76,141,29,85,0,0,0 // lea 0x55(%rip),%r11 # 3dbc <_sk_store_a8_avx+0xbc>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 196,67,121,20,4,25,0 // vpextrb $0x0,%xmm8,(%r9,%rbx,1)
- .byte 235,201 // jmp 3bf6 <_sk_store_a8_avx+0x42>
+ .byte 235,201 // jmp 3d42 <_sk_store_a8_avx+0x42>
.byte 196,67,121,20,68,25,2,4 // vpextrb $0x4,%xmm8,0x2(%r9,%rbx,1)
- .byte 196,98,57,0,5,98,119,0,0 // vpshufb 0x7762(%rip),%xmm8,%xmm8 # b3a0 <_sk_clut_4D_avx+0x2709>
+ .byte 196,98,57,0,5,182,122,0,0 // vpshufb 0x7ab6(%rip),%xmm8,%xmm8 # b840 <_sk_clut_4D_avx+0x27dd>
.byte 196,67,121,21,4,25,0 // vpextrw $0x0,%xmm8,(%r9,%rbx,1)
- .byte 235,175 // jmp 3bf6 <_sk_store_a8_avx+0x42>
+ .byte 235,175 // jmp 3d42 <_sk_store_a8_avx+0x42>
.byte 196,67,121,20,68,25,6,12 // vpextrb $0xc,%xmm8,0x6(%r9,%rbx,1)
.byte 196,67,121,20,68,25,5,10 // vpextrb $0xa,%xmm8,0x5(%r9,%rbx,1)
.byte 196,67,121,20,68,25,4,8 // vpextrb $0x8,%xmm8,0x4(%r9,%rbx,1)
- .byte 196,98,57,0,5,72,119,0,0 // vpshufb 0x7748(%rip),%xmm8,%xmm8 # b3b0 <_sk_clut_4D_avx+0x2719>
+ .byte 196,98,57,0,5,156,122,0,0 // vpshufb 0x7a9c(%rip),%xmm8,%xmm8 # b850 <_sk_clut_4D_avx+0x27ed>
.byte 196,65,121,126,4,25 // vmovd %xmm8,(%r9,%rbx,1)
- .byte 235,134 // jmp 3bf6 <_sk_store_a8_avx+0x42>
+ .byte 235,134 // jmp 3d42 <_sk_store_a8_avx+0x42>
.byte 180,255 // mov $0xff,%ah
.byte 255 // (bad)
.byte 255,197 // inc %ebp
@@ -27650,18 +28081,18 @@ _sk_load_g8_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,72 // jne 3ced <_sk_load_g8_avx+0x61>
+ .byte 117,72 // jne 3e39 <_sk_load_g8_avx+0x61>
.byte 196,194,121,48,4,25 // vpmovzxbw (%r9,%rbx,1),%xmm0
- .byte 197,249,219,5,13,119,0,0 // vpand 0x770d(%rip),%xmm0,%xmm0 # b3c0 <_sk_clut_4D_avx+0x2729>
+ .byte 197,249,219,5,97,122,0,0 // vpand 0x7a61(%rip),%xmm0,%xmm0 # b860 <_sk_clut_4D_avx+0x27fd>
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,161,115,0,0 // vbroadcastss 0x73a1(%rip),%ymm1 # b074 <_sk_clut_4D_avx+0x23dd>
+ .byte 196,226,125,24,13,33,118,0,0 // vbroadcastss 0x7621(%rip),%ymm1 # b440 <_sk_clut_4D_avx+0x23dd>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,150,115,0,0 // vbroadcastss 0x7396(%rip),%ymm3 # b078 <_sk_clut_4D_avx+0x23e1>
+ .byte 196,226,125,24,29,22,118,0,0 // vbroadcastss 0x7616(%rip),%ymm3 # b444 <_sk_clut_4D_avx+0x23e1>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
.byte 91 // pop %rbx
@@ -27671,15 +28102,15 @@ _sk_load_g8_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,170 // ja 3cab <_sk_load_g8_avx+0x1f>
+ .byte 119,170 // ja 3df7 <_sk_load_g8_avx+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,120,0,0,0 // lea 0x78(%rip),%r11 # 3d84 <_sk_load_g8_avx+0xf8>
+ .byte 76,141,29,120,0,0,0 // lea 0x78(%rip),%r11 # 3ed0 <_sk_load_g8_avx+0xf8>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
- .byte 235,139 // jmp 3cab <_sk_load_g8_avx+0x1f>
+ .byte 235,139 // jmp 3df7 <_sk_load_g8_avx+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 197,249,196,192,2 // vpinsrw $0x2,%eax,%xmm0,%xmm0
@@ -27687,7 +28118,7 @@ _sk_load_g8_avx:
.byte 197,249,110,200 // vmovd %eax,%xmm1
.byte 196,226,121,48,201 // vpmovzxbw %xmm1,%xmm1
.byte 196,227,121,14,193,3 // vpblendw $0x3,%xmm1,%xmm0,%xmm0
- .byte 233,99,255,255,255 // jmpq 3cab <_sk_load_g8_avx+0x1f>
+ .byte 233,99,255,255,255 // jmpq 3df7 <_sk_load_g8_avx+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 197,249,196,192,6 // vpinsrw $0x6,%eax,%xmm0,%xmm0
@@ -27698,7 +28129,7 @@ _sk_load_g8_avx:
.byte 196,193,121,110,12,25 // vmovd (%r9,%rbx,1),%xmm1
.byte 196,226,121,48,201 // vpmovzxbw %xmm1,%xmm1
.byte 196,227,113,14,192,240 // vpblendw $0xf0,%xmm0,%xmm1,%xmm0
- .byte 233,40,255,255,255 // jmpq 3cab <_sk_load_g8_avx+0x1f>
+ .byte 233,40,255,255,255 // jmpq 3df7 <_sk_load_g8_avx+0x1f>
.byte 144 // nop
.byte 145 // xchg %eax,%ecx
.byte 255 // (bad)
@@ -27707,7 +28138,7 @@ _sk_load_g8_avx:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf003d94 <_sk_clut_4D_avx+0xffffffffdeffb0fd>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf003ee0 <_sk_clut_4D_avx+0xffffffffdeffae7d>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -27730,18 +28161,18 @@ _sk_load_g8_dst_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 117,72 // jne 3e01 <_sk_load_g8_dst_avx+0x61>
+ .byte 117,72 // jne 3f4d <_sk_load_g8_dst_avx+0x61>
.byte 196,194,121,48,36,25 // vpmovzxbw (%r9,%rbx,1),%xmm4
- .byte 197,217,219,37,9,118,0,0 // vpand 0x7609(%rip),%xmm4,%xmm4 # b3d0 <_sk_clut_4D_avx+0x2739>
+ .byte 197,217,219,37,93,121,0,0 // vpand 0x795d(%rip),%xmm4,%xmm4 # b870 <_sk_clut_4D_avx+0x280d>
.byte 197,209,239,237 // vpxor %xmm5,%xmm5,%xmm5
.byte 197,217,105,237 // vpunpckhwd %xmm5,%xmm4,%xmm5
.byte 196,226,121,51,228 // vpmovzxwd %xmm4,%xmm4
.byte 196,227,93,24,229,1 // vinsertf128 $0x1,%xmm5,%ymm4,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,226,125,24,45,149,114,0,0 // vbroadcastss 0x7295(%rip),%ymm5 # b07c <_sk_clut_4D_avx+0x23e5>
+ .byte 196,226,125,24,45,21,117,0,0 // vbroadcastss 0x7515(%rip),%ymm5 # b448 <_sk_clut_4D_avx+0x23e5>
.byte 197,220,89,229 // vmulps %ymm5,%ymm4,%ymm4
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,61,138,114,0,0 // vbroadcastss 0x728a(%rip),%ymm7 # b080 <_sk_clut_4D_avx+0x23e9>
+ .byte 196,226,125,24,61,10,117,0,0 // vbroadcastss 0x750a(%rip),%ymm7 # b44c <_sk_clut_4D_avx+0x23e9>
.byte 197,252,40,236 // vmovaps %ymm4,%ymm5
.byte 197,252,40,244 // vmovaps %ymm4,%ymm6
.byte 91 // pop %rbx
@@ -27751,15 +28182,15 @@ _sk_load_g8_dst_avx:
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,170 // ja 3dbf <_sk_load_g8_dst_avx+0x1f>
+ .byte 119,170 // ja 3f0b <_sk_load_g8_dst_avx+0x1f>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,120,0,0,0 // lea 0x78(%rip),%r11 # 3e98 <_sk_load_g8_dst_avx+0xf8>
+ .byte 76,141,29,120,0,0,0 // lea 0x78(%rip),%r11 # 3fe4 <_sk_load_g8_dst_avx+0xf8>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,182,4,25 // movzbl (%r9,%rbx,1),%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
- .byte 235,139 // jmp 3dbf <_sk_load_g8_dst_avx+0x1f>
+ .byte 235,139 // jmp 3f0b <_sk_load_g8_dst_avx+0x1f>
.byte 65,15,182,68,25,2 // movzbl 0x2(%r9,%rbx,1),%eax
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 197,217,196,224,2 // vpinsrw $0x2,%eax,%xmm4,%xmm4
@@ -27767,7 +28198,7 @@ _sk_load_g8_dst_avx:
.byte 197,249,110,232 // vmovd %eax,%xmm5
.byte 196,226,121,48,237 // vpmovzxbw %xmm5,%xmm5
.byte 196,227,89,14,229,3 // vpblendw $0x3,%xmm5,%xmm4,%xmm4
- .byte 233,99,255,255,255 // jmpq 3dbf <_sk_load_g8_dst_avx+0x1f>
+ .byte 233,99,255,255,255 // jmpq 3f0b <_sk_load_g8_dst_avx+0x1f>
.byte 65,15,182,68,25,6 // movzbl 0x6(%r9,%rbx,1),%eax
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 197,217,196,224,6 // vpinsrw $0x6,%eax,%xmm4,%xmm4
@@ -27778,7 +28209,7 @@ _sk_load_g8_dst_avx:
.byte 196,193,121,110,44,25 // vmovd (%r9,%rbx,1),%xmm5
.byte 196,226,121,48,237 // vpmovzxbw %xmm5,%xmm5
.byte 196,227,81,14,228,240 // vpblendw $0xf0,%xmm4,%xmm5,%xmm4
- .byte 233,40,255,255,255 // jmpq 3dbf <_sk_load_g8_dst_avx+0x1f>
+ .byte 233,40,255,255,255 // jmpq 3f0b <_sk_load_g8_dst_avx+0x1f>
.byte 144 // nop
.byte 145 // xchg %eax,%ecx
.byte 255 // (bad)
@@ -27787,7 +28218,7 @@ _sk_load_g8_dst_avx:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf003ea8 <_sk_clut_4D_avx+0xffffffffdeffb211>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf003ff4 <_sk_clut_4D_avx+0xffffffffdeffaf91>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -27842,10 +28273,10 @@ _sk_gather_g8_avx:
.byte 196,226,121,49,192 // vpmovzxbd %xmm0,%xmm0
.byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,4,113,0,0 // vbroadcastss 0x7104(%rip),%ymm1 # b084 <_sk_clut_4D_avx+0x23ed>
+ .byte 196,226,125,24,13,132,115,0,0 // vbroadcastss 0x7384(%rip),%ymm1 # b450 <_sk_clut_4D_avx+0x23ed>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,249,112,0,0 // vbroadcastss 0x70f9(%rip),%ymm3 # b088 <_sk_clut_4D_avx+0x23f1>
+ .byte 196,226,125,24,29,121,115,0,0 // vbroadcastss 0x7379(%rip),%ymm3 # b454 <_sk_clut_4D_avx+0x23f1>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 197,252,40,208 // vmovaps %ymm0,%ymm2
.byte 255,224 // jmpq *%rax
@@ -27863,29 +28294,29 @@ _sk_load_565_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,129,0,0,0 // jne 403a <_sk_load_565_avx+0xa1>
+ .byte 15,133,129,0,0,0 // jne 4186 <_sk_load_565_avx+0xa1>
.byte 196,193,122,111,4,89 // vmovdqu (%r9,%rbx,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,209,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
- .byte 196,226,125,24,5,177,112,0,0 // vbroadcastss 0x70b1(%rip),%ymm0 # b08c <_sk_clut_4D_avx+0x23f5>
+ .byte 196,226,125,24,5,49,115,0,0 // vbroadcastss 0x7331(%rip),%ymm0 # b458 <_sk_clut_4D_avx+0x23f5>
.byte 197,236,84,192 // vandps %ymm0,%ymm2,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,164,112,0,0 // vbroadcastss 0x70a4(%rip),%ymm1 # b090 <_sk_clut_4D_avx+0x23f9>
+ .byte 196,226,125,24,13,36,115,0,0 // vbroadcastss 0x7324(%rip),%ymm1 # b45c <_sk_clut_4D_avx+0x23f9>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,155,112,0,0 // vbroadcastss 0x709b(%rip),%ymm1 # b094 <_sk_clut_4D_avx+0x23fd>
+ .byte 196,226,125,24,13,27,115,0,0 // vbroadcastss 0x731b(%rip),%ymm1 # b460 <_sk_clut_4D_avx+0x23fd>
.byte 197,236,84,201 // vandps %ymm1,%ymm2,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,29,142,112,0,0 // vbroadcastss 0x708e(%rip),%ymm3 # b098 <_sk_clut_4D_avx+0x2401>
+ .byte 196,226,125,24,29,14,115,0,0 // vbroadcastss 0x730e(%rip),%ymm3 # b464 <_sk_clut_4D_avx+0x2401>
.byte 197,244,89,203 // vmulps %ymm3,%ymm1,%ymm1
- .byte 196,226,125,24,29,133,112,0,0 // vbroadcastss 0x7085(%rip),%ymm3 # b09c <_sk_clut_4D_avx+0x2405>
+ .byte 196,226,125,24,29,5,115,0,0 // vbroadcastss 0x7305(%rip),%ymm3 # b468 <_sk_clut_4D_avx+0x2405>
.byte 197,236,84,211 // vandps %ymm3,%ymm2,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,226,125,24,29,120,112,0,0 // vbroadcastss 0x7078(%rip),%ymm3 # b0a0 <_sk_clut_4D_avx+0x2409>
+ .byte 196,226,125,24,29,248,114,0,0 // vbroadcastss 0x72f8(%rip),%ymm3 # b46c <_sk_clut_4D_avx+0x2409>
.byte 197,236,89,211 // vmulps %ymm3,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,109,112,0,0 // vbroadcastss 0x706d(%rip),%ymm3 # b0a4 <_sk_clut_4D_avx+0x240d>
+ .byte 196,226,125,24,29,237,114,0,0 // vbroadcastss 0x72ed(%rip),%ymm3 # b470 <_sk_clut_4D_avx+0x240d>
.byte 91 // pop %rbx
.byte 255,224 // jmpq *%rax
.byte 69,137,194 // mov %r8d,%r10d
@@ -27893,27 +28324,27 @@ _sk_load_565_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,109,255,255,255 // ja 3fbf <_sk_load_565_avx+0x26>
+ .byte 15,135,109,255,255,255 // ja 410b <_sk_load_565_avx+0x26>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,99,0,0,0 // lea 0x63(%rip),%r11 # 40c0 <_sk_load_565_avx+0x127>
+ .byte 76,141,29,99,0,0,0 // lea 0x63(%rip),%r11 # 420c <_sk_load_565_avx+0x127>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
- .byte 233,75,255,255,255 // jmpq 3fbf <_sk_load_565_avx+0x26>
+ .byte 233,75,255,255,255 // jmpq 410b <_sk_load_565_avx+0x26>
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 196,193,121,196,68,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,110,12,89 // vmovd (%r9,%rbx,2),%xmm1
.byte 196,227,121,14,193,3 // vpblendw $0x3,%xmm1,%xmm0,%xmm0
- .byte 233,46,255,255,255 // jmpq 3fbf <_sk_load_565_avx+0x26>
+ .byte 233,46,255,255,255 // jmpq 410b <_sk_load_565_avx+0x26>
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 196,193,121,196,68,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,196,68,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,196,68,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,122,126,12,89 // vmovq (%r9,%rbx,2),%xmm1
.byte 196,227,113,14,192,240 // vpblendw $0xf0,%xmm0,%xmm1,%xmm0
- .byte 233,1,255,255,255 // jmpq 3fbf <_sk_load_565_avx+0x26>
+ .byte 233,1,255,255,255 // jmpq 410b <_sk_load_565_avx+0x26>
.byte 102,144 // xchg %ax,%ax
.byte 166 // cmpsb %es:(%rdi),%ds:(%rsi)
.byte 255 // (bad)
@@ -27947,29 +28378,29 @@ _sk_load_565_dst_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,129,0,0,0 // jne 417d <_sk_load_565_dst_avx+0xa1>
+ .byte 15,133,129,0,0,0 // jne 42c9 <_sk_load_565_dst_avx+0xa1>
.byte 196,193,122,111,36,89 // vmovdqu (%r9,%rbx,2),%xmm4
.byte 197,209,239,237 // vpxor %xmm5,%xmm5,%xmm5
.byte 197,217,105,237 // vpunpckhwd %xmm5,%xmm4,%xmm5
.byte 196,226,121,51,228 // vpmovzxwd %xmm4,%xmm4
.byte 196,227,93,24,245,1 // vinsertf128 $0x1,%xmm5,%ymm4,%ymm6
- .byte 196,226,125,24,37,138,111,0,0 // vbroadcastss 0x6f8a(%rip),%ymm4 # b0a8 <_sk_clut_4D_avx+0x2411>
+ .byte 196,226,125,24,37,10,114,0,0 // vbroadcastss 0x720a(%rip),%ymm4 # b474 <_sk_clut_4D_avx+0x2411>
.byte 197,204,84,228 // vandps %ymm4,%ymm6,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,226,125,24,45,125,111,0,0 // vbroadcastss 0x6f7d(%rip),%ymm5 # b0ac <_sk_clut_4D_avx+0x2415>
+ .byte 196,226,125,24,45,253,113,0,0 // vbroadcastss 0x71fd(%rip),%ymm5 # b478 <_sk_clut_4D_avx+0x2415>
.byte 197,220,89,229 // vmulps %ymm5,%ymm4,%ymm4
- .byte 196,226,125,24,45,116,111,0,0 // vbroadcastss 0x6f74(%rip),%ymm5 # b0b0 <_sk_clut_4D_avx+0x2419>
+ .byte 196,226,125,24,45,244,113,0,0 // vbroadcastss 0x71f4(%rip),%ymm5 # b47c <_sk_clut_4D_avx+0x2419>
.byte 197,204,84,237 // vandps %ymm5,%ymm6,%ymm5
.byte 197,252,91,237 // vcvtdq2ps %ymm5,%ymm5
- .byte 196,226,125,24,61,103,111,0,0 // vbroadcastss 0x6f67(%rip),%ymm7 # b0b4 <_sk_clut_4D_avx+0x241d>
+ .byte 196,226,125,24,61,231,113,0,0 // vbroadcastss 0x71e7(%rip),%ymm7 # b480 <_sk_clut_4D_avx+0x241d>
.byte 197,212,89,239 // vmulps %ymm7,%ymm5,%ymm5
- .byte 196,226,125,24,61,94,111,0,0 // vbroadcastss 0x6f5e(%rip),%ymm7 # b0b8 <_sk_clut_4D_avx+0x2421>
+ .byte 196,226,125,24,61,222,113,0,0 // vbroadcastss 0x71de(%rip),%ymm7 # b484 <_sk_clut_4D_avx+0x2421>
.byte 197,204,84,247 // vandps %ymm7,%ymm6,%ymm6
.byte 197,252,91,246 // vcvtdq2ps %ymm6,%ymm6
- .byte 196,226,125,24,61,81,111,0,0 // vbroadcastss 0x6f51(%rip),%ymm7 # b0bc <_sk_clut_4D_avx+0x2425>
+ .byte 196,226,125,24,61,209,113,0,0 // vbroadcastss 0x71d1(%rip),%ymm7 # b488 <_sk_clut_4D_avx+0x2425>
.byte 197,204,89,247 // vmulps %ymm7,%ymm6,%ymm6
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,61,70,111,0,0 // vbroadcastss 0x6f46(%rip),%ymm7 # b0c0 <_sk_clut_4D_avx+0x2429>
+ .byte 196,226,125,24,61,198,113,0,0 // vbroadcastss 0x71c6(%rip),%ymm7 # b48c <_sk_clut_4D_avx+0x2429>
.byte 91 // pop %rbx
.byte 255,224 // jmpq *%rax
.byte 69,137,194 // mov %r8d,%r10d
@@ -27977,27 +28408,27 @@ _sk_load_565_dst_avx:
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,109,255,255,255 // ja 4102 <_sk_load_565_dst_avx+0x26>
+ .byte 15,135,109,255,255,255 // ja 424e <_sk_load_565_dst_avx+0x26>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,100,0,0,0 // lea 0x64(%rip),%r11 # 4204 <_sk_load_565_dst_avx+0x128>
+ .byte 76,141,29,100,0,0,0 // lea 0x64(%rip),%r11 # 4350 <_sk_load_565_dst_avx+0x128>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
- .byte 233,75,255,255,255 // jmpq 4102 <_sk_load_565_dst_avx+0x26>
+ .byte 233,75,255,255,255 // jmpq 424e <_sk_load_565_dst_avx+0x26>
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 196,193,89,196,100,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,121,110,44,89 // vmovd (%r9,%rbx,2),%xmm5
.byte 196,227,89,14,229,3 // vpblendw $0x3,%xmm5,%xmm4,%xmm4
- .byte 233,46,255,255,255 // jmpq 4102 <_sk_load_565_dst_avx+0x26>
+ .byte 233,46,255,255,255 // jmpq 424e <_sk_load_565_dst_avx+0x26>
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 196,193,89,196,100,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,89,196,100,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,89,196,100,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,122,126,44,89 // vmovq (%r9,%rbx,2),%xmm5
.byte 196,227,81,14,228,240 // vpblendw $0xf0,%xmm4,%xmm5,%xmm4
- .byte 233,1,255,255,255 // jmpq 4102 <_sk_load_565_dst_avx+0x26>
+ .byte 233,1,255,255,255 // jmpq 424e <_sk_load_565_dst_avx+0x26>
.byte 15,31,0 // nopl (%rax)
.byte 165 // movsl %ds:(%rsi),%es:(%rdi)
.byte 255 // (bad)
@@ -28069,23 +28500,23 @@ _sk_gather_565_avx:
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,209,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
- .byte 196,226,125,24,5,210,109,0,0 // vbroadcastss 0x6dd2(%rip),%ymm0 # b0c4 <_sk_clut_4D_avx+0x242d>
+ .byte 196,226,125,24,5,82,112,0,0 // vbroadcastss 0x7052(%rip),%ymm0 # b490 <_sk_clut_4D_avx+0x242d>
.byte 197,236,84,192 // vandps %ymm0,%ymm2,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,197,109,0,0 // vbroadcastss 0x6dc5(%rip),%ymm1 # b0c8 <_sk_clut_4D_avx+0x2431>
+ .byte 196,226,125,24,13,69,112,0,0 // vbroadcastss 0x7045(%rip),%ymm1 # b494 <_sk_clut_4D_avx+0x2431>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,188,109,0,0 // vbroadcastss 0x6dbc(%rip),%ymm1 # b0cc <_sk_clut_4D_avx+0x2435>
+ .byte 196,226,125,24,13,60,112,0,0 // vbroadcastss 0x703c(%rip),%ymm1 # b498 <_sk_clut_4D_avx+0x2435>
.byte 197,236,84,201 // vandps %ymm1,%ymm2,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,29,175,109,0,0 // vbroadcastss 0x6daf(%rip),%ymm3 # b0d0 <_sk_clut_4D_avx+0x2439>
+ .byte 196,226,125,24,29,47,112,0,0 // vbroadcastss 0x702f(%rip),%ymm3 # b49c <_sk_clut_4D_avx+0x2439>
.byte 197,244,89,203 // vmulps %ymm3,%ymm1,%ymm1
- .byte 196,226,125,24,29,166,109,0,0 // vbroadcastss 0x6da6(%rip),%ymm3 # b0d4 <_sk_clut_4D_avx+0x243d>
+ .byte 196,226,125,24,29,38,112,0,0 // vbroadcastss 0x7026(%rip),%ymm3 # b4a0 <_sk_clut_4D_avx+0x243d>
.byte 197,236,84,211 // vandps %ymm3,%ymm2,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,226,125,24,29,153,109,0,0 // vbroadcastss 0x6d99(%rip),%ymm3 # b0d8 <_sk_clut_4D_avx+0x2441>
+ .byte 196,226,125,24,29,25,112,0,0 // vbroadcastss 0x7019(%rip),%ymm3 # b4a4 <_sk_clut_4D_avx+0x2441>
.byte 197,236,89,211 // vmulps %ymm3,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,142,109,0,0 // vbroadcastss 0x6d8e(%rip),%ymm3 # b0dc <_sk_clut_4D_avx+0x2445>
+ .byte 196,226,125,24,29,14,112,0,0 // vbroadcastss 0x700e(%rip),%ymm3 # b4a8 <_sk_clut_4D_avx+0x2445>
.byte 255,224 // jmpq *%rax
HIDDEN _sk_store_565_avx
@@ -28100,14 +28531,14 @@ _sk_store_565_avx:
.byte 77,1,201 // add %r9,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
- .byte 196,98,125,24,5,112,109,0,0 // vbroadcastss 0x6d70(%rip),%ymm8 # b0e0 <_sk_clut_4D_avx+0x2449>
+ .byte 196,98,125,24,5,240,111,0,0 // vbroadcastss 0x6ff0(%rip),%ymm8 # b4ac <_sk_clut_4D_avx+0x2449>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,193,41,114,241,11 // vpslld $0xb,%xmm9,%xmm10
.byte 196,67,125,25,201,1 // vextractf128 $0x1,%ymm9,%xmm9
.byte 196,193,49,114,241,11 // vpslld $0xb,%xmm9,%xmm9
.byte 196,67,45,24,201,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- .byte 196,98,125,24,21,73,109,0,0 // vbroadcastss 0x6d49(%rip),%ymm10 # b0e4 <_sk_clut_4D_avx+0x244d>
+ .byte 196,98,125,24,21,201,111,0,0 // vbroadcastss 0x6fc9(%rip),%ymm10 # b4b0 <_sk_clut_4D_avx+0x244d>
.byte 196,65,116,89,210 // vmulps %ymm10,%ymm1,%ymm10
.byte 196,65,125,91,210 // vcvtps2dq %ymm10,%ymm10
.byte 196,193,33,114,242,5 // vpslld $0x5,%xmm10,%xmm11
@@ -28121,7 +28552,7 @@ _sk_store_565_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 43ec <_sk_store_565_avx+0x9c>
+ .byte 117,11 // jne 4538 <_sk_store_565_avx+0x9c>
.byte 196,65,122,127,4,89 // vmovdqu %xmm8,(%r9,%rbx,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -28130,22 +28561,22 @@ _sk_store_565_avx:
.byte 65,128,226,7 // and $0x7,%r10b
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,235 // ja 43e7 <_sk_store_565_avx+0x97>
+ .byte 119,235 // ja 4533 <_sk_store_565_avx+0x97>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,69,0,0,0 // lea 0x45(%rip),%r11 # 444c <_sk_store_565_avx+0xfc>
+ .byte 76,141,29,69,0,0,0 // lea 0x45(%rip),%r11 # 4598 <_sk_store_565_avx+0xfc>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 196,67,121,21,4,89,0 // vpextrw $0x0,%xmm8,(%r9,%rbx,2)
- .byte 235,206 // jmp 43e7 <_sk_store_565_avx+0x97>
+ .byte 235,206 // jmp 4533 <_sk_store_565_avx+0x97>
.byte 196,67,121,21,68,89,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rbx,2)
.byte 196,65,121,126,4,89 // vmovd %xmm8,(%r9,%rbx,2)
- .byte 235,190 // jmp 43e7 <_sk_store_565_avx+0x97>
+ .byte 235,190 // jmp 4533 <_sk_store_565_avx+0x97>
.byte 196,67,121,21,68,89,12,6 // vpextrw $0x6,%xmm8,0xc(%r9,%rbx,2)
.byte 196,67,121,21,68,89,10,5 // vpextrw $0x5,%xmm8,0xa(%r9,%rbx,2)
.byte 196,67,121,21,68,89,8,4 // vpextrw $0x4,%xmm8,0x8(%r9,%rbx,2)
.byte 196,65,121,214,4,89 // vmovq %xmm8,(%r9,%rbx,2)
- .byte 235,158 // jmp 43e7 <_sk_store_565_avx+0x97>
+ .byte 235,158 // jmp 4533 <_sk_store_565_avx+0x97>
.byte 15,31,0 // nopl (%rax)
.byte 196 // (bad)
.byte 255 // (bad)
@@ -28184,31 +28615,31 @@ _sk_load_4444_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,153,0,0,0 // jne 4521 <_sk_load_4444_avx+0xb9>
+ .byte 15,133,153,0,0,0 // jne 466d <_sk_load_4444_avx+0xb9>
.byte 196,193,122,111,4,89 // vmovdqu (%r9,%rbx,2),%xmm0
.byte 197,241,239,201 // vpxor %xmm1,%xmm1,%xmm1
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,217,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm3
- .byte 196,226,125,24,5,62,108,0,0 // vbroadcastss 0x6c3e(%rip),%ymm0 # b0e8 <_sk_clut_4D_avx+0x2451>
+ .byte 196,226,125,24,5,190,110,0,0 // vbroadcastss 0x6ebe(%rip),%ymm0 # b4b4 <_sk_clut_4D_avx+0x2451>
.byte 197,228,84,192 // vandps %ymm0,%ymm3,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,49,108,0,0 // vbroadcastss 0x6c31(%rip),%ymm1 # b0ec <_sk_clut_4D_avx+0x2455>
+ .byte 196,226,125,24,13,177,110,0,0 // vbroadcastss 0x6eb1(%rip),%ymm1 # b4b8 <_sk_clut_4D_avx+0x2455>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,40,108,0,0 // vbroadcastss 0x6c28(%rip),%ymm1 # b0f0 <_sk_clut_4D_avx+0x2459>
+ .byte 196,226,125,24,13,168,110,0,0 // vbroadcastss 0x6ea8(%rip),%ymm1 # b4bc <_sk_clut_4D_avx+0x2459>
.byte 197,228,84,201 // vandps %ymm1,%ymm3,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,21,27,108,0,0 // vbroadcastss 0x6c1b(%rip),%ymm2 # b0f4 <_sk_clut_4D_avx+0x245d>
+ .byte 196,226,125,24,21,155,110,0,0 // vbroadcastss 0x6e9b(%rip),%ymm2 # b4c0 <_sk_clut_4D_avx+0x245d>
.byte 197,244,89,202 // vmulps %ymm2,%ymm1,%ymm1
- .byte 196,226,125,24,21,18,108,0,0 // vbroadcastss 0x6c12(%rip),%ymm2 # b0f8 <_sk_clut_4D_avx+0x2461>
+ .byte 196,226,125,24,21,146,110,0,0 // vbroadcastss 0x6e92(%rip),%ymm2 # b4c4 <_sk_clut_4D_avx+0x2461>
.byte 197,228,84,210 // vandps %ymm2,%ymm3,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,98,125,24,5,5,108,0,0 // vbroadcastss 0x6c05(%rip),%ymm8 # b0fc <_sk_clut_4D_avx+0x2465>
+ .byte 196,98,125,24,5,133,110,0,0 // vbroadcastss 0x6e85(%rip),%ymm8 # b4c8 <_sk_clut_4D_avx+0x2465>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,251,107,0,0 // vbroadcastss 0x6bfb(%rip),%ymm8 # b100 <_sk_clut_4D_avx+0x2469>
+ .byte 196,98,125,24,5,123,110,0,0 // vbroadcastss 0x6e7b(%rip),%ymm8 # b4cc <_sk_clut_4D_avx+0x2469>
.byte 196,193,100,84,216 // vandps %ymm8,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,237,107,0,0 // vbroadcastss 0x6bed(%rip),%ymm8 # b104 <_sk_clut_4D_avx+0x246d>
+ .byte 196,98,125,24,5,109,110,0,0 // vbroadcastss 0x6e6d(%rip),%ymm8 # b4d0 <_sk_clut_4D_avx+0x246d>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -28218,27 +28649,27 @@ _sk_load_4444_avx:
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,85,255,255,255 // ja 448e <_sk_load_4444_avx+0x26>
+ .byte 15,135,85,255,255,255 // ja 45da <_sk_load_4444_avx+0x26>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,100,0,0,0 // lea 0x64(%rip),%r11 # 45a8 <_sk_load_4444_avx+0x140>
+ .byte 76,141,29,100,0,0,0 // lea 0x64(%rip),%r11 # 46f4 <_sk_load_4444_avx+0x140>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,249,110,192 // vmovd %eax,%xmm0
- .byte 233,51,255,255,255 // jmpq 448e <_sk_load_4444_avx+0x26>
+ .byte 233,51,255,255,255 // jmpq 45da <_sk_load_4444_avx+0x26>
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 196,193,121,196,68,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,110,12,89 // vmovd (%r9,%rbx,2),%xmm1
.byte 196,227,121,14,193,3 // vpblendw $0x3,%xmm1,%xmm0,%xmm0
- .byte 233,22,255,255,255 // jmpq 448e <_sk_load_4444_avx+0x26>
+ .byte 233,22,255,255,255 // jmpq 45da <_sk_load_4444_avx+0x26>
.byte 197,249,239,192 // vpxor %xmm0,%xmm0,%xmm0
.byte 196,193,121,196,68,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,196,68,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,121,196,68,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm0,%xmm0
.byte 196,193,122,126,12,89 // vmovq (%r9,%rbx,2),%xmm1
.byte 196,227,113,14,192,240 // vpblendw $0xf0,%xmm0,%xmm1,%xmm0
- .byte 233,233,254,255,255 // jmpq 448e <_sk_load_4444_avx+0x26>
+ .byte 233,233,254,255,255 // jmpq 45da <_sk_load_4444_avx+0x26>
.byte 15,31,0 // nopl (%rax)
.byte 165 // movsl %ds:(%rsi),%es:(%rdi)
.byte 255 // (bad)
@@ -28275,31 +28706,31 @@ _sk_load_4444_dst_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,153,0,0,0 // jne 467d <_sk_load_4444_dst_avx+0xb9>
+ .byte 15,133,153,0,0,0 // jne 47c9 <_sk_load_4444_dst_avx+0xb9>
.byte 196,193,122,111,36,89 // vmovdqu (%r9,%rbx,2),%xmm4
.byte 197,209,239,237 // vpxor %xmm5,%xmm5,%xmm5
.byte 197,217,105,237 // vpunpckhwd %xmm5,%xmm4,%xmm5
.byte 196,226,121,51,228 // vpmovzxwd %xmm4,%xmm4
.byte 196,227,93,24,253,1 // vinsertf128 $0x1,%xmm5,%ymm4,%ymm7
- .byte 196,226,125,24,37,2,107,0,0 // vbroadcastss 0x6b02(%rip),%ymm4 # b108 <_sk_clut_4D_avx+0x2471>
+ .byte 196,226,125,24,37,130,109,0,0 // vbroadcastss 0x6d82(%rip),%ymm4 # b4d4 <_sk_clut_4D_avx+0x2471>
.byte 197,196,84,228 // vandps %ymm4,%ymm7,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,226,125,24,45,245,106,0,0 // vbroadcastss 0x6af5(%rip),%ymm5 # b10c <_sk_clut_4D_avx+0x2475>
+ .byte 196,226,125,24,45,117,109,0,0 // vbroadcastss 0x6d75(%rip),%ymm5 # b4d8 <_sk_clut_4D_avx+0x2475>
.byte 197,220,89,229 // vmulps %ymm5,%ymm4,%ymm4
- .byte 196,226,125,24,45,236,106,0,0 // vbroadcastss 0x6aec(%rip),%ymm5 # b110 <_sk_clut_4D_avx+0x2479>
+ .byte 196,226,125,24,45,108,109,0,0 // vbroadcastss 0x6d6c(%rip),%ymm5 # b4dc <_sk_clut_4D_avx+0x2479>
.byte 197,196,84,237 // vandps %ymm5,%ymm7,%ymm5
.byte 197,252,91,237 // vcvtdq2ps %ymm5,%ymm5
- .byte 196,226,125,24,53,223,106,0,0 // vbroadcastss 0x6adf(%rip),%ymm6 # b114 <_sk_clut_4D_avx+0x247d>
+ .byte 196,226,125,24,53,95,109,0,0 // vbroadcastss 0x6d5f(%rip),%ymm6 # b4e0 <_sk_clut_4D_avx+0x247d>
.byte 197,212,89,238 // vmulps %ymm6,%ymm5,%ymm5
- .byte 196,226,125,24,53,214,106,0,0 // vbroadcastss 0x6ad6(%rip),%ymm6 # b118 <_sk_clut_4D_avx+0x2481>
+ .byte 196,226,125,24,53,86,109,0,0 // vbroadcastss 0x6d56(%rip),%ymm6 # b4e4 <_sk_clut_4D_avx+0x2481>
.byte 197,196,84,246 // vandps %ymm6,%ymm7,%ymm6
.byte 197,252,91,246 // vcvtdq2ps %ymm6,%ymm6
- .byte 196,98,125,24,5,201,106,0,0 // vbroadcastss 0x6ac9(%rip),%ymm8 # b11c <_sk_clut_4D_avx+0x2485>
+ .byte 196,98,125,24,5,73,109,0,0 // vbroadcastss 0x6d49(%rip),%ymm8 # b4e8 <_sk_clut_4D_avx+0x2485>
.byte 196,193,76,89,240 // vmulps %ymm8,%ymm6,%ymm6
- .byte 196,98,125,24,5,191,106,0,0 // vbroadcastss 0x6abf(%rip),%ymm8 # b120 <_sk_clut_4D_avx+0x2489>
+ .byte 196,98,125,24,5,63,109,0,0 // vbroadcastss 0x6d3f(%rip),%ymm8 # b4ec <_sk_clut_4D_avx+0x2489>
.byte 196,193,68,84,248 // vandps %ymm8,%ymm7,%ymm7
.byte 197,252,91,255 // vcvtdq2ps %ymm7,%ymm7
- .byte 196,98,125,24,5,177,106,0,0 // vbroadcastss 0x6ab1(%rip),%ymm8 # b124 <_sk_clut_4D_avx+0x248d>
+ .byte 196,98,125,24,5,49,109,0,0 // vbroadcastss 0x6d31(%rip),%ymm8 # b4f0 <_sk_clut_4D_avx+0x248d>
.byte 196,193,68,89,248 // vmulps %ymm8,%ymm7,%ymm7
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -28309,27 +28740,27 @@ _sk_load_4444_dst_avx:
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 15,135,85,255,255,255 // ja 45ea <_sk_load_4444_dst_avx+0x26>
+ .byte 15,135,85,255,255,255 // ja 4736 <_sk_load_4444_dst_avx+0x26>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,100,0,0,0 // lea 0x64(%rip),%r11 # 4704 <_sk_load_4444_dst_avx+0x140>
+ .byte 76,141,29,100,0,0,0 // lea 0x64(%rip),%r11 # 4850 <_sk_load_4444_dst_avx+0x140>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 65,15,183,4,89 // movzwl (%r9,%rbx,2),%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
- .byte 233,51,255,255,255 // jmpq 45ea <_sk_load_4444_dst_avx+0x26>
+ .byte 233,51,255,255,255 // jmpq 4736 <_sk_load_4444_dst_avx+0x26>
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 196,193,89,196,100,89,4,2 // vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,121,110,44,89 // vmovd (%r9,%rbx,2),%xmm5
.byte 196,227,89,14,229,3 // vpblendw $0x3,%xmm5,%xmm4,%xmm4
- .byte 233,22,255,255,255 // jmpq 45ea <_sk_load_4444_dst_avx+0x26>
+ .byte 233,22,255,255,255 // jmpq 4736 <_sk_load_4444_dst_avx+0x26>
.byte 197,217,239,228 // vpxor %xmm4,%xmm4,%xmm4
.byte 196,193,89,196,100,89,12,6 // vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,89,196,100,89,10,5 // vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,89,196,100,89,8,4 // vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm4,%xmm4
.byte 196,193,122,126,44,89 // vmovq (%r9,%rbx,2),%xmm5
.byte 196,227,81,14,228,240 // vpblendw $0xf0,%xmm4,%xmm5,%xmm4
- .byte 233,233,254,255,255 // jmpq 45ea <_sk_load_4444_dst_avx+0x26>
+ .byte 233,233,254,255,255 // jmpq 4736 <_sk_load_4444_dst_avx+0x26>
.byte 15,31,0 // nopl (%rax)
.byte 165 // movsl %ds:(%rsi),%es:(%rdi)
.byte 255 // (bad)
@@ -28401,25 +28832,25 @@ _sk_gather_4444_avx:
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,217,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm3
- .byte 196,226,125,24,5,54,105,0,0 // vbroadcastss 0x6936(%rip),%ymm0 # b128 <_sk_clut_4D_avx+0x2491>
+ .byte 196,226,125,24,5,182,107,0,0 // vbroadcastss 0x6bb6(%rip),%ymm0 # b4f4 <_sk_clut_4D_avx+0x2491>
.byte 197,228,84,192 // vandps %ymm0,%ymm3,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,226,125,24,13,41,105,0,0 // vbroadcastss 0x6929(%rip),%ymm1 # b12c <_sk_clut_4D_avx+0x2495>
+ .byte 196,226,125,24,13,169,107,0,0 // vbroadcastss 0x6ba9(%rip),%ymm1 # b4f8 <_sk_clut_4D_avx+0x2495>
.byte 197,252,89,193 // vmulps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,32,105,0,0 // vbroadcastss 0x6920(%rip),%ymm1 # b130 <_sk_clut_4D_avx+0x2499>
+ .byte 196,226,125,24,13,160,107,0,0 // vbroadcastss 0x6ba0(%rip),%ymm1 # b4fc <_sk_clut_4D_avx+0x2499>
.byte 197,228,84,201 // vandps %ymm1,%ymm3,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,226,125,24,21,19,105,0,0 // vbroadcastss 0x6913(%rip),%ymm2 # b134 <_sk_clut_4D_avx+0x249d>
+ .byte 196,226,125,24,21,147,107,0,0 // vbroadcastss 0x6b93(%rip),%ymm2 # b500 <_sk_clut_4D_avx+0x249d>
.byte 197,244,89,202 // vmulps %ymm2,%ymm1,%ymm1
- .byte 196,226,125,24,21,10,105,0,0 // vbroadcastss 0x690a(%rip),%ymm2 # b138 <_sk_clut_4D_avx+0x24a1>
+ .byte 196,226,125,24,21,138,107,0,0 // vbroadcastss 0x6b8a(%rip),%ymm2 # b504 <_sk_clut_4D_avx+0x24a1>
.byte 197,228,84,210 // vandps %ymm2,%ymm3,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
- .byte 196,98,125,24,5,253,104,0,0 // vbroadcastss 0x68fd(%rip),%ymm8 # b13c <_sk_clut_4D_avx+0x24a5>
+ .byte 196,98,125,24,5,125,107,0,0 // vbroadcastss 0x6b7d(%rip),%ymm8 # b508 <_sk_clut_4D_avx+0x24a5>
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
- .byte 196,98,125,24,5,243,104,0,0 // vbroadcastss 0x68f3(%rip),%ymm8 # b140 <_sk_clut_4D_avx+0x24a9>
+ .byte 196,98,125,24,5,115,107,0,0 // vbroadcastss 0x6b73(%rip),%ymm8 # b50c <_sk_clut_4D_avx+0x24a9>
.byte 196,193,100,84,216 // vandps %ymm8,%ymm3,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
- .byte 196,98,125,24,5,229,104,0,0 // vbroadcastss 0x68e5(%rip),%ymm8 # b144 <_sk_clut_4D_avx+0x24ad>
+ .byte 196,98,125,24,5,101,107,0,0 // vbroadcastss 0x6b65(%rip),%ymm8 # b510 <_sk_clut_4D_avx+0x24ad>
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -28436,7 +28867,7 @@ _sk_store_4444_avx:
.byte 77,1,201 // add %r9,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,218 // movslq %edx,%rbx
- .byte 196,98,125,24,5,192,104,0,0 // vbroadcastss 0x68c0(%rip),%ymm8 # b148 <_sk_clut_4D_avx+0x24b1>
+ .byte 196,98,125,24,5,64,107,0,0 // vbroadcastss 0x6b40(%rip),%ymm8 # b514 <_sk_clut_4D_avx+0x24b1>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,193,41,114,241,12 // vpslld $0xc,%xmm9,%xmm10
@@ -28463,7 +28894,7 @@ _sk_store_4444_avx:
.byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
.byte 196,66,57,43,193 // vpackusdw %xmm9,%xmm8,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 4922 <_sk_store_4444_avx+0xba>
+ .byte 117,11 // jne 4a6e <_sk_store_4444_avx+0xba>
.byte 196,65,122,127,4,89 // vmovdqu %xmm8,(%r9,%rbx,2)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 91 // pop %rbx
@@ -28472,22 +28903,22 @@ _sk_store_4444_avx:
.byte 65,128,226,7 // and $0x7,%r10b
.byte 65,254,202 // dec %r10b
.byte 65,128,250,6 // cmp $0x6,%r10b
- .byte 119,235 // ja 491d <_sk_store_4444_avx+0xb5>
+ .byte 119,235 // ja 4a69 <_sk_store_4444_avx+0xb5>
.byte 69,15,182,210 // movzbl %r10b,%r10d
- .byte 76,141,29,67,0,0,0 // lea 0x43(%rip),%r11 # 4980 <_sk_store_4444_avx+0x118>
+ .byte 76,141,29,67,0,0,0 // lea 0x43(%rip),%r11 # 4acc <_sk_store_4444_avx+0x118>
.byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
.byte 196,67,121,21,4,89,0 // vpextrw $0x0,%xmm8,(%r9,%rbx,2)
- .byte 235,206 // jmp 491d <_sk_store_4444_avx+0xb5>
+ .byte 235,206 // jmp 4a69 <_sk_store_4444_avx+0xb5>
.byte 196,67,121,21,68,89,4,2 // vpextrw $0x2,%xmm8,0x4(%r9,%rbx,2)
.byte 196,65,121,126,4,89 // vmovd %xmm8,(%r9,%rbx,2)
- .byte 235,190 // jmp 491d <_sk_store_4444_avx+0xb5>
+ .byte 235,190 // jmp 4a69 <_sk_store_4444_avx+0xb5>
.byte 196,67,121,21,68,89,12,6 // vpextrw $0x6,%xmm8,0xc(%r9,%rbx,2)
.byte 196,67,121,21,68,89,10,5 // vpextrw $0x5,%xmm8,0xa(%r9,%rbx,2)
.byte 196,67,121,21,68,89,8,4 // vpextrw $0x4,%xmm8,0x8(%r9,%rbx,2)
.byte 196,65,121,214,4,89 // vmovq %xmm8,(%r9,%rbx,2)
- .byte 235,158 // jmp 491d <_sk_store_4444_avx+0xb5>
+ .byte 235,158 // jmp 4a69 <_sk_store_4444_avx+0xb5>
.byte 144 // nop
.byte 198 // (bad)
.byte 255 // (bad)
@@ -28517,115 +28948,193 @@ HIDDEN _sk_load_8888_avx
.globl _sk_load_8888_avx
FUNCTION(_sk_load_8888_avx)
_sk_load_8888_avx:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,136,0,0,0 // jne 4a4b <_sk_load_8888_avx+0xaf>
- .byte 197,252,16,24 // vmovups (%rax),%ymm3
- .byte 197,124,40,21,17,107,0,0 // vmovaps 0x6b11(%rip),%ymm10 # b4e0 <_sk_clut_4D_avx+0x2849>
- .byte 196,193,100,84,194 // vandps %ymm10,%ymm3,%ymm0
+ .byte 15,133,136,0,0,0 // jne 4b91 <_sk_load_8888_avx+0xa9>
+ .byte 196,65,125,16,12,153 // vmovupd (%r9,%rbx,4),%ymm9
+ .byte 197,125,40,21,9,108,0,0 // vmovapd 0x6c09(%rip),%ymm10 # b720 <_sk_clut_4D_avx+0x26bd>
+ .byte 196,193,53,84,194 // vandpd %ymm10,%ymm9,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,5,107,103,0,0 // vbroadcastss 0x676b(%rip),%ymm8 # b14c <_sk_clut_4D_avx+0x24b5>
+ .byte 196,98,125,24,5,239,105,0,0 // vbroadcastss 0x69ef(%rip),%ymm8 # b518 <_sk_clut_4D_avx+0x24b5>
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 197,241,114,211,8 // vpsrld $0x8,%xmm3,%xmm1
- .byte 196,195,125,25,217,1 // vextractf128 $0x1,%ymm3,%xmm9
- .byte 196,193,105,114,209,8 // vpsrld $0x8,%xmm9,%xmm2
+ .byte 196,193,113,114,209,8 // vpsrld $0x8,%xmm9,%xmm1
+ .byte 196,99,125,25,203,1 // vextractf128 $0x1,%ymm9,%xmm3
+ .byte 197,233,114,211,8 // vpsrld $0x8,%xmm3,%xmm2
.byte 196,227,117,24,202,1 // vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
- .byte 196,193,116,84,202 // vandps %ymm10,%ymm1,%ymm1
+ .byte 196,193,117,84,202 // vandpd %ymm10,%ymm1,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
.byte 196,193,116,89,200 // vmulps %ymm8,%ymm1,%ymm1
- .byte 197,161,114,211,16 // vpsrld $0x10,%xmm3,%xmm11
- .byte 196,193,105,114,209,16 // vpsrld $0x10,%xmm9,%xmm2
+ .byte 196,193,33,114,209,16 // vpsrld $0x10,%xmm9,%xmm11
+ .byte 197,233,114,211,16 // vpsrld $0x10,%xmm3,%xmm2
.byte 196,227,37,24,210,1 // vinsertf128 $0x1,%xmm2,%ymm11,%ymm2
- .byte 196,193,108,84,210 // vandps %ymm10,%ymm2,%ymm2
+ .byte 196,193,109,84,210 // vandpd %ymm10,%ymm2,%ymm2
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 196,193,108,89,208 // vmulps %ymm8,%ymm2,%ymm2
- .byte 197,169,114,211,24 // vpsrld $0x18,%xmm3,%xmm10
- .byte 196,193,97,114,209,24 // vpsrld $0x18,%xmm9,%xmm3
- .byte 196,227,45,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm10,%ymm3
+ .byte 196,193,49,114,209,24 // vpsrld $0x18,%xmm9,%xmm9
+ .byte 197,225,114,211,24 // vpsrld $0x18,%xmm3,%xmm3
+ .byte 196,227,53,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
- .byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,194 // vmovq %r10,%xmm0
- .byte 196,226,121,48,192 // vpmovzxbw %xmm0,%xmm0
- .byte 196,226,121,0,13,109,105,0,0 // vpshufb 0x696d(%rip),%xmm0,%xmm1 # b3e0 <_sk_clut_4D_avx+0x2749>
- .byte 196,226,121,33,201 // vpmovsxbd %xmm1,%xmm1
- .byte 196,226,121,0,5,111,105,0,0 // vpshufb 0x696f(%rip),%xmm0,%xmm0 # b3f0 <_sk_clut_4D_avx+0x2759>
- .byte 196,226,121,33,192 // vpmovsxbd %xmm0,%xmm0
- .byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- .byte 196,226,125,44,24 // vmaskmovps (%rax),%ymm0,%ymm3
- .byte 233,49,255,255,255 // jmpq 49c7 <_sk_load_8888_avx+0x2b>
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 15,135,101,255,255,255 // ja 4b0f <_sk_load_8888_avx+0x27>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,147,0,0,0 // lea 0x93(%rip),%r11 # 4c48 <_sk_load_8888_avx+0x160>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,122,16,12,153 // vmovss (%r9,%rbx,4),%xmm9
+ .byte 233,70,255,255,255 // jmpq 4b0f <_sk_load_8888_avx+0x27>
+ .byte 196,193,121,110,68,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm0
+ .byte 197,249,112,192,68 // vpshufd $0x44,%xmm0,%xmm0
+ .byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
+ .byte 196,99,117,12,200,4 // vblendps $0x4,%ymm0,%ymm1,%ymm9
+ .byte 196,193,123,16,4,153 // vmovsd (%r9,%rbx,4),%xmm0
+ .byte 196,99,53,13,200,1 // vblendpd $0x1,%ymm0,%ymm9,%ymm9
+ .byte 233,31,255,255,255 // jmpq 4b0f <_sk_load_8888_avx+0x27>
+ .byte 196,193,121,110,68,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm0
+ .byte 197,249,112,192,68 // vpshufd $0x44,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
+ .byte 196,99,117,12,200,64 // vblendps $0x40,%ymm0,%ymm1,%ymm9
+ .byte 196,99,125,25,200,1 // vextractf128 $0x1,%ymm9,%xmm0
+ .byte 196,195,121,34,68,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm0,%xmm0
+ .byte 196,99,53,24,200,1 // vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
+ .byte 196,99,125,25,200,1 // vextractf128 $0x1,%ymm9,%xmm0
+ .byte 196,195,121,34,68,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm0,%xmm0
+ .byte 196,99,53,24,200,1 // vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
+ .byte 196,193,121,16,4,153 // vmovupd (%r9,%rbx,4),%xmm0
+ .byte 196,67,125,13,201,12 // vblendpd $0xc,%ymm9,%ymm0,%ymm9
+ .byte 233,202,254,255,255 // jmpq 4b0f <_sk_load_8888_avx+0x27>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 118,255 // jbe 4c49 <_sk_load_8888_avx+0x161>
+ .byte 255 // (bad)
+ .byte 255,151,255,255,255,129 // callq *-0x7e000001(%rdi)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 236 // in (%dx),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 216,255 // fdivr %st(7),%st
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 168,255 // test $0xff,%al
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_8888_dst_avx
.globl _sk_load_8888_dst_avx
FUNCTION(_sk_load_8888_dst_avx)
_sk_load_8888_dst_avx:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,136,0,0,0 // jne 4b45 <_sk_load_8888_dst_avx+0xaf>
- .byte 197,252,16,56 // vmovups (%rax),%ymm7
- .byte 197,124,40,21,55,106,0,0 // vmovaps 0x6a37(%rip),%ymm10 # b500 <_sk_clut_4D_avx+0x2869>
- .byte 196,193,68,84,226 // vandps %ymm10,%ymm7,%ymm4
+ .byte 15,133,136,0,0,0 // jne 4d0d <_sk_load_8888_dst_avx+0xa9>
+ .byte 196,65,125,16,12,153 // vmovupd (%r9,%rbx,4),%ymm9
+ .byte 197,125,40,21,173,106,0,0 // vmovapd 0x6aad(%rip),%ymm10 # b740 <_sk_clut_4D_avx+0x26dd>
+ .byte 196,193,53,84,226 // vandpd %ymm10,%ymm9,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
- .byte 196,98,125,24,5,117,102,0,0 // vbroadcastss 0x6675(%rip),%ymm8 # b150 <_sk_clut_4D_avx+0x24b9>
+ .byte 196,98,125,24,5,119,104,0,0 // vbroadcastss 0x6877(%rip),%ymm8 # b51c <_sk_clut_4D_avx+0x24b9>
.byte 196,193,92,89,224 // vmulps %ymm8,%ymm4,%ymm4
- .byte 197,209,114,215,8 // vpsrld $0x8,%xmm7,%xmm5
- .byte 196,195,125,25,249,1 // vextractf128 $0x1,%ymm7,%xmm9
- .byte 196,193,73,114,209,8 // vpsrld $0x8,%xmm9,%xmm6
+ .byte 196,193,81,114,209,8 // vpsrld $0x8,%xmm9,%xmm5
+ .byte 196,99,125,25,207,1 // vextractf128 $0x1,%ymm9,%xmm7
+ .byte 197,201,114,215,8 // vpsrld $0x8,%xmm7,%xmm6
.byte 196,227,85,24,238,1 // vinsertf128 $0x1,%xmm6,%ymm5,%ymm5
- .byte 196,193,84,84,234 // vandps %ymm10,%ymm5,%ymm5
+ .byte 196,193,85,84,234 // vandpd %ymm10,%ymm5,%ymm5
.byte 197,252,91,237 // vcvtdq2ps %ymm5,%ymm5
.byte 196,193,84,89,232 // vmulps %ymm8,%ymm5,%ymm5
- .byte 197,161,114,215,16 // vpsrld $0x10,%xmm7,%xmm11
- .byte 196,193,73,114,209,16 // vpsrld $0x10,%xmm9,%xmm6
+ .byte 196,193,33,114,209,16 // vpsrld $0x10,%xmm9,%xmm11
+ .byte 197,201,114,215,16 // vpsrld $0x10,%xmm7,%xmm6
.byte 196,227,37,24,246,1 // vinsertf128 $0x1,%xmm6,%ymm11,%ymm6
- .byte 196,193,76,84,242 // vandps %ymm10,%ymm6,%ymm6
+ .byte 196,193,77,84,242 // vandpd %ymm10,%ymm6,%ymm6
.byte 197,252,91,246 // vcvtdq2ps %ymm6,%ymm6
.byte 196,193,76,89,240 // vmulps %ymm8,%ymm6,%ymm6
- .byte 197,169,114,215,24 // vpsrld $0x18,%xmm7,%xmm10
- .byte 196,193,65,114,209,24 // vpsrld $0x18,%xmm9,%xmm7
- .byte 196,227,45,24,255,1 // vinsertf128 $0x1,%xmm7,%ymm10,%ymm7
+ .byte 196,193,49,114,209,24 // vpsrld $0x18,%xmm9,%xmm9
+ .byte 197,193,114,215,24 // vpsrld $0x18,%xmm7,%xmm7
+ .byte 196,227,53,24,255,1 // vinsertf128 $0x1,%xmm7,%ymm9,%ymm7
.byte 197,252,91,255 // vcvtdq2ps %ymm7,%ymm7
.byte 196,193,68,89,248 // vmulps %ymm8,%ymm7,%ymm7
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
- .byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,226 // vmovq %r10,%xmm4
- .byte 196,226,121,48,228 // vpmovzxbw %xmm4,%xmm4
- .byte 196,226,89,0,45,147,104,0,0 // vpshufb 0x6893(%rip),%xmm4,%xmm5 # b400 <_sk_clut_4D_avx+0x2769>
- .byte 196,226,121,33,237 // vpmovsxbd %xmm5,%xmm5
- .byte 196,226,89,0,37,149,104,0,0 // vpshufb 0x6895(%rip),%xmm4,%xmm4 # b410 <_sk_clut_4D_avx+0x2779>
- .byte 196,226,121,33,228 // vpmovsxbd %xmm4,%xmm4
- .byte 196,227,85,24,228,1 // vinsertf128 $0x1,%xmm4,%ymm5,%ymm4
- .byte 196,226,93,44,56 // vmaskmovps (%rax),%ymm4,%ymm7
- .byte 233,49,255,255,255 // jmpq 4ac1 <_sk_load_8888_dst_avx+0x2b>
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 15,135,101,255,255,255 // ja 4c8b <_sk_load_8888_dst_avx+0x27>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,147,0,0,0 // lea 0x93(%rip),%r11 # 4dc4 <_sk_load_8888_dst_avx+0x160>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,122,16,12,153 // vmovss (%r9,%rbx,4),%xmm9
+ .byte 233,70,255,255,255 // jmpq 4c8b <_sk_load_8888_dst_avx+0x27>
+ .byte 196,193,121,110,100,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm4
+ .byte 197,249,112,228,68 // vpshufd $0x44,%xmm4,%xmm4
+ .byte 197,212,87,237 // vxorps %ymm5,%ymm5,%ymm5
+ .byte 196,99,85,12,204,4 // vblendps $0x4,%ymm4,%ymm5,%ymm9
+ .byte 196,193,123,16,36,153 // vmovsd (%r9,%rbx,4),%xmm4
+ .byte 196,99,53,13,204,1 // vblendpd $0x1,%ymm4,%ymm9,%ymm9
+ .byte 233,31,255,255,255 // jmpq 4c8b <_sk_load_8888_dst_avx+0x27>
+ .byte 196,193,121,110,100,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm4
+ .byte 197,249,112,228,68 // vpshufd $0x44,%xmm4,%xmm4
+ .byte 196,227,125,24,228,1 // vinsertf128 $0x1,%xmm4,%ymm0,%ymm4
+ .byte 197,212,87,237 // vxorps %ymm5,%ymm5,%ymm5
+ .byte 196,99,85,12,204,64 // vblendps $0x40,%ymm4,%ymm5,%ymm9
+ .byte 196,99,125,25,204,1 // vextractf128 $0x1,%ymm9,%xmm4
+ .byte 196,195,89,34,100,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,99,53,24,204,1 // vinsertf128 $0x1,%xmm4,%ymm9,%ymm9
+ .byte 196,99,125,25,204,1 // vextractf128 $0x1,%ymm9,%xmm4
+ .byte 196,195,89,34,100,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,99,53,24,204,1 // vinsertf128 $0x1,%xmm4,%ymm9,%ymm9
+ .byte 196,193,121,16,36,153 // vmovupd (%r9,%rbx,4),%xmm4
+ .byte 196,67,93,13,201,12 // vblendpd $0xc,%ymm9,%ymm4,%ymm9
+ .byte 233,202,254,255,255 // jmpq 4c8b <_sk_load_8888_dst_avx+0x27>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 118,255 // jbe 4dc5 <_sk_load_8888_dst_avx+0x161>
+ .byte 255 // (bad)
+ .byte 255,151,255,255,255,129 // callq *-0x7e000001(%rdi)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 236 // in (%dx),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 216,255 // fdivr %st(7),%st
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 168,255 // test $0xff,%al
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_gather_8888_avx
.globl _sk_gather_8888_avx
@@ -28664,10 +29173,10 @@ _sk_gather_8888_avx:
.byte 73,193,234,32 // shr $0x20,%r10
.byte 196,131,121,34,28,145,3 // vpinsrd $0x3,(%r9,%r10,4),%xmm0,%xmm3
.byte 196,227,61,24,195,1 // vinsertf128 $0x1,%xmm3,%ymm8,%ymm0
- .byte 197,124,40,21,229,104,0,0 // vmovaps 0x68e5(%rip),%ymm10 # b520 <_sk_clut_4D_avx+0x2889>
+ .byte 197,124,40,21,213,104,0,0 // vmovaps 0x68d5(%rip),%ymm10 # b760 <_sk_clut_4D_avx+0x26fd>
.byte 196,193,124,84,194 // vandps %ymm10,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,13,7,101,0,0 // vbroadcastss 0x6507(%rip),%ymm9 # b154 <_sk_clut_4D_avx+0x24bd>
+ .byte 196,98,125,24,13,131,102,0,0 // vbroadcastss 0x6683(%rip),%ymm9 # b520 <_sk_clut_4D_avx+0x24bd>
.byte 196,193,124,89,193 // vmulps %ymm9,%ymm0,%ymm0
.byte 196,193,113,114,208,8 // vpsrld $0x8,%xmm8,%xmm1
.byte 197,233,114,211,8 // vpsrld $0x8,%xmm3,%xmm2
@@ -28693,16 +29202,15 @@ HIDDEN _sk_store_8888_avx
.globl _sk_store_8888_avx
FUNCTION(_sk_store_8888_avx)
_sk_store_8888_avx:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
- .byte 196,98,125,24,5,131,100,0,0 // vbroadcastss 0x6483(%rip),%ymm8 # b158 <_sk_clut_4D_avx+0x24c1>
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
+ .byte 196,98,125,24,5,5,102,0,0 // vbroadcastss 0x6605(%rip),%ymm8 # b524 <_sk_clut_4D_avx+0x24c1>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,65,116,89,208 // vmulps %ymm8,%ymm1,%ymm10
@@ -28727,139 +29235,247 @@ _sk_store_8888_avx:
.byte 196,65,45,86,192 // vorpd %ymm8,%ymm10,%ymm8
.byte 196,65,53,86,192 // vorpd %ymm8,%ymm9,%ymm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 4d64 <_sk_store_8888_avx+0xb6>
- .byte 197,124,17,0 // vmovups %ymm8,(%rax)
+ .byte 117,11 // jne 4fae <_sk_store_8888_avx+0xb0>
+ .byte 196,65,124,17,4,153 // vmovups %ymm8,(%r9,%rbx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
+ .byte 91 // pop %rbx
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,65,249,110,202 // vmovq %r10,%xmm9
- .byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
- .byte 196,98,49,0,21,148,102,0,0 // vpshufb 0x6694(%rip),%xmm9,%xmm10 # b420 <_sk_clut_4D_avx+0x2789>
- .byte 196,66,121,33,210 // vpmovsxbd %xmm10,%xmm10
- .byte 196,98,49,0,13,150,102,0,0 // vpshufb 0x6696(%rip),%xmm9,%xmm9 # b430 <_sk_clut_4D_avx+0x2799>
- .byte 196,66,121,33,201 // vpmovsxbd %xmm9,%xmm9
- .byte 196,67,45,24,201,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- .byte 196,98,53,46,0 // vmaskmovps %ymm8,%ymm9,(%rax)
- .byte 235,177 // jmp 4d5d <_sk_store_8888_avx+0xaf>
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,235 // ja 4fa9 <_sk_store_8888_avx+0xab>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,83,0,0,0 // lea 0x53(%rip),%r11 # 501c <_sk_store_8888_avx+0x11e>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,121,126,4,153 // vmovd %xmm8,(%r9,%rbx,4)
+ .byte 235,207 // jmp 4fa9 <_sk_store_8888_avx+0xab>
+ .byte 196,67,121,22,68,153,8,2 // vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ .byte 196,65,121,214,4,153 // vmovq %xmm8,(%r9,%rbx,4)
+ .byte 235,191 // jmp 4fa9 <_sk_store_8888_avx+0xab>
+ .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,24,2 // vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,20,1 // vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
+ .byte 196,65,122,17,76,153,16 // vmovss %xmm9,0x10(%r9,%rbx,4)
+ .byte 196,65,121,17,4,153 // vmovupd %xmm8,(%r9,%rbx,4)
+ .byte 235,142 // jmp 4fa9 <_sk_store_8888_avx+0xab>
+ .byte 144 // nop
+ .byte 182,255 // mov $0xff,%dh
+ .byte 255 // (bad)
+ .byte 255,198 // inc %esi
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 190,255,255,255,247 // mov $0xf7ffffff,%esi
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 234 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 220,255 // fdivr %st,%st(7)
+ .byte 255 // (bad)
+ .byte 255,206 // dec %esi
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_bgra_avx
.globl _sk_load_bgra_avx
FUNCTION(_sk_load_bgra_avx)
_sk_load_bgra_avx:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,136,0,0,0 // jne 4e5b <_sk_load_bgra_avx+0xaf>
- .byte 197,252,16,24 // vmovups (%rax),%ymm3
- .byte 197,124,40,21,97,103,0,0 // vmovaps 0x6761(%rip),%ymm10 # b540 <_sk_clut_4D_avx+0x28a9>
- .byte 196,193,100,84,202 // vandps %ymm10,%ymm3,%ymm1
+ .byte 15,133,136,0,0,0 // jne 50e1 <_sk_load_bgra_avx+0xa9>
+ .byte 196,65,125,16,12,153 // vmovupd (%r9,%rbx,4),%ymm9
+ .byte 197,125,40,21,25,103,0,0 // vmovapd 0x6719(%rip),%ymm10 # b780 <_sk_clut_4D_avx+0x271d>
+ .byte 196,193,53,84,202 // vandpd %ymm10,%ymm9,%ymm1
.byte 197,252,91,201 // vcvtdq2ps %ymm1,%ymm1
- .byte 196,98,125,24,5,107,99,0,0 // vbroadcastss 0x636b(%rip),%ymm8 # b15c <_sk_clut_4D_avx+0x24c5>
+ .byte 196,98,125,24,5,175,100,0,0 // vbroadcastss 0x64af(%rip),%ymm8 # b528 <_sk_clut_4D_avx+0x24c5>
.byte 196,193,116,89,208 // vmulps %ymm8,%ymm1,%ymm2
- .byte 197,241,114,211,8 // vpsrld $0x8,%xmm3,%xmm1
- .byte 196,195,125,25,217,1 // vextractf128 $0x1,%ymm3,%xmm9
- .byte 196,193,121,114,209,8 // vpsrld $0x8,%xmm9,%xmm0
+ .byte 196,193,113,114,209,8 // vpsrld $0x8,%xmm9,%xmm1
+ .byte 196,99,125,25,203,1 // vextractf128 $0x1,%ymm9,%xmm3
+ .byte 197,249,114,211,8 // vpsrld $0x8,%xmm3,%xmm0
.byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- .byte 196,193,124,84,194 // vandps %ymm10,%ymm0,%ymm0
+ .byte 196,193,125,84,194 // vandpd %ymm10,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
.byte 196,193,124,89,200 // vmulps %ymm8,%ymm0,%ymm1
- .byte 197,161,114,211,16 // vpsrld $0x10,%xmm3,%xmm11
- .byte 196,193,121,114,209,16 // vpsrld $0x10,%xmm9,%xmm0
+ .byte 196,193,33,114,209,16 // vpsrld $0x10,%xmm9,%xmm11
+ .byte 197,249,114,211,16 // vpsrld $0x10,%xmm3,%xmm0
.byte 196,227,37,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm11,%ymm0
- .byte 196,193,124,84,194 // vandps %ymm10,%ymm0,%ymm0
+ .byte 196,193,125,84,194 // vandpd %ymm10,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
.byte 196,193,124,89,192 // vmulps %ymm8,%ymm0,%ymm0
- .byte 197,169,114,211,24 // vpsrld $0x18,%xmm3,%xmm10
- .byte 196,193,97,114,209,24 // vpsrld $0x18,%xmm9,%xmm3
- .byte 196,227,45,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm10,%ymm3
+ .byte 196,193,49,114,209,24 // vpsrld $0x18,%xmm9,%xmm9
+ .byte 197,225,114,211,24 // vpsrld $0x18,%xmm3,%xmm3
+ .byte 196,227,53,24,219,1 // vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
.byte 197,252,91,219 // vcvtdq2ps %ymm3,%ymm3
.byte 196,193,100,89,216 // vmulps %ymm8,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
- .byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,194 // vmovq %r10,%xmm0
- .byte 196,226,121,48,192 // vpmovzxbw %xmm0,%xmm0
- .byte 196,226,121,0,13,189,101,0,0 // vpshufb 0x65bd(%rip),%xmm0,%xmm1 # b440 <_sk_clut_4D_avx+0x27a9>
- .byte 196,226,121,33,201 // vpmovsxbd %xmm1,%xmm1
- .byte 196,226,121,0,5,191,101,0,0 // vpshufb 0x65bf(%rip),%xmm0,%xmm0 # b450 <_sk_clut_4D_avx+0x27b9>
- .byte 196,226,121,33,192 // vpmovsxbd %xmm0,%xmm0
- .byte 196,227,117,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- .byte 196,226,125,44,24 // vmaskmovps (%rax),%ymm0,%ymm3
- .byte 233,49,255,255,255 // jmpq 4dd7 <_sk_load_bgra_avx+0x2b>
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 15,135,101,255,255,255 // ja 505f <_sk_load_bgra_avx+0x27>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,147,0,0,0 // lea 0x93(%rip),%r11 # 5198 <_sk_load_bgra_avx+0x160>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,122,16,12,153 // vmovss (%r9,%rbx,4),%xmm9
+ .byte 233,70,255,255,255 // jmpq 505f <_sk_load_bgra_avx+0x27>
+ .byte 196,193,121,110,68,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm0
+ .byte 197,249,112,192,68 // vpshufd $0x44,%xmm0,%xmm0
+ .byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
+ .byte 196,99,117,12,200,4 // vblendps $0x4,%ymm0,%ymm1,%ymm9
+ .byte 196,193,123,16,4,153 // vmovsd (%r9,%rbx,4),%xmm0
+ .byte 196,99,53,13,200,1 // vblendpd $0x1,%ymm0,%ymm9,%ymm9
+ .byte 233,31,255,255,255 // jmpq 505f <_sk_load_bgra_avx+0x27>
+ .byte 196,193,121,110,68,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm0
+ .byte 197,249,112,192,68 // vpshufd $0x44,%xmm0,%xmm0
+ .byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ .byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
+ .byte 196,99,117,12,200,64 // vblendps $0x40,%ymm0,%ymm1,%ymm9
+ .byte 196,99,125,25,200,1 // vextractf128 $0x1,%ymm9,%xmm0
+ .byte 196,195,121,34,68,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm0,%xmm0
+ .byte 196,99,53,24,200,1 // vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
+ .byte 196,99,125,25,200,1 // vextractf128 $0x1,%ymm9,%xmm0
+ .byte 196,195,121,34,68,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm0,%xmm0
+ .byte 196,99,53,24,200,1 // vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
+ .byte 196,193,121,16,4,153 // vmovupd (%r9,%rbx,4),%xmm0
+ .byte 196,67,125,13,201,12 // vblendpd $0xc,%ymm9,%ymm0,%ymm9
+ .byte 233,202,254,255,255 // jmpq 505f <_sk_load_bgra_avx+0x27>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 118,255 // jbe 5199 <_sk_load_bgra_avx+0x161>
+ .byte 255 // (bad)
+ .byte 255,151,255,255,255,129 // callq *-0x7e000001(%rdi)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 236 // in (%dx),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 216,255 // fdivr %st(7),%st
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 168,255 // test $0xff,%al
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_bgra_dst_avx
.globl _sk_load_bgra_dst_avx
FUNCTION(_sk_load_bgra_dst_avx)
_sk_load_bgra_dst_avx:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,136,0,0,0 // jne 4f55 <_sk_load_bgra_dst_avx+0xaf>
- .byte 197,252,16,56 // vmovups (%rax),%ymm7
- .byte 197,124,40,21,135,102,0,0 // vmovaps 0x6687(%rip),%ymm10 # b560 <_sk_clut_4D_avx+0x28c9>
- .byte 196,193,68,84,234 // vandps %ymm10,%ymm7,%ymm5
+ .byte 15,133,136,0,0,0 // jne 525d <_sk_load_bgra_dst_avx+0xa9>
+ .byte 196,65,125,16,12,153 // vmovupd (%r9,%rbx,4),%ymm9
+ .byte 197,125,40,21,189,101,0,0 // vmovapd 0x65bd(%rip),%ymm10 # b7a0 <_sk_clut_4D_avx+0x273d>
+ .byte 196,193,53,84,234 // vandpd %ymm10,%ymm9,%ymm5
.byte 197,252,91,237 // vcvtdq2ps %ymm5,%ymm5
- .byte 196,98,125,24,5,117,98,0,0 // vbroadcastss 0x6275(%rip),%ymm8 # b160 <_sk_clut_4D_avx+0x24c9>
+ .byte 196,98,125,24,5,55,99,0,0 // vbroadcastss 0x6337(%rip),%ymm8 # b52c <_sk_clut_4D_avx+0x24c9>
.byte 196,193,84,89,240 // vmulps %ymm8,%ymm5,%ymm6
- .byte 197,209,114,215,8 // vpsrld $0x8,%xmm7,%xmm5
- .byte 196,195,125,25,249,1 // vextractf128 $0x1,%ymm7,%xmm9
- .byte 196,193,89,114,209,8 // vpsrld $0x8,%xmm9,%xmm4
+ .byte 196,193,81,114,209,8 // vpsrld $0x8,%xmm9,%xmm5
+ .byte 196,99,125,25,207,1 // vextractf128 $0x1,%ymm9,%xmm7
+ .byte 197,217,114,215,8 // vpsrld $0x8,%xmm7,%xmm4
.byte 196,227,85,24,228,1 // vinsertf128 $0x1,%xmm4,%ymm5,%ymm4
- .byte 196,193,92,84,226 // vandps %ymm10,%ymm4,%ymm4
+ .byte 196,193,93,84,226 // vandpd %ymm10,%ymm4,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
.byte 196,193,92,89,232 // vmulps %ymm8,%ymm4,%ymm5
- .byte 197,161,114,215,16 // vpsrld $0x10,%xmm7,%xmm11
- .byte 196,193,89,114,209,16 // vpsrld $0x10,%xmm9,%xmm4
+ .byte 196,193,33,114,209,16 // vpsrld $0x10,%xmm9,%xmm11
+ .byte 197,217,114,215,16 // vpsrld $0x10,%xmm7,%xmm4
.byte 196,227,37,24,228,1 // vinsertf128 $0x1,%xmm4,%ymm11,%ymm4
- .byte 196,193,92,84,226 // vandps %ymm10,%ymm4,%ymm4
+ .byte 196,193,93,84,226 // vandpd %ymm10,%ymm4,%ymm4
.byte 197,252,91,228 // vcvtdq2ps %ymm4,%ymm4
.byte 196,193,92,89,224 // vmulps %ymm8,%ymm4,%ymm4
- .byte 197,169,114,215,24 // vpsrld $0x18,%xmm7,%xmm10
- .byte 196,193,65,114,209,24 // vpsrld $0x18,%xmm9,%xmm7
- .byte 196,227,45,24,255,1 // vinsertf128 $0x1,%xmm7,%ymm10,%ymm7
+ .byte 196,193,49,114,209,24 // vpsrld $0x18,%xmm9,%xmm9
+ .byte 197,193,114,215,24 // vpsrld $0x18,%xmm7,%xmm7
+ .byte 196,227,53,24,255,1 // vinsertf128 $0x1,%xmm7,%ymm9,%ymm7
.byte 197,252,91,255 // vcvtdq2ps %ymm7,%ymm7
.byte 196,193,68,89,248 // vmulps %ymm8,%ymm7,%ymm7
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
- .byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,193,249,110,226 // vmovq %r10,%xmm4
- .byte 196,226,121,48,228 // vpmovzxbw %xmm4,%xmm4
- .byte 196,226,89,0,45,227,100,0,0 // vpshufb 0x64e3(%rip),%xmm4,%xmm5 # b460 <_sk_clut_4D_avx+0x27c9>
- .byte 196,226,121,33,237 // vpmovsxbd %xmm5,%xmm5
- .byte 196,226,89,0,37,229,100,0,0 // vpshufb 0x64e5(%rip),%xmm4,%xmm4 # b470 <_sk_clut_4D_avx+0x27d9>
- .byte 196,226,121,33,228 // vpmovsxbd %xmm4,%xmm4
- .byte 196,227,85,24,228,1 // vinsertf128 $0x1,%xmm4,%ymm5,%ymm4
- .byte 196,226,93,44,56 // vmaskmovps (%rax),%ymm4,%ymm7
- .byte 233,49,255,255,255 // jmpq 4ed1 <_sk_load_bgra_dst_avx+0x2b>
+ .byte 91 // pop %rbx
+ .byte 255,224 // jmpq *%rax
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 15,135,101,255,255,255 // ja 51db <_sk_load_bgra_dst_avx+0x27>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,147,0,0,0 // lea 0x93(%rip),%r11 # 5314 <_sk_load_bgra_dst_avx+0x160>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,122,16,12,153 // vmovss (%r9,%rbx,4),%xmm9
+ .byte 233,70,255,255,255 // jmpq 51db <_sk_load_bgra_dst_avx+0x27>
+ .byte 196,193,121,110,100,153,8 // vmovd 0x8(%r9,%rbx,4),%xmm4
+ .byte 197,249,112,228,68 // vpshufd $0x44,%xmm4,%xmm4
+ .byte 197,212,87,237 // vxorps %ymm5,%ymm5,%ymm5
+ .byte 196,99,85,12,204,4 // vblendps $0x4,%ymm4,%ymm5,%ymm9
+ .byte 196,193,123,16,36,153 // vmovsd (%r9,%rbx,4),%xmm4
+ .byte 196,99,53,13,204,1 // vblendpd $0x1,%ymm4,%ymm9,%ymm9
+ .byte 233,31,255,255,255 // jmpq 51db <_sk_load_bgra_dst_avx+0x27>
+ .byte 196,193,121,110,100,153,24 // vmovd 0x18(%r9,%rbx,4),%xmm4
+ .byte 197,249,112,228,68 // vpshufd $0x44,%xmm4,%xmm4
+ .byte 196,227,125,24,228,1 // vinsertf128 $0x1,%xmm4,%ymm0,%ymm4
+ .byte 197,212,87,237 // vxorps %ymm5,%ymm5,%ymm5
+ .byte 196,99,85,12,204,64 // vblendps $0x40,%ymm4,%ymm5,%ymm9
+ .byte 196,99,125,25,204,1 // vextractf128 $0x1,%ymm9,%xmm4
+ .byte 196,195,89,34,100,153,20,1 // vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,99,53,24,204,1 // vinsertf128 $0x1,%xmm4,%ymm9,%ymm9
+ .byte 196,99,125,25,204,1 // vextractf128 $0x1,%ymm9,%xmm4
+ .byte 196,195,89,34,100,153,16,0 // vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ .byte 196,99,53,24,204,1 // vinsertf128 $0x1,%xmm4,%ymm9,%ymm9
+ .byte 196,193,121,16,36,153 // vmovupd (%r9,%rbx,4),%xmm4
+ .byte 196,67,93,13,201,12 // vblendpd $0xc,%ymm9,%ymm4,%ymm9
+ .byte 233,202,254,255,255 // jmpq 51db <_sk_load_bgra_dst_avx+0x27>
+ .byte 15,31,0 // nopl (%rax)
+ .byte 118,255 // jbe 5315 <_sk_load_bgra_dst_avx+0x161>
+ .byte 255 // (bad)
+ .byte 255,151,255,255,255,129 // callq *-0x7e000001(%rdi)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 236 // in (%dx),%al
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 216,255 // fdivr %st(7),%st
+ .byte 255 // (bad)
+ .byte 255,196 // inc %esp
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
+ .byte 168,255 // test $0xff,%al
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_gather_bgra_avx
.globl _sk_gather_bgra_avx
@@ -28898,10 +29514,10 @@ _sk_gather_bgra_avx:
.byte 73,193,234,32 // shr $0x20,%r10
.byte 196,131,121,34,28,145,3 // vpinsrd $0x3,(%r9,%r10,4),%xmm0,%xmm3
.byte 196,227,61,24,195,1 // vinsertf128 $0x1,%xmm3,%ymm8,%ymm0
- .byte 197,124,40,13,53,101,0,0 // vmovaps 0x6535(%rip),%ymm9 # b580 <_sk_clut_4D_avx+0x28e9>
+ .byte 197,124,40,13,229,99,0,0 // vmovaps 0x63e5(%rip),%ymm9 # b7c0 <_sk_clut_4D_avx+0x275d>
.byte 196,193,124,84,193 // vandps %ymm9,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,21,7,97,0,0 // vbroadcastss 0x6107(%rip),%ymm10 # b164 <_sk_clut_4D_avx+0x24cd>
+ .byte 196,98,125,24,21,67,97,0,0 // vbroadcastss 0x6143(%rip),%ymm10 # b530 <_sk_clut_4D_avx+0x24cd>
.byte 196,193,124,89,210 // vmulps %ymm10,%ymm0,%ymm2
.byte 196,193,121,114,208,8 // vpsrld $0x8,%xmm8,%xmm0
.byte 197,241,114,211,8 // vpsrld $0x8,%xmm3,%xmm1
@@ -28927,16 +29543,15 @@ HIDDEN _sk_store_bgra_avx
.globl _sk_store_bgra_avx
FUNCTION(_sk_store_bgra_avx)
_sk_store_bgra_avx:
- .byte 73,137,201 // mov %rcx,%r9
+ .byte 83 // push %rbx
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,99,80,8 // movslq 0x8(%rax),%r10
- .byte 73,99,201 // movslq %r9d,%rcx
- .byte 73,15,175,202 // imul %r10,%rcx
- .byte 72,193,225,2 // shl $0x2,%rcx
- .byte 72,3,8 // add (%rax),%rcx
- .byte 72,99,194 // movslq %edx,%rax
- .byte 72,141,4,129 // lea (%rcx,%rax,4),%rax
- .byte 196,98,125,24,5,131,96,0,0 // vbroadcastss 0x6083(%rip),%ymm8 # b168 <_sk_clut_4D_avx+0x24d1>
+ .byte 76,99,201 // movslq %ecx,%r9
+ .byte 77,15,175,202 // imul %r10,%r9
+ .byte 73,193,225,2 // shl $0x2,%r9
+ .byte 76,3,8 // add (%rax),%r9
+ .byte 72,99,218 // movslq %edx,%rbx
+ .byte 196,98,125,24,5,197,96,0,0 // vbroadcastss 0x60c5(%rip),%ymm8 # b534 <_sk_clut_4D_avx+0x24d1>
.byte 196,65,108,89,200 // vmulps %ymm8,%ymm2,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,65,116,89,208 // vmulps %ymm8,%ymm1,%ymm10
@@ -28961,25 +29576,55 @@ _sk_store_bgra_avx:
.byte 196,65,45,86,192 // vorpd %ymm8,%ymm10,%ymm8
.byte 196,65,53,86,192 // vorpd %ymm8,%ymm9,%ymm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,11 // jne 5174 <_sk_store_bgra_avx+0xb6>
- .byte 197,124,17,0 // vmovups %ymm8,(%rax)
+ .byte 117,11 // jne 54fe <_sk_store_bgra_avx+0xb0>
+ .byte 196,65,124,17,4,153 // vmovups %ymm8,(%r9,%rbx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 76,137,201 // mov %r9,%rcx
+ .byte 91 // pop %rbx
.byte 255,224 // jmpq *%rax
- .byte 185,8,0,0,0 // mov $0x8,%ecx
- .byte 68,41,193 // sub %r8d,%ecx
- .byte 192,225,3 // shl $0x3,%cl
- .byte 73,199,194,255,255,255,255 // mov $0xffffffffffffffff,%r10
- .byte 73,211,234 // shr %cl,%r10
- .byte 196,65,249,110,202 // vmovq %r10,%xmm9
- .byte 196,66,121,48,201 // vpmovzxbw %xmm9,%xmm9
- .byte 196,98,49,0,21,228,98,0,0 // vpshufb 0x62e4(%rip),%xmm9,%xmm10 # b480 <_sk_clut_4D_avx+0x27e9>
- .byte 196,66,121,33,210 // vpmovsxbd %xmm10,%xmm10
- .byte 196,98,49,0,13,230,98,0,0 // vpshufb 0x62e6(%rip),%xmm9,%xmm9 # b490 <_sk_clut_4D_avx+0x27f9>
- .byte 196,66,121,33,201 // vpmovsxbd %xmm9,%xmm9
- .byte 196,67,45,24,201,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- .byte 196,98,53,46,0 // vmaskmovps %ymm8,%ymm9,(%rax)
- .byte 235,177 // jmp 516d <_sk_store_bgra_avx+0xaf>
+ .byte 69,137,194 // mov %r8d,%r10d
+ .byte 65,128,226,7 // and $0x7,%r10b
+ .byte 65,254,202 // dec %r10b
+ .byte 65,128,250,6 // cmp $0x6,%r10b
+ .byte 119,235 // ja 54f9 <_sk_store_bgra_avx+0xab>
+ .byte 69,15,182,210 // movzbl %r10b,%r10d
+ .byte 76,141,29,83,0,0,0 // lea 0x53(%rip),%r11 # 556c <_sk_store_bgra_avx+0x11e>
+ .byte 75,99,4,147 // movslq (%r11,%r10,4),%rax
+ .byte 76,1,216 // add %r11,%rax
+ .byte 255,224 // jmpq *%rax
+ .byte 196,65,121,126,4,153 // vmovd %xmm8,(%r9,%rbx,4)
+ .byte 235,207 // jmp 54f9 <_sk_store_bgra_avx+0xab>
+ .byte 196,67,121,22,68,153,8,2 // vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ .byte 196,65,121,214,4,153 // vmovq %xmm8,(%r9,%rbx,4)
+ .byte 235,191 // jmp 54f9 <_sk_store_bgra_avx+0xab>
+ .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,24,2 // vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
+ .byte 196,67,121,22,76,153,20,1 // vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ .byte 196,67,125,25,193,1 // vextractf128 $0x1,%ymm8,%xmm9
+ .byte 196,65,122,17,76,153,16 // vmovss %xmm9,0x10(%r9,%rbx,4)
+ .byte 196,65,121,17,4,153 // vmovupd %xmm8,(%r9,%rbx,4)
+ .byte 235,142 // jmp 54f9 <_sk_store_bgra_avx+0xab>
+ .byte 144 // nop
+ .byte 182,255 // mov $0xff,%dh
+ .byte 255 // (bad)
+ .byte 255,198 // inc %esi
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 190,255,255,255,247 // mov $0xf7ffffff,%esi
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 234 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 220,255 // fdivr %st,%st(7)
+ .byte 255 // (bad)
+ .byte 255,206 // dec %esi
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255 // .byte 0xff
HIDDEN _sk_load_f16_avx
.globl _sk_load_f16_avx
@@ -28998,7 +29643,7 @@ _sk_load_f16_avx:
.byte 197,252,17,116,36,192 // vmovups %ymm6,-0x40(%rsp)
.byte 197,252,17,108,36,160 // vmovups %ymm5,-0x60(%rsp)
.byte 197,254,127,100,36,128 // vmovdqu %ymm4,-0x80(%rsp)
- .byte 15,133,145,2,0,0 // jne 5489 <_sk_load_f16_avx+0x2cd>
+ .byte 15,133,145,2,0,0 // jne 5855 <_sk_load_f16_avx+0x2cd>
.byte 196,65,121,16,4,193 // vmovupd (%r9,%rax,8),%xmm8
.byte 196,193,121,16,84,193,16 // vmovupd 0x10(%r9,%rax,8),%xmm2
.byte 196,193,121,16,76,193,32 // vmovupd 0x20(%r9,%rax,8),%xmm1
@@ -29016,13 +29661,13 @@ _sk_load_f16_avx:
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
- .byte 196,98,125,24,37,23,95,0,0 // vbroadcastss 0x5f17(%rip),%ymm12 # b16c <_sk_clut_4D_avx+0x24d5>
+ .byte 196,98,125,24,37,23,95,0,0 // vbroadcastss 0x5f17(%rip),%ymm12 # b538 <_sk_clut_4D_avx+0x24d5>
.byte 196,193,124,84,204 // vandps %ymm12,%ymm0,%ymm1
.byte 197,252,87,193 // vxorps %ymm1,%ymm0,%ymm0
.byte 196,195,125,25,198,1 // vextractf128 $0x1,%ymm0,%xmm14
- .byte 196,98,121,24,29,3,95,0,0 // vbroadcastss 0x5f03(%rip),%xmm11 # b170 <_sk_clut_4D_avx+0x24d9>
+ .byte 196,98,121,24,29,3,95,0,0 // vbroadcastss 0x5f03(%rip),%xmm11 # b53c <_sk_clut_4D_avx+0x24d9>
.byte 196,193,8,87,219 // vxorps %xmm11,%xmm14,%xmm3
- .byte 196,98,121,24,45,249,94,0,0 // vbroadcastss 0x5ef9(%rip),%xmm13 # b174 <_sk_clut_4D_avx+0x24dd>
+ .byte 196,98,121,24,45,249,94,0,0 // vbroadcastss 0x5ef9(%rip),%xmm13 # b540 <_sk_clut_4D_avx+0x24dd>
.byte 197,145,102,219 // vpcmpgtd %xmm3,%xmm13,%xmm3
.byte 196,65,120,87,211 // vxorps %xmm11,%xmm0,%xmm10
.byte 196,65,17,102,210 // vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -29036,7 +29681,7 @@ _sk_load_f16_avx:
.byte 196,227,125,24,195,1 // vinsertf128 $0x1,%xmm3,%ymm0,%ymm0
.byte 197,252,86,193 // vorps %ymm1,%ymm0,%ymm0
.byte 196,227,125,25,193,1 // vextractf128 $0x1,%ymm0,%xmm1
- .byte 196,226,121,24,29,175,94,0,0 // vbroadcastss 0x5eaf(%rip),%xmm3 # b178 <_sk_clut_4D_avx+0x24e1>
+ .byte 196,226,121,24,29,175,94,0,0 // vbroadcastss 0x5eaf(%rip),%xmm3 # b544 <_sk_clut_4D_avx+0x24e1>
.byte 197,241,254,203 // vpaddd %xmm3,%xmm1,%xmm1
.byte 197,249,254,195 // vpaddd %xmm3,%xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
@@ -29129,29 +29774,29 @@ _sk_load_f16_avx:
.byte 196,65,123,16,4,193 // vmovsd (%r9,%rax,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,85 // je 54ef <_sk_load_f16_avx+0x333>
+ .byte 116,85 // je 58bb <_sk_load_f16_avx+0x333>
.byte 196,65,57,22,68,193,8 // vmovhpd 0x8(%r9,%rax,8),%xmm8,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,72 // jb 54ef <_sk_load_f16_avx+0x333>
+ .byte 114,72 // jb 58bb <_sk_load_f16_avx+0x333>
.byte 196,193,123,16,84,193,16 // vmovsd 0x10(%r9,%rax,8),%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 116,72 // je 54fc <_sk_load_f16_avx+0x340>
+ .byte 116,72 // je 58c8 <_sk_load_f16_avx+0x340>
.byte 196,193,105,22,84,193,24 // vmovhpd 0x18(%r9,%rax,8),%xmm2,%xmm2
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,59 // jb 54fc <_sk_load_f16_avx+0x340>
+ .byte 114,59 // jb 58c8 <_sk_load_f16_avx+0x340>
.byte 196,193,123,16,76,193,32 // vmovsd 0x20(%r9,%rax,8),%xmm1
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 15,132,65,253,255,255 // je 5213 <_sk_load_f16_avx+0x57>
+ .byte 15,132,65,253,255,255 // je 55df <_sk_load_f16_avx+0x57>
.byte 196,193,113,22,76,193,40 // vmovhpd 0x28(%r9,%rax,8),%xmm1,%xmm1
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,130,48,253,255,255 // jb 5213 <_sk_load_f16_avx+0x57>
+ .byte 15,130,48,253,255,255 // jb 55df <_sk_load_f16_avx+0x57>
.byte 196,65,122,126,76,193,48 // vmovq 0x30(%r9,%rax,8),%xmm9
- .byte 233,36,253,255,255 // jmpq 5213 <_sk_load_f16_avx+0x57>
+ .byte 233,36,253,255,255 // jmpq 55df <_sk_load_f16_avx+0x57>
.byte 197,241,87,201 // vxorpd %xmm1,%xmm1,%xmm1
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,23,253,255,255 // jmpq 5213 <_sk_load_f16_avx+0x57>
+ .byte 233,23,253,255,255 // jmpq 55df <_sk_load_f16_avx+0x57>
.byte 197,241,87,201 // vxorpd %xmm1,%xmm1,%xmm1
- .byte 233,14,253,255,255 // jmpq 5213 <_sk_load_f16_avx+0x57>
+ .byte 233,14,253,255,255 // jmpq 55df <_sk_load_f16_avx+0x57>
HIDDEN _sk_load_f16_dst_avx
.globl _sk_load_f16_dst_avx
@@ -29170,7 +29815,7 @@ _sk_load_f16_dst_avx:
.byte 197,252,17,84,36,192 // vmovups %ymm2,-0x40(%rsp)
.byte 197,252,17,76,36,160 // vmovups %ymm1,-0x60(%rsp)
.byte 197,254,127,68,36,128 // vmovdqu %ymm0,-0x80(%rsp)
- .byte 15,133,145,2,0,0 // jne 57d2 <_sk_load_f16_dst_avx+0x2cd>
+ .byte 15,133,145,2,0,0 // jne 5b9e <_sk_load_f16_dst_avx+0x2cd>
.byte 196,65,121,16,4,193 // vmovupd (%r9,%rax,8),%xmm8
.byte 196,193,121,16,116,193,16 // vmovupd 0x10(%r9,%rax,8),%xmm6
.byte 196,193,121,16,108,193,32 // vmovupd 0x20(%r9,%rax,8),%xmm5
@@ -29188,13 +29833,13 @@ _sk_load_f16_dst_avx:
.byte 197,217,105,232 // vpunpckhwd %xmm0,%xmm4,%xmm5
.byte 196,226,121,51,228 // vpmovzxwd %xmm4,%xmm4
.byte 196,227,93,24,229,1 // vinsertf128 $0x1,%xmm5,%ymm4,%ymm4
- .byte 196,98,125,24,37,222,91,0,0 // vbroadcastss 0x5bde(%rip),%ymm12 # b17c <_sk_clut_4D_avx+0x24e5>
+ .byte 196,98,125,24,37,222,91,0,0 // vbroadcastss 0x5bde(%rip),%ymm12 # b548 <_sk_clut_4D_avx+0x24e5>
.byte 196,193,92,84,236 // vandps %ymm12,%ymm4,%ymm5
.byte 197,220,87,229 // vxorps %ymm5,%ymm4,%ymm4
.byte 196,195,125,25,230,1 // vextractf128 $0x1,%ymm4,%xmm14
- .byte 196,98,121,24,29,202,91,0,0 // vbroadcastss 0x5bca(%rip),%xmm11 # b180 <_sk_clut_4D_avx+0x24e9>
+ .byte 196,98,121,24,29,202,91,0,0 // vbroadcastss 0x5bca(%rip),%xmm11 # b54c <_sk_clut_4D_avx+0x24e9>
.byte 196,193,8,87,251 // vxorps %xmm11,%xmm14,%xmm7
- .byte 196,98,121,24,45,192,91,0,0 // vbroadcastss 0x5bc0(%rip),%xmm13 # b184 <_sk_clut_4D_avx+0x24ed>
+ .byte 196,98,121,24,45,192,91,0,0 // vbroadcastss 0x5bc0(%rip),%xmm13 # b550 <_sk_clut_4D_avx+0x24ed>
.byte 197,145,102,255 // vpcmpgtd %xmm7,%xmm13,%xmm7
.byte 196,65,88,87,211 // vxorps %xmm11,%xmm4,%xmm10
.byte 196,65,17,102,210 // vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -29208,7 +29853,7 @@ _sk_load_f16_dst_avx:
.byte 196,227,93,24,231,1 // vinsertf128 $0x1,%xmm7,%ymm4,%ymm4
.byte 197,220,86,229 // vorps %ymm5,%ymm4,%ymm4
.byte 196,227,125,25,229,1 // vextractf128 $0x1,%ymm4,%xmm5
- .byte 196,226,121,24,61,118,91,0,0 // vbroadcastss 0x5b76(%rip),%xmm7 # b188 <_sk_clut_4D_avx+0x24f1>
+ .byte 196,226,121,24,61,118,91,0,0 // vbroadcastss 0x5b76(%rip),%xmm7 # b554 <_sk_clut_4D_avx+0x24f1>
.byte 197,209,254,239 // vpaddd %xmm7,%xmm5,%xmm5
.byte 197,217,254,231 // vpaddd %xmm7,%xmm4,%xmm4
.byte 196,227,93,24,229,1 // vinsertf128 $0x1,%xmm5,%ymm4,%ymm4
@@ -29301,29 +29946,29 @@ _sk_load_f16_dst_avx:
.byte 196,65,123,16,4,193 // vmovsd (%r9,%rax,8),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,85 // je 5838 <_sk_load_f16_dst_avx+0x333>
+ .byte 116,85 // je 5c04 <_sk_load_f16_dst_avx+0x333>
.byte 196,65,57,22,68,193,8 // vmovhpd 0x8(%r9,%rax,8),%xmm8,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,72 // jb 5838 <_sk_load_f16_dst_avx+0x333>
+ .byte 114,72 // jb 5c04 <_sk_load_f16_dst_avx+0x333>
.byte 196,193,123,16,116,193,16 // vmovsd 0x10(%r9,%rax,8),%xmm6
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 116,72 // je 5845 <_sk_load_f16_dst_avx+0x340>
+ .byte 116,72 // je 5c11 <_sk_load_f16_dst_avx+0x340>
.byte 196,193,73,22,116,193,24 // vmovhpd 0x18(%r9,%rax,8),%xmm6,%xmm6
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,59 // jb 5845 <_sk_load_f16_dst_avx+0x340>
+ .byte 114,59 // jb 5c11 <_sk_load_f16_dst_avx+0x340>
.byte 196,193,123,16,108,193,32 // vmovsd 0x20(%r9,%rax,8),%xmm5
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 15,132,65,253,255,255 // je 555c <_sk_load_f16_dst_avx+0x57>
+ .byte 15,132,65,253,255,255 // je 5928 <_sk_load_f16_dst_avx+0x57>
.byte 196,193,81,22,108,193,40 // vmovhpd 0x28(%r9,%rax,8),%xmm5,%xmm5
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,130,48,253,255,255 // jb 555c <_sk_load_f16_dst_avx+0x57>
+ .byte 15,130,48,253,255,255 // jb 5928 <_sk_load_f16_dst_avx+0x57>
.byte 196,65,122,126,76,193,48 // vmovq 0x30(%r9,%rax,8),%xmm9
- .byte 233,36,253,255,255 // jmpq 555c <_sk_load_f16_dst_avx+0x57>
+ .byte 233,36,253,255,255 // jmpq 5928 <_sk_load_f16_dst_avx+0x57>
.byte 197,209,87,237 // vxorpd %xmm5,%xmm5,%xmm5
.byte 197,201,87,246 // vxorpd %xmm6,%xmm6,%xmm6
- .byte 233,23,253,255,255 // jmpq 555c <_sk_load_f16_dst_avx+0x57>
+ .byte 233,23,253,255,255 // jmpq 5928 <_sk_load_f16_dst_avx+0x57>
.byte 197,209,87,237 // vxorpd %xmm5,%xmm5,%xmm5
- .byte 233,14,253,255,255 // jmpq 555c <_sk_load_f16_dst_avx+0x57>
+ .byte 233,14,253,255,255 // jmpq 5928 <_sk_load_f16_dst_avx+0x57>
HIDDEN _sk_gather_f16_avx
.globl _sk_gather_f16_avx
@@ -29384,13 +30029,13 @@ _sk_gather_f16_avx:
.byte 197,249,105,201 // vpunpckhwd %xmm1,%xmm0,%xmm1
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
- .byte 196,98,125,24,37,52,88,0,0 // vbroadcastss 0x5834(%rip),%ymm12 # b18c <_sk_clut_4D_avx+0x24f5>
+ .byte 196,98,125,24,37,52,88,0,0 // vbroadcastss 0x5834(%rip),%ymm12 # b558 <_sk_clut_4D_avx+0x24f5>
.byte 196,193,124,84,204 // vandps %ymm12,%ymm0,%ymm1
.byte 197,252,87,193 // vxorps %ymm1,%ymm0,%ymm0
.byte 196,195,125,25,198,1 // vextractf128 $0x1,%ymm0,%xmm14
- .byte 196,98,121,24,29,32,88,0,0 // vbroadcastss 0x5820(%rip),%xmm11 # b190 <_sk_clut_4D_avx+0x24f9>
+ .byte 196,98,121,24,29,32,88,0,0 // vbroadcastss 0x5820(%rip),%xmm11 # b55c <_sk_clut_4D_avx+0x24f9>
.byte 196,193,8,87,219 // vxorps %xmm11,%xmm14,%xmm3
- .byte 196,98,121,24,45,22,88,0,0 // vbroadcastss 0x5816(%rip),%xmm13 # b194 <_sk_clut_4D_avx+0x24fd>
+ .byte 196,98,121,24,45,22,88,0,0 // vbroadcastss 0x5816(%rip),%xmm13 # b560 <_sk_clut_4D_avx+0x24fd>
.byte 197,145,102,219 // vpcmpgtd %xmm3,%xmm13,%xmm3
.byte 196,65,120,87,211 // vxorps %xmm11,%xmm0,%xmm10
.byte 196,65,17,102,210 // vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -29404,7 +30049,7 @@ _sk_gather_f16_avx:
.byte 196,227,125,24,195,1 // vinsertf128 $0x1,%xmm3,%ymm0,%ymm0
.byte 197,252,86,193 // vorps %ymm1,%ymm0,%ymm0
.byte 196,227,125,25,193,1 // vextractf128 $0x1,%ymm0,%xmm1
- .byte 196,226,121,24,29,204,87,0,0 // vbroadcastss 0x57cc(%rip),%xmm3 # b198 <_sk_clut_4D_avx+0x2501>
+ .byte 196,226,121,24,29,204,87,0,0 // vbroadcastss 0x57cc(%rip),%xmm3 # b564 <_sk_clut_4D_avx+0x2501>
.byte 197,241,254,203 // vpaddd %xmm3,%xmm1,%xmm1
.byte 197,249,254,195 // vpaddd %xmm3,%xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
@@ -29504,12 +30149,12 @@ _sk_store_f16_avx:
.byte 197,252,17,52,36 // vmovups %ymm6,(%rsp)
.byte 197,252,17,108,36,224 // vmovups %ymm5,-0x20(%rsp)
.byte 197,252,17,100,36,192 // vmovups %ymm4,-0x40(%rsp)
- .byte 196,98,125,24,13,236,85,0,0 // vbroadcastss 0x55ec(%rip),%ymm9 # b19c <_sk_clut_4D_avx+0x2505>
+ .byte 196,98,125,24,13,236,85,0,0 // vbroadcastss 0x55ec(%rip),%ymm9 # b568 <_sk_clut_4D_avx+0x2505>
.byte 196,65,124,84,209 // vandps %ymm9,%ymm0,%ymm10
.byte 197,252,17,68,36,128 // vmovups %ymm0,-0x80(%rsp)
.byte 196,65,124,87,218 // vxorps %ymm10,%ymm0,%ymm11
.byte 196,67,125,25,220,1 // vextractf128 $0x1,%ymm11,%xmm12
- .byte 196,98,121,24,5,209,85,0,0 // vbroadcastss 0x55d1(%rip),%xmm8 # b1a0 <_sk_clut_4D_avx+0x2509>
+ .byte 196,98,121,24,5,209,85,0,0 // vbroadcastss 0x55d1(%rip),%xmm8 # b56c <_sk_clut_4D_avx+0x2509>
.byte 196,65,57,102,236 // vpcmpgtd %xmm12,%xmm8,%xmm13
.byte 196,65,57,102,243 // vpcmpgtd %xmm11,%xmm8,%xmm14
.byte 196,67,13,24,237,1 // vinsertf128 $0x1,%xmm13,%ymm14,%ymm13
@@ -29519,7 +30164,7 @@ _sk_store_f16_avx:
.byte 196,67,13,24,242,1 // vinsertf128 $0x1,%xmm10,%ymm14,%ymm14
.byte 196,193,33,114,211,13 // vpsrld $0xd,%xmm11,%xmm11
.byte 196,193,25,114,212,13 // vpsrld $0xd,%xmm12,%xmm12
- .byte 196,98,125,24,21,152,85,0,0 // vbroadcastss 0x5598(%rip),%ymm10 # b1a4 <_sk_clut_4D_avx+0x250d>
+ .byte 196,98,125,24,21,152,85,0,0 // vbroadcastss 0x5598(%rip),%ymm10 # b570 <_sk_clut_4D_avx+0x250d>
.byte 196,65,12,86,242 // vorps %ymm10,%ymm14,%ymm14
.byte 196,67,125,25,247,1 // vextractf128 $0x1,%ymm14,%xmm15
.byte 196,65,1,254,228 // vpaddd %xmm12,%xmm15,%xmm12
@@ -29606,7 +30251,7 @@ _sk_store_f16_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 72,99,194 // movslq %edx,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 117,70 // jne 5e00 <_sk_store_f16_avx+0x274>
+ .byte 117,70 // jne 61cc <_sk_store_f16_avx+0x274>
.byte 196,65,120,17,28,193 // vmovups %xmm11,(%r9,%rax,8)
.byte 196,65,120,17,84,193,16 // vmovups %xmm10,0x10(%r9,%rax,8)
.byte 196,65,120,17,76,193,32 // vmovups %xmm9,0x20(%r9,%rax,8)
@@ -29622,22 +30267,22 @@ _sk_store_f16_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,214,28,193 // vmovq %xmm11,(%r9,%rax,8)
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,201 // je 5dd5 <_sk_store_f16_avx+0x249>
+ .byte 116,201 // je 61a1 <_sk_store_f16_avx+0x249>
.byte 196,65,121,23,92,193,8 // vmovhpd %xmm11,0x8(%r9,%rax,8)
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,188 // jb 5dd5 <_sk_store_f16_avx+0x249>
+ .byte 114,188 // jb 61a1 <_sk_store_f16_avx+0x249>
.byte 196,65,121,214,84,193,16 // vmovq %xmm10,0x10(%r9,%rax,8)
- .byte 116,179 // je 5dd5 <_sk_store_f16_avx+0x249>
+ .byte 116,179 // je 61a1 <_sk_store_f16_avx+0x249>
.byte 196,65,121,23,84,193,24 // vmovhpd %xmm10,0x18(%r9,%rax,8)
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,166 // jb 5dd5 <_sk_store_f16_avx+0x249>
+ .byte 114,166 // jb 61a1 <_sk_store_f16_avx+0x249>
.byte 196,65,121,214,76,193,32 // vmovq %xmm9,0x20(%r9,%rax,8)
- .byte 116,157 // je 5dd5 <_sk_store_f16_avx+0x249>
+ .byte 116,157 // je 61a1 <_sk_store_f16_avx+0x249>
.byte 196,65,121,23,76,193,40 // vmovhpd %xmm9,0x28(%r9,%rax,8)
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,144 // jb 5dd5 <_sk_store_f16_avx+0x249>
+ .byte 114,144 // jb 61a1 <_sk_store_f16_avx+0x249>
.byte 196,65,121,214,68,193,48 // vmovq %xmm8,0x30(%r9,%rax,8)
- .byte 235,135 // jmp 5dd5 <_sk_store_f16_avx+0x249>
+ .byte 235,135 // jmp 61a1 <_sk_store_f16_avx+0x249>
HIDDEN _sk_load_u16_be_avx
.globl _sk_load_u16_be_avx
@@ -29652,7 +30297,7 @@ _sk_load_u16_be_avx:
.byte 76,3,8 // add (%rax),%r9
.byte 73,99,194 // movslq %r10d,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,253,0,0,0 // jne 5f72 <_sk_load_u16_be_avx+0x124>
+ .byte 15,133,253,0,0,0 // jne 633e <_sk_load_u16_be_avx+0x124>
.byte 196,65,121,16,4,65 // vmovupd (%r9,%rax,2),%xmm8
.byte 196,193,121,16,84,65,16 // vmovupd 0x10(%r9,%rax,2),%xmm2
.byte 196,193,121,16,92,65,32 // vmovupd 0x20(%r9,%rax,2),%xmm3
@@ -29674,7 +30319,7 @@ _sk_load_u16_be_avx:
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,29,194,82,0,0 // vbroadcastss 0x52c2(%rip),%ymm11 # b1a8 <_sk_clut_4D_avx+0x2511>
+ .byte 196,98,125,24,29,194,82,0,0 // vbroadcastss 0x52c2(%rip),%ymm11 # b574 <_sk_clut_4D_avx+0x2511>
.byte 196,193,124,89,195 // vmulps %ymm11,%ymm0,%ymm0
.byte 197,177,109,202 // vpunpckhqdq %xmm2,%xmm9,%xmm1
.byte 197,233,113,241,8 // vpsllw $0x8,%xmm1,%xmm2
@@ -29708,29 +30353,29 @@ _sk_load_u16_be_avx:
.byte 196,65,123,16,4,65 // vmovsd (%r9,%rax,2),%xmm8
.byte 196,65,49,239,201 // vpxor %xmm9,%xmm9,%xmm9
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,85 // je 5fd8 <_sk_load_u16_be_avx+0x18a>
+ .byte 116,85 // je 63a4 <_sk_load_u16_be_avx+0x18a>
.byte 196,65,57,22,68,65,8 // vmovhpd 0x8(%r9,%rax,2),%xmm8,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,72 // jb 5fd8 <_sk_load_u16_be_avx+0x18a>
+ .byte 114,72 // jb 63a4 <_sk_load_u16_be_avx+0x18a>
.byte 196,193,123,16,84,65,16 // vmovsd 0x10(%r9,%rax,2),%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 116,72 // je 5fe5 <_sk_load_u16_be_avx+0x197>
+ .byte 116,72 // je 63b1 <_sk_load_u16_be_avx+0x197>
.byte 196,193,105,22,84,65,24 // vmovhpd 0x18(%r9,%rax,2),%xmm2,%xmm2
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,59 // jb 5fe5 <_sk_load_u16_be_avx+0x197>
+ .byte 114,59 // jb 63b1 <_sk_load_u16_be_avx+0x197>
.byte 196,193,123,16,92,65,32 // vmovsd 0x20(%r9,%rax,2),%xmm3
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 15,132,213,254,255,255 // je 5e90 <_sk_load_u16_be_avx+0x42>
+ .byte 15,132,213,254,255,255 // je 625c <_sk_load_u16_be_avx+0x42>
.byte 196,193,97,22,92,65,40 // vmovhpd 0x28(%r9,%rax,2),%xmm3,%xmm3
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,130,196,254,255,255 // jb 5e90 <_sk_load_u16_be_avx+0x42>
+ .byte 15,130,196,254,255,255 // jb 625c <_sk_load_u16_be_avx+0x42>
.byte 196,65,122,126,76,65,48 // vmovq 0x30(%r9,%rax,2),%xmm9
- .byte 233,184,254,255,255 // jmpq 5e90 <_sk_load_u16_be_avx+0x42>
+ .byte 233,184,254,255,255 // jmpq 625c <_sk_load_u16_be_avx+0x42>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
.byte 197,233,87,210 // vxorpd %xmm2,%xmm2,%xmm2
- .byte 233,171,254,255,255 // jmpq 5e90 <_sk_load_u16_be_avx+0x42>
+ .byte 233,171,254,255,255 // jmpq 625c <_sk_load_u16_be_avx+0x42>
.byte 197,225,87,219 // vxorpd %xmm3,%xmm3,%xmm3
- .byte 233,162,254,255,255 // jmpq 5e90 <_sk_load_u16_be_avx+0x42>
+ .byte 233,162,254,255,255 // jmpq 625c <_sk_load_u16_be_avx+0x42>
HIDDEN _sk_load_rgb_u16_be_avx
.globl _sk_load_rgb_u16_be_avx
@@ -29747,7 +30392,7 @@ _sk_load_rgb_u16_be_avx:
.byte 72,141,4,64 // lea (%rax,%rax,2),%rax
.byte 72,193,248,32 // sar $0x20,%rax
.byte 77,133,192 // test %r8,%r8
- .byte 15,133,243,0,0,0 // jne 610c <_sk_load_rgb_u16_be_avx+0x11e>
+ .byte 15,133,243,0,0,0 // jne 64d8 <_sk_load_rgb_u16_be_avx+0x11e>
.byte 196,193,122,111,4,65 // vmovdqu (%r9,%rax,2),%xmm0
.byte 196,193,122,111,84,65,12 // vmovdqu 0xc(%r9,%rax,2),%xmm2
.byte 196,193,122,111,76,65,24 // vmovdqu 0x18(%r9,%rax,2),%xmm1
@@ -29774,7 +30419,7 @@ _sk_load_rgb_u16_be_avx:
.byte 196,226,121,51,192 // vpmovzxwd %xmm0,%xmm0
.byte 196,227,125,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
.byte 197,252,91,192 // vcvtdq2ps %ymm0,%ymm0
- .byte 196,98,125,24,29,9,81,0,0 // vbroadcastss 0x5109(%rip),%ymm11 # b1ac <_sk_clut_4D_avx+0x2515>
+ .byte 196,98,125,24,29,9,81,0,0 // vbroadcastss 0x5109(%rip),%ymm11 # b578 <_sk_clut_4D_avx+0x2515>
.byte 196,193,124,89,195 // vmulps %ymm11,%ymm0,%ymm0
.byte 197,185,109,202 // vpunpckhqdq %xmm2,%xmm8,%xmm1
.byte 197,233,113,241,8 // vpsllw $0x8,%xmm1,%xmm2
@@ -29795,41 +30440,41 @@ _sk_load_rgb_u16_be_avx:
.byte 197,252,91,210 // vcvtdq2ps %ymm2,%ymm2
.byte 196,193,108,89,211 // vmulps %ymm11,%ymm2,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,166,80,0,0 // vbroadcastss 0x50a6(%rip),%ymm3 # b1b0 <_sk_clut_4D_avx+0x2519>
+ .byte 196,226,125,24,29,166,80,0,0 // vbroadcastss 0x50a6(%rip),%ymm3 # b57c <_sk_clut_4D_avx+0x2519>
.byte 255,224 // jmpq *%rax
.byte 196,193,121,110,4,65 // vmovd (%r9,%rax,2),%xmm0
.byte 196,193,121,196,68,65,4,2 // vpinsrw $0x2,0x4(%r9,%rax,2),%xmm0,%xmm0
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 117,5 // jne 6125 <_sk_load_rgb_u16_be_avx+0x137>
- .byte 233,40,255,255,255 // jmpq 604d <_sk_load_rgb_u16_be_avx+0x5f>
+ .byte 117,5 // jne 64f1 <_sk_load_rgb_u16_be_avx+0x137>
+ .byte 233,40,255,255,255 // jmpq 6419 <_sk_load_rgb_u16_be_avx+0x5f>
.byte 196,193,121,110,76,65,6 // vmovd 0x6(%r9,%rax,2),%xmm1
.byte 196,65,113,196,68,65,10,2 // vpinsrw $0x2,0xa(%r9,%rax,2),%xmm1,%xmm8
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,26 // jb 6154 <_sk_load_rgb_u16_be_avx+0x166>
+ .byte 114,26 // jb 6520 <_sk_load_rgb_u16_be_avx+0x166>
.byte 196,193,121,110,76,65,12 // vmovd 0xc(%r9,%rax,2),%xmm1
.byte 196,193,113,196,84,65,16,2 // vpinsrw $0x2,0x10(%r9,%rax,2),%xmm1,%xmm2
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 117,10 // jne 6159 <_sk_load_rgb_u16_be_avx+0x16b>
- .byte 233,249,254,255,255 // jmpq 604d <_sk_load_rgb_u16_be_avx+0x5f>
- .byte 233,244,254,255,255 // jmpq 604d <_sk_load_rgb_u16_be_avx+0x5f>
+ .byte 117,10 // jne 6525 <_sk_load_rgb_u16_be_avx+0x16b>
+ .byte 233,249,254,255,255 // jmpq 6419 <_sk_load_rgb_u16_be_avx+0x5f>
+ .byte 233,244,254,255,255 // jmpq 6419 <_sk_load_rgb_u16_be_avx+0x5f>
.byte 196,193,121,110,76,65,18 // vmovd 0x12(%r9,%rax,2),%xmm1
.byte 196,65,113,196,76,65,22,2 // vpinsrw $0x2,0x16(%r9,%rax,2),%xmm1,%xmm9
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,26 // jb 6188 <_sk_load_rgb_u16_be_avx+0x19a>
+ .byte 114,26 // jb 6554 <_sk_load_rgb_u16_be_avx+0x19a>
.byte 196,193,121,110,76,65,24 // vmovd 0x18(%r9,%rax,2),%xmm1
.byte 196,193,113,196,76,65,28,2 // vpinsrw $0x2,0x1c(%r9,%rax,2),%xmm1,%xmm1
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 117,10 // jne 618d <_sk_load_rgb_u16_be_avx+0x19f>
- .byte 233,197,254,255,255 // jmpq 604d <_sk_load_rgb_u16_be_avx+0x5f>
- .byte 233,192,254,255,255 // jmpq 604d <_sk_load_rgb_u16_be_avx+0x5f>
+ .byte 117,10 // jne 6559 <_sk_load_rgb_u16_be_avx+0x19f>
+ .byte 233,197,254,255,255 // jmpq 6419 <_sk_load_rgb_u16_be_avx+0x5f>
+ .byte 233,192,254,255,255 // jmpq 6419 <_sk_load_rgb_u16_be_avx+0x5f>
.byte 196,193,121,110,92,65,30 // vmovd 0x1e(%r9,%rax,2),%xmm3
.byte 196,65,97,196,92,65,34,2 // vpinsrw $0x2,0x22(%r9,%rax,2),%xmm3,%xmm11
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,20 // jb 61b6 <_sk_load_rgb_u16_be_avx+0x1c8>
+ .byte 114,20 // jb 6582 <_sk_load_rgb_u16_be_avx+0x1c8>
.byte 196,193,121,110,92,65,36 // vmovd 0x24(%r9,%rax,2),%xmm3
.byte 196,193,97,196,92,65,40,2 // vpinsrw $0x2,0x28(%r9,%rax,2),%xmm3,%xmm3
- .byte 233,151,254,255,255 // jmpq 604d <_sk_load_rgb_u16_be_avx+0x5f>
- .byte 233,146,254,255,255 // jmpq 604d <_sk_load_rgb_u16_be_avx+0x5f>
+ .byte 233,151,254,255,255 // jmpq 6419 <_sk_load_rgb_u16_be_avx+0x5f>
+ .byte 233,146,254,255,255 // jmpq 6419 <_sk_load_rgb_u16_be_avx+0x5f>
HIDDEN _sk_store_u16_be_avx
.globl _sk_store_u16_be_avx
@@ -29843,7 +30488,7 @@ _sk_store_u16_be_avx:
.byte 77,1,201 // add %r9,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 73,99,194 // movslq %r10d,%rax
- .byte 196,98,125,24,5,210,79,0,0 // vbroadcastss 0x4fd2(%rip),%ymm8 # b1b4 <_sk_clut_4D_avx+0x251d>
+ .byte 196,98,125,24,5,210,79,0,0 // vbroadcastss 0x4fd2(%rip),%ymm8 # b580 <_sk_clut_4D_avx+0x251d>
.byte 196,65,124,89,200 // vmulps %ymm8,%ymm0,%ymm9
.byte 196,65,125,91,201 // vcvtps2dq %ymm9,%ymm9
.byte 196,67,125,25,202,1 // vextractf128 $0x1,%ymm9,%xmm10
@@ -29881,7 +30526,7 @@ _sk_store_u16_be_avx:
.byte 196,65,17,98,200 // vpunpckldq %xmm8,%xmm13,%xmm9
.byte 196,65,17,106,192 // vpunpckhdq %xmm8,%xmm13,%xmm8
.byte 77,133,192 // test %r8,%r8
- .byte 117,31 // jne 62c6 <_sk_store_u16_be_avx+0x10b>
+ .byte 117,31 // jne 6692 <_sk_store_u16_be_avx+0x10b>
.byte 196,65,120,17,28,65 // vmovups %xmm11,(%r9,%rax,2)
.byte 196,65,120,17,84,65,16 // vmovups %xmm10,0x10(%r9,%rax,2)
.byte 196,65,120,17,76,65,32 // vmovups %xmm9,0x20(%r9,%rax,2)
@@ -29890,22 +30535,22 @@ _sk_store_u16_be_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,214,28,65 // vmovq %xmm11,(%r9,%rax,2)
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,240 // je 62c2 <_sk_store_u16_be_avx+0x107>
+ .byte 116,240 // je 668e <_sk_store_u16_be_avx+0x107>
.byte 196,65,121,23,92,65,8 // vmovhpd %xmm11,0x8(%r9,%rax,2)
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,227 // jb 62c2 <_sk_store_u16_be_avx+0x107>
+ .byte 114,227 // jb 668e <_sk_store_u16_be_avx+0x107>
.byte 196,65,121,214,84,65,16 // vmovq %xmm10,0x10(%r9,%rax,2)
- .byte 116,218 // je 62c2 <_sk_store_u16_be_avx+0x107>
+ .byte 116,218 // je 668e <_sk_store_u16_be_avx+0x107>
.byte 196,65,121,23,84,65,24 // vmovhpd %xmm10,0x18(%r9,%rax,2)
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,205 // jb 62c2 <_sk_store_u16_be_avx+0x107>
+ .byte 114,205 // jb 668e <_sk_store_u16_be_avx+0x107>
.byte 196,65,121,214,76,65,32 // vmovq %xmm9,0x20(%r9,%rax,2)
- .byte 116,196 // je 62c2 <_sk_store_u16_be_avx+0x107>
+ .byte 116,196 // je 668e <_sk_store_u16_be_avx+0x107>
.byte 196,65,121,23,76,65,40 // vmovhpd %xmm9,0x28(%r9,%rax,2)
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,183 // jb 62c2 <_sk_store_u16_be_avx+0x107>
+ .byte 114,183 // jb 668e <_sk_store_u16_be_avx+0x107>
.byte 196,65,121,214,68,65,48 // vmovq %xmm8,0x30(%r9,%rax,2)
- .byte 235,174 // jmp 62c2 <_sk_store_u16_be_avx+0x107>
+ .byte 235,174 // jmp 668e <_sk_store_u16_be_avx+0x107>
HIDDEN _sk_load_f32_avx
.globl _sk_load_f32_avx
@@ -29913,7 +30558,7 @@ FUNCTION(_sk_load_f32_avx)
_sk_load_f32_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,135,128,0,0,0 // ja 63a0 <_sk_load_f32_avx+0x8c>
+ .byte 15,135,128,0,0,0 // ja 676c <_sk_load_f32_avx+0x8c>
.byte 68,141,20,149,0,0,0,0 // lea 0x0(,%rdx,4),%r10d
.byte 76,99,88,8 // movslq 0x8(%rax),%r11
.byte 76,99,201 // movslq %ecx,%r9
@@ -29921,7 +30566,7 @@ _sk_load_f32_avx:
.byte 73,193,225,2 // shl $0x2,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 77,99,210 // movslq %r10d,%r10
- .byte 76,141,29,132,0,0,0 // lea 0x84(%rip),%r11 # 63c8 <_sk_load_f32_avx+0xb4>
+ .byte 76,141,29,132,0,0,0 // lea 0x84(%rip),%r11 # 6794 <_sk_load_f32_avx+0xb4>
.byte 75,99,4,131 // movslq (%r11,%r8,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
@@ -29970,7 +30615,7 @@ FUNCTION(_sk_load_f32_dst_avx)
_sk_load_f32_dst_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 15,135,128,0,0,0 // ja 6474 <_sk_load_f32_dst_avx+0x8c>
+ .byte 15,135,128,0,0,0 // ja 6840 <_sk_load_f32_dst_avx+0x8c>
.byte 68,141,20,149,0,0,0,0 // lea 0x0(,%rdx,4),%r10d
.byte 76,99,88,8 // movslq 0x8(%rax),%r11
.byte 76,99,201 // movslq %ecx,%r9
@@ -29978,7 +30623,7 @@ _sk_load_f32_dst_avx:
.byte 73,193,225,2 // shl $0x2,%r9
.byte 76,3,8 // add (%rax),%r9
.byte 77,99,210 // movslq %r10d,%r10
- .byte 76,141,29,132,0,0,0 // lea 0x84(%rip),%r11 # 649c <_sk_load_f32_dst_avx+0xb4>
+ .byte 76,141,29,132,0,0,0 // lea 0x84(%rip),%r11 # 6868 <_sk_load_f32_dst_avx+0xb4>
.byte 75,99,4,131 // movslq (%r11,%r8,4),%rax
.byte 76,1,216 // add %r11,%rax
.byte 255,224 // jmpq *%rax
@@ -30042,7 +30687,7 @@ _sk_store_f32_avx:
.byte 196,65,37,20,196 // vunpcklpd %ymm12,%ymm11,%ymm8
.byte 196,65,37,21,220 // vunpckhpd %ymm12,%ymm11,%ymm11
.byte 77,133,192 // test %r8,%r8
- .byte 117,55 // jne 653b <_sk_store_f32_avx+0x7f>
+ .byte 117,55 // jne 6907 <_sk_store_f32_avx+0x7f>
.byte 196,67,45,24,225,1 // vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
.byte 196,67,61,24,235,1 // vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
.byte 196,67,45,6,201,49 // vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -30055,22 +30700,22 @@ _sk_store_f32_avx:
.byte 255,224 // jmpq *%rax
.byte 196,65,121,17,20,129 // vmovupd %xmm10,(%r9,%rax,4)
.byte 73,131,248,1 // cmp $0x1,%r8
- .byte 116,240 // je 6537 <_sk_store_f32_avx+0x7b>
+ .byte 116,240 // je 6903 <_sk_store_f32_avx+0x7b>
.byte 196,65,121,17,76,129,16 // vmovupd %xmm9,0x10(%r9,%rax,4)
.byte 73,131,248,3 // cmp $0x3,%r8
- .byte 114,227 // jb 6537 <_sk_store_f32_avx+0x7b>
+ .byte 114,227 // jb 6903 <_sk_store_f32_avx+0x7b>
.byte 196,65,121,17,68,129,32 // vmovupd %xmm8,0x20(%r9,%rax,4)
- .byte 116,218 // je 6537 <_sk_store_f32_avx+0x7b>
+ .byte 116,218 // je 6903 <_sk_store_f32_avx+0x7b>
.byte 196,65,121,17,92,129,48 // vmovupd %xmm11,0x30(%r9,%rax,4)
.byte 73,131,248,5 // cmp $0x5,%r8
- .byte 114,205 // jb 6537 <_sk_store_f32_avx+0x7b>
+ .byte 114,205 // jb 6903 <_sk_store_f32_avx+0x7b>
.byte 196,67,125,25,84,129,64,1 // vextractf128 $0x1,%ymm10,0x40(%r9,%rax,4)
- .byte 116,195 // je 6537 <_sk_store_f32_avx+0x7b>
+ .byte 116,195 // je 6903 <_sk_store_f32_avx+0x7b>
.byte 196,67,125,25,76,129,80,1 // vextractf128 $0x1,%ymm9,0x50(%r9,%rax,4)
.byte 73,131,248,7 // cmp $0x7,%r8
- .byte 114,181 // jb 6537 <_sk_store_f32_avx+0x7b>
+ .byte 114,181 // jb 6903 <_sk_store_f32_avx+0x7b>
.byte 196,67,125,25,68,129,96,1 // vextractf128 $0x1,%ymm8,0x60(%r9,%rax,4)
- .byte 235,171 // jmp 6537 <_sk_store_f32_avx+0x7b>
+ .byte 235,171 // jmp 6903 <_sk_store_f32_avx+0x7b>
HIDDEN _sk_clamp_x_avx
.globl _sk_clamp_x_avx
@@ -30163,7 +30808,7 @@ _sk_mirror_x_avx:
.byte 196,193,58,88,192 // vaddss %xmm8,%xmm8,%xmm0
.byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
.byte 196,99,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm8
- .byte 197,178,89,5,247,74,0,0 // vmulss 0x4af7(%rip),%xmm9,%xmm0 # b1b8 <_sk_clut_4D_avx+0x2521>
+ .byte 197,178,89,5,247,74,0,0 // vmulss 0x4af7(%rip),%xmm9,%xmm0 # b584 <_sk_clut_4D_avx+0x2521>
.byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
.byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
.byte 197,164,89,192 // vmulps %ymm0,%ymm11,%ymm0
@@ -30197,7 +30842,7 @@ _sk_mirror_y_avx:
.byte 196,193,58,88,200 // vaddss %xmm8,%xmm8,%xmm1
.byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
.byte 196,99,117,24,193,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm8
- .byte 197,178,89,13,110,74,0,0 // vmulss 0x4a6e(%rip),%xmm9,%xmm1 # b1bc <_sk_clut_4D_avx+0x2525>
+ .byte 197,178,89,13,110,74,0,0 // vmulss 0x4a6e(%rip),%xmm9,%xmm1 # b588 <_sk_clut_4D_avx+0x2525>
.byte 196,227,121,4,201,0 // vpermilps $0x0,%xmm1,%xmm1
.byte 196,227,117,24,201,1 // vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
.byte 197,164,89,201 // vmulps %ymm1,%ymm11,%ymm1
@@ -30224,7 +30869,7 @@ FUNCTION(_sk_clamp_x_1_avx)
_sk_clamp_x_1_avx:
.byte 196,65,60,87,192 // vxorps %ymm8,%ymm8,%ymm8
.byte 197,188,95,192 // vmaxps %ymm0,%ymm8,%ymm0
- .byte 196,98,125,24,5,8,74,0,0 // vbroadcastss 0x4a08(%rip),%ymm8 # b1c0 <_sk_clut_4D_avx+0x2529>
+ .byte 196,98,125,24,5,8,74,0,0 // vbroadcastss 0x4a08(%rip),%ymm8 # b58c <_sk_clut_4D_avx+0x2529>
.byte 196,193,124,93,192 // vminps %ymm8,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -30242,9 +30887,9 @@ HIDDEN _sk_mirror_x_1_avx
.globl _sk_mirror_x_1_avx
FUNCTION(_sk_mirror_x_1_avx)
_sk_mirror_x_1_avx:
- .byte 196,98,125,24,5,235,73,0,0 // vbroadcastss 0x49eb(%rip),%ymm8 # b1c4 <_sk_clut_4D_avx+0x252d>
+ .byte 196,98,125,24,5,235,73,0,0 // vbroadcastss 0x49eb(%rip),%ymm8 # b590 <_sk_clut_4D_avx+0x252d>
.byte 196,193,124,88,192 // vaddps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,13,225,73,0,0 // vbroadcastss 0x49e1(%rip),%ymm9 # b1c8 <_sk_clut_4D_avx+0x2531>
+ .byte 196,98,125,24,13,225,73,0,0 // vbroadcastss 0x49e1(%rip),%ymm9 # b594 <_sk_clut_4D_avx+0x2531>
.byte 196,65,124,89,201 // vmulps %ymm9,%ymm0,%ymm9
.byte 196,67,125,8,201,1 // vroundps $0x1,%ymm9,%ymm9
.byte 196,65,52,88,201 // vaddps %ymm9,%ymm9,%ymm9
@@ -30260,12 +30905,12 @@ HIDDEN _sk_luminance_to_alpha_avx
.globl _sk_luminance_to_alpha_avx
FUNCTION(_sk_luminance_to_alpha_avx)
_sk_luminance_to_alpha_avx:
- .byte 196,226,125,24,29,177,73,0,0 // vbroadcastss 0x49b1(%rip),%ymm3 # b1cc <_sk_clut_4D_avx+0x2535>
+ .byte 196,226,125,24,29,177,73,0,0 // vbroadcastss 0x49b1(%rip),%ymm3 # b598 <_sk_clut_4D_avx+0x2535>
.byte 197,252,89,195 // vmulps %ymm3,%ymm0,%ymm0
- .byte 196,226,125,24,29,168,73,0,0 // vbroadcastss 0x49a8(%rip),%ymm3 # b1d0 <_sk_clut_4D_avx+0x2539>
+ .byte 196,226,125,24,29,168,73,0,0 // vbroadcastss 0x49a8(%rip),%ymm3 # b59c <_sk_clut_4D_avx+0x2539>
.byte 197,244,89,203 // vmulps %ymm3,%ymm1,%ymm1
.byte 197,252,88,193 // vaddps %ymm1,%ymm0,%ymm0
- .byte 196,226,125,24,13,155,73,0,0 // vbroadcastss 0x499b(%rip),%ymm1 # b1d4 <_sk_clut_4D_avx+0x253d>
+ .byte 196,226,125,24,13,155,73,0,0 // vbroadcastss 0x499b(%rip),%ymm1 # b5a0 <_sk_clut_4D_avx+0x253d>
.byte 197,236,89,201 // vmulps %ymm1,%ymm2,%ymm1
.byte 197,252,88,217 // vaddps %ymm1,%ymm0,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -30513,9 +31158,9 @@ _sk_evenly_spaced_gradient_avx:
.byte 72,139,24 // mov (%rax),%rbx
.byte 72,139,104,8 // mov 0x8(%rax),%rbp
.byte 72,255,203 // dec %rbx
- .byte 120,7 // js 6bdd <_sk_evenly_spaced_gradient_avx+0x25>
+ .byte 120,7 // js 6fa9 <_sk_evenly_spaced_gradient_avx+0x25>
.byte 196,225,242,42,203 // vcvtsi2ss %rbx,%xmm1,%xmm1
- .byte 235,21 // jmp 6bf2 <_sk_evenly_spaced_gradient_avx+0x3a>
+ .byte 235,21 // jmp 6fbe <_sk_evenly_spaced_gradient_avx+0x3a>
.byte 73,137,217 // mov %rbx,%r9
.byte 73,209,233 // shr %r9
.byte 131,227,1 // and $0x1,%ebx
@@ -30673,18 +31318,18 @@ HIDDEN _sk_gauss_a_to_rgba_avx
.globl _sk_gauss_a_to_rgba_avx
FUNCTION(_sk_gauss_a_to_rgba_avx)
_sk_gauss_a_to_rgba_avx:
- .byte 196,226,125,24,5,130,66,0,0 // vbroadcastss 0x4282(%rip),%ymm0 # b1d8 <_sk_clut_4D_avx+0x2541>
+ .byte 196,226,125,24,5,130,66,0,0 // vbroadcastss 0x4282(%rip),%ymm0 # b5a4 <_sk_clut_4D_avx+0x2541>
.byte 197,228,89,192 // vmulps %ymm0,%ymm3,%ymm0
- .byte 196,226,125,24,13,121,66,0,0 // vbroadcastss 0x4279(%rip),%ymm1 # b1dc <_sk_clut_4D_avx+0x2545>
+ .byte 196,226,125,24,13,121,66,0,0 // vbroadcastss 0x4279(%rip),%ymm1 # b5a8 <_sk_clut_4D_avx+0x2545>
.byte 197,252,88,193 // vaddps %ymm1,%ymm0,%ymm0
.byte 197,252,89,195 // vmulps %ymm3,%ymm0,%ymm0
- .byte 196,226,125,24,13,108,66,0,0 // vbroadcastss 0x426c(%rip),%ymm1 # b1e0 <_sk_clut_4D_avx+0x2549>
+ .byte 196,226,125,24,13,108,66,0,0 // vbroadcastss 0x426c(%rip),%ymm1 # b5ac <_sk_clut_4D_avx+0x2549>
.byte 197,252,88,193 // vaddps %ymm1,%ymm0,%ymm0
.byte 197,252,89,195 // vmulps %ymm3,%ymm0,%ymm0
- .byte 196,226,125,24,13,95,66,0,0 // vbroadcastss 0x425f(%rip),%ymm1 # b1e4 <_sk_clut_4D_avx+0x254d>
+ .byte 196,226,125,24,13,95,66,0,0 // vbroadcastss 0x425f(%rip),%ymm1 # b5b0 <_sk_clut_4D_avx+0x254d>
.byte 197,252,88,193 // vaddps %ymm1,%ymm0,%ymm0
.byte 197,252,89,195 // vmulps %ymm3,%ymm0,%ymm0
- .byte 196,226,125,24,13,82,66,0,0 // vbroadcastss 0x4252(%rip),%ymm1 # b1e8 <_sk_clut_4D_avx+0x2551>
+ .byte 196,226,125,24,13,82,66,0,0 // vbroadcastss 0x4252(%rip),%ymm1 # b5b4 <_sk_clut_4D_avx+0x2551>
.byte 197,252,88,193 // vaddps %ymm1,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
@@ -30707,12 +31352,12 @@ _sk_gradient_avx:
.byte 76,139,8 // mov (%rax),%r9
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 73,131,249,2 // cmp $0x2,%r9
- .byte 114,80 // jb 7019 <_sk_gradient_avx+0x6f>
+ .byte 114,80 // jb 73e5 <_sk_gradient_avx+0x6f>
.byte 72,139,88,72 // mov 0x48(%rax),%rbx
.byte 73,255,201 // dec %r9
.byte 72,131,195,4 // add $0x4,%rbx
.byte 196,65,52,87,201 // vxorps %ymm9,%ymm9,%ymm9
- .byte 196,98,125,24,21,10,66,0,0 // vbroadcastss 0x420a(%rip),%ymm10 # b1ec <_sk_clut_4D_avx+0x2555>
+ .byte 196,98,125,24,21,10,66,0,0 // vbroadcastss 0x420a(%rip),%ymm10 # b5b8 <_sk_clut_4D_avx+0x2555>
.byte 197,244,87,201 // vxorps %ymm1,%ymm1,%ymm1
.byte 196,98,125,24,3 // vbroadcastss (%rbx),%ymm8
.byte 197,60,194,192,2 // vcmpleps %ymm0,%ymm8,%ymm8
@@ -30724,7 +31369,7 @@ _sk_gradient_avx:
.byte 196,227,117,24,202,1 // vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
.byte 72,131,195,4 // add $0x4,%rbx
.byte 73,255,201 // dec %r9
- .byte 117,205 // jne 6fe6 <_sk_gradient_avx+0x3c>
+ .byte 117,205 // jne 73b2 <_sk_gradient_avx+0x3c>
.byte 196,195,249,22,201,1 // vpextrq $0x1,%xmm1,%r9
.byte 69,137,202 // mov %r9d,%r10d
.byte 73,193,233,32 // shr $0x20,%r9
@@ -30907,27 +31552,27 @@ _sk_xy_to_unit_angle_avx:
.byte 196,65,52,95,226 // vmaxps %ymm10,%ymm9,%ymm12
.byte 196,65,36,94,220 // vdivps %ymm12,%ymm11,%ymm11
.byte 196,65,36,89,227 // vmulps %ymm11,%ymm11,%ymm12
- .byte 196,98,125,24,45,0,62,0,0 // vbroadcastss 0x3e00(%rip),%ymm13 # b1f0 <_sk_clut_4D_avx+0x2559>
+ .byte 196,98,125,24,45,0,62,0,0 // vbroadcastss 0x3e00(%rip),%ymm13 # b5bc <_sk_clut_4D_avx+0x2559>
.byte 196,65,28,89,237 // vmulps %ymm13,%ymm12,%ymm13
- .byte 196,98,125,24,53,246,61,0,0 // vbroadcastss 0x3df6(%rip),%ymm14 # b1f4 <_sk_clut_4D_avx+0x255d>
+ .byte 196,98,125,24,53,246,61,0,0 // vbroadcastss 0x3df6(%rip),%ymm14 # b5c0 <_sk_clut_4D_avx+0x255d>
.byte 196,65,20,88,238 // vaddps %ymm14,%ymm13,%ymm13
.byte 196,65,28,89,237 // vmulps %ymm13,%ymm12,%ymm13
- .byte 196,98,125,24,53,231,61,0,0 // vbroadcastss 0x3de7(%rip),%ymm14 # b1f8 <_sk_clut_4D_avx+0x2561>
+ .byte 196,98,125,24,53,231,61,0,0 // vbroadcastss 0x3de7(%rip),%ymm14 # b5c4 <_sk_clut_4D_avx+0x2561>
.byte 196,65,20,88,238 // vaddps %ymm14,%ymm13,%ymm13
.byte 196,65,28,89,229 // vmulps %ymm13,%ymm12,%ymm12
- .byte 196,98,125,24,45,216,61,0,0 // vbroadcastss 0x3dd8(%rip),%ymm13 # b1fc <_sk_clut_4D_avx+0x2565>
+ .byte 196,98,125,24,45,216,61,0,0 // vbroadcastss 0x3dd8(%rip),%ymm13 # b5c8 <_sk_clut_4D_avx+0x2565>
.byte 196,65,28,88,229 // vaddps %ymm13,%ymm12,%ymm12
.byte 196,65,36,89,220 // vmulps %ymm12,%ymm11,%ymm11
.byte 196,65,52,194,202,1 // vcmpltps %ymm10,%ymm9,%ymm9
- .byte 196,98,125,24,21,195,61,0,0 // vbroadcastss 0x3dc3(%rip),%ymm10 # b200 <_sk_clut_4D_avx+0x2569>
+ .byte 196,98,125,24,21,195,61,0,0 // vbroadcastss 0x3dc3(%rip),%ymm10 # b5cc <_sk_clut_4D_avx+0x2569>
.byte 196,65,44,92,211 // vsubps %ymm11,%ymm10,%ymm10
.byte 196,67,37,74,202,144 // vblendvps %ymm9,%ymm10,%ymm11,%ymm9
.byte 196,193,124,194,192,1 // vcmpltps %ymm8,%ymm0,%ymm0
- .byte 196,98,125,24,21,173,61,0,0 // vbroadcastss 0x3dad(%rip),%ymm10 # b204 <_sk_clut_4D_avx+0x256d>
+ .byte 196,98,125,24,21,173,61,0,0 // vbroadcastss 0x3dad(%rip),%ymm10 # b5d0 <_sk_clut_4D_avx+0x256d>
.byte 196,65,44,92,209 // vsubps %ymm9,%ymm10,%ymm10
.byte 196,195,53,74,194,0 // vblendvps %ymm0,%ymm10,%ymm9,%ymm0
.byte 196,65,116,194,200,1 // vcmpltps %ymm8,%ymm1,%ymm9
- .byte 196,98,125,24,21,151,61,0,0 // vbroadcastss 0x3d97(%rip),%ymm10 # b208 <_sk_clut_4D_avx+0x2571>
+ .byte 196,98,125,24,21,151,61,0,0 // vbroadcastss 0x3d97(%rip),%ymm10 # b5d4 <_sk_clut_4D_avx+0x2571>
.byte 197,44,92,208 // vsubps %ymm0,%ymm10,%ymm10
.byte 196,195,125,74,194,144 // vblendvps %ymm9,%ymm10,%ymm0,%ymm0
.byte 196,65,124,194,200,3 // vcmpunordps %ymm8,%ymm0,%ymm9
@@ -30957,7 +31602,7 @@ _sk_xy_to_2pt_conical_quadratic_max_avx:
.byte 196,67,121,4,210,0 // vpermilps $0x0,%xmm10,%xmm10
.byte 196,67,45,24,210,1 // vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
.byte 197,44,88,208 // vaddps %ymm0,%ymm10,%ymm10
- .byte 196,98,125,24,29,65,61,0,0 // vbroadcastss 0x3d41(%rip),%ymm11 # b20c <_sk_clut_4D_avx+0x2575>
+ .byte 196,98,125,24,29,65,61,0,0 // vbroadcastss 0x3d41(%rip),%ymm11 # b5d8 <_sk_clut_4D_avx+0x2575>
.byte 196,65,44,89,211 // vmulps %ymm11,%ymm10,%ymm10
.byte 197,252,89,192 // vmulps %ymm0,%ymm0,%ymm0
.byte 197,116,89,217 // vmulps %ymm1,%ymm1,%ymm11
@@ -30966,17 +31611,17 @@ _sk_xy_to_2pt_conical_quadratic_max_avx:
.byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
.byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
.byte 197,164,92,192 // vsubps %ymm0,%ymm11,%ymm0
- .byte 196,98,125,24,13,21,61,0,0 // vbroadcastss 0x3d15(%rip),%ymm9 # b210 <_sk_clut_4D_avx+0x2579>
+ .byte 196,98,125,24,13,21,61,0,0 // vbroadcastss 0x3d15(%rip),%ymm9 # b5dc <_sk_clut_4D_avx+0x2579>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 196,65,44,89,194 // vmulps %ymm10,%ymm10,%ymm8
.byte 196,193,124,88,192 // vaddps %ymm8,%ymm0,%ymm0
.byte 197,252,81,192 // vsqrtps %ymm0,%ymm0
.byte 196,98,125,24,64,36 // vbroadcastss 0x24(%rax),%ymm8
- .byte 196,98,125,24,13,243,60,0,0 // vbroadcastss 0x3cf3(%rip),%ymm9 # b214 <_sk_clut_4D_avx+0x257d>
+ .byte 196,98,125,24,13,243,60,0,0 // vbroadcastss 0x3cf3(%rip),%ymm9 # b5e0 <_sk_clut_4D_avx+0x257d>
.byte 196,65,44,87,201 // vxorps %ymm9,%ymm10,%ymm9
.byte 196,65,124,92,210 // vsubps %ymm10,%ymm0,%ymm10
- .byte 196,98,125,24,29,228,60,0,0 // vbroadcastss 0x3ce4(%rip),%ymm11 # b218 <_sk_clut_4D_avx+0x2581>
+ .byte 196,98,125,24,29,228,60,0,0 // vbroadcastss 0x3ce4(%rip),%ymm11 # b5e4 <_sk_clut_4D_avx+0x2581>
.byte 196,65,60,89,195 // vmulps %ymm11,%ymm8,%ymm8
.byte 196,65,60,89,210 // vmulps %ymm10,%ymm8,%ymm10
.byte 197,180,92,192 // vsubps %ymm0,%ymm9,%ymm0
@@ -30996,7 +31641,7 @@ _sk_xy_to_2pt_conical_quadratic_min_avx:
.byte 196,67,121,4,210,0 // vpermilps $0x0,%xmm10,%xmm10
.byte 196,67,45,24,210,1 // vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
.byte 197,44,88,208 // vaddps %ymm0,%ymm10,%ymm10
- .byte 196,98,125,24,29,163,60,0,0 // vbroadcastss 0x3ca3(%rip),%ymm11 # b21c <_sk_clut_4D_avx+0x2585>
+ .byte 196,98,125,24,29,163,60,0,0 // vbroadcastss 0x3ca3(%rip),%ymm11 # b5e8 <_sk_clut_4D_avx+0x2585>
.byte 196,65,44,89,211 // vmulps %ymm11,%ymm10,%ymm10
.byte 197,252,89,192 // vmulps %ymm0,%ymm0,%ymm0
.byte 197,116,89,217 // vmulps %ymm1,%ymm1,%ymm11
@@ -31005,17 +31650,17 @@ _sk_xy_to_2pt_conical_quadratic_min_avx:
.byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
.byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
.byte 197,164,92,192 // vsubps %ymm0,%ymm11,%ymm0
- .byte 196,98,125,24,13,119,60,0,0 // vbroadcastss 0x3c77(%rip),%ymm9 # b220 <_sk_clut_4D_avx+0x2589>
+ .byte 196,98,125,24,13,119,60,0,0 // vbroadcastss 0x3c77(%rip),%ymm9 # b5ec <_sk_clut_4D_avx+0x2589>
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
.byte 197,188,89,192 // vmulps %ymm0,%ymm8,%ymm0
.byte 196,65,44,89,194 // vmulps %ymm10,%ymm10,%ymm8
.byte 196,193,124,88,192 // vaddps %ymm8,%ymm0,%ymm0
.byte 197,252,81,192 // vsqrtps %ymm0,%ymm0
.byte 196,98,125,24,64,36 // vbroadcastss 0x24(%rax),%ymm8
- .byte 196,98,125,24,13,85,60,0,0 // vbroadcastss 0x3c55(%rip),%ymm9 # b224 <_sk_clut_4D_avx+0x258d>
+ .byte 196,98,125,24,13,85,60,0,0 // vbroadcastss 0x3c55(%rip),%ymm9 # b5f0 <_sk_clut_4D_avx+0x258d>
.byte 196,65,44,87,201 // vxorps %ymm9,%ymm10,%ymm9
.byte 196,65,124,92,210 // vsubps %ymm10,%ymm0,%ymm10
- .byte 196,98,125,24,29,70,60,0,0 // vbroadcastss 0x3c46(%rip),%ymm11 # b228 <_sk_clut_4D_avx+0x2591>
+ .byte 196,98,125,24,29,70,60,0,0 // vbroadcastss 0x3c46(%rip),%ymm11 # b5f4 <_sk_clut_4D_avx+0x2591>
.byte 196,65,60,89,195 // vmulps %ymm11,%ymm8,%ymm8
.byte 196,65,60,89,210 // vmulps %ymm10,%ymm8,%ymm10
.byte 197,180,92,192 // vsubps %ymm0,%ymm9,%ymm0
@@ -31034,7 +31679,7 @@ _sk_xy_to_2pt_conical_linear_avx:
.byte 196,67,121,4,201,0 // vpermilps $0x0,%xmm9,%xmm9
.byte 196,67,53,24,201,1 // vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
.byte 197,52,88,200 // vaddps %ymm0,%ymm9,%ymm9
- .byte 196,98,125,24,21,11,60,0,0 // vbroadcastss 0x3c0b(%rip),%ymm10 # b22c <_sk_clut_4D_avx+0x2595>
+ .byte 196,98,125,24,21,11,60,0,0 // vbroadcastss 0x3c0b(%rip),%ymm10 # b5f8 <_sk_clut_4D_avx+0x2595>
.byte 196,65,52,89,202 // vmulps %ymm10,%ymm9,%ymm9
.byte 197,252,89,192 // vmulps %ymm0,%ymm0,%ymm0
.byte 197,116,89,209 // vmulps %ymm1,%ymm1,%ymm10
@@ -31043,7 +31688,7 @@ _sk_xy_to_2pt_conical_linear_avx:
.byte 196,227,121,4,192,0 // vpermilps $0x0,%xmm0,%xmm0
.byte 196,227,125,24,192,1 // vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
.byte 197,172,92,192 // vsubps %ymm0,%ymm10,%ymm0
- .byte 196,98,125,24,5,223,59,0,0 // vbroadcastss 0x3bdf(%rip),%ymm8 # b230 <_sk_clut_4D_avx+0x2599>
+ .byte 196,98,125,24,5,223,59,0,0 // vbroadcastss 0x3bdf(%rip),%ymm8 # b5fc <_sk_clut_4D_avx+0x2599>
.byte 196,193,124,87,192 // vxorps %ymm8,%ymm0,%ymm0
.byte 196,193,124,94,193 // vdivps %ymm9,%ymm0,%ymm0
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -31084,7 +31729,7 @@ HIDDEN _sk_save_xy_avx
FUNCTION(_sk_save_xy_avx)
_sk_save_xy_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,123,59,0,0 // vbroadcastss 0x3b7b(%rip),%ymm8 # b234 <_sk_clut_4D_avx+0x259d>
+ .byte 196,98,125,24,5,123,59,0,0 // vbroadcastss 0x3b7b(%rip),%ymm8 # b600 <_sk_clut_4D_avx+0x259d>
.byte 196,65,124,88,200 // vaddps %ymm8,%ymm0,%ymm9
.byte 196,67,125,8,209,1 // vroundps $0x1,%ymm9,%ymm10
.byte 196,65,52,92,202 // vsubps %ymm10,%ymm9,%ymm9
@@ -31121,9 +31766,9 @@ HIDDEN _sk_bilinear_nx_avx
FUNCTION(_sk_bilinear_nx_avx)
_sk_bilinear_nx_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,7,59,0,0 // vbroadcastss 0x3b07(%rip),%ymm0 # b238 <_sk_clut_4D_avx+0x25a1>
+ .byte 196,226,125,24,5,7,59,0,0 // vbroadcastss 0x3b07(%rip),%ymm0 # b604 <_sk_clut_4D_avx+0x25a1>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
- .byte 196,98,125,24,5,254,58,0,0 // vbroadcastss 0x3afe(%rip),%ymm8 # b23c <_sk_clut_4D_avx+0x25a5>
+ .byte 196,98,125,24,5,254,58,0,0 // vbroadcastss 0x3afe(%rip),%ymm8 # b608 <_sk_clut_4D_avx+0x25a5>
.byte 197,60,92,64,64 // vsubps 0x40(%rax),%ymm8,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -31134,7 +31779,7 @@ HIDDEN _sk_bilinear_px_avx
FUNCTION(_sk_bilinear_px_avx)
_sk_bilinear_px_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,230,58,0,0 // vbroadcastss 0x3ae6(%rip),%ymm0 # b240 <_sk_clut_4D_avx+0x25a9>
+ .byte 196,226,125,24,5,230,58,0,0 // vbroadcastss 0x3ae6(%rip),%ymm0 # b60c <_sk_clut_4D_avx+0x25a9>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
.byte 197,124,16,64,64 // vmovups 0x40(%rax),%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
@@ -31146,9 +31791,9 @@ HIDDEN _sk_bilinear_ny_avx
FUNCTION(_sk_bilinear_ny_avx)
_sk_bilinear_ny_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,202,58,0,0 // vbroadcastss 0x3aca(%rip),%ymm1 # b244 <_sk_clut_4D_avx+0x25ad>
+ .byte 196,226,125,24,13,202,58,0,0 // vbroadcastss 0x3aca(%rip),%ymm1 # b610 <_sk_clut_4D_avx+0x25ad>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
- .byte 196,98,125,24,5,192,58,0,0 // vbroadcastss 0x3ac0(%rip),%ymm8 # b248 <_sk_clut_4D_avx+0x25b1>
+ .byte 196,98,125,24,5,192,58,0,0 // vbroadcastss 0x3ac0(%rip),%ymm8 # b614 <_sk_clut_4D_avx+0x25b1>
.byte 197,60,92,64,96 // vsubps 0x60(%rax),%ymm8,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -31159,7 +31804,7 @@ HIDDEN _sk_bilinear_py_avx
FUNCTION(_sk_bilinear_py_avx)
_sk_bilinear_py_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,168,58,0,0 // vbroadcastss 0x3aa8(%rip),%ymm1 # b24c <_sk_clut_4D_avx+0x25b5>
+ .byte 196,226,125,24,13,168,58,0,0 // vbroadcastss 0x3aa8(%rip),%ymm1 # b618 <_sk_clut_4D_avx+0x25b5>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
.byte 197,124,16,64,96 // vmovups 0x60(%rax),%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
@@ -31171,14 +31816,14 @@ HIDDEN _sk_bicubic_n3x_avx
FUNCTION(_sk_bicubic_n3x_avx)
_sk_bicubic_n3x_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,139,58,0,0 // vbroadcastss 0x3a8b(%rip),%ymm0 # b250 <_sk_clut_4D_avx+0x25b9>
+ .byte 196,226,125,24,5,139,58,0,0 // vbroadcastss 0x3a8b(%rip),%ymm0 # b61c <_sk_clut_4D_avx+0x25b9>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
- .byte 196,98,125,24,5,130,58,0,0 // vbroadcastss 0x3a82(%rip),%ymm8 # b254 <_sk_clut_4D_avx+0x25bd>
+ .byte 196,98,125,24,5,130,58,0,0 // vbroadcastss 0x3a82(%rip),%ymm8 # b620 <_sk_clut_4D_avx+0x25bd>
.byte 197,60,92,64,64 // vsubps 0x40(%rax),%ymm8,%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,115,58,0,0 // vbroadcastss 0x3a73(%rip),%ymm10 # b258 <_sk_clut_4D_avx+0x25c1>
+ .byte 196,98,125,24,21,115,58,0,0 // vbroadcastss 0x3a73(%rip),%ymm10 # b624 <_sk_clut_4D_avx+0x25c1>
.byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,98,125,24,21,105,58,0,0 // vbroadcastss 0x3a69(%rip),%ymm10 # b25c <_sk_clut_4D_avx+0x25c5>
+ .byte 196,98,125,24,21,105,58,0,0 // vbroadcastss 0x3a69(%rip),%ymm10 # b628 <_sk_clut_4D_avx+0x25c5>
.byte 196,65,60,88,194 // vaddps %ymm10,%ymm8,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
@@ -31190,19 +31835,19 @@ HIDDEN _sk_bicubic_n1x_avx
FUNCTION(_sk_bicubic_n1x_avx)
_sk_bicubic_n1x_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,76,58,0,0 // vbroadcastss 0x3a4c(%rip),%ymm0 # b260 <_sk_clut_4D_avx+0x25c9>
+ .byte 196,226,125,24,5,76,58,0,0 // vbroadcastss 0x3a4c(%rip),%ymm0 # b62c <_sk_clut_4D_avx+0x25c9>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
- .byte 196,98,125,24,5,67,58,0,0 // vbroadcastss 0x3a43(%rip),%ymm8 # b264 <_sk_clut_4D_avx+0x25cd>
+ .byte 196,98,125,24,5,67,58,0,0 // vbroadcastss 0x3a43(%rip),%ymm8 # b630 <_sk_clut_4D_avx+0x25cd>
.byte 197,60,92,64,64 // vsubps 0x40(%rax),%ymm8,%ymm8
- .byte 196,98,125,24,13,57,58,0,0 // vbroadcastss 0x3a39(%rip),%ymm9 # b268 <_sk_clut_4D_avx+0x25d1>
+ .byte 196,98,125,24,13,57,58,0,0 // vbroadcastss 0x3a39(%rip),%ymm9 # b634 <_sk_clut_4D_avx+0x25d1>
.byte 196,65,60,89,201 // vmulps %ymm9,%ymm8,%ymm9
- .byte 196,98,125,24,21,47,58,0,0 // vbroadcastss 0x3a2f(%rip),%ymm10 # b26c <_sk_clut_4D_avx+0x25d5>
+ .byte 196,98,125,24,21,47,58,0,0 // vbroadcastss 0x3a2f(%rip),%ymm10 # b638 <_sk_clut_4D_avx+0x25d5>
.byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
.byte 196,65,60,89,201 // vmulps %ymm9,%ymm8,%ymm9
- .byte 196,98,125,24,21,32,58,0,0 // vbroadcastss 0x3a20(%rip),%ymm10 # b270 <_sk_clut_4D_avx+0x25d9>
+ .byte 196,98,125,24,21,32,58,0,0 // vbroadcastss 0x3a20(%rip),%ymm10 # b63c <_sk_clut_4D_avx+0x25d9>
.byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
- .byte 196,98,125,24,13,17,58,0,0 // vbroadcastss 0x3a11(%rip),%ymm9 # b274 <_sk_clut_4D_avx+0x25dd>
+ .byte 196,98,125,24,13,17,58,0,0 // vbroadcastss 0x3a11(%rip),%ymm9 # b640 <_sk_clut_4D_avx+0x25dd>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -31213,17 +31858,17 @@ HIDDEN _sk_bicubic_p1x_avx
FUNCTION(_sk_bicubic_p1x_avx)
_sk_bicubic_p1x_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,249,57,0,0 // vbroadcastss 0x39f9(%rip),%ymm8 # b278 <_sk_clut_4D_avx+0x25e1>
+ .byte 196,98,125,24,5,249,57,0,0 // vbroadcastss 0x39f9(%rip),%ymm8 # b644 <_sk_clut_4D_avx+0x25e1>
.byte 197,188,88,0 // vaddps (%rax),%ymm8,%ymm0
.byte 197,124,16,72,64 // vmovups 0x40(%rax),%ymm9
- .byte 196,98,125,24,21,235,57,0,0 // vbroadcastss 0x39eb(%rip),%ymm10 # b27c <_sk_clut_4D_avx+0x25e5>
+ .byte 196,98,125,24,21,235,57,0,0 // vbroadcastss 0x39eb(%rip),%ymm10 # b648 <_sk_clut_4D_avx+0x25e5>
.byte 196,65,52,89,210 // vmulps %ymm10,%ymm9,%ymm10
- .byte 196,98,125,24,29,225,57,0,0 // vbroadcastss 0x39e1(%rip),%ymm11 # b280 <_sk_clut_4D_avx+0x25e9>
+ .byte 196,98,125,24,29,225,57,0,0 // vbroadcastss 0x39e1(%rip),%ymm11 # b64c <_sk_clut_4D_avx+0x25e9>
.byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
.byte 196,65,52,89,210 // vmulps %ymm10,%ymm9,%ymm10
.byte 196,65,44,88,192 // vaddps %ymm8,%ymm10,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
- .byte 196,98,125,24,13,200,57,0,0 // vbroadcastss 0x39c8(%rip),%ymm9 # b284 <_sk_clut_4D_avx+0x25ed>
+ .byte 196,98,125,24,13,200,57,0,0 // vbroadcastss 0x39c8(%rip),%ymm9 # b650 <_sk_clut_4D_avx+0x25ed>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -31234,13 +31879,13 @@ HIDDEN _sk_bicubic_p3x_avx
FUNCTION(_sk_bicubic_p3x_avx)
_sk_bicubic_p3x_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,176,57,0,0 // vbroadcastss 0x39b0(%rip),%ymm0 # b288 <_sk_clut_4D_avx+0x25f1>
+ .byte 196,226,125,24,5,176,57,0,0 // vbroadcastss 0x39b0(%rip),%ymm0 # b654 <_sk_clut_4D_avx+0x25f1>
.byte 197,252,88,0 // vaddps (%rax),%ymm0,%ymm0
.byte 197,124,16,64,64 // vmovups 0x40(%rax),%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,157,57,0,0 // vbroadcastss 0x399d(%rip),%ymm10 # b28c <_sk_clut_4D_avx+0x25f5>
+ .byte 196,98,125,24,21,157,57,0,0 // vbroadcastss 0x399d(%rip),%ymm10 # b658 <_sk_clut_4D_avx+0x25f5>
.byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,98,125,24,21,147,57,0,0 // vbroadcastss 0x3993(%rip),%ymm10 # b290 <_sk_clut_4D_avx+0x25f9>
+ .byte 196,98,125,24,21,147,57,0,0 // vbroadcastss 0x3993(%rip),%ymm10 # b65c <_sk_clut_4D_avx+0x25f9>
.byte 196,65,60,88,194 // vaddps %ymm10,%ymm8,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
.byte 197,124,17,128,128,0,0,0 // vmovups %ymm8,0x80(%rax)
@@ -31252,14 +31897,14 @@ HIDDEN _sk_bicubic_n3y_avx
FUNCTION(_sk_bicubic_n3y_avx)
_sk_bicubic_n3y_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,118,57,0,0 // vbroadcastss 0x3976(%rip),%ymm1 # b294 <_sk_clut_4D_avx+0x25fd>
+ .byte 196,226,125,24,13,118,57,0,0 // vbroadcastss 0x3976(%rip),%ymm1 # b660 <_sk_clut_4D_avx+0x25fd>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
- .byte 196,98,125,24,5,108,57,0,0 // vbroadcastss 0x396c(%rip),%ymm8 # b298 <_sk_clut_4D_avx+0x2601>
+ .byte 196,98,125,24,5,108,57,0,0 // vbroadcastss 0x396c(%rip),%ymm8 # b664 <_sk_clut_4D_avx+0x2601>
.byte 197,60,92,64,96 // vsubps 0x60(%rax),%ymm8,%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,93,57,0,0 // vbroadcastss 0x395d(%rip),%ymm10 # b29c <_sk_clut_4D_avx+0x2605>
+ .byte 196,98,125,24,21,93,57,0,0 // vbroadcastss 0x395d(%rip),%ymm10 # b668 <_sk_clut_4D_avx+0x2605>
.byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,98,125,24,21,83,57,0,0 // vbroadcastss 0x3953(%rip),%ymm10 # b2a0 <_sk_clut_4D_avx+0x2609>
+ .byte 196,98,125,24,21,83,57,0,0 // vbroadcastss 0x3953(%rip),%ymm10 # b66c <_sk_clut_4D_avx+0x2609>
.byte 196,65,60,88,194 // vaddps %ymm10,%ymm8,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
@@ -31271,19 +31916,19 @@ HIDDEN _sk_bicubic_n1y_avx
FUNCTION(_sk_bicubic_n1y_avx)
_sk_bicubic_n1y_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,54,57,0,0 // vbroadcastss 0x3936(%rip),%ymm1 # b2a4 <_sk_clut_4D_avx+0x260d>
+ .byte 196,226,125,24,13,54,57,0,0 // vbroadcastss 0x3936(%rip),%ymm1 # b670 <_sk_clut_4D_avx+0x260d>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
- .byte 196,98,125,24,5,44,57,0,0 // vbroadcastss 0x392c(%rip),%ymm8 # b2a8 <_sk_clut_4D_avx+0x2611>
+ .byte 196,98,125,24,5,44,57,0,0 // vbroadcastss 0x392c(%rip),%ymm8 # b674 <_sk_clut_4D_avx+0x2611>
.byte 197,60,92,64,96 // vsubps 0x60(%rax),%ymm8,%ymm8
- .byte 196,98,125,24,13,34,57,0,0 // vbroadcastss 0x3922(%rip),%ymm9 # b2ac <_sk_clut_4D_avx+0x2615>
+ .byte 196,98,125,24,13,34,57,0,0 // vbroadcastss 0x3922(%rip),%ymm9 # b678 <_sk_clut_4D_avx+0x2615>
.byte 196,65,60,89,201 // vmulps %ymm9,%ymm8,%ymm9
- .byte 196,98,125,24,21,24,57,0,0 // vbroadcastss 0x3918(%rip),%ymm10 # b2b0 <_sk_clut_4D_avx+0x2619>
+ .byte 196,98,125,24,21,24,57,0,0 // vbroadcastss 0x3918(%rip),%ymm10 # b67c <_sk_clut_4D_avx+0x2619>
.byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
.byte 196,65,60,89,201 // vmulps %ymm9,%ymm8,%ymm9
- .byte 196,98,125,24,21,9,57,0,0 // vbroadcastss 0x3909(%rip),%ymm10 # b2b4 <_sk_clut_4D_avx+0x261d>
+ .byte 196,98,125,24,21,9,57,0,0 // vbroadcastss 0x3909(%rip),%ymm10 # b680 <_sk_clut_4D_avx+0x261d>
.byte 196,65,52,88,202 // vaddps %ymm10,%ymm9,%ymm9
.byte 196,65,60,89,193 // vmulps %ymm9,%ymm8,%ymm8
- .byte 196,98,125,24,13,250,56,0,0 // vbroadcastss 0x38fa(%rip),%ymm9 # b2b8 <_sk_clut_4D_avx+0x2621>
+ .byte 196,98,125,24,13,250,56,0,0 // vbroadcastss 0x38fa(%rip),%ymm9 # b684 <_sk_clut_4D_avx+0x2621>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -31294,17 +31939,17 @@ HIDDEN _sk_bicubic_p1y_avx
FUNCTION(_sk_bicubic_p1y_avx)
_sk_bicubic_p1y_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,98,125,24,5,226,56,0,0 // vbroadcastss 0x38e2(%rip),%ymm8 # b2bc <_sk_clut_4D_avx+0x2625>
+ .byte 196,98,125,24,5,226,56,0,0 // vbroadcastss 0x38e2(%rip),%ymm8 # b688 <_sk_clut_4D_avx+0x2625>
.byte 197,188,88,72,32 // vaddps 0x20(%rax),%ymm8,%ymm1
.byte 197,124,16,72,96 // vmovups 0x60(%rax),%ymm9
- .byte 196,98,125,24,21,211,56,0,0 // vbroadcastss 0x38d3(%rip),%ymm10 # b2c0 <_sk_clut_4D_avx+0x2629>
+ .byte 196,98,125,24,21,211,56,0,0 // vbroadcastss 0x38d3(%rip),%ymm10 # b68c <_sk_clut_4D_avx+0x2629>
.byte 196,65,52,89,210 // vmulps %ymm10,%ymm9,%ymm10
- .byte 196,98,125,24,29,201,56,0,0 // vbroadcastss 0x38c9(%rip),%ymm11 # b2c4 <_sk_clut_4D_avx+0x262d>
+ .byte 196,98,125,24,29,201,56,0,0 // vbroadcastss 0x38c9(%rip),%ymm11 # b690 <_sk_clut_4D_avx+0x262d>
.byte 196,65,44,88,211 // vaddps %ymm11,%ymm10,%ymm10
.byte 196,65,52,89,210 // vmulps %ymm10,%ymm9,%ymm10
.byte 196,65,44,88,192 // vaddps %ymm8,%ymm10,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
- .byte 196,98,125,24,13,176,56,0,0 // vbroadcastss 0x38b0(%rip),%ymm9 # b2c8 <_sk_clut_4D_avx+0x2631>
+ .byte 196,98,125,24,13,176,56,0,0 // vbroadcastss 0x38b0(%rip),%ymm9 # b694 <_sk_clut_4D_avx+0x2631>
.byte 196,65,60,88,193 // vaddps %ymm9,%ymm8,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -31315,13 +31960,13 @@ HIDDEN _sk_bicubic_p3y_avx
FUNCTION(_sk_bicubic_p3y_avx)
_sk_bicubic_p3y_avx:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,13,152,56,0,0 // vbroadcastss 0x3898(%rip),%ymm1 # b2cc <_sk_clut_4D_avx+0x2635>
+ .byte 196,226,125,24,13,152,56,0,0 // vbroadcastss 0x3898(%rip),%ymm1 # b698 <_sk_clut_4D_avx+0x2635>
.byte 197,244,88,72,32 // vaddps 0x20(%rax),%ymm1,%ymm1
.byte 197,124,16,64,96 // vmovups 0x60(%rax),%ymm8
.byte 196,65,60,89,200 // vmulps %ymm8,%ymm8,%ymm9
- .byte 196,98,125,24,21,132,56,0,0 // vbroadcastss 0x3884(%rip),%ymm10 # b2d0 <_sk_clut_4D_avx+0x2639>
+ .byte 196,98,125,24,21,132,56,0,0 // vbroadcastss 0x3884(%rip),%ymm10 # b69c <_sk_clut_4D_avx+0x2639>
.byte 196,65,60,89,194 // vmulps %ymm10,%ymm8,%ymm8
- .byte 196,98,125,24,21,122,56,0,0 // vbroadcastss 0x387a(%rip),%ymm10 # b2d4 <_sk_clut_4D_avx+0x263d>
+ .byte 196,98,125,24,21,122,56,0,0 // vbroadcastss 0x387a(%rip),%ymm10 # b6a0 <_sk_clut_4D_avx+0x263d>
.byte 196,65,60,88,194 // vaddps %ymm10,%ymm8,%ymm8
.byte 196,65,52,89,192 // vmulps %ymm8,%ymm9,%ymm8
.byte 197,124,17,128,160,0,0,0 // vmovups %ymm8,0xa0(%rax)
@@ -31467,7 +32112,7 @@ _sk_clut_3D_avx:
.byte 197,249,127,68,36,224 // vmovdqa %xmm0,-0x20(%rsp)
.byte 72,139,0 // mov (%rax),%rax
.byte 196,193,121,254,194 // vpaddd %xmm10,%xmm0,%xmm0
- .byte 196,98,121,24,29,18,54,0,0 // vbroadcastss 0x3612(%rip),%xmm11 # b2dc <_sk_clut_4D_avx+0x2645>
+ .byte 196,98,121,24,29,18,54,0,0 // vbroadcastss 0x3612(%rip),%xmm11 # b6a8 <_sk_clut_4D_avx+0x2645>
.byte 196,194,121,64,203 // vpmulld %xmm11,%xmm0,%xmm1
.byte 196,193,249,126,201 // vmovq %xmm1,%r9
.byte 69,137,202 // mov %r9d,%r10d
@@ -31501,7 +32146,7 @@ _sk_clut_3D_avx:
.byte 196,163,65,33,60,152,48 // vinsertps $0x30,(%rax,%r11,4),%xmm7,%xmm7
.byte 196,227,93,24,223,1 // vinsertf128 $0x1,%xmm7,%ymm4,%ymm3
.byte 197,252,17,92,36,64 // vmovups %ymm3,0x40(%rsp)
- .byte 196,98,121,24,13,85,53,0,0 // vbroadcastss 0x3555(%rip),%xmm9 # b2e0 <_sk_clut_4D_avx+0x2649>
+ .byte 196,98,121,24,13,85,53,0,0 // vbroadcastss 0x3555(%rip),%xmm9 # b6ac <_sk_clut_4D_avx+0x2649>
.byte 196,193,105,254,249 // vpaddd %xmm9,%xmm2,%xmm7
.byte 196,195,249,22,249,1 // vpextrq $0x1,%xmm7,%r9
.byte 196,193,249,126,250 // vmovq %xmm7,%r10
@@ -31529,7 +32174,7 @@ _sk_clut_3D_avx:
.byte 196,161,122,16,60,152 // vmovss (%rax,%r11,4),%xmm7
.byte 196,227,81,33,239,48 // vinsertps $0x30,%xmm7,%xmm5,%xmm5
.byte 196,99,85,24,254,1 // vinsertf128 $0x1,%xmm6,%ymm5,%ymm15
- .byte 196,98,121,24,37,196,52,0,0 // vbroadcastss 0x34c4(%rip),%xmm12 # b2e4 <_sk_clut_4D_avx+0x264d>
+ .byte 196,98,121,24,37,196,52,0,0 // vbroadcastss 0x34c4(%rip),%xmm12 # b6b0 <_sk_clut_4D_avx+0x264d>
.byte 196,193,105,254,212 // vpaddd %xmm12,%xmm2,%xmm2
.byte 196,195,249,22,209,1 // vpextrq $0x1,%xmm2,%r9
.byte 196,193,249,126,210 // vmovq %xmm2,%r10
@@ -31557,7 +32202,7 @@ _sk_clut_3D_avx:
.byte 197,250,16,44,152 // vmovss (%rax,%rbx,4),%xmm5
.byte 196,227,105,33,213,48 // vinsertps $0x30,%xmm5,%xmm2,%xmm2
.byte 196,227,109,24,225,1 // vinsertf128 $0x1,%xmm1,%ymm2,%ymm4
- .byte 196,226,125,24,13,36,52,0,0 // vbroadcastss 0x3424(%rip),%ymm1 # b2d8 <_sk_clut_4D_avx+0x2641>
+ .byte 196,226,125,24,13,36,52,0,0 // vbroadcastss 0x3424(%rip),%ymm1 # b6a4 <_sk_clut_4D_avx+0x2641>
.byte 197,188,88,209 // vaddps %ymm1,%ymm8,%ymm2
.byte 197,252,17,76,36,32 // vmovups %ymm1,0x20(%rsp)
.byte 197,254,91,210 // vcvttps2dq %ymm2,%ymm2
@@ -32312,7 +32957,7 @@ _sk_clut_4D_avx:
.byte 196,226,25,64,200 // vpmulld %xmm0,%xmm12,%xmm1
.byte 197,249,127,76,36,128 // vmovdqa %xmm1,-0x80(%rsp)
.byte 197,241,254,202 // vpaddd %xmm2,%xmm1,%xmm1
- .byte 196,98,121,24,61,226,36,0,0 // vbroadcastss 0x24e2(%rip),%xmm15 # b2ec <_sk_clut_4D_avx+0x2655>
+ .byte 196,98,121,24,61,226,36,0,0 // vbroadcastss 0x24e2(%rip),%xmm15 # b6b8 <_sk_clut_4D_avx+0x2655>
.byte 196,194,113,64,247 // vpmulld %xmm15,%xmm1,%xmm6
.byte 196,193,249,126,241 // vmovq %xmm6,%r9
.byte 69,137,202 // mov %r9d,%r10d
@@ -32350,7 +32995,7 @@ _sk_clut_4D_avx:
.byte 196,163,65,33,60,144,32 // vinsertps $0x20,(%rax,%r10,4),%xmm7,%xmm7
.byte 196,163,65,33,60,152,48 // vinsertps $0x30,(%rax,%r11,4),%xmm7,%xmm7
.byte 196,99,53,24,247,1 // vinsertf128 $0x1,%xmm7,%ymm9,%ymm14
- .byte 196,98,121,24,21,13,36,0,0 // vbroadcastss 0x240d(%rip),%xmm10 # b2f0 <_sk_clut_4D_avx+0x2659>
+ .byte 196,98,121,24,21,13,36,0,0 // vbroadcastss 0x240d(%rip),%xmm10 # b6bc <_sk_clut_4D_avx+0x2659>
.byte 196,65,97,254,202 // vpaddd %xmm10,%xmm3,%xmm9
.byte 196,67,249,22,201,1 // vpextrq $0x1,%xmm9,%r9
.byte 196,65,249,126,202 // vmovq %xmm9,%r10
@@ -32378,7 +33023,7 @@ _sk_clut_4D_avx:
.byte 196,161,122,16,60,152 // vmovss (%rax,%r11,4),%xmm7
.byte 196,227,81,33,239,48 // vinsertps $0x30,%xmm7,%xmm5,%xmm5
.byte 196,99,85,24,220,1 // vinsertf128 $0x1,%xmm4,%ymm5,%ymm11
- .byte 196,98,121,24,13,124,35,0,0 // vbroadcastss 0x237c(%rip),%xmm9 # b2f4 <_sk_clut_4D_avx+0x265d>
+ .byte 196,98,121,24,13,124,35,0,0 // vbroadcastss 0x237c(%rip),%xmm9 # b6c0 <_sk_clut_4D_avx+0x265d>
.byte 196,193,97,254,217 // vpaddd %xmm9,%xmm3,%xmm3
.byte 196,195,249,22,217,1 // vpextrq $0x1,%xmm3,%r9
.byte 196,193,249,126,218 // vmovq %xmm3,%r10
@@ -32406,7 +33051,7 @@ _sk_clut_4D_avx:
.byte 197,250,16,44,152 // vmovss (%rax,%rbx,4),%xmm5
.byte 196,227,89,33,229,48 // vinsertps $0x30,%xmm5,%xmm4,%xmm4
.byte 196,227,93,24,243,1 // vinsertf128 $0x1,%xmm3,%ymm4,%ymm6
- .byte 196,226,125,24,37,220,34,0,0 // vbroadcastss 0x22dc(%rip),%ymm4 # b2e8 <_sk_clut_4D_avx+0x2651>
+ .byte 196,226,125,24,37,220,34,0,0 // vbroadcastss 0x22dc(%rip),%ymm4 # b6b4 <_sk_clut_4D_avx+0x2651>
.byte 197,148,88,220 // vaddps %ymm4,%ymm13,%ymm3
.byte 197,252,40,252 // vmovaps %ymm4,%ymm7
.byte 197,252,17,124,36,32 // vmovups %ymm7,0x20(%rsp)
@@ -33836,7 +34481,7 @@ _sk_clut_4D_avx:
.byte 197,220,89,210 // vmulps %ymm2,%ymm4,%ymm2
.byte 197,212,88,210 // vaddps %ymm2,%ymm5,%ymm2
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,29,112,5,0,0 // vbroadcastss 0x570(%rip),%ymm3 # b2f8 <_sk_clut_4D_avx+0x2661>
+ .byte 196,226,125,24,29,112,5,0,0 // vbroadcastss 0x570(%rip),%ymm3 # b6c4 <_sk_clut_4D_avx+0x2661>
.byte 197,252,16,164,36,32,2,0,0 // vmovups 0x220(%rsp),%ymm4
.byte 197,252,16,172,36,64,2,0,0 // vmovups 0x240(%rsp),%ymm5
.byte 197,252,16,180,36,96,2,0,0 // vmovups 0x260(%rsp),%ymm6
@@ -33890,25 +34535,25 @@ BALIGN4
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope ae75 <.literal4+0xb9>
+ .byte 71,225,61 // rex.RXB loope b241 <.literal4+0xb9>
.byte 0,0 // add %al,(%rax)
.byte 128,63,154 // cmpb $0x9a,(%rdi)
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope ae85 <.literal4+0xc9>
+ .byte 71,225,61 // rex.RXB loope b251 <.literal4+0xc9>
.byte 0,0 // add %al,(%rax)
.byte 128,63,154 // cmpb $0x9a,(%rdi)
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope ae95 <.literal4+0xd9>
+ .byte 71,225,61 // rex.RXB loope b261 <.literal4+0xd9>
.byte 0,0 // add %al,(%rax)
.byte 128,63,154 // cmpb $0x9a,(%rdi)
.byte 153 // cltd
.byte 153 // cltd
.byte 62,61,10,23,63,174 // ds cmp $0xae3f170a,%eax
- .byte 71,225,61 // rex.RXB loope aea5 <.literal4+0xe9>
+ .byte 71,225,61 // rex.RXB loope b271 <.literal4+0xe9>
.byte 0,0 // add %al,(%rax)
.byte 128,63,0 // cmpb $0x0,(%rdi)
.byte 0,128,63,0,0,127 // add %al,0x7f00003f(%rax)
@@ -33973,7 +34618,7 @@ BALIGN4
.byte 190,129,128,128,59 // mov $0x3b808081,%esi
.byte 129,128,128,59,0,248,0,0,8,33 // addl $0x21080000,-0x7ffc480(%rax)
.byte 132,55 // test %dh,(%rdi)
- .byte 224,7 // loopne af15 <.literal4+0x159>
+ .byte 224,7 // loopne b2e1 <.literal4+0x159>
.byte 0,0 // add %al,(%rax)
.byte 33,8 // and %ecx,(%rax)
.byte 2,58 // add (%rdx),%bh
@@ -33989,10 +34634,10 @@ BALIGN4
.byte 129,128,128,59,129,128,128,59,0,0 // addl $0x3b80,-0x7f7ec480(%rax)
.byte 0,52,255 // add %dh,(%rdi,%rdi,8)
.byte 255 // (bad)
- .byte 127,0 // jg af3c <.literal4+0x180>
+ .byte 127,0 // jg b308 <.literal4+0x180>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja afb5 <.literal4+0x1f9>
+ .byte 119,115 // ja b381 <.literal4+0x1f9>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -34006,10 +34651,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg af70 <.literal4+0x1b4>
+ .byte 127,0 // jg b33c <.literal4+0x1b4>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja afe9 <.literal4+0x22d>
+ .byte 119,115 // ja b3b5 <.literal4+0x22d>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -34023,10 +34668,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg afa4 <.literal4+0x1e8>
+ .byte 127,0 // jg b370 <.literal4+0x1e8>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja b01d <.literal4+0x261>
+ .byte 119,115 // ja b3e9 <.literal4+0x261>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -34040,10 +34685,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg afd8 <.literal4+0x21c>
+ .byte 127,0 // jg b3a4 <.literal4+0x21c>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja b051 <.literal4+0x295>
+ .byte 119,115 // ja b41d <.literal4+0x295>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -34057,10 +34702,10 @@ BALIGN4
.byte 0,128,63,0,0,0 // add %al,0x3f(%rax)
.byte 52,255 // xor $0xff,%al
.byte 255 // (bad)
- .byte 127,0 // jg b00c <.literal4+0x250>
+ .byte 127,0 // jg b3d8 <.literal4+0x250>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
- .byte 119,115 // ja b085 <.literal4+0x2c9>
+ .byte 119,115 // ja b451 <.literal4+0x2c9>
.byte 248 // clc
.byte 194,117,191 // retq $0xbf75
.byte 191,63,249,68,180 // mov $0xb444f93f,%edi
@@ -34073,7 +34718,7 @@ BALIGN4
.byte 0,75,0 // add %cl,0x0(%rbx)
.byte 0,200 // add %cl,%al
.byte 66,0,0 // rex.X add %al,(%rax)
- .byte 127,67 // jg b07f <.literal4+0x2c3>
+ .byte 127,67 // jg b44b <.literal4+0x2c3>
.byte 0,0 // add %al,(%rax)
.byte 0,195 // add %al,%bl
.byte 0,0 // add %al,(%rax)
@@ -34085,7 +34730,7 @@ BALIGN4
.byte 190,80,128,3,62 // mov $0x3e038050,%esi
.byte 31 // (bad)
.byte 215 // xlat %ds:(%rbx)
- .byte 118,63 // jbe b09f <.literal4+0x2e3>
+ .byte 118,63 // jbe b46b <.literal4+0x2e3>
.byte 246,64,83,63 // testb $0x3f,0x53(%rax)
.byte 129,128,128,59,129,128,128,59,129,128// addl $0x80813b80,-0x7f7ec480(%rax)
.byte 128,59,0 // cmpb $0x0,(%rbx)
@@ -34107,7 +34752,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 8,33 // or %ah,(%rcx)
.byte 132,55 // test %dh,(%rdi)
- .byte 224,7 // loopne b0b9 <.literal4+0x2fd>
+ .byte 224,7 // loopne b485 <.literal4+0x2fd>
.byte 0,0 // add %al,(%rax)
.byte 33,8 // and %ecx,(%rax)
.byte 2,58 // add (%rdx),%bh
@@ -34119,7 +34764,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 8,33 // or %ah,(%rcx)
.byte 132,55 // test %dh,(%rdi)
- .byte 224,7 // loopne b0d5 <.literal4+0x319>
+ .byte 224,7 // loopne b4a1 <.literal4+0x319>
.byte 0,0 // add %al,(%rax)
.byte 33,8 // and %ecx,(%rax)
.byte 2,58 // add (%rdx),%bh
@@ -34130,7 +34775,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 248 // clc
.byte 65,0,0 // add %al,(%r8)
- .byte 124,66 // jl b12a <.literal4+0x36e>
+ .byte 124,66 // jl b4f6 <.literal4+0x36e>
.byte 0,240 // add %dh,%al
.byte 0,0 // add %al,(%rax)
.byte 137,136,136,55,0,15 // mov %ecx,0xf003788(%rax)
@@ -34156,7 +34801,7 @@ BALIGN4
.byte 137,136,136,59,15,0 // mov %ecx,0xf3b88(%rax)
.byte 0,0 // add %al,(%rax)
.byte 137,136,136,61,0,0 // mov %ecx,0x3d88(%rax)
- .byte 112,65 // jo b18d <.literal4+0x3d1>
+ .byte 112,65 // jo b559 <.literal4+0x3d1>
.byte 129,128,128,59,129,128,128,59,129,128// addl $0x80813b80,-0x7f7ec480(%rax)
.byte 128,59,0 // cmpb $0x0,(%rbx)
.byte 0,127,67 // add %bh,0x43(%rdi)
@@ -34182,7 +34827,7 @@ BALIGN4
.byte 0,128,55,0,0,128 // add %al,-0x7fffffc9(%rax)
.byte 63 // (bad)
.byte 0,255 // add %bh,%bh
- .byte 127,71 // jg b1ff <.literal4+0x443>
+ .byte 127,71 // jg b5cb <.literal4+0x443>
.byte 0,0 // add %al,(%rax)
.byte 0,63 // add %bh,(%rdi)
.byte 0,0 // add %al,(%rax)
@@ -34302,8 +34947,8 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 2,0 // add (%rax),%al
.byte 0,0 // add %al,(%rax)
- .byte 114,249 // jb b2e3 <.literal4+0x527>
- .byte 127,63 // jg b32b <_sk_clut_4D_avx+0x2694>
+ .byte 114,249 // jb b6af <.literal4+0x527>
+ .byte 127,63 // jg b6f7 <_sk_clut_4D_avx+0x2694>
.byte 3,0 // add (%rax),%eax
.byte 0,0 // add %al,(%rax)
.byte 1,0 // add %eax,(%rax)
@@ -34314,103 +34959,137 @@ BALIGN4
.byte 128 // .byte 0x80
.byte 63 // (bad)
-BALIGN16
- .byte 0,2 // add %al,(%rdx)
- .byte 4,6 // add $0x6,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 8,10 // or %cl,(%rdx)
- .byte 12,14 // or $0xe,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
- .byte 255,0 // incl (%rax)
- .byte 255,0 // incl (%rax)
- .byte 255,0 // incl (%rax)
- .byte 255,0 // incl (%rax)
- .byte 255,0 // incl (%rax)
- .byte 255,0 // incl (%rax)
+BALIGN32
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,2 // add %al,(%rdx)
- .byte 4,6 // add $0x6,%al
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 8,10 // or %cl,(%rdx)
- .byte 12,14 // or $0xe,%al
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,2 // add %al,(%rdx)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 0,2 // add %al,(%rdx)
- .byte 4,6 // add $0x6,%al
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+ .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
+
+BALIGN16
.byte 255,0 // incl (%rax)
.byte 255,0 // incl (%rax)
.byte 255,0 // incl (%rax)
@@ -34427,232 +35106,70 @@ BALIGN16
.byte 255,0 // incl (%rax)
.byte 255,0 // incl (%rax)
.byte 255,0 // incl (%rax)
- .byte 0,2 // add %al,(%rdx)
- .byte 4,6 // add $0x6,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 8,10 // or %cl,(%rdx)
- .byte 12,14 // or $0xe,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,2 // add %al,(%rdx)
- .byte 4,6 // add $0x6,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 8,10 // or %cl,(%rdx)
- .byte 12,14 // or $0xe,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,2 // add %al,(%rdx)
- .byte 4,6 // add $0x6,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 8,10 // or %cl,(%rdx)
- .byte 12,14 // or $0xe,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,2 // add %al,(%rdx)
- .byte 4,6 // add $0x6,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 8,10 // or %cl,(%rdx)
- .byte 12,14 // or $0xe,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,2 // add %al,(%rdx)
- .byte 4,6 // add $0x6,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 8,10 // or %cl,(%rdx)
- .byte 12,14 // or $0xe,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,2 // add %al,(%rdx)
- .byte 4,6 // add $0x6,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 8,10 // or %cl,(%rdx)
- .byte 12,14 // or $0xe,%al
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 0,0 // add %al,(%rax)
-
-BALIGN32
- .byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
+ .byte 0,2 // add %al,(%rdx)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
+ .byte 0,2 // add %al,(%rdx)
+ .byte 4,6 // add $0x6,%al
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
- .byte 255,0 // incl (%rax)
.byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
.byte 255,0 // incl (%rax)
- .byte 0,0 // add %al,(%rax)
BALIGN32
HIDDEN _sk_start_pipeline_sse41
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index ef9b9c94dd..afa37bc78f 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -101,7 +101,7 @@ _sk_seed_shader_hsw LABEL PROC
DB 197,249,110,194 ; vmovd %edx,%xmm0
DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,189,103,0,0 ; vbroadcastss 0x67bd(%rip),%ymm1 # 691c <_sk_clut_4D_hsw+0x8c8>
+ DB 196,226,125,24,13,137,108,0,0 ; vbroadcastss 0x6c89(%rip),%ymm1 # 6de8 <_sk_clut_4D_hsw+0x8c8>
DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0
DB 197,252,88,7 ; vaddps (%rdi),%ymm0,%ymm0
DB 197,249,110,209 ; vmovd %ecx,%xmm2
@@ -109,7 +109,7 @@ _sk_seed_shader_hsw LABEL PROC
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 197,236,88,201 ; vaddps %ymm1,%ymm2,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,21,157,103,0,0 ; vbroadcastss 0x679d(%rip),%ymm2 # 6920 <_sk_clut_4D_hsw+0x8cc>
+ DB 196,226,125,24,21,105,108,0,0 ; vbroadcastss 0x6c69(%rip),%ymm2 # 6dec <_sk_clut_4D_hsw+0x8cc>
DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3
DB 197,220,87,228 ; vxorps %ymm4,%ymm4,%ymm4
DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5
@@ -126,13 +126,13 @@ _sk_dither_hsw LABEL PROC
DB 197,121,110,201 ; vmovd %ecx,%xmm9
DB 196,66,125,88,201 ; vpbroadcastd %xmm9,%ymm9
DB 196,65,53,239,200 ; vpxor %ymm8,%ymm9,%ymm9
- DB 196,98,125,88,21,100,103,0,0 ; vpbroadcastd 0x6764(%rip),%ymm10 # 6924 <_sk_clut_4D_hsw+0x8d0>
+ DB 196,98,125,88,21,48,108,0,0 ; vpbroadcastd 0x6c30(%rip),%ymm10 # 6df0 <_sk_clut_4D_hsw+0x8d0>
DB 196,65,53,219,218 ; vpand %ymm10,%ymm9,%ymm11
DB 196,193,37,114,243,5 ; vpslld $0x5,%ymm11,%ymm11
DB 196,65,61,219,210 ; vpand %ymm10,%ymm8,%ymm10
DB 196,193,45,114,242,4 ; vpslld $0x4,%ymm10,%ymm10
- DB 196,98,125,88,37,73,103,0,0 ; vpbroadcastd 0x6749(%rip),%ymm12 # 6928 <_sk_clut_4D_hsw+0x8d4>
- DB 196,98,125,88,45,68,103,0,0 ; vpbroadcastd 0x6744(%rip),%ymm13 # 692c <_sk_clut_4D_hsw+0x8d8>
+ DB 196,98,125,88,37,21,108,0,0 ; vpbroadcastd 0x6c15(%rip),%ymm12 # 6df4 <_sk_clut_4D_hsw+0x8d4>
+ DB 196,98,125,88,45,16,108,0,0 ; vpbroadcastd 0x6c10(%rip),%ymm13 # 6df8 <_sk_clut_4D_hsw+0x8d8>
DB 196,65,53,219,245 ; vpand %ymm13,%ymm9,%ymm14
DB 196,193,13,114,246,2 ; vpslld $0x2,%ymm14,%ymm14
DB 196,65,61,219,237 ; vpand %ymm13,%ymm8,%ymm13
@@ -147,8 +147,8 @@ _sk_dither_hsw LABEL PROC
DB 196,65,61,235,194 ; vpor %ymm10,%ymm8,%ymm8
DB 196,65,61,235,193 ; vpor %ymm9,%ymm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,13,246,102,0,0 ; vbroadcastss 0x66f6(%rip),%ymm9 # 6930 <_sk_clut_4D_hsw+0x8dc>
- DB 196,98,125,24,21,241,102,0,0 ; vbroadcastss 0x66f1(%rip),%ymm10 # 6934 <_sk_clut_4D_hsw+0x8e0>
+ DB 196,98,125,24,13,194,107,0,0 ; vbroadcastss 0x6bc2(%rip),%ymm9 # 6dfc <_sk_clut_4D_hsw+0x8dc>
+ DB 196,98,125,24,21,189,107,0,0 ; vbroadcastss 0x6bbd(%rip),%ymm10 # 6e00 <_sk_clut_4D_hsw+0x8e0>
DB 196,66,61,184,209 ; vfmadd231ps %ymm9,%ymm8,%ymm10
DB 196,98,125,24,0 ; vbroadcastss (%rax),%ymm8
DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
@@ -178,7 +178,7 @@ _sk_uniform_color_hsw LABEL PROC
PUBLIC _sk_black_color_hsw
_sk_black_color_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,145,102,0,0 ; vbroadcastss 0x6691(%rip),%ymm3 # 6938 <_sk_clut_4D_hsw+0x8e4>
+ DB 196,226,125,24,29,93,107,0,0 ; vbroadcastss 0x6b5d(%rip),%ymm3 # 6e04 <_sk_clut_4D_hsw+0x8e4>
DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2
@@ -187,7 +187,7 @@ _sk_black_color_hsw LABEL PROC
PUBLIC _sk_white_color_hsw
_sk_white_color_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,124,102,0,0 ; vbroadcastss 0x667c(%rip),%ymm0 # 693c <_sk_clut_4D_hsw+0x8e8>
+ DB 196,226,125,24,5,72,107,0,0 ; vbroadcastss 0x6b48(%rip),%ymm0 # 6e08 <_sk_clut_4D_hsw+0x8e8>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
DB 197,252,40,216 ; vmovaps %ymm0,%ymm3
@@ -225,7 +225,7 @@ _sk_clear_hsw LABEL PROC
PUBLIC _sk_srcatop_hsw
_sk_srcatop_hsw LABEL PROC
DB 197,252,89,199 ; vmulps %ymm7,%ymm0,%ymm0
- DB 196,98,125,24,5,31,102,0,0 ; vbroadcastss 0x661f(%rip),%ymm8 # 6940 <_sk_clut_4D_hsw+0x8ec>
+ DB 196,98,125,24,5,235,106,0,0 ; vbroadcastss 0x6aeb(%rip),%ymm8 # 6e0c <_sk_clut_4D_hsw+0x8ec>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 196,226,61,184,196 ; vfmadd231ps %ymm4,%ymm8,%ymm0
DB 197,244,89,207 ; vmulps %ymm7,%ymm1,%ymm1
@@ -239,7 +239,7 @@ _sk_srcatop_hsw LABEL PROC
PUBLIC _sk_dstatop_hsw
_sk_dstatop_hsw LABEL PROC
- DB 196,98,125,24,5,242,101,0,0 ; vbroadcastss 0x65f2(%rip),%ymm8 # 6944 <_sk_clut_4D_hsw+0x8f0>
+ DB 196,98,125,24,5,190,106,0,0 ; vbroadcastss 0x6abe(%rip),%ymm8 # 6e10 <_sk_clut_4D_hsw+0x8f0>
DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 196,226,101,184,196 ; vfmadd231ps %ymm4,%ymm3,%ymm0
@@ -272,7 +272,7 @@ _sk_dstin_hsw LABEL PROC
PUBLIC _sk_srcout_hsw
_sk_srcout_hsw LABEL PROC
- DB 196,98,125,24,5,153,101,0,0 ; vbroadcastss 0x6599(%rip),%ymm8 # 6948 <_sk_clut_4D_hsw+0x8f4>
+ DB 196,98,125,24,5,101,106,0,0 ; vbroadcastss 0x6a65(%rip),%ymm8 # 6e14 <_sk_clut_4D_hsw+0x8f4>
DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1
@@ -283,7 +283,7 @@ _sk_srcout_hsw LABEL PROC
PUBLIC _sk_dstout_hsw
_sk_dstout_hsw LABEL PROC
- DB 196,226,125,24,5,124,101,0,0 ; vbroadcastss 0x657c(%rip),%ymm0 # 694c <_sk_clut_4D_hsw+0x8f8>
+ DB 196,226,125,24,5,72,106,0,0 ; vbroadcastss 0x6a48(%rip),%ymm0 # 6e18 <_sk_clut_4D_hsw+0x8f8>
DB 197,252,92,219 ; vsubps %ymm3,%ymm0,%ymm3
DB 197,228,89,196 ; vmulps %ymm4,%ymm3,%ymm0
DB 197,228,89,205 ; vmulps %ymm5,%ymm3,%ymm1
@@ -294,7 +294,7 @@ _sk_dstout_hsw LABEL PROC
PUBLIC _sk_srcover_hsw
_sk_srcover_hsw LABEL PROC
- DB 196,98,125,24,5,95,101,0,0 ; vbroadcastss 0x655f(%rip),%ymm8 # 6950 <_sk_clut_4D_hsw+0x8fc>
+ DB 196,98,125,24,5,43,106,0,0 ; vbroadcastss 0x6a2b(%rip),%ymm8 # 6e1c <_sk_clut_4D_hsw+0x8fc>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 196,194,93,184,192 ; vfmadd231ps %ymm8,%ymm4,%ymm0
DB 196,194,85,184,200 ; vfmadd231ps %ymm8,%ymm5,%ymm1
@@ -305,7 +305,7 @@ _sk_srcover_hsw LABEL PROC
PUBLIC _sk_dstover_hsw
_sk_dstover_hsw LABEL PROC
- DB 196,98,125,24,5,62,101,0,0 ; vbroadcastss 0x653e(%rip),%ymm8 # 6954 <_sk_clut_4D_hsw+0x900>
+ DB 196,98,125,24,5,10,106,0,0 ; vbroadcastss 0x6a0a(%rip),%ymm8 # 6e20 <_sk_clut_4D_hsw+0x900>
DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8
DB 196,226,61,168,196 ; vfmadd213ps %ymm4,%ymm8,%ymm0
DB 196,226,61,168,205 ; vfmadd213ps %ymm5,%ymm8,%ymm1
@@ -325,7 +325,7 @@ _sk_modulate_hsw LABEL PROC
PUBLIC _sk_multiply_hsw
_sk_multiply_hsw LABEL PROC
- DB 196,98,125,24,5,9,101,0,0 ; vbroadcastss 0x6509(%rip),%ymm8 # 6958 <_sk_clut_4D_hsw+0x904>
+ DB 196,98,125,24,5,213,105,0,0 ; vbroadcastss 0x69d5(%rip),%ymm8 # 6e24 <_sk_clut_4D_hsw+0x904>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,52,89,208 ; vmulps %ymm0,%ymm9,%ymm10
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -367,7 +367,7 @@ _sk_screen_hsw LABEL PROC
PUBLIC _sk_xor__hsw
_sk_xor__hsw LABEL PROC
- DB 196,98,125,24,5,132,100,0,0 ; vbroadcastss 0x6484(%rip),%ymm8 # 695c <_sk_clut_4D_hsw+0x908>
+ DB 196,98,125,24,5,80,105,0,0 ; vbroadcastss 0x6950(%rip),%ymm8 # 6e28 <_sk_clut_4D_hsw+0x908>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -399,7 +399,7 @@ _sk_darken_hsw LABEL PROC
DB 197,100,89,206 ; vmulps %ymm6,%ymm3,%ymm9
DB 196,193,108,95,209 ; vmaxps %ymm9,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,12,100,0,0 ; vbroadcastss 0x640c(%rip),%ymm8 # 6960 <_sk_clut_4D_hsw+0x90c>
+ DB 196,98,125,24,5,216,104,0,0 ; vbroadcastss 0x68d8(%rip),%ymm8 # 6e2c <_sk_clut_4D_hsw+0x90c>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 196,194,69,184,216 ; vfmadd231ps %ymm8,%ymm7,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -422,7 +422,7 @@ _sk_lighten_hsw LABEL PROC
DB 197,100,89,206 ; vmulps %ymm6,%ymm3,%ymm9
DB 196,193,108,93,209 ; vminps %ymm9,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,187,99,0,0 ; vbroadcastss 0x63bb(%rip),%ymm8 # 6964 <_sk_clut_4D_hsw+0x910>
+ DB 196,98,125,24,5,135,104,0,0 ; vbroadcastss 0x6887(%rip),%ymm8 # 6e30 <_sk_clut_4D_hsw+0x910>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 196,194,69,184,216 ; vfmadd231ps %ymm8,%ymm7,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -448,7 +448,7 @@ _sk_difference_hsw LABEL PROC
DB 196,193,108,93,209 ; vminps %ymm9,%ymm2,%ymm2
DB 197,236,88,210 ; vaddps %ymm2,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,94,99,0,0 ; vbroadcastss 0x635e(%rip),%ymm8 # 6968 <_sk_clut_4D_hsw+0x914>
+ DB 196,98,125,24,5,42,104,0,0 ; vbroadcastss 0x682a(%rip),%ymm8 # 6e34 <_sk_clut_4D_hsw+0x914>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 196,194,69,184,216 ; vfmadd231ps %ymm8,%ymm7,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -468,7 +468,7 @@ _sk_exclusion_hsw LABEL PROC
DB 197,236,89,214 ; vmulps %ymm6,%ymm2,%ymm2
DB 197,236,88,210 ; vaddps %ymm2,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,28,99,0,0 ; vbroadcastss 0x631c(%rip),%ymm8 # 696c <_sk_clut_4D_hsw+0x918>
+ DB 196,98,125,24,5,232,103,0,0 ; vbroadcastss 0x67e8(%rip),%ymm8 # 6e38 <_sk_clut_4D_hsw+0x918>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 196,194,69,184,216 ; vfmadd231ps %ymm8,%ymm7,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -476,7 +476,7 @@ _sk_exclusion_hsw LABEL PROC
PUBLIC _sk_colorburn_hsw
_sk_colorburn_hsw LABEL PROC
- DB 196,98,125,24,5,10,99,0,0 ; vbroadcastss 0x630a(%rip),%ymm8 # 6970 <_sk_clut_4D_hsw+0x91c>
+ DB 196,98,125,24,5,214,103,0,0 ; vbroadcastss 0x67d6(%rip),%ymm8 # 6e3c <_sk_clut_4D_hsw+0x91c>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,52,89,216 ; vmulps %ymm0,%ymm9,%ymm11
DB 196,65,44,87,210 ; vxorps %ymm10,%ymm10,%ymm10
@@ -532,7 +532,7 @@ _sk_colorburn_hsw LABEL PROC
PUBLIC _sk_colordodge_hsw
_sk_colordodge_hsw LABEL PROC
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
- DB 196,98,125,24,13,21,98,0,0 ; vbroadcastss 0x6215(%rip),%ymm9 # 6974 <_sk_clut_4D_hsw+0x920>
+ DB 196,98,125,24,13,225,102,0,0 ; vbroadcastss 0x66e1(%rip),%ymm9 # 6e40 <_sk_clut_4D_hsw+0x920>
DB 197,52,92,215 ; vsubps %ymm7,%ymm9,%ymm10
DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
DB 197,52,92,203 ; vsubps %ymm3,%ymm9,%ymm9
@@ -583,7 +583,7 @@ _sk_colordodge_hsw LABEL PROC
PUBLIC _sk_hardlight_hsw
_sk_hardlight_hsw LABEL PROC
- DB 196,98,125,24,5,54,97,0,0 ; vbroadcastss 0x6136(%rip),%ymm8 # 6978 <_sk_clut_4D_hsw+0x924>
+ DB 196,98,125,24,5,2,102,0,0 ; vbroadcastss 0x6602(%rip),%ymm8 # 6e44 <_sk_clut_4D_hsw+0x924>
DB 197,60,92,215 ; vsubps %ymm7,%ymm8,%ymm10
DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -632,7 +632,7 @@ _sk_hardlight_hsw LABEL PROC
PUBLIC _sk_overlay_hsw
_sk_overlay_hsw LABEL PROC
- DB 196,98,125,24,5,110,96,0,0 ; vbroadcastss 0x606e(%rip),%ymm8 # 697c <_sk_clut_4D_hsw+0x928>
+ DB 196,98,125,24,5,58,101,0,0 ; vbroadcastss 0x653a(%rip),%ymm8 # 6e48 <_sk_clut_4D_hsw+0x928>
DB 197,60,92,215 ; vsubps %ymm7,%ymm8,%ymm10
DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -692,10 +692,10 @@ _sk_softlight_hsw LABEL PROC
DB 196,65,20,88,197 ; vaddps %ymm13,%ymm13,%ymm8
DB 196,65,60,88,192 ; vaddps %ymm8,%ymm8,%ymm8
DB 196,66,61,168,192 ; vfmadd213ps %ymm8,%ymm8,%ymm8
- DB 196,98,125,24,29,117,95,0,0 ; vbroadcastss 0x5f75(%rip),%ymm11 # 6984 <_sk_clut_4D_hsw+0x930>
+ DB 196,98,125,24,29,65,100,0,0 ; vbroadcastss 0x6441(%rip),%ymm11 # 6e50 <_sk_clut_4D_hsw+0x930>
DB 196,65,20,88,227 ; vaddps %ymm11,%ymm13,%ymm12
DB 196,65,28,89,192 ; vmulps %ymm8,%ymm12,%ymm8
- DB 196,98,125,24,37,102,95,0,0 ; vbroadcastss 0x5f66(%rip),%ymm12 # 6988 <_sk_clut_4D_hsw+0x934>
+ DB 196,98,125,24,37,50,100,0,0 ; vbroadcastss 0x6432(%rip),%ymm12 # 6e54 <_sk_clut_4D_hsw+0x934>
DB 196,66,21,184,196 ; vfmadd231ps %ymm12,%ymm13,%ymm8
DB 196,65,124,82,245 ; vrsqrtps %ymm13,%ymm14
DB 196,65,124,83,246 ; vrcpps %ymm14,%ymm14
@@ -705,7 +705,7 @@ _sk_softlight_hsw LABEL PROC
DB 197,4,194,255,2 ; vcmpleps %ymm7,%ymm15,%ymm15
DB 196,67,13,74,240,240 ; vblendvps %ymm15,%ymm8,%ymm14,%ymm14
DB 197,116,88,249 ; vaddps %ymm1,%ymm1,%ymm15
- DB 196,98,125,24,5,41,95,0,0 ; vbroadcastss 0x5f29(%rip),%ymm8 # 6980 <_sk_clut_4D_hsw+0x92c>
+ DB 196,98,125,24,5,245,99,0,0 ; vbroadcastss 0x63f5(%rip),%ymm8 # 6e4c <_sk_clut_4D_hsw+0x92c>
DB 196,65,60,92,237 ; vsubps %ymm13,%ymm8,%ymm13
DB 197,132,92,195 ; vsubps %ymm3,%ymm15,%ymm0
DB 196,98,125,168,235 ; vfmadd213ps %ymm3,%ymm0,%ymm13
@@ -818,11 +818,11 @@ _sk_hue_hsw LABEL PROC
DB 196,65,28,89,210 ; vmulps %ymm10,%ymm12,%ymm10
DB 196,65,44,94,214 ; vdivps %ymm14,%ymm10,%ymm10
DB 196,67,45,74,224,240 ; vblendvps %ymm15,%ymm8,%ymm10,%ymm12
- DB 196,98,125,24,53,40,93,0,0 ; vbroadcastss 0x5d28(%rip),%ymm14 # 698c <_sk_clut_4D_hsw+0x938>
- DB 196,98,125,24,61,35,93,0,0 ; vbroadcastss 0x5d23(%rip),%ymm15 # 6990 <_sk_clut_4D_hsw+0x93c>
+ DB 196,98,125,24,53,244,97,0,0 ; vbroadcastss 0x61f4(%rip),%ymm14 # 6e58 <_sk_clut_4D_hsw+0x938>
+ DB 196,98,125,24,61,239,97,0,0 ; vbroadcastss 0x61ef(%rip),%ymm15 # 6e5c <_sk_clut_4D_hsw+0x93c>
DB 196,65,84,89,239 ; vmulps %ymm15,%ymm5,%ymm13
DB 196,66,93,184,238 ; vfmadd231ps %ymm14,%ymm4,%ymm13
- DB 196,226,125,24,5,20,93,0,0 ; vbroadcastss 0x5d14(%rip),%ymm0 # 6994 <_sk_clut_4D_hsw+0x940>
+ DB 196,226,125,24,5,224,97,0,0 ; vbroadcastss 0x61e0(%rip),%ymm0 # 6e60 <_sk_clut_4D_hsw+0x940>
DB 196,98,77,184,232 ; vfmadd231ps %ymm0,%ymm6,%ymm13
DB 196,65,116,89,215 ; vmulps %ymm15,%ymm1,%ymm10
DB 196,66,53,184,214 ; vfmadd231ps %ymm14,%ymm9,%ymm10
@@ -877,7 +877,7 @@ _sk_hue_hsw LABEL PROC
DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0
DB 196,65,36,95,200 ; vmaxps %ymm8,%ymm11,%ymm9
DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8
- DB 196,226,125,24,13,1,92,0,0 ; vbroadcastss 0x5c01(%rip),%ymm1 # 6998 <_sk_clut_4D_hsw+0x944>
+ DB 196,226,125,24,13,205,96,0,0 ; vbroadcastss 0x60cd(%rip),%ymm1 # 6e64 <_sk_clut_4D_hsw+0x944>
DB 197,116,92,215 ; vsubps %ymm7,%ymm1,%ymm10
DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
DB 197,116,92,219 ; vsubps %ymm3,%ymm1,%ymm11
@@ -931,11 +931,11 @@ _sk_saturation_hsw LABEL PROC
DB 196,65,28,89,210 ; vmulps %ymm10,%ymm12,%ymm10
DB 196,65,44,94,214 ; vdivps %ymm14,%ymm10,%ymm10
DB 196,67,45,74,224,240 ; vblendvps %ymm15,%ymm8,%ymm10,%ymm12
- DB 196,98,125,24,53,18,91,0,0 ; vbroadcastss 0x5b12(%rip),%ymm14 # 699c <_sk_clut_4D_hsw+0x948>
- DB 196,98,125,24,61,13,91,0,0 ; vbroadcastss 0x5b0d(%rip),%ymm15 # 69a0 <_sk_clut_4D_hsw+0x94c>
+ DB 196,98,125,24,53,222,95,0,0 ; vbroadcastss 0x5fde(%rip),%ymm14 # 6e68 <_sk_clut_4D_hsw+0x948>
+ DB 196,98,125,24,61,217,95,0,0 ; vbroadcastss 0x5fd9(%rip),%ymm15 # 6e6c <_sk_clut_4D_hsw+0x94c>
DB 196,65,84,89,239 ; vmulps %ymm15,%ymm5,%ymm13
DB 196,66,93,184,238 ; vfmadd231ps %ymm14,%ymm4,%ymm13
- DB 196,226,125,24,5,254,90,0,0 ; vbroadcastss 0x5afe(%rip),%ymm0 # 69a4 <_sk_clut_4D_hsw+0x950>
+ DB 196,226,125,24,5,202,95,0,0 ; vbroadcastss 0x5fca(%rip),%ymm0 # 6e70 <_sk_clut_4D_hsw+0x950>
DB 196,98,77,184,232 ; vfmadd231ps %ymm0,%ymm6,%ymm13
DB 196,65,116,89,215 ; vmulps %ymm15,%ymm1,%ymm10
DB 196,66,53,184,214 ; vfmadd231ps %ymm14,%ymm9,%ymm10
@@ -990,7 +990,7 @@ _sk_saturation_hsw LABEL PROC
DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0
DB 196,65,36,95,200 ; vmaxps %ymm8,%ymm11,%ymm9
DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8
- DB 196,226,125,24,13,235,89,0,0 ; vbroadcastss 0x59eb(%rip),%ymm1 # 69a8 <_sk_clut_4D_hsw+0x954>
+ DB 196,226,125,24,13,183,94,0,0 ; vbroadcastss 0x5eb7(%rip),%ymm1 # 6e74 <_sk_clut_4D_hsw+0x954>
DB 197,116,92,215 ; vsubps %ymm7,%ymm1,%ymm10
DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
DB 197,116,92,219 ; vsubps %ymm3,%ymm1,%ymm11
@@ -1018,11 +1018,11 @@ _sk_color_hsw LABEL PROC
DB 197,108,89,199 ; vmulps %ymm7,%ymm2,%ymm8
DB 197,116,89,215 ; vmulps %ymm7,%ymm1,%ymm10
DB 197,52,89,223 ; vmulps %ymm7,%ymm9,%ymm11
- DB 196,98,125,24,45,126,89,0,0 ; vbroadcastss 0x597e(%rip),%ymm13 # 69ac <_sk_clut_4D_hsw+0x958>
- DB 196,98,125,24,53,121,89,0,0 ; vbroadcastss 0x5979(%rip),%ymm14 # 69b0 <_sk_clut_4D_hsw+0x95c>
+ DB 196,98,125,24,45,74,94,0,0 ; vbroadcastss 0x5e4a(%rip),%ymm13 # 6e78 <_sk_clut_4D_hsw+0x958>
+ DB 196,98,125,24,53,69,94,0,0 ; vbroadcastss 0x5e45(%rip),%ymm14 # 6e7c <_sk_clut_4D_hsw+0x95c>
DB 196,65,84,89,230 ; vmulps %ymm14,%ymm5,%ymm12
DB 196,66,93,184,229 ; vfmadd231ps %ymm13,%ymm4,%ymm12
- DB 196,98,125,24,61,106,89,0,0 ; vbroadcastss 0x596a(%rip),%ymm15 # 69b4 <_sk_clut_4D_hsw+0x960>
+ DB 196,98,125,24,61,54,94,0,0 ; vbroadcastss 0x5e36(%rip),%ymm15 # 6e80 <_sk_clut_4D_hsw+0x960>
DB 196,66,77,184,231 ; vfmadd231ps %ymm15,%ymm6,%ymm12
DB 196,65,44,89,206 ; vmulps %ymm14,%ymm10,%ymm9
DB 196,66,61,184,205 ; vfmadd231ps %ymm13,%ymm8,%ymm9
@@ -1078,7 +1078,7 @@ _sk_color_hsw LABEL PROC
DB 196,193,116,95,206 ; vmaxps %ymm14,%ymm1,%ymm1
DB 196,65,44,95,198 ; vmaxps %ymm14,%ymm10,%ymm8
DB 196,65,124,95,206 ; vmaxps %ymm14,%ymm0,%ymm9
- DB 196,226,125,24,5,76,88,0,0 ; vbroadcastss 0x584c(%rip),%ymm0 # 69b8 <_sk_clut_4D_hsw+0x964>
+ DB 196,226,125,24,5,24,93,0,0 ; vbroadcastss 0x5d18(%rip),%ymm0 # 6e84 <_sk_clut_4D_hsw+0x964>
DB 197,124,92,215 ; vsubps %ymm7,%ymm0,%ymm10
DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
DB 197,124,92,219 ; vsubps %ymm3,%ymm0,%ymm11
@@ -1106,11 +1106,11 @@ _sk_luminosity_hsw LABEL PROC
DB 197,100,89,196 ; vmulps %ymm4,%ymm3,%ymm8
DB 197,100,89,213 ; vmulps %ymm5,%ymm3,%ymm10
DB 197,100,89,222 ; vmulps %ymm6,%ymm3,%ymm11
- DB 196,98,125,24,45,223,87,0,0 ; vbroadcastss 0x57df(%rip),%ymm13 # 69bc <_sk_clut_4D_hsw+0x968>
- DB 196,98,125,24,53,218,87,0,0 ; vbroadcastss 0x57da(%rip),%ymm14 # 69c0 <_sk_clut_4D_hsw+0x96c>
+ DB 196,98,125,24,45,171,92,0,0 ; vbroadcastss 0x5cab(%rip),%ymm13 # 6e88 <_sk_clut_4D_hsw+0x968>
+ DB 196,98,125,24,53,166,92,0,0 ; vbroadcastss 0x5ca6(%rip),%ymm14 # 6e8c <_sk_clut_4D_hsw+0x96c>
DB 196,65,116,89,230 ; vmulps %ymm14,%ymm1,%ymm12
DB 196,66,109,184,229 ; vfmadd231ps %ymm13,%ymm2,%ymm12
- DB 196,98,125,24,61,203,87,0,0 ; vbroadcastss 0x57cb(%rip),%ymm15 # 69c4 <_sk_clut_4D_hsw+0x970>
+ DB 196,98,125,24,61,151,92,0,0 ; vbroadcastss 0x5c97(%rip),%ymm15 # 6e90 <_sk_clut_4D_hsw+0x970>
DB 196,66,53,184,231 ; vfmadd231ps %ymm15,%ymm9,%ymm12
DB 196,65,44,89,206 ; vmulps %ymm14,%ymm10,%ymm9
DB 196,66,61,184,205 ; vfmadd231ps %ymm13,%ymm8,%ymm9
@@ -1166,7 +1166,7 @@ _sk_luminosity_hsw LABEL PROC
DB 196,193,116,95,206 ; vmaxps %ymm14,%ymm1,%ymm1
DB 196,65,44,95,198 ; vmaxps %ymm14,%ymm10,%ymm8
DB 196,65,124,95,206 ; vmaxps %ymm14,%ymm0,%ymm9
- DB 196,226,125,24,5,173,86,0,0 ; vbroadcastss 0x56ad(%rip),%ymm0 # 69c8 <_sk_clut_4D_hsw+0x974>
+ DB 196,226,125,24,5,121,91,0,0 ; vbroadcastss 0x5b79(%rip),%ymm0 # 6e94 <_sk_clut_4D_hsw+0x974>
DB 197,124,92,215 ; vsubps %ymm7,%ymm0,%ymm10
DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
DB 197,124,92,219 ; vsubps %ymm3,%ymm0,%ymm11
@@ -1186,29 +1186,28 @@ _sk_luminosity_hsw LABEL PROC
PUBLIC _sk_srcover_rgba_8888_hsw
_sk_srcover_rgba_8888_hsw LABEL PROC
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,178,0,0,0 ; jne 1439 <_sk_srcover_rgba_8888_hsw+0xd9>
- DB 197,252,16,56 ; vmovups (%rax),%ymm7
- DB 197,196,84,37,173,90,0,0 ; vandps 0x5aad(%rip),%ymm7,%ymm4 # 6e40 <_sk_clut_4D_hsw+0xdec>
+ DB 15,133,180,0,0,0 ; jne 1435 <_sk_srcover_rgba_8888_hsw+0xd5>
+ DB 196,193,126,111,60,153 ; vmovdqu (%r9,%rbx,4),%ymm7
+ DB 197,197,219,37,113,95,0,0 ; vpand 0x5f71(%rip),%ymm7,%ymm4 # 7300 <_sk_clut_4D_hsw+0xde0>
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,226,69,0,45,192,90,0,0 ; vpshufb 0x5ac0(%rip),%ymm7,%ymm5 # 6e60 <_sk_clut_4D_hsw+0xe0c>
+ DB 196,226,69,0,45,132,95,0,0 ; vpshufb 0x5f84(%rip),%ymm7,%ymm5 # 7320 <_sk_clut_4D_hsw+0xe00>
DB 197,252,91,237 ; vcvtdq2ps %ymm5,%ymm5
- DB 196,226,69,0,53,211,90,0,0 ; vpshufb 0x5ad3(%rip),%ymm7,%ymm6 # 6e80 <_sk_clut_4D_hsw+0xe2c>
+ DB 196,226,69,0,53,151,95,0,0 ; vpshufb 0x5f97(%rip),%ymm7,%ymm6 # 7340 <_sk_clut_4D_hsw+0xe20>
DB 197,252,91,246 ; vcvtdq2ps %ymm6,%ymm6
DB 197,197,114,215,24 ; vpsrld $0x18,%ymm7,%ymm7
DB 197,252,91,255 ; vcvtdq2ps %ymm7,%ymm7
- DB 196,98,125,24,5,9,86,0,0 ; vbroadcastss 0x5609(%rip),%ymm8 # 69cc <_sk_clut_4D_hsw+0x978>
+ DB 196,98,125,24,5,217,90,0,0 ; vbroadcastss 0x5ad9(%rip),%ymm8 # 6e98 <_sk_clut_4D_hsw+0x978>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
- DB 196,98,125,24,13,0,86,0,0 ; vbroadcastss 0x5600(%rip),%ymm9 # 69d0 <_sk_clut_4D_hsw+0x97c>
+ DB 196,98,125,24,13,208,90,0,0 ; vbroadcastss 0x5ad0(%rip),%ymm9 # 6e9c <_sk_clut_4D_hsw+0x97c>
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
DB 196,194,93,184,192 ; vfmadd231ps %ymm8,%ymm4,%ymm0
DB 196,193,116,89,201 ; vmulps %ymm9,%ymm1,%ymm1
@@ -1228,29 +1227,98 @@ _sk_srcover_rgba_8888_hsw LABEL PROC
DB 196,65,53,235,202 ; vpor %ymm10,%ymm9,%ymm9
DB 196,65,61,235,193 ; vpor %ymm9,%ymm8,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,52 ; jne 1462 <_sk_srcover_rgba_8888_hsw+0x102>
- DB 197,124,17,0 ; vmovups %ymm8,(%rax)
+ DB 117,66 ; jne 146c <_sk_srcover_rgba_8888_hsw+0x10c>
+ DB 196,65,126,127,4,153 ; vmovdqu %ymm8,(%r9,%rbx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
- DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,226 ; vmovq %r10,%xmm4
- DB 196,226,125,33,228 ; vpmovsxbd %xmm4,%ymm4
- DB 196,226,93,44,56 ; vmaskmovps (%rax),%ymm4,%ymm7
- DB 233,41,255,255,255 ; jmpq 138b <_sk_srcover_rgba_8888_hsw+0x2b>
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,65,249,110,202 ; vmovq %r10,%xmm9
- DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9
- DB 196,98,53,46,0 ; vmaskmovps %ymm8,%ymm9,(%rax)
- DB 235,170 ; jmp 1432 <_sk_srcover_rgba_8888_hsw+0xd2>
+ DB 91 ; pop %rbx
+ DB 255,224 ; jmpq *%rax
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 197,197,239,255 ; vpxor %ymm7,%ymm7,%ymm7
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 15,135,58,255,255,255 ; ja 1387 <_sk_srcover_rgba_8888_hsw+0x27>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,0,1,0,0 ; lea 0x100(%rip),%r11 # 1558 <_sk_srcover_rgba_8888_hsw+0x1f8>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,193,121,110,60,153 ; vmovd (%r9,%rbx,4),%xmm7
+ DB 233,27,255,255,255 ; jmpq 1387 <_sk_srcover_rgba_8888_hsw+0x27>
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,180 ; ja 1430 <_sk_srcover_rgba_8888_hsw+0xd0>
+ DB 65,15,182,194 ; movzbl %r10b,%eax
+ DB 76,141,21,237,0,0,0 ; lea 0xed(%rip),%r10 # 1574 <_sk_srcover_rgba_8888_hsw+0x214>
+ DB 73,99,4,130 ; movslq (%r10,%rax,4),%rax
+ DB 76,1,208 ; add %r10,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,121,126,4,153 ; vmovd %xmm8,(%r9,%rbx,4)
+ DB 235,152 ; jmp 1430 <_sk_srcover_rgba_8888_hsw+0xd0>
+ DB 196,193,121,110,100,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm4
+ DB 196,226,121,89,228 ; vpbroadcastq %xmm4,%xmm4
+ DB 197,213,239,237 ; vpxor %ymm5,%ymm5,%ymm5
+ DB 196,227,85,2,252,4 ; vpblendd $0x4,%ymm4,%ymm5,%ymm7
+ DB 196,193,122,126,36,153 ; vmovq (%r9,%rbx,4),%xmm4
+ DB 196,227,69,2,252,3 ; vpblendd $0x3,%ymm4,%ymm7,%ymm7
+ DB 233,200,254,255,255 ; jmpq 1387 <_sk_srcover_rgba_8888_hsw+0x27>
+ DB 196,193,121,110,100,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm4
+ DB 196,226,125,89,228 ; vpbroadcastq %xmm4,%ymm4
+ DB 197,213,239,237 ; vpxor %ymm5,%ymm5,%ymm5
+ DB 196,227,85,2,252,64 ; vpblendd $0x40,%ymm4,%ymm5,%ymm7
+ DB 196,227,125,57,252,1 ; vextracti128 $0x1,%ymm7,%xmm4
+ DB 196,195,89,34,100,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,227,69,56,252,1 ; vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ DB 196,227,125,57,252,1 ; vextracti128 $0x1,%ymm7,%xmm4
+ DB 196,195,89,34,100,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,227,69,56,252,1 ; vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ DB 196,193,122,111,36,153 ; vmovdqu (%r9,%rbx,4),%xmm4
+ DB 196,227,93,2,255,240 ; vpblendd $0xf0,%ymm7,%ymm4,%ymm7
+ DB 233,121,254,255,255 ; jmpq 1387 <_sk_srcover_rgba_8888_hsw+0x27>
+ DB 196,67,121,22,68,153,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ DB 196,65,121,214,4,153 ; vmovq %xmm8,(%r9,%rbx,4)
+ DB 233,15,255,255,255 ; jmpq 1430 <_sk_srcover_rgba_8888_hsw+0xd0>
+ DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,24,2 ; vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,20,1 ; vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
+ DB 196,65,121,126,76,153,16 ; vmovd %xmm9,0x10(%r9,%rbx,4)
+ DB 196,65,122,127,4,153 ; vmovdqu %xmm8,(%r9,%rbx,4)
+ DB 233,219,254,255,255 ; jmpq 1430 <_sk_srcover_rgba_8888_hsw+0xd0>
+ DB 15,31,0 ; nopl (%rax)
+ DB 9,255 ; or %edi,%edi
+ DB 255 ; (bad)
+ DB 255,86,255 ; callq *-0x1(%rsi)
+ DB 255 ; (bad)
+ DB 255,64,255 ; incl -0x1(%rax)
+ DB 255 ; (bad)
+ DB 255,165,255,255,255,145 ; jmpq *-0x6e000001(%rbp)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 125,255 ; jge 156d <_sk_srcover_rgba_8888_hsw+0x20d>
+ DB 255 ; (bad)
+ DB 255,103,255 ; jmpq *-0x1(%rdi)
+ DB 255 ; (bad)
+ DB 255,28,255 ; lcall *(%rdi,%rdi,8)
+ DB 255 ; (bad)
+ DB 255,162,255,255,255,154 ; jmpq *-0x65000001(%rdx)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,214 ; callq *%rsi
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,201 ; dec %ecx
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 187,255,255,255,173 ; mov $0xadffffff,%ebx
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_clamp_0_hsw
_sk_clamp_0_hsw LABEL PROC
@@ -1264,7 +1332,7 @@ _sk_clamp_0_hsw LABEL PROC
PUBLIC _sk_clamp_1_hsw
_sk_clamp_1_hsw LABEL PROC
- DB 196,98,125,24,5,38,85,0,0 ; vbroadcastss 0x5526(%rip),%ymm8 # 69d4 <_sk_clut_4D_hsw+0x980>
+ DB 196,98,125,24,5,234,88,0,0 ; vbroadcastss 0x58ea(%rip),%ymm8 # 6ea0 <_sk_clut_4D_hsw+0x980>
DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0
DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1
DB 196,193,108,93,208 ; vminps %ymm8,%ymm2,%ymm2
@@ -1274,7 +1342,7 @@ _sk_clamp_1_hsw LABEL PROC
PUBLIC _sk_clamp_a_hsw
_sk_clamp_a_hsw LABEL PROC
- DB 196,98,125,24,5,9,85,0,0 ; vbroadcastss 0x5509(%rip),%ymm8 # 69d8 <_sk_clut_4D_hsw+0x984>
+ DB 196,98,125,24,5,205,88,0,0 ; vbroadcastss 0x58cd(%rip),%ymm8 # 6ea4 <_sk_clut_4D_hsw+0x984>
DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3
DB 197,252,93,195 ; vminps %ymm3,%ymm0,%ymm0
DB 197,244,93,203 ; vminps %ymm3,%ymm1,%ymm1
@@ -1284,7 +1352,7 @@ _sk_clamp_a_hsw LABEL PROC
PUBLIC _sk_clamp_a_dst_hsw
_sk_clamp_a_dst_hsw LABEL PROC
- DB 196,98,125,24,5,239,84,0,0 ; vbroadcastss 0x54ef(%rip),%ymm8 # 69dc <_sk_clut_4D_hsw+0x988>
+ DB 196,98,125,24,5,179,88,0,0 ; vbroadcastss 0x58b3(%rip),%ymm8 # 6ea8 <_sk_clut_4D_hsw+0x988>
DB 196,193,68,93,248 ; vminps %ymm8,%ymm7,%ymm7
DB 197,220,93,231 ; vminps %ymm7,%ymm4,%ymm4
DB 197,212,93,239 ; vminps %ymm7,%ymm5,%ymm5
@@ -1311,7 +1379,7 @@ _sk_swap_rb_hsw LABEL PROC
PUBLIC _sk_invert_hsw
_sk_invert_hsw LABEL PROC
- DB 196,98,125,24,5,174,84,0,0 ; vbroadcastss 0x54ae(%rip),%ymm8 # 69e0 <_sk_clut_4D_hsw+0x98c>
+ DB 196,98,125,24,5,114,88,0,0 ; vbroadcastss 0x5872(%rip),%ymm8 # 6eac <_sk_clut_4D_hsw+0x98c>
DB 197,188,92,192 ; vsubps %ymm0,%ymm8,%ymm0
DB 197,188,92,201 ; vsubps %ymm1,%ymm8,%ymm1
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
@@ -1357,7 +1425,7 @@ PUBLIC _sk_unpremul_hsw
_sk_unpremul_hsw LABEL PROC
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,65,100,194,200,0 ; vcmpeqps %ymm8,%ymm3,%ymm9
- DB 196,98,125,24,21,66,84,0,0 ; vbroadcastss 0x5442(%rip),%ymm10 # 69e4 <_sk_clut_4D_hsw+0x990>
+ DB 196,98,125,24,21,6,88,0,0 ; vbroadcastss 0x5806(%rip),%ymm10 # 6eb0 <_sk_clut_4D_hsw+0x990>
DB 197,44,94,211 ; vdivps %ymm3,%ymm10,%ymm10
DB 196,67,45,74,192,144 ; vblendvps %ymm9,%ymm8,%ymm10,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
@@ -1368,16 +1436,16 @@ _sk_unpremul_hsw LABEL PROC
PUBLIC _sk_from_srgb_hsw
_sk_from_srgb_hsw LABEL PROC
- DB 196,98,125,24,5,35,84,0,0 ; vbroadcastss 0x5423(%rip),%ymm8 # 69e8 <_sk_clut_4D_hsw+0x994>
+ DB 196,98,125,24,5,231,87,0,0 ; vbroadcastss 0x57e7(%rip),%ymm8 # 6eb4 <_sk_clut_4D_hsw+0x994>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 197,124,89,208 ; vmulps %ymm0,%ymm0,%ymm10
- DB 196,98,125,24,29,21,84,0,0 ; vbroadcastss 0x5415(%rip),%ymm11 # 69ec <_sk_clut_4D_hsw+0x998>
- DB 196,98,125,24,37,16,84,0,0 ; vbroadcastss 0x5410(%rip),%ymm12 # 69f0 <_sk_clut_4D_hsw+0x99c>
+ DB 196,98,125,24,29,217,87,0,0 ; vbroadcastss 0x57d9(%rip),%ymm11 # 6eb8 <_sk_clut_4D_hsw+0x998>
+ DB 196,98,125,24,37,212,87,0,0 ; vbroadcastss 0x57d4(%rip),%ymm12 # 6ebc <_sk_clut_4D_hsw+0x99c>
DB 196,65,124,40,236 ; vmovaps %ymm12,%ymm13
DB 196,66,125,168,235 ; vfmadd213ps %ymm11,%ymm0,%ymm13
- DB 196,98,125,24,53,1,84,0,0 ; vbroadcastss 0x5401(%rip),%ymm14 # 69f4 <_sk_clut_4D_hsw+0x9a0>
+ DB 196,98,125,24,53,197,87,0,0 ; vbroadcastss 0x57c5(%rip),%ymm14 # 6ec0 <_sk_clut_4D_hsw+0x9a0>
DB 196,66,45,168,238 ; vfmadd213ps %ymm14,%ymm10,%ymm13
- DB 196,98,125,24,21,247,83,0,0 ; vbroadcastss 0x53f7(%rip),%ymm10 # 69f8 <_sk_clut_4D_hsw+0x9a4>
+ DB 196,98,125,24,21,187,87,0,0 ; vbroadcastss 0x57bb(%rip),%ymm10 # 6ec4 <_sk_clut_4D_hsw+0x9a4>
DB 196,193,124,194,194,1 ; vcmpltps %ymm10,%ymm0,%ymm0
DB 196,195,21,74,193,0 ; vblendvps %ymm0,%ymm9,%ymm13,%ymm0
DB 196,65,116,89,200 ; vmulps %ymm8,%ymm1,%ymm9
@@ -1398,16 +1466,16 @@ _sk_from_srgb_hsw LABEL PROC
PUBLIC _sk_from_srgb_dst_hsw
_sk_from_srgb_dst_hsw LABEL PROC
- DB 196,98,125,24,5,159,83,0,0 ; vbroadcastss 0x539f(%rip),%ymm8 # 69fc <_sk_clut_4D_hsw+0x9a8>
+ DB 196,98,125,24,5,99,87,0,0 ; vbroadcastss 0x5763(%rip),%ymm8 # 6ec8 <_sk_clut_4D_hsw+0x9a8>
DB 196,65,92,89,200 ; vmulps %ymm8,%ymm4,%ymm9
DB 197,92,89,212 ; vmulps %ymm4,%ymm4,%ymm10
- DB 196,98,125,24,29,145,83,0,0 ; vbroadcastss 0x5391(%rip),%ymm11 # 6a00 <_sk_clut_4D_hsw+0x9ac>
- DB 196,98,125,24,37,140,83,0,0 ; vbroadcastss 0x538c(%rip),%ymm12 # 6a04 <_sk_clut_4D_hsw+0x9b0>
+ DB 196,98,125,24,29,85,87,0,0 ; vbroadcastss 0x5755(%rip),%ymm11 # 6ecc <_sk_clut_4D_hsw+0x9ac>
+ DB 196,98,125,24,37,80,87,0,0 ; vbroadcastss 0x5750(%rip),%ymm12 # 6ed0 <_sk_clut_4D_hsw+0x9b0>
DB 196,65,124,40,236 ; vmovaps %ymm12,%ymm13
DB 196,66,93,168,235 ; vfmadd213ps %ymm11,%ymm4,%ymm13
- DB 196,98,125,24,53,125,83,0,0 ; vbroadcastss 0x537d(%rip),%ymm14 # 6a08 <_sk_clut_4D_hsw+0x9b4>
+ DB 196,98,125,24,53,65,87,0,0 ; vbroadcastss 0x5741(%rip),%ymm14 # 6ed4 <_sk_clut_4D_hsw+0x9b4>
DB 196,66,45,168,238 ; vfmadd213ps %ymm14,%ymm10,%ymm13
- DB 196,98,125,24,21,115,83,0,0 ; vbroadcastss 0x5373(%rip),%ymm10 # 6a0c <_sk_clut_4D_hsw+0x9b8>
+ DB 196,98,125,24,21,55,87,0,0 ; vbroadcastss 0x5737(%rip),%ymm10 # 6ed8 <_sk_clut_4D_hsw+0x9b8>
DB 196,193,92,194,226,1 ; vcmpltps %ymm10,%ymm4,%ymm4
DB 196,195,21,74,225,64 ; vblendvps %ymm4,%ymm9,%ymm13,%ymm4
DB 196,65,84,89,200 ; vmulps %ymm8,%ymm5,%ymm9
@@ -1429,19 +1497,19 @@ _sk_from_srgb_dst_hsw LABEL PROC
PUBLIC _sk_to_srgb_hsw
_sk_to_srgb_hsw LABEL PROC
DB 197,124,82,200 ; vrsqrtps %ymm0,%ymm9
- DB 196,98,125,24,5,23,83,0,0 ; vbroadcastss 0x5317(%rip),%ymm8 # 6a10 <_sk_clut_4D_hsw+0x9bc>
+ DB 196,98,125,24,5,219,86,0,0 ; vbroadcastss 0x56db(%rip),%ymm8 # 6edc <_sk_clut_4D_hsw+0x9bc>
DB 196,65,124,89,208 ; vmulps %ymm8,%ymm0,%ymm10
- DB 196,98,125,24,29,13,83,0,0 ; vbroadcastss 0x530d(%rip),%ymm11 # 6a14 <_sk_clut_4D_hsw+0x9c0>
- DB 196,98,125,24,37,8,83,0,0 ; vbroadcastss 0x5308(%rip),%ymm12 # 6a18 <_sk_clut_4D_hsw+0x9c4>
+ DB 196,98,125,24,29,209,86,0,0 ; vbroadcastss 0x56d1(%rip),%ymm11 # 6ee0 <_sk_clut_4D_hsw+0x9c0>
+ DB 196,98,125,24,37,204,86,0,0 ; vbroadcastss 0x56cc(%rip),%ymm12 # 6ee4 <_sk_clut_4D_hsw+0x9c4>
DB 196,65,124,40,236 ; vmovaps %ymm12,%ymm13
DB 196,66,53,168,235 ; vfmadd213ps %ymm11,%ymm9,%ymm13
- DB 196,98,125,24,53,249,82,0,0 ; vbroadcastss 0x52f9(%rip),%ymm14 # 6a1c <_sk_clut_4D_hsw+0x9c8>
+ DB 196,98,125,24,53,189,86,0,0 ; vbroadcastss 0x56bd(%rip),%ymm14 # 6ee8 <_sk_clut_4D_hsw+0x9c8>
DB 196,66,53,168,238 ; vfmadd213ps %ymm14,%ymm9,%ymm13
- DB 196,98,125,24,61,239,82,0,0 ; vbroadcastss 0x52ef(%rip),%ymm15 # 6a20 <_sk_clut_4D_hsw+0x9cc>
+ DB 196,98,125,24,61,179,86,0,0 ; vbroadcastss 0x56b3(%rip),%ymm15 # 6eec <_sk_clut_4D_hsw+0x9cc>
DB 196,65,52,88,207 ; vaddps %ymm15,%ymm9,%ymm9
DB 196,65,124,83,201 ; vrcpps %ymm9,%ymm9
DB 196,65,20,89,201 ; vmulps %ymm9,%ymm13,%ymm9
- DB 196,98,125,24,45,219,82,0,0 ; vbroadcastss 0x52db(%rip),%ymm13 # 6a24 <_sk_clut_4D_hsw+0x9d0>
+ DB 196,98,125,24,45,159,86,0,0 ; vbroadcastss 0x569f(%rip),%ymm13 # 6ef0 <_sk_clut_4D_hsw+0x9d0>
DB 196,193,124,194,197,1 ; vcmpltps %ymm13,%ymm0,%ymm0
DB 196,195,53,74,194,0 ; vblendvps %ymm0,%ymm10,%ymm9,%ymm0
DB 197,124,82,201 ; vrsqrtps %ymm1,%ymm9
@@ -1473,26 +1541,26 @@ _sk_rgb_to_hsl_hsw LABEL PROC
DB 197,124,93,201 ; vminps %ymm1,%ymm0,%ymm9
DB 197,52,93,202 ; vminps %ymm2,%ymm9,%ymm9
DB 196,65,60,92,209 ; vsubps %ymm9,%ymm8,%ymm10
- DB 196,98,125,24,29,80,82,0,0 ; vbroadcastss 0x5250(%rip),%ymm11 # 6a28 <_sk_clut_4D_hsw+0x9d4>
+ DB 196,98,125,24,29,20,86,0,0 ; vbroadcastss 0x5614(%rip),%ymm11 # 6ef4 <_sk_clut_4D_hsw+0x9d4>
DB 196,65,36,94,218 ; vdivps %ymm10,%ymm11,%ymm11
DB 197,116,92,226 ; vsubps %ymm2,%ymm1,%ymm12
DB 197,116,194,234,1 ; vcmpltps %ymm2,%ymm1,%ymm13
- DB 196,98,125,24,53,61,82,0,0 ; vbroadcastss 0x523d(%rip),%ymm14 # 6a2c <_sk_clut_4D_hsw+0x9d8>
+ DB 196,98,125,24,53,1,86,0,0 ; vbroadcastss 0x5601(%rip),%ymm14 # 6ef8 <_sk_clut_4D_hsw+0x9d8>
DB 196,65,4,87,255 ; vxorps %ymm15,%ymm15,%ymm15
DB 196,67,5,74,238,208 ; vblendvps %ymm13,%ymm14,%ymm15,%ymm13
DB 196,66,37,168,229 ; vfmadd213ps %ymm13,%ymm11,%ymm12
DB 197,236,92,208 ; vsubps %ymm0,%ymm2,%ymm2
DB 197,124,92,233 ; vsubps %ymm1,%ymm0,%ymm13
- DB 196,98,125,24,53,36,82,0,0 ; vbroadcastss 0x5224(%rip),%ymm14 # 6a34 <_sk_clut_4D_hsw+0x9e0>
+ DB 196,98,125,24,53,232,85,0,0 ; vbroadcastss 0x55e8(%rip),%ymm14 # 6f00 <_sk_clut_4D_hsw+0x9e0>
DB 196,66,37,168,238 ; vfmadd213ps %ymm14,%ymm11,%ymm13
- DB 196,98,125,24,53,18,82,0,0 ; vbroadcastss 0x5212(%rip),%ymm14 # 6a30 <_sk_clut_4D_hsw+0x9dc>
+ DB 196,98,125,24,53,214,85,0,0 ; vbroadcastss 0x55d6(%rip),%ymm14 # 6efc <_sk_clut_4D_hsw+0x9dc>
DB 196,194,37,168,214 ; vfmadd213ps %ymm14,%ymm11,%ymm2
DB 197,188,194,201,0 ; vcmpeqps %ymm1,%ymm8,%ymm1
DB 196,227,21,74,202,16 ; vblendvps %ymm1,%ymm2,%ymm13,%ymm1
DB 197,188,194,192,0 ; vcmpeqps %ymm0,%ymm8,%ymm0
DB 196,195,117,74,196,0 ; vblendvps %ymm0,%ymm12,%ymm1,%ymm0
DB 196,193,60,88,201 ; vaddps %ymm9,%ymm8,%ymm1
- DB 196,98,125,24,29,245,81,0,0 ; vbroadcastss 0x51f5(%rip),%ymm11 # 6a3c <_sk_clut_4D_hsw+0x9e8>
+ DB 196,98,125,24,29,185,85,0,0 ; vbroadcastss 0x55b9(%rip),%ymm11 # 6f08 <_sk_clut_4D_hsw+0x9e8>
DB 196,193,116,89,211 ; vmulps %ymm11,%ymm1,%ymm2
DB 197,36,194,218,1 ; vcmpltps %ymm2,%ymm11,%ymm11
DB 196,65,12,92,224 ; vsubps %ymm8,%ymm14,%ymm12
@@ -1502,7 +1570,7 @@ _sk_rgb_to_hsl_hsw LABEL PROC
DB 197,172,94,201 ; vdivps %ymm1,%ymm10,%ymm1
DB 196,195,125,74,199,128 ; vblendvps %ymm8,%ymm15,%ymm0,%ymm0
DB 196,195,117,74,207,128 ; vblendvps %ymm8,%ymm15,%ymm1,%ymm1
- DB 196,98,125,24,5,184,81,0,0 ; vbroadcastss 0x51b8(%rip),%ymm8 # 6a38 <_sk_clut_4D_hsw+0x9e4>
+ DB 196,98,125,24,5,124,85,0,0 ; vbroadcastss 0x557c(%rip),%ymm8 # 6f04 <_sk_clut_4D_hsw+0x9e4>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -1517,30 +1585,30 @@ _sk_hsl_to_rgb_hsw LABEL PROC
DB 197,252,17,28,36 ; vmovups %ymm3,(%rsp)
DB 197,252,40,233 ; vmovaps %ymm1,%ymm5
DB 197,252,40,224 ; vmovaps %ymm0,%ymm4
- DB 196,98,125,24,5,127,81,0,0 ; vbroadcastss 0x517f(%rip),%ymm8 # 6a40 <_sk_clut_4D_hsw+0x9ec>
+ DB 196,98,125,24,5,67,85,0,0 ; vbroadcastss 0x5543(%rip),%ymm8 # 6f0c <_sk_clut_4D_hsw+0x9ec>
DB 197,60,194,202,2 ; vcmpleps %ymm2,%ymm8,%ymm9
DB 197,84,89,210 ; vmulps %ymm2,%ymm5,%ymm10
DB 196,65,84,92,218 ; vsubps %ymm10,%ymm5,%ymm11
DB 196,67,45,74,203,144 ; vblendvps %ymm9,%ymm11,%ymm10,%ymm9
DB 197,52,88,210 ; vaddps %ymm2,%ymm9,%ymm10
- DB 196,98,125,24,13,98,81,0,0 ; vbroadcastss 0x5162(%rip),%ymm9 # 6a44 <_sk_clut_4D_hsw+0x9f0>
+ DB 196,98,125,24,13,38,85,0,0 ; vbroadcastss 0x5526(%rip),%ymm9 # 6f10 <_sk_clut_4D_hsw+0x9f0>
DB 196,66,109,170,202 ; vfmsub213ps %ymm10,%ymm2,%ymm9
- DB 196,98,125,24,29,88,81,0,0 ; vbroadcastss 0x5158(%rip),%ymm11 # 6a48 <_sk_clut_4D_hsw+0x9f4>
+ DB 196,98,125,24,29,28,85,0,0 ; vbroadcastss 0x551c(%rip),%ymm11 # 6f14 <_sk_clut_4D_hsw+0x9f4>
DB 196,65,92,88,219 ; vaddps %ymm11,%ymm4,%ymm11
DB 196,67,125,8,227,1 ; vroundps $0x1,%ymm11,%ymm12
DB 196,65,36,92,252 ; vsubps %ymm12,%ymm11,%ymm15
DB 196,65,44,92,217 ; vsubps %ymm9,%ymm10,%ymm11
- DB 196,98,125,24,45,66,81,0,0 ; vbroadcastss 0x5142(%rip),%ymm13 # 6a50 <_sk_clut_4D_hsw+0x9fc>
+ DB 196,98,125,24,45,6,85,0,0 ; vbroadcastss 0x5506(%rip),%ymm13 # 6f1c <_sk_clut_4D_hsw+0x9fc>
DB 196,193,4,89,197 ; vmulps %ymm13,%ymm15,%ymm0
- DB 196,98,125,24,53,56,81,0,0 ; vbroadcastss 0x5138(%rip),%ymm14 # 6a54 <_sk_clut_4D_hsw+0xa00>
+ DB 196,98,125,24,53,252,84,0,0 ; vbroadcastss 0x54fc(%rip),%ymm14 # 6f20 <_sk_clut_4D_hsw+0xa00>
DB 197,12,92,224 ; vsubps %ymm0,%ymm14,%ymm12
DB 196,66,37,168,225 ; vfmadd213ps %ymm9,%ymm11,%ymm12
- DB 196,226,125,24,29,30,81,0,0 ; vbroadcastss 0x511e(%rip),%ymm3 # 6a4c <_sk_clut_4D_hsw+0x9f8>
+ DB 196,226,125,24,29,226,84,0,0 ; vbroadcastss 0x54e2(%rip),%ymm3 # 6f18 <_sk_clut_4D_hsw+0x9f8>
DB 196,193,100,194,255,2 ; vcmpleps %ymm15,%ymm3,%ymm7
DB 196,195,29,74,249,112 ; vblendvps %ymm7,%ymm9,%ymm12,%ymm7
DB 196,65,60,194,231,2 ; vcmpleps %ymm15,%ymm8,%ymm12
DB 196,227,45,74,255,192 ; vblendvps %ymm12,%ymm7,%ymm10,%ymm7
- DB 196,98,125,24,37,9,81,0,0 ; vbroadcastss 0x5109(%rip),%ymm12 # 6a58 <_sk_clut_4D_hsw+0xa04>
+ DB 196,98,125,24,37,205,84,0,0 ; vbroadcastss 0x54cd(%rip),%ymm12 # 6f24 <_sk_clut_4D_hsw+0xa04>
DB 196,65,28,194,255,2 ; vcmpleps %ymm15,%ymm12,%ymm15
DB 196,194,37,168,193 ; vfmadd213ps %ymm9,%ymm11,%ymm0
DB 196,99,125,74,255,240 ; vblendvps %ymm15,%ymm7,%ymm0,%ymm15
@@ -1556,7 +1624,7 @@ _sk_hsl_to_rgb_hsw LABEL PROC
DB 197,156,194,192,2 ; vcmpleps %ymm0,%ymm12,%ymm0
DB 196,194,37,168,249 ; vfmadd213ps %ymm9,%ymm11,%ymm7
DB 196,227,69,74,201,0 ; vblendvps %ymm0,%ymm1,%ymm7,%ymm1
- DB 196,226,125,24,5,181,80,0,0 ; vbroadcastss 0x50b5(%rip),%ymm0 # 6a5c <_sk_clut_4D_hsw+0xa08>
+ DB 196,226,125,24,5,121,84,0,0 ; vbroadcastss 0x5479(%rip),%ymm0 # 6f28 <_sk_clut_4D_hsw+0xa08>
DB 197,220,88,192 ; vaddps %ymm0,%ymm4,%ymm0
DB 196,227,125,8,224,1 ; vroundps $0x1,%ymm0,%ymm4
DB 197,252,92,196 ; vsubps %ymm4,%ymm0,%ymm0
@@ -1605,12 +1673,12 @@ _sk_scale_u8_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,59 ; jne 1a9e <_sk_scale_u8_hsw+0x54>
+ DB 117,59 ; jne 1ba6 <_sk_scale_u8_hsw+0x54>
DB 196,66,121,48,4,25 ; vpmovzxbw (%r9,%rbx,1),%xmm8
- DB 197,57,219,5,207,86,0,0 ; vpand 0x56cf(%rip),%xmm8,%xmm8 # 7140 <_sk_clut_4D_hsw+0x10ec>
+ DB 197,57,219,5,135,90,0,0 ; vpand 0x5a87(%rip),%xmm8,%xmm8 # 7600 <_sk_clut_4D_hsw+0x10e0>
DB 196,66,125,51,192 ; vpmovzxwd %xmm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,13,220,79,0,0 ; vbroadcastss 0x4fdc(%rip),%ymm9 # 6a60 <_sk_clut_4D_hsw+0xa0c>
+ DB 196,98,125,24,13,160,83,0,0 ; vbroadcastss 0x53a0(%rip),%ymm9 # 6f2c <_sk_clut_4D_hsw+0xa0c>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1
@@ -1624,15 +1692,15 @@ _sk_scale_u8_hsw LABEL PROC
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,182 ; ja 1a69 <_sk_scale_u8_hsw+0x1f>
+ DB 119,182 ; ja 1b71 <_sk_scale_u8_hsw+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,122,0,0,0 ; lea 0x7a(%rip),%r11 # 1b38 <_sk_scale_u8_hsw+0xee>
+ DB 76,141,29,122,0,0,0 ; lea 0x7a(%rip),%r11 # 1c40 <_sk_scale_u8_hsw+0xee>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,121,110,192 ; vmovd %eax,%xmm8
- DB 235,151 ; jmp 1a69 <_sk_scale_u8_hsw+0x1f>
+ DB 235,151 ; jmp 1b71 <_sk_scale_u8_hsw+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 197,57,196,192,2 ; vpinsrw $0x2,%eax,%xmm8,%xmm8
@@ -1640,7 +1708,7 @@ _sk_scale_u8_hsw LABEL PROC
DB 197,121,110,200 ; vmovd %eax,%xmm9
DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
DB 196,67,57,2,193,1 ; vpblendd $0x1,%xmm9,%xmm8,%xmm8
- DB 233,110,255,255,255 ; jmpq 1a69 <_sk_scale_u8_hsw+0x1f>
+ DB 233,110,255,255,255 ; jmpq 1b71 <_sk_scale_u8_hsw+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 197,57,196,192,6 ; vpinsrw $0x6,%eax,%xmm8,%xmm8
@@ -1651,7 +1719,7 @@ _sk_scale_u8_hsw LABEL PROC
DB 196,65,121,110,12,25 ; vmovd (%r9,%rbx,1),%xmm9
DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
DB 196,67,49,2,192,12 ; vpblendd $0xc,%xmm8,%xmm9,%xmm8
- DB 233,50,255,255,255 ; jmpq 1a69 <_sk_scale_u8_hsw+0x1f>
+ DB 233,50,255,255,255 ; jmpq 1b71 <_sk_scale_u8_hsw+0x1f>
DB 144 ; nop
DB 143 ; (bad)
DB 255 ; (bad)
@@ -1660,7 +1728,7 @@ _sk_scale_u8_hsw LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf001b48 <_sk_clut_4D_hsw+0xffffffffdeffbaf4>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf001c50 <_sk_clut_4D_hsw+0xffffffffdeffb730>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -1696,12 +1764,12 @@ _sk_lerp_u8_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,79 ; jne 1beb <_sk_lerp_u8_hsw+0x68>
+ DB 117,79 ; jne 1cf3 <_sk_lerp_u8_hsw+0x68>
DB 196,66,121,48,4,25 ; vpmovzxbw (%r9,%rbx,1),%xmm8
- DB 197,57,219,5,166,85,0,0 ; vpand 0x55a6(%rip),%xmm8,%xmm8 # 7150 <_sk_clut_4D_hsw+0x10fc>
+ DB 197,57,219,5,94,89,0,0 ; vpand 0x595e(%rip),%xmm8,%xmm8 # 7610 <_sk_clut_4D_hsw+0x10f0>
DB 196,66,125,51,192 ; vpmovzxwd %xmm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,13,167,78,0,0 ; vbroadcastss 0x4ea7(%rip),%ymm9 # 6a64 <_sk_clut_4D_hsw+0xa10>
+ DB 196,98,125,24,13,107,82,0,0 ; vbroadcastss 0x526b(%rip),%ymm9 # 6f30 <_sk_clut_4D_hsw+0xa10>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,252,92,196 ; vsubps %ymm4,%ymm0,%ymm0
DB 196,226,61,168,196 ; vfmadd213ps %ymm4,%ymm8,%ymm0
@@ -1719,15 +1787,15 @@ _sk_lerp_u8_hsw LABEL PROC
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,162 ; ja 1ba2 <_sk_lerp_u8_hsw+0x1f>
+ DB 119,162 ; ja 1caa <_sk_lerp_u8_hsw+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,121,0,0,0 ; lea 0x79(%rip),%r11 # 1c84 <_sk_lerp_u8_hsw+0x101>
+ DB 76,141,29,121,0,0,0 ; lea 0x79(%rip),%r11 # 1d8c <_sk_lerp_u8_hsw+0x101>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,121,110,192 ; vmovd %eax,%xmm8
- DB 235,131 ; jmp 1ba2 <_sk_lerp_u8_hsw+0x1f>
+ DB 235,131 ; jmp 1caa <_sk_lerp_u8_hsw+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 197,57,196,192,2 ; vpinsrw $0x2,%eax,%xmm8,%xmm8
@@ -1735,7 +1803,7 @@ _sk_lerp_u8_hsw LABEL PROC
DB 197,121,110,200 ; vmovd %eax,%xmm9
DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
DB 196,67,57,2,193,1 ; vpblendd $0x1,%xmm9,%xmm8,%xmm8
- DB 233,90,255,255,255 ; jmpq 1ba2 <_sk_lerp_u8_hsw+0x1f>
+ DB 233,90,255,255,255 ; jmpq 1caa <_sk_lerp_u8_hsw+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 197,57,196,192,6 ; vpinsrw $0x6,%eax,%xmm8,%xmm8
@@ -1746,7 +1814,7 @@ _sk_lerp_u8_hsw LABEL PROC
DB 196,65,121,110,12,25 ; vmovd (%r9,%rbx,1),%xmm9
DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
DB 196,67,49,2,192,12 ; vpblendd $0xc,%xmm8,%xmm9,%xmm8
- DB 233,30,255,255,255 ; jmpq 1ba2 <_sk_lerp_u8_hsw+0x1f>
+ DB 233,30,255,255,255 ; jmpq 1caa <_sk_lerp_u8_hsw+0x1f>
DB 144 ; nop
DB 255 ; (bad)
DB 255 ; (bad)
@@ -1779,23 +1847,23 @@ _sk_lerp_565_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,170,0,0,0 ; jne 1d6a <_sk_lerp_565_hsw+0xca>
+ DB 15,133,170,0,0,0 ; jne 1e72 <_sk_lerp_565_hsw+0xca>
DB 196,65,122,111,4,89 ; vmovdqu (%r9,%rbx,2),%xmm8
DB 196,66,125,51,192 ; vpmovzxwd %xmm8,%ymm8
- DB 196,98,125,88,13,148,77,0,0 ; vpbroadcastd 0x4d94(%rip),%ymm9 # 6a68 <_sk_clut_4D_hsw+0xa14>
+ DB 196,98,125,88,13,88,81,0,0 ; vpbroadcastd 0x5158(%rip),%ymm9 # 6f34 <_sk_clut_4D_hsw+0xa14>
DB 196,65,61,219,201 ; vpand %ymm9,%ymm8,%ymm9
DB 196,65,124,91,201 ; vcvtdq2ps %ymm9,%ymm9
- DB 196,98,125,24,21,133,77,0,0 ; vbroadcastss 0x4d85(%rip),%ymm10 # 6a6c <_sk_clut_4D_hsw+0xa18>
+ DB 196,98,125,24,21,73,81,0,0 ; vbroadcastss 0x5149(%rip),%ymm10 # 6f38 <_sk_clut_4D_hsw+0xa18>
DB 196,65,52,89,202 ; vmulps %ymm10,%ymm9,%ymm9
- DB 196,98,125,88,21,123,77,0,0 ; vpbroadcastd 0x4d7b(%rip),%ymm10 # 6a70 <_sk_clut_4D_hsw+0xa1c>
+ DB 196,98,125,88,21,63,81,0,0 ; vpbroadcastd 0x513f(%rip),%ymm10 # 6f3c <_sk_clut_4D_hsw+0xa1c>
DB 196,65,61,219,210 ; vpand %ymm10,%ymm8,%ymm10
DB 196,65,124,91,210 ; vcvtdq2ps %ymm10,%ymm10
- DB 196,98,125,24,29,108,77,0,0 ; vbroadcastss 0x4d6c(%rip),%ymm11 # 6a74 <_sk_clut_4D_hsw+0xa20>
+ DB 196,98,125,24,29,48,81,0,0 ; vbroadcastss 0x5130(%rip),%ymm11 # 6f40 <_sk_clut_4D_hsw+0xa20>
DB 196,65,44,89,211 ; vmulps %ymm11,%ymm10,%ymm10
- DB 196,98,125,88,29,98,77,0,0 ; vpbroadcastd 0x4d62(%rip),%ymm11 # 6a78 <_sk_clut_4D_hsw+0xa24>
+ DB 196,98,125,88,29,38,81,0,0 ; vpbroadcastd 0x5126(%rip),%ymm11 # 6f44 <_sk_clut_4D_hsw+0xa24>
DB 196,65,61,219,195 ; vpand %ymm11,%ymm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,29,83,77,0,0 ; vbroadcastss 0x4d53(%rip),%ymm11 # 6a7c <_sk_clut_4D_hsw+0xa28>
+ DB 196,98,125,24,29,23,81,0,0 ; vbroadcastss 0x5117(%rip),%ymm11 # 6f48 <_sk_clut_4D_hsw+0xa28>
DB 196,65,60,89,195 ; vmulps %ymm11,%ymm8,%ymm8
DB 197,252,92,196 ; vsubps %ymm4,%ymm0,%ymm0
DB 196,226,53,168,196 ; vfmadd213ps %ymm4,%ymm9,%ymm0
@@ -1817,27 +1885,27 @@ _sk_lerp_565_hsw LABEL PROC
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,67,255,255,255 ; ja 1cc6 <_sk_lerp_565_hsw+0x26>
+ DB 15,135,67,255,255,255 ; ja 1dce <_sk_lerp_565_hsw+0x26>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,102,0,0,0 ; lea 0x66(%rip),%r11 # 1df4 <_sk_lerp_565_hsw+0x154>
+ DB 76,141,29,102,0,0,0 ; lea 0x66(%rip),%r11 # 1efc <_sk_lerp_565_hsw+0x154>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,121,110,192 ; vmovd %eax,%xmm8
- DB 233,33,255,255,255 ; jmpq 1cc6 <_sk_lerp_565_hsw+0x26>
+ DB 233,33,255,255,255 ; jmpq 1dce <_sk_lerp_565_hsw+0x26>
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 196,65,57,196,68,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm8,%xmm8
DB 196,65,121,110,12,89 ; vmovd (%r9,%rbx,2),%xmm9
DB 196,67,57,2,193,1 ; vpblendd $0x1,%xmm9,%xmm8,%xmm8
- DB 233,3,255,255,255 ; jmpq 1cc6 <_sk_lerp_565_hsw+0x26>
+ DB 233,3,255,255,255 ; jmpq 1dce <_sk_lerp_565_hsw+0x26>
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 196,65,57,196,68,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm8,%xmm8
DB 196,65,57,196,68,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm8,%xmm8
DB 196,65,57,196,68,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm8,%xmm8
DB 196,65,122,126,12,89 ; vmovq (%r9,%rbx,2),%xmm9
DB 196,67,49,2,192,12 ; vpblendd $0xc,%xmm8,%xmm9,%xmm8
- DB 233,213,254,255,255 ; jmpq 1cc6 <_sk_lerp_565_hsw+0x26>
+ DB 233,213,254,255,255 ; jmpq 1dce <_sk_lerp_565_hsw+0x26>
DB 15,31,0 ; nopl (%rax)
DB 163,255,255,255,190,255,255,255,177 ; movabs %eax,0xb1ffffffbeffffff
DB 255 ; (bad)
@@ -1859,41 +1927,82 @@ _sk_lerp_565_hsw LABEL PROC
PUBLIC _sk_load_tables_hsw
_sk_load_tables_hsw LABEL PROC
- DB 73,137,201 ; mov %rcx,%r9
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,141,20,149,0,0,0,0 ; lea 0x0(,%rdx,4),%r10
- DB 76,3,16 ; add (%rax),%r10
+ DB 76,139,8 ; mov (%rax),%r9
DB 77,133,192 ; test %r8,%r8
- DB 117,105 ; jne 1e8e <_sk_load_tables_hsw+0x7e>
- DB 196,193,124,16,26 ; vmovups (%r10),%ymm3
- DB 197,228,84,13,110,80,0,0 ; vandps 0x506e(%rip),%ymm3,%ymm1 # 6ea0 <_sk_clut_4D_hsw+0xe4c>
+ DB 117,103 ; jne 1f89 <_sk_load_tables_hsw+0x71>
+ DB 196,193,126,111,28,145 ; vmovdqu (%r9,%rdx,4),%ymm3
+ DB 197,229,219,13,48,84,0,0 ; vpand 0x5430(%rip),%ymm3,%ymm1 # 7360 <_sk_clut_4D_hsw+0xe40>
DB 196,65,61,118,192 ; vpcmpeqd %ymm8,%ymm8,%ymm8
- DB 72,139,72,8 ; mov 0x8(%rax),%rcx
+ DB 76,139,72,8 ; mov 0x8(%rax),%r9
DB 76,139,80,16 ; mov 0x10(%rax),%r10
DB 197,237,118,210 ; vpcmpeqd %ymm2,%ymm2,%ymm2
- DB 196,226,109,146,4,137 ; vgatherdps %ymm2,(%rcx,%ymm1,4),%ymm0
- DB 196,226,101,0,21,110,80,0,0 ; vpshufb 0x506e(%rip),%ymm3,%ymm2 # 6ec0 <_sk_clut_4D_hsw+0xe6c>
+ DB 196,194,109,146,4,137 ; vgatherdps %ymm2,(%r9,%ymm1,4),%ymm0
+ DB 196,226,101,0,21,48,84,0,0 ; vpshufb 0x5430(%rip),%ymm3,%ymm2 # 7380 <_sk_clut_4D_hsw+0xe60>
DB 196,65,53,118,201 ; vpcmpeqd %ymm9,%ymm9,%ymm9
DB 196,194,53,146,12,146 ; vgatherdps %ymm9,(%r10,%ymm2,4),%ymm1
DB 72,139,64,24 ; mov 0x18(%rax),%rax
- DB 196,98,101,0,13,118,80,0,0 ; vpshufb 0x5076(%rip),%ymm3,%ymm9 # 6ee0 <_sk_clut_4D_hsw+0xe8c>
+ DB 196,98,101,0,13,56,84,0,0 ; vpshufb 0x5438(%rip),%ymm3,%ymm9 # 73a0 <_sk_clut_4D_hsw+0xe80>
DB 196,162,61,146,20,136 ; vgatherdps %ymm8,(%rax,%ymm9,4),%ymm2
DB 197,229,114,211,24 ; vpsrld $0x18,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,254,75,0,0 ; vbroadcastss 0x4bfe(%rip),%ymm8 # 6a80 <_sk_clut_4D_hsw+0xa2c>
+ DB 196,98,125,24,5,204,79,0,0 ; vbroadcastss 0x4fcc(%rip),%ymm8 # 6f4c <_sk_clut_4D_hsw+0xa2c>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,195,255,255,255,255 ; mov $0xffffffffffffffff,%r11
- DB 73,211,235 ; shr %cl,%r11
- DB 196,193,249,110,195 ; vmovq %r11,%xmm0
- DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
- DB 196,194,125,44,26 ; vmaskmovps (%r10),%ymm0,%ymm3
- DB 233,115,255,255,255 ; jmpq 1e2a <_sk_load_tables_hsw+0x1a>
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 197,229,239,219 ; vpxor %ymm3,%ymm3,%ymm3
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,139 ; ja 1f28 <_sk_load_tables_hsw+0x10>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,140,0,0,0 ; lea 0x8c(%rip),%r11 # 2034 <_sk_load_tables_hsw+0x11c>
+ DB 79,99,20,147 ; movslq (%r11,%r10,4),%r10
+ DB 77,1,218 ; add %r11,%r10
+ DB 65,255,226 ; jmpq *%r10
+ DB 196,193,121,110,28,145 ; vmovd (%r9,%rdx,4),%xmm3
+ DB 233,107,255,255,255 ; jmpq 1f28 <_sk_load_tables_hsw+0x10>
+ DB 196,193,121,110,68,145,8 ; vmovd 0x8(%r9,%rdx,4),%xmm0
+ DB 196,226,121,89,192 ; vpbroadcastq %xmm0,%xmm0
+ DB 197,245,239,201 ; vpxor %ymm1,%ymm1,%ymm1
+ DB 196,227,117,2,216,4 ; vpblendd $0x4,%ymm0,%ymm1,%ymm3
+ DB 196,193,122,126,4,145 ; vmovq (%r9,%rdx,4),%xmm0
+ DB 196,227,101,2,216,3 ; vpblendd $0x3,%ymm0,%ymm3,%ymm3
+ DB 233,68,255,255,255 ; jmpq 1f28 <_sk_load_tables_hsw+0x10>
+ DB 196,193,121,110,68,145,24 ; vmovd 0x18(%r9,%rdx,4),%xmm0
+ DB 196,226,125,89,192 ; vpbroadcastq %xmm0,%ymm0
+ DB 197,245,239,201 ; vpxor %ymm1,%ymm1,%ymm1
+ DB 196,227,117,2,216,64 ; vpblendd $0x40,%ymm0,%ymm1,%ymm3
+ DB 196,227,125,57,216,1 ; vextracti128 $0x1,%ymm3,%xmm0
+ DB 196,195,121,34,68,145,20,1 ; vpinsrd $0x1,0x14(%r9,%rdx,4),%xmm0,%xmm0
+ DB 196,227,101,56,216,1 ; vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ DB 196,227,125,57,216,1 ; vextracti128 $0x1,%ymm3,%xmm0
+ DB 196,195,121,34,68,145,16,0 ; vpinsrd $0x0,0x10(%r9,%rdx,4),%xmm0,%xmm0
+ DB 196,227,101,56,216,1 ; vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ DB 196,193,122,111,4,145 ; vmovdqu (%r9,%rdx,4),%xmm0
+ DB 196,227,125,2,219,240 ; vpblendd $0xf0,%ymm3,%ymm0,%ymm3
+ DB 233,245,254,255,255 ; jmpq 1f28 <_sk_load_tables_hsw+0x10>
+ DB 144 ; nop
+ DB 126,255 ; jle 2035 <_sk_load_tables_hsw+0x11d>
+ DB 255 ; (bad)
+ DB 255,159,255,255,255,137 ; lcall *-0x76000001(%rdi)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 238 ; out %al,(%dx)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 218,255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,198 ; inc %esi
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 176,255 ; mov $0xff,%al
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_tables_u16_be_hsw
_sk_load_tables_u16_be_hsw LABEL PROC
@@ -1901,7 +2010,7 @@ _sk_load_tables_u16_be_hsw LABEL PROC
DB 76,139,8 ; mov (%rax),%r9
DB 76,141,20,149,0,0,0,0 ; lea 0x0(,%rdx,4),%r10
DB 77,133,192 ; test %r8,%r8
- DB 15,133,201,0,0,0 ; jne 1f96 <_sk_load_tables_u16_be_hsw+0xdf>
+ DB 15,133,201,0,0,0 ; jne 212f <_sk_load_tables_u16_be_hsw+0xdf>
DB 196,1,121,16,4,81 ; vmovupd (%r9,%r10,2),%xmm8
DB 196,129,121,16,84,81,16 ; vmovupd 0x10(%r9,%r10,2),%xmm2
DB 196,129,121,16,92,81,32 ; vmovupd 0x20(%r9,%r10,2),%xmm3
@@ -1917,7 +2026,7 @@ _sk_load_tables_u16_be_hsw LABEL PROC
DB 197,185,108,200 ; vpunpcklqdq %xmm0,%xmm8,%xmm1
DB 197,185,109,208 ; vpunpckhqdq %xmm0,%xmm8,%xmm2
DB 197,49,108,195 ; vpunpcklqdq %xmm3,%xmm9,%xmm8
- DB 197,121,111,21,66,82,0,0 ; vmovdqa 0x5242(%rip),%xmm10 # 7160 <_sk_clut_4D_hsw+0x110c>
+ DB 197,121,111,21,105,85,0,0 ; vmovdqa 0x5569(%rip),%xmm10 # 7620 <_sk_clut_4D_hsw+0x1100>
DB 196,193,113,219,194 ; vpand %xmm10,%xmm1,%xmm0
DB 196,226,125,51,200 ; vpmovzxwd %xmm0,%ymm1
DB 196,65,37,118,219 ; vpcmpeqd %ymm11,%ymm11,%ymm11
@@ -1939,36 +2048,36 @@ _sk_load_tables_u16_be_hsw LABEL PROC
DB 197,185,235,219 ; vpor %xmm3,%xmm8,%xmm3
DB 196,226,125,51,219 ; vpmovzxwd %xmm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,247,74,0,0 ; vbroadcastss 0x4af7(%rip),%ymm8 # 6a84 <_sk_clut_4D_hsw+0xa30>
+ DB 196,98,125,24,5,42,78,0,0 ; vbroadcastss 0x4e2a(%rip),%ymm8 # 6f50 <_sk_clut_4D_hsw+0xa30>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
DB 196,1,123,16,4,81 ; vmovsd (%r9,%r10,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,85 ; je 1ffc <_sk_load_tables_u16_be_hsw+0x145>
+ DB 116,85 ; je 2195 <_sk_load_tables_u16_be_hsw+0x145>
DB 196,1,57,22,68,81,8 ; vmovhpd 0x8(%r9,%r10,2),%xmm8,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,72 ; jb 1ffc <_sk_load_tables_u16_be_hsw+0x145>
+ DB 114,72 ; jb 2195 <_sk_load_tables_u16_be_hsw+0x145>
DB 196,129,123,16,84,81,16 ; vmovsd 0x10(%r9,%r10,2),%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 116,72 ; je 2009 <_sk_load_tables_u16_be_hsw+0x152>
+ DB 116,72 ; je 21a2 <_sk_load_tables_u16_be_hsw+0x152>
DB 196,129,105,22,84,81,24 ; vmovhpd 0x18(%r9,%r10,2),%xmm2,%xmm2
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,59 ; jb 2009 <_sk_load_tables_u16_be_hsw+0x152>
+ DB 114,59 ; jb 21a2 <_sk_load_tables_u16_be_hsw+0x152>
DB 196,129,123,16,92,81,32 ; vmovsd 0x20(%r9,%r10,2),%xmm3
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 15,132,9,255,255,255 ; je 1ee8 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 15,132,9,255,255,255 ; je 2081 <_sk_load_tables_u16_be_hsw+0x31>
DB 196,129,97,22,92,81,40 ; vmovhpd 0x28(%r9,%r10,2),%xmm3,%xmm3
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,130,248,254,255,255 ; jb 1ee8 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 15,130,248,254,255,255 ; jb 2081 <_sk_load_tables_u16_be_hsw+0x31>
DB 196,1,122,126,76,81,48 ; vmovq 0x30(%r9,%r10,2),%xmm9
- DB 233,236,254,255,255 ; jmpq 1ee8 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 233,236,254,255,255 ; jmpq 2081 <_sk_load_tables_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,223,254,255,255 ; jmpq 1ee8 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 233,223,254,255,255 ; jmpq 2081 <_sk_load_tables_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,214,254,255,255 ; jmpq 1ee8 <_sk_load_tables_u16_be_hsw+0x31>
+ DB 233,214,254,255,255 ; jmpq 2081 <_sk_load_tables_u16_be_hsw+0x31>
PUBLIC _sk_load_tables_rgb_u16_be_hsw
_sk_load_tables_rgb_u16_be_hsw LABEL PROC
@@ -1976,7 +2085,7 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
DB 76,139,8 ; mov (%rax),%r9
DB 76,141,20,82 ; lea (%rdx,%rdx,2),%r10
DB 77,133,192 ; test %r8,%r8
- DB 15,133,193,0,0,0 ; jne 20e5 <_sk_load_tables_rgb_u16_be_hsw+0xd3>
+ DB 15,133,193,0,0,0 ; jne 227e <_sk_load_tables_rgb_u16_be_hsw+0xd3>
DB 196,129,122,111,4,81 ; vmovdqu (%r9,%r10,2),%xmm0
DB 196,129,122,111,84,81,12 ; vmovdqu 0xc(%r9,%r10,2),%xmm2
DB 196,129,122,111,76,81,24 ; vmovdqu 0x18(%r9,%r10,2),%xmm1
@@ -1997,7 +2106,7 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
DB 197,185,108,218 ; vpunpcklqdq %xmm2,%xmm8,%xmm3
DB 197,185,109,210 ; vpunpckhqdq %xmm2,%xmm8,%xmm2
DB 197,121,108,193 ; vpunpcklqdq %xmm1,%xmm0,%xmm8
- DB 197,121,111,13,226,80,0,0 ; vmovdqa 0x50e2(%rip),%xmm9 # 7170 <_sk_clut_4D_hsw+0x111c>
+ DB 197,121,111,13,9,84,0,0 ; vmovdqa 0x5409(%rip),%xmm9 # 7630 <_sk_clut_4D_hsw+0x1110>
DB 196,193,97,219,193 ; vpand %xmm9,%xmm3,%xmm0
DB 196,226,125,51,200 ; vpmovzxwd %xmm0,%ymm1
DB 197,229,118,219 ; vpcmpeqd %ymm3,%ymm3,%ymm3
@@ -2014,46 +2123,46 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
DB 196,98,125,51,194 ; vpmovzxwd %xmm2,%ymm8
DB 196,162,101,146,20,128 ; vgatherdps %ymm3,(%rax,%ymm8,4),%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,165,73,0,0 ; vbroadcastss 0x49a5(%rip),%ymm3 # 6a88 <_sk_clut_4D_hsw+0xa34>
+ DB 196,226,125,24,29,216,76,0,0 ; vbroadcastss 0x4cd8(%rip),%ymm3 # 6f54 <_sk_clut_4D_hsw+0xa34>
DB 255,224 ; jmpq *%rax
DB 196,129,121,110,4,81 ; vmovd (%r9,%r10,2),%xmm0
DB 196,129,121,196,68,81,4,2 ; vpinsrw $0x2,0x4(%r9,%r10,2),%xmm0,%xmm0
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 117,5 ; jne 20fe <_sk_load_tables_rgb_u16_be_hsw+0xec>
- DB 233,90,255,255,255 ; jmpq 2058 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 117,5 ; jne 2297 <_sk_load_tables_rgb_u16_be_hsw+0xec>
+ DB 233,90,255,255,255 ; jmpq 21f1 <_sk_load_tables_rgb_u16_be_hsw+0x46>
DB 196,129,121,110,76,81,6 ; vmovd 0x6(%r9,%r10,2),%xmm1
DB 196,1,113,196,68,81,10,2 ; vpinsrw $0x2,0xa(%r9,%r10,2),%xmm1,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,26 ; jb 212d <_sk_load_tables_rgb_u16_be_hsw+0x11b>
+ DB 114,26 ; jb 22c6 <_sk_load_tables_rgb_u16_be_hsw+0x11b>
DB 196,129,121,110,76,81,12 ; vmovd 0xc(%r9,%r10,2),%xmm1
DB 196,129,113,196,84,81,16,2 ; vpinsrw $0x2,0x10(%r9,%r10,2),%xmm1,%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 117,10 ; jne 2132 <_sk_load_tables_rgb_u16_be_hsw+0x120>
- DB 233,43,255,255,255 ; jmpq 2058 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- DB 233,38,255,255,255 ; jmpq 2058 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 22cb <_sk_load_tables_rgb_u16_be_hsw+0x120>
+ DB 233,43,255,255,255 ; jmpq 21f1 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 233,38,255,255,255 ; jmpq 21f1 <_sk_load_tables_rgb_u16_be_hsw+0x46>
DB 196,129,121,110,76,81,18 ; vmovd 0x12(%r9,%r10,2),%xmm1
DB 196,1,113,196,76,81,22,2 ; vpinsrw $0x2,0x16(%r9,%r10,2),%xmm1,%xmm9
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,26 ; jb 2161 <_sk_load_tables_rgb_u16_be_hsw+0x14f>
+ DB 114,26 ; jb 22fa <_sk_load_tables_rgb_u16_be_hsw+0x14f>
DB 196,129,121,110,76,81,24 ; vmovd 0x18(%r9,%r10,2),%xmm1
DB 196,129,113,196,76,81,28,2 ; vpinsrw $0x2,0x1c(%r9,%r10,2),%xmm1,%xmm1
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 117,10 ; jne 2166 <_sk_load_tables_rgb_u16_be_hsw+0x154>
- DB 233,247,254,255,255 ; jmpq 2058 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- DB 233,242,254,255,255 ; jmpq 2058 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 22ff <_sk_load_tables_rgb_u16_be_hsw+0x154>
+ DB 233,247,254,255,255 ; jmpq 21f1 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 233,242,254,255,255 ; jmpq 21f1 <_sk_load_tables_rgb_u16_be_hsw+0x46>
DB 196,129,121,110,92,81,30 ; vmovd 0x1e(%r9,%r10,2),%xmm3
DB 196,1,97,196,92,81,34,2 ; vpinsrw $0x2,0x22(%r9,%r10,2),%xmm3,%xmm11
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,20 ; jb 218f <_sk_load_tables_rgb_u16_be_hsw+0x17d>
+ DB 114,20 ; jb 2328 <_sk_load_tables_rgb_u16_be_hsw+0x17d>
DB 196,129,121,110,92,81,36 ; vmovd 0x24(%r9,%r10,2),%xmm3
DB 196,129,97,196,92,81,40,2 ; vpinsrw $0x2,0x28(%r9,%r10,2),%xmm3,%xmm3
- DB 233,201,254,255,255 ; jmpq 2058 <_sk_load_tables_rgb_u16_be_hsw+0x46>
- DB 233,196,254,255,255 ; jmpq 2058 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 233,201,254,255,255 ; jmpq 21f1 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+ DB 233,196,254,255,255 ; jmpq 21f1 <_sk_load_tables_rgb_u16_be_hsw+0x46>
PUBLIC _sk_byte_tables_hsw
_sk_byte_tables_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,237,72,0,0 ; vbroadcastss 0x48ed(%rip),%ymm8 # 6a8c <_sk_clut_4D_hsw+0xa38>
+ DB 196,98,125,24,5,32,76,0,0 ; vbroadcastss 0x4c20(%rip),%ymm8 # 6f58 <_sk_clut_4D_hsw+0xa38>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 197,125,91,200 ; vcvtps2dq %ymm0,%ymm9
DB 196,65,249,126,201 ; vmovq %xmm9,%r9
@@ -2175,7 +2284,7 @@ _sk_byte_tables_hsw LABEL PROC
DB 67,15,182,4,26 ; movzbl (%r10,%r11,1),%eax
DB 196,194,125,49,193 ; vpmovzxbd %xmm9,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,5,146,70,0,0 ; vbroadcastss 0x4692(%rip),%ymm8 # 6a90 <_sk_clut_4D_hsw+0xa3c>
+ DB 196,98,125,24,5,197,73,0,0 ; vbroadcastss 0x49c5(%rip),%ymm8 # 6f5c <_sk_clut_4D_hsw+0xa3c>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 196,226,125,49,201 ; vpmovzxbd %xmm1,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
@@ -2289,7 +2398,7 @@ _sk_byte_tables_rgb_hsw LABEL PROC
DB 67,15,182,4,26 ; movzbl (%r10,%r11,1),%eax
DB 196,194,125,49,193 ; vpmovzxbd %xmm9,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,5,126,68,0,0 ; vbroadcastss 0x447e(%rip),%ymm8 # 6a94 <_sk_clut_4D_hsw+0xa40>
+ DB 196,98,125,24,5,177,71,0,0 ; vbroadcastss 0x47b1(%rip),%ymm8 # 6f60 <_sk_clut_4D_hsw+0xa40>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 196,226,125,49,201 ; vpmovzxbd %xmm1,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
@@ -2378,33 +2487,33 @@ _sk_parametric_r_hsw LABEL PROC
DB 196,66,125,168,211 ; vfmadd213ps %ymm11,%ymm0,%ymm10
DB 196,226,125,24,0 ; vbroadcastss (%rax),%ymm0
DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
- DB 196,98,125,24,37,88,67,0,0 ; vbroadcastss 0x4358(%rip),%ymm12 # 6a98 <_sk_clut_4D_hsw+0xa44>
- DB 196,98,125,24,45,83,67,0,0 ; vbroadcastss 0x4353(%rip),%ymm13 # 6a9c <_sk_clut_4D_hsw+0xa48>
+ DB 196,98,125,24,37,139,70,0,0 ; vbroadcastss 0x468b(%rip),%ymm12 # 6f64 <_sk_clut_4D_hsw+0xa44>
+ DB 196,98,125,24,45,134,70,0,0 ; vbroadcastss 0x4686(%rip),%ymm13 # 6f68 <_sk_clut_4D_hsw+0xa48>
DB 196,65,44,84,213 ; vandps %ymm13,%ymm10,%ymm10
- DB 196,98,125,24,45,73,67,0,0 ; vbroadcastss 0x4349(%rip),%ymm13 # 6aa0 <_sk_clut_4D_hsw+0xa4c>
+ DB 196,98,125,24,45,124,70,0,0 ; vbroadcastss 0x467c(%rip),%ymm13 # 6f6c <_sk_clut_4D_hsw+0xa4c>
DB 196,65,44,86,213 ; vorps %ymm13,%ymm10,%ymm10
- DB 196,98,125,24,45,63,67,0,0 ; vbroadcastss 0x433f(%rip),%ymm13 # 6aa4 <_sk_clut_4D_hsw+0xa50>
+ DB 196,98,125,24,45,114,70,0,0 ; vbroadcastss 0x4672(%rip),%ymm13 # 6f70 <_sk_clut_4D_hsw+0xa50>
DB 196,66,37,184,236 ; vfmadd231ps %ymm12,%ymm11,%ymm13
- DB 196,98,125,24,29,53,67,0,0 ; vbroadcastss 0x4335(%rip),%ymm11 # 6aa8 <_sk_clut_4D_hsw+0xa54>
+ DB 196,98,125,24,29,104,70,0,0 ; vbroadcastss 0x4668(%rip),%ymm11 # 6f74 <_sk_clut_4D_hsw+0xa54>
DB 196,66,45,172,221 ; vfnmadd213ps %ymm13,%ymm10,%ymm11
- DB 196,98,125,24,37,43,67,0,0 ; vbroadcastss 0x432b(%rip),%ymm12 # 6aac <_sk_clut_4D_hsw+0xa58>
+ DB 196,98,125,24,37,94,70,0,0 ; vbroadcastss 0x465e(%rip),%ymm12 # 6f78 <_sk_clut_4D_hsw+0xa58>
DB 196,65,44,88,212 ; vaddps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,37,33,67,0,0 ; vbroadcastss 0x4321(%rip),%ymm12 # 6ab0 <_sk_clut_4D_hsw+0xa5c>
+ DB 196,98,125,24,37,84,70,0,0 ; vbroadcastss 0x4654(%rip),%ymm12 # 6f7c <_sk_clut_4D_hsw+0xa5c>
DB 196,65,28,94,210 ; vdivps %ymm10,%ymm12,%ymm10
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0
DB 196,99,125,8,208,1 ; vroundps $0x1,%ymm0,%ymm10
DB 196,65,124,92,210 ; vsubps %ymm10,%ymm0,%ymm10
- DB 196,98,125,24,29,2,67,0,0 ; vbroadcastss 0x4302(%rip),%ymm11 # 6ab4 <_sk_clut_4D_hsw+0xa60>
+ DB 196,98,125,24,29,53,70,0,0 ; vbroadcastss 0x4635(%rip),%ymm11 # 6f80 <_sk_clut_4D_hsw+0xa60>
DB 196,193,124,88,195 ; vaddps %ymm11,%ymm0,%ymm0
- DB 196,98,125,24,29,248,66,0,0 ; vbroadcastss 0x42f8(%rip),%ymm11 # 6ab8 <_sk_clut_4D_hsw+0xa64>
+ DB 196,98,125,24,29,43,70,0,0 ; vbroadcastss 0x462b(%rip),%ymm11 # 6f84 <_sk_clut_4D_hsw+0xa64>
DB 196,98,45,172,216 ; vfnmadd213ps %ymm0,%ymm10,%ymm11
- DB 196,226,125,24,5,238,66,0,0 ; vbroadcastss 0x42ee(%rip),%ymm0 # 6abc <_sk_clut_4D_hsw+0xa68>
+ DB 196,226,125,24,5,33,70,0,0 ; vbroadcastss 0x4621(%rip),%ymm0 # 6f88 <_sk_clut_4D_hsw+0xa68>
DB 196,193,124,92,194 ; vsubps %ymm10,%ymm0,%ymm0
- DB 196,98,125,24,21,228,66,0,0 ; vbroadcastss 0x42e4(%rip),%ymm10 # 6ac0 <_sk_clut_4D_hsw+0xa6c>
+ DB 196,98,125,24,21,23,70,0,0 ; vbroadcastss 0x4617(%rip),%ymm10 # 6f8c <_sk_clut_4D_hsw+0xa6c>
DB 197,172,94,192 ; vdivps %ymm0,%ymm10,%ymm0
DB 197,164,88,192 ; vaddps %ymm0,%ymm11,%ymm0
- DB 196,98,125,24,21,215,66,0,0 ; vbroadcastss 0x42d7(%rip),%ymm10 # 6ac4 <_sk_clut_4D_hsw+0xa70>
+ DB 196,98,125,24,21,10,70,0,0 ; vbroadcastss 0x460a(%rip),%ymm10 # 6f90 <_sk_clut_4D_hsw+0xa70>
DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0
DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -2412,7 +2521,7 @@ _sk_parametric_r_hsw LABEL PROC
DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,5,174,66,0,0 ; vbroadcastss 0x42ae(%rip),%ymm8 # 6ac8 <_sk_clut_4D_hsw+0xa74>
+ DB 196,98,125,24,5,225,69,0,0 ; vbroadcastss 0x45e1(%rip),%ymm8 # 6f94 <_sk_clut_4D_hsw+0xa74>
DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2430,33 +2539,33 @@ _sk_parametric_g_hsw LABEL PROC
DB 196,66,117,168,211 ; vfmadd213ps %ymm11,%ymm1,%ymm10
DB 196,226,125,24,8 ; vbroadcastss (%rax),%ymm1
DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
- DB 196,98,125,24,37,102,66,0,0 ; vbroadcastss 0x4266(%rip),%ymm12 # 6acc <_sk_clut_4D_hsw+0xa78>
- DB 196,98,125,24,45,97,66,0,0 ; vbroadcastss 0x4261(%rip),%ymm13 # 6ad0 <_sk_clut_4D_hsw+0xa7c>
+ DB 196,98,125,24,37,153,69,0,0 ; vbroadcastss 0x4599(%rip),%ymm12 # 6f98 <_sk_clut_4D_hsw+0xa78>
+ DB 196,98,125,24,45,148,69,0,0 ; vbroadcastss 0x4594(%rip),%ymm13 # 6f9c <_sk_clut_4D_hsw+0xa7c>
DB 196,65,44,84,213 ; vandps %ymm13,%ymm10,%ymm10
- DB 196,98,125,24,45,87,66,0,0 ; vbroadcastss 0x4257(%rip),%ymm13 # 6ad4 <_sk_clut_4D_hsw+0xa80>
+ DB 196,98,125,24,45,138,69,0,0 ; vbroadcastss 0x458a(%rip),%ymm13 # 6fa0 <_sk_clut_4D_hsw+0xa80>
DB 196,65,44,86,213 ; vorps %ymm13,%ymm10,%ymm10
- DB 196,98,125,24,45,77,66,0,0 ; vbroadcastss 0x424d(%rip),%ymm13 # 6ad8 <_sk_clut_4D_hsw+0xa84>
+ DB 196,98,125,24,45,128,69,0,0 ; vbroadcastss 0x4580(%rip),%ymm13 # 6fa4 <_sk_clut_4D_hsw+0xa84>
DB 196,66,37,184,236 ; vfmadd231ps %ymm12,%ymm11,%ymm13
- DB 196,98,125,24,29,67,66,0,0 ; vbroadcastss 0x4243(%rip),%ymm11 # 6adc <_sk_clut_4D_hsw+0xa88>
+ DB 196,98,125,24,29,118,69,0,0 ; vbroadcastss 0x4576(%rip),%ymm11 # 6fa8 <_sk_clut_4D_hsw+0xa88>
DB 196,66,45,172,221 ; vfnmadd213ps %ymm13,%ymm10,%ymm11
- DB 196,98,125,24,37,57,66,0,0 ; vbroadcastss 0x4239(%rip),%ymm12 # 6ae0 <_sk_clut_4D_hsw+0xa8c>
+ DB 196,98,125,24,37,108,69,0,0 ; vbroadcastss 0x456c(%rip),%ymm12 # 6fac <_sk_clut_4D_hsw+0xa8c>
DB 196,65,44,88,212 ; vaddps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,37,47,66,0,0 ; vbroadcastss 0x422f(%rip),%ymm12 # 6ae4 <_sk_clut_4D_hsw+0xa90>
+ DB 196,98,125,24,37,98,69,0,0 ; vbroadcastss 0x4562(%rip),%ymm12 # 6fb0 <_sk_clut_4D_hsw+0xa90>
DB 196,65,28,94,210 ; vdivps %ymm10,%ymm12,%ymm10
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
DB 196,193,116,89,202 ; vmulps %ymm10,%ymm1,%ymm1
DB 196,99,125,8,209,1 ; vroundps $0x1,%ymm1,%ymm10
DB 196,65,116,92,210 ; vsubps %ymm10,%ymm1,%ymm10
- DB 196,98,125,24,29,16,66,0,0 ; vbroadcastss 0x4210(%rip),%ymm11 # 6ae8 <_sk_clut_4D_hsw+0xa94>
+ DB 196,98,125,24,29,67,69,0,0 ; vbroadcastss 0x4543(%rip),%ymm11 # 6fb4 <_sk_clut_4D_hsw+0xa94>
DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,29,6,66,0,0 ; vbroadcastss 0x4206(%rip),%ymm11 # 6aec <_sk_clut_4D_hsw+0xa98>
+ DB 196,98,125,24,29,57,69,0,0 ; vbroadcastss 0x4539(%rip),%ymm11 # 6fb8 <_sk_clut_4D_hsw+0xa98>
DB 196,98,45,172,217 ; vfnmadd213ps %ymm1,%ymm10,%ymm11
- DB 196,226,125,24,13,252,65,0,0 ; vbroadcastss 0x41fc(%rip),%ymm1 # 6af0 <_sk_clut_4D_hsw+0xa9c>
+ DB 196,226,125,24,13,47,69,0,0 ; vbroadcastss 0x452f(%rip),%ymm1 # 6fbc <_sk_clut_4D_hsw+0xa9c>
DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1
- DB 196,98,125,24,21,242,65,0,0 ; vbroadcastss 0x41f2(%rip),%ymm10 # 6af4 <_sk_clut_4D_hsw+0xaa0>
+ DB 196,98,125,24,21,37,69,0,0 ; vbroadcastss 0x4525(%rip),%ymm10 # 6fc0 <_sk_clut_4D_hsw+0xaa0>
DB 197,172,94,201 ; vdivps %ymm1,%ymm10,%ymm1
DB 197,164,88,201 ; vaddps %ymm1,%ymm11,%ymm1
- DB 196,98,125,24,21,229,65,0,0 ; vbroadcastss 0x41e5(%rip),%ymm10 # 6af8 <_sk_clut_4D_hsw+0xaa4>
+ DB 196,98,125,24,21,24,69,0,0 ; vbroadcastss 0x4518(%rip),%ymm10 # 6fc4 <_sk_clut_4D_hsw+0xaa4>
DB 196,193,116,89,202 ; vmulps %ymm10,%ymm1,%ymm1
DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -2464,7 +2573,7 @@ _sk_parametric_g_hsw LABEL PROC
DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,116,95,200 ; vmaxps %ymm8,%ymm1,%ymm1
- DB 196,98,125,24,5,188,65,0,0 ; vbroadcastss 0x41bc(%rip),%ymm8 # 6afc <_sk_clut_4D_hsw+0xaa8>
+ DB 196,98,125,24,5,239,68,0,0 ; vbroadcastss 0x44ef(%rip),%ymm8 # 6fc8 <_sk_clut_4D_hsw+0xaa8>
DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2482,33 +2591,33 @@ _sk_parametric_b_hsw LABEL PROC
DB 196,66,109,168,211 ; vfmadd213ps %ymm11,%ymm2,%ymm10
DB 196,226,125,24,16 ; vbroadcastss (%rax),%ymm2
DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
- DB 196,98,125,24,37,116,65,0,0 ; vbroadcastss 0x4174(%rip),%ymm12 # 6b00 <_sk_clut_4D_hsw+0xaac>
- DB 196,98,125,24,45,111,65,0,0 ; vbroadcastss 0x416f(%rip),%ymm13 # 6b04 <_sk_clut_4D_hsw+0xab0>
+ DB 196,98,125,24,37,167,68,0,0 ; vbroadcastss 0x44a7(%rip),%ymm12 # 6fcc <_sk_clut_4D_hsw+0xaac>
+ DB 196,98,125,24,45,162,68,0,0 ; vbroadcastss 0x44a2(%rip),%ymm13 # 6fd0 <_sk_clut_4D_hsw+0xab0>
DB 196,65,44,84,213 ; vandps %ymm13,%ymm10,%ymm10
- DB 196,98,125,24,45,101,65,0,0 ; vbroadcastss 0x4165(%rip),%ymm13 # 6b08 <_sk_clut_4D_hsw+0xab4>
+ DB 196,98,125,24,45,152,68,0,0 ; vbroadcastss 0x4498(%rip),%ymm13 # 6fd4 <_sk_clut_4D_hsw+0xab4>
DB 196,65,44,86,213 ; vorps %ymm13,%ymm10,%ymm10
- DB 196,98,125,24,45,91,65,0,0 ; vbroadcastss 0x415b(%rip),%ymm13 # 6b0c <_sk_clut_4D_hsw+0xab8>
+ DB 196,98,125,24,45,142,68,0,0 ; vbroadcastss 0x448e(%rip),%ymm13 # 6fd8 <_sk_clut_4D_hsw+0xab8>
DB 196,66,37,184,236 ; vfmadd231ps %ymm12,%ymm11,%ymm13
- DB 196,98,125,24,29,81,65,0,0 ; vbroadcastss 0x4151(%rip),%ymm11 # 6b10 <_sk_clut_4D_hsw+0xabc>
+ DB 196,98,125,24,29,132,68,0,0 ; vbroadcastss 0x4484(%rip),%ymm11 # 6fdc <_sk_clut_4D_hsw+0xabc>
DB 196,66,45,172,221 ; vfnmadd213ps %ymm13,%ymm10,%ymm11
- DB 196,98,125,24,37,71,65,0,0 ; vbroadcastss 0x4147(%rip),%ymm12 # 6b14 <_sk_clut_4D_hsw+0xac0>
+ DB 196,98,125,24,37,122,68,0,0 ; vbroadcastss 0x447a(%rip),%ymm12 # 6fe0 <_sk_clut_4D_hsw+0xac0>
DB 196,65,44,88,212 ; vaddps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,37,61,65,0,0 ; vbroadcastss 0x413d(%rip),%ymm12 # 6b18 <_sk_clut_4D_hsw+0xac4>
+ DB 196,98,125,24,37,112,68,0,0 ; vbroadcastss 0x4470(%rip),%ymm12 # 6fe4 <_sk_clut_4D_hsw+0xac4>
DB 196,65,28,94,210 ; vdivps %ymm10,%ymm12,%ymm10
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
DB 196,193,108,89,210 ; vmulps %ymm10,%ymm2,%ymm2
DB 196,99,125,8,210,1 ; vroundps $0x1,%ymm2,%ymm10
DB 196,65,108,92,210 ; vsubps %ymm10,%ymm2,%ymm10
- DB 196,98,125,24,29,30,65,0,0 ; vbroadcastss 0x411e(%rip),%ymm11 # 6b1c <_sk_clut_4D_hsw+0xac8>
+ DB 196,98,125,24,29,81,68,0,0 ; vbroadcastss 0x4451(%rip),%ymm11 # 6fe8 <_sk_clut_4D_hsw+0xac8>
DB 196,193,108,88,211 ; vaddps %ymm11,%ymm2,%ymm2
- DB 196,98,125,24,29,20,65,0,0 ; vbroadcastss 0x4114(%rip),%ymm11 # 6b20 <_sk_clut_4D_hsw+0xacc>
+ DB 196,98,125,24,29,71,68,0,0 ; vbroadcastss 0x4447(%rip),%ymm11 # 6fec <_sk_clut_4D_hsw+0xacc>
DB 196,98,45,172,218 ; vfnmadd213ps %ymm2,%ymm10,%ymm11
- DB 196,226,125,24,21,10,65,0,0 ; vbroadcastss 0x410a(%rip),%ymm2 # 6b24 <_sk_clut_4D_hsw+0xad0>
+ DB 196,226,125,24,21,61,68,0,0 ; vbroadcastss 0x443d(%rip),%ymm2 # 6ff0 <_sk_clut_4D_hsw+0xad0>
DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2
- DB 196,98,125,24,21,0,65,0,0 ; vbroadcastss 0x4100(%rip),%ymm10 # 6b28 <_sk_clut_4D_hsw+0xad4>
+ DB 196,98,125,24,21,51,68,0,0 ; vbroadcastss 0x4433(%rip),%ymm10 # 6ff4 <_sk_clut_4D_hsw+0xad4>
DB 197,172,94,210 ; vdivps %ymm2,%ymm10,%ymm2
DB 197,164,88,210 ; vaddps %ymm2,%ymm11,%ymm2
- DB 196,98,125,24,21,243,64,0,0 ; vbroadcastss 0x40f3(%rip),%ymm10 # 6b2c <_sk_clut_4D_hsw+0xad8>
+ DB 196,98,125,24,21,38,68,0,0 ; vbroadcastss 0x4426(%rip),%ymm10 # 6ff8 <_sk_clut_4D_hsw+0xad8>
DB 196,193,108,89,210 ; vmulps %ymm10,%ymm2,%ymm2
DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -2516,7 +2625,7 @@ _sk_parametric_b_hsw LABEL PROC
DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,202,64,0,0 ; vbroadcastss 0x40ca(%rip),%ymm8 # 6b30 <_sk_clut_4D_hsw+0xadc>
+ DB 196,98,125,24,5,253,67,0,0 ; vbroadcastss 0x43fd(%rip),%ymm8 # 6ffc <_sk_clut_4D_hsw+0xadc>
DB 196,193,108,93,208 ; vminps %ymm8,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2534,33 +2643,33 @@ _sk_parametric_a_hsw LABEL PROC
DB 196,66,101,168,211 ; vfmadd213ps %ymm11,%ymm3,%ymm10
DB 196,226,125,24,24 ; vbroadcastss (%rax),%ymm3
DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
- DB 196,98,125,24,37,130,64,0,0 ; vbroadcastss 0x4082(%rip),%ymm12 # 6b34 <_sk_clut_4D_hsw+0xae0>
- DB 196,98,125,24,45,125,64,0,0 ; vbroadcastss 0x407d(%rip),%ymm13 # 6b38 <_sk_clut_4D_hsw+0xae4>
+ DB 196,98,125,24,37,181,67,0,0 ; vbroadcastss 0x43b5(%rip),%ymm12 # 7000 <_sk_clut_4D_hsw+0xae0>
+ DB 196,98,125,24,45,176,67,0,0 ; vbroadcastss 0x43b0(%rip),%ymm13 # 7004 <_sk_clut_4D_hsw+0xae4>
DB 196,65,44,84,213 ; vandps %ymm13,%ymm10,%ymm10
- DB 196,98,125,24,45,115,64,0,0 ; vbroadcastss 0x4073(%rip),%ymm13 # 6b3c <_sk_clut_4D_hsw+0xae8>
+ DB 196,98,125,24,45,166,67,0,0 ; vbroadcastss 0x43a6(%rip),%ymm13 # 7008 <_sk_clut_4D_hsw+0xae8>
DB 196,65,44,86,213 ; vorps %ymm13,%ymm10,%ymm10
- DB 196,98,125,24,45,105,64,0,0 ; vbroadcastss 0x4069(%rip),%ymm13 # 6b40 <_sk_clut_4D_hsw+0xaec>
+ DB 196,98,125,24,45,156,67,0,0 ; vbroadcastss 0x439c(%rip),%ymm13 # 700c <_sk_clut_4D_hsw+0xaec>
DB 196,66,37,184,236 ; vfmadd231ps %ymm12,%ymm11,%ymm13
- DB 196,98,125,24,29,95,64,0,0 ; vbroadcastss 0x405f(%rip),%ymm11 # 6b44 <_sk_clut_4D_hsw+0xaf0>
+ DB 196,98,125,24,29,146,67,0,0 ; vbroadcastss 0x4392(%rip),%ymm11 # 7010 <_sk_clut_4D_hsw+0xaf0>
DB 196,66,45,172,221 ; vfnmadd213ps %ymm13,%ymm10,%ymm11
- DB 196,98,125,24,37,85,64,0,0 ; vbroadcastss 0x4055(%rip),%ymm12 # 6b48 <_sk_clut_4D_hsw+0xaf4>
+ DB 196,98,125,24,37,136,67,0,0 ; vbroadcastss 0x4388(%rip),%ymm12 # 7014 <_sk_clut_4D_hsw+0xaf4>
DB 196,65,44,88,212 ; vaddps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,37,75,64,0,0 ; vbroadcastss 0x404b(%rip),%ymm12 # 6b4c <_sk_clut_4D_hsw+0xaf8>
+ DB 196,98,125,24,37,126,67,0,0 ; vbroadcastss 0x437e(%rip),%ymm12 # 7018 <_sk_clut_4D_hsw+0xaf8>
DB 196,65,28,94,210 ; vdivps %ymm10,%ymm12,%ymm10
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
DB 196,193,100,89,218 ; vmulps %ymm10,%ymm3,%ymm3
DB 196,99,125,8,211,1 ; vroundps $0x1,%ymm3,%ymm10
DB 196,65,100,92,210 ; vsubps %ymm10,%ymm3,%ymm10
- DB 196,98,125,24,29,44,64,0,0 ; vbroadcastss 0x402c(%rip),%ymm11 # 6b50 <_sk_clut_4D_hsw+0xafc>
+ DB 196,98,125,24,29,95,67,0,0 ; vbroadcastss 0x435f(%rip),%ymm11 # 701c <_sk_clut_4D_hsw+0xafc>
DB 196,193,100,88,219 ; vaddps %ymm11,%ymm3,%ymm3
- DB 196,98,125,24,29,34,64,0,0 ; vbroadcastss 0x4022(%rip),%ymm11 # 6b54 <_sk_clut_4D_hsw+0xb00>
+ DB 196,98,125,24,29,85,67,0,0 ; vbroadcastss 0x4355(%rip),%ymm11 # 7020 <_sk_clut_4D_hsw+0xb00>
DB 196,98,45,172,219 ; vfnmadd213ps %ymm3,%ymm10,%ymm11
- DB 196,226,125,24,29,24,64,0,0 ; vbroadcastss 0x4018(%rip),%ymm3 # 6b58 <_sk_clut_4D_hsw+0xb04>
+ DB 196,226,125,24,29,75,67,0,0 ; vbroadcastss 0x434b(%rip),%ymm3 # 7024 <_sk_clut_4D_hsw+0xb04>
DB 196,193,100,92,218 ; vsubps %ymm10,%ymm3,%ymm3
- DB 196,98,125,24,21,14,64,0,0 ; vbroadcastss 0x400e(%rip),%ymm10 # 6b5c <_sk_clut_4D_hsw+0xb08>
+ DB 196,98,125,24,21,65,67,0,0 ; vbroadcastss 0x4341(%rip),%ymm10 # 7028 <_sk_clut_4D_hsw+0xb08>
DB 197,172,94,219 ; vdivps %ymm3,%ymm10,%ymm3
DB 197,164,88,219 ; vaddps %ymm3,%ymm11,%ymm3
- DB 196,98,125,24,21,1,64,0,0 ; vbroadcastss 0x4001(%rip),%ymm10 # 6b60 <_sk_clut_4D_hsw+0xb0c>
+ DB 196,98,125,24,21,52,67,0,0 ; vbroadcastss 0x4334(%rip),%ymm10 # 702c <_sk_clut_4D_hsw+0xb0c>
DB 196,193,100,89,218 ; vmulps %ymm10,%ymm3,%ymm3
DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -2568,7 +2677,7 @@ _sk_parametric_a_hsw LABEL PROC
DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,100,95,216 ; vmaxps %ymm8,%ymm3,%ymm3
- DB 196,98,125,24,5,216,63,0,0 ; vbroadcastss 0x3fd8(%rip),%ymm8 # 6b64 <_sk_clut_4D_hsw+0xb10>
+ DB 196,98,125,24,5,11,67,0,0 ; vbroadcastss 0x430b(%rip),%ymm8 # 7030 <_sk_clut_4D_hsw+0xb10>
DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2585,35 +2694,35 @@ _sk_gamma_hsw LABEL PROC
DB 197,252,40,241 ; vmovaps %ymm1,%ymm6
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,124,91,208 ; vcvtdq2ps %ymm0,%ymm10
- DB 196,98,125,24,29,149,63,0,0 ; vbroadcastss 0x3f95(%rip),%ymm11 # 6b68 <_sk_clut_4D_hsw+0xb14>
- DB 196,226,125,24,45,144,63,0,0 ; vbroadcastss 0x3f90(%rip),%ymm5 # 6b6c <_sk_clut_4D_hsw+0xb18>
+ DB 196,98,125,24,29,200,66,0,0 ; vbroadcastss 0x42c8(%rip),%ymm11 # 7034 <_sk_clut_4D_hsw+0xb14>
+ DB 196,226,125,24,45,195,66,0,0 ; vbroadcastss 0x42c3(%rip),%ymm5 # 7038 <_sk_clut_4D_hsw+0xb18>
DB 197,124,84,205 ; vandps %ymm5,%ymm0,%ymm9
- DB 196,226,125,24,37,135,63,0,0 ; vbroadcastss 0x3f87(%rip),%ymm4 # 6b70 <_sk_clut_4D_hsw+0xb1c>
+ DB 196,226,125,24,37,186,66,0,0 ; vbroadcastss 0x42ba(%rip),%ymm4 # 703c <_sk_clut_4D_hsw+0xb1c>
DB 197,52,86,228 ; vorps %ymm4,%ymm9,%ymm12
- DB 196,98,125,24,45,126,63,0,0 ; vbroadcastss 0x3f7e(%rip),%ymm13 # 6b74 <_sk_clut_4D_hsw+0xb20>
+ DB 196,98,125,24,45,177,66,0,0 ; vbroadcastss 0x42b1(%rip),%ymm13 # 7040 <_sk_clut_4D_hsw+0xb20>
DB 196,66,37,168,213 ; vfmadd213ps %ymm13,%ymm11,%ymm10
- DB 196,98,125,24,13,116,63,0,0 ; vbroadcastss 0x3f74(%rip),%ymm9 # 6b78 <_sk_clut_4D_hsw+0xb24>
+ DB 196,98,125,24,13,167,66,0,0 ; vbroadcastss 0x42a7(%rip),%ymm9 # 7044 <_sk_clut_4D_hsw+0xb24>
DB 196,66,29,188,209 ; vfnmadd231ps %ymm9,%ymm12,%ymm10
DB 197,124,91,246 ; vcvtdq2ps %ymm6,%ymm14
DB 196,66,37,168,245 ; vfmadd213ps %ymm13,%ymm11,%ymm14
DB 197,124,91,255 ; vcvtdq2ps %ymm7,%ymm15
DB 196,66,37,168,253 ; vfmadd213ps %ymm13,%ymm11,%ymm15
- DB 196,98,125,24,29,88,63,0,0 ; vbroadcastss 0x3f58(%rip),%ymm11 # 6b7c <_sk_clut_4D_hsw+0xb28>
+ DB 196,98,125,24,29,139,66,0,0 ; vbroadcastss 0x428b(%rip),%ymm11 # 7048 <_sk_clut_4D_hsw+0xb28>
DB 196,65,28,88,227 ; vaddps %ymm11,%ymm12,%ymm12
- DB 196,98,125,24,45,78,63,0,0 ; vbroadcastss 0x3f4e(%rip),%ymm13 # 6b80 <_sk_clut_4D_hsw+0xb2c>
+ DB 196,98,125,24,45,129,66,0,0 ; vbroadcastss 0x4281(%rip),%ymm13 # 704c <_sk_clut_4D_hsw+0xb2c>
DB 196,65,20,94,228 ; vdivps %ymm12,%ymm13,%ymm12
DB 196,65,44,92,212 ; vsubps %ymm12,%ymm10,%ymm10
DB 196,98,125,24,32 ; vbroadcastss (%rax),%ymm12
DB 196,65,44,89,212 ; vmulps %ymm12,%ymm10,%ymm10
DB 196,67,125,8,194,1 ; vroundps $0x1,%ymm10,%ymm8
DB 196,65,44,92,192 ; vsubps %ymm8,%ymm10,%ymm8
- DB 196,226,125,24,21,42,63,0,0 ; vbroadcastss 0x3f2a(%rip),%ymm2 # 6b84 <_sk_clut_4D_hsw+0xb30>
+ DB 196,226,125,24,21,93,66,0,0 ; vbroadcastss 0x425d(%rip),%ymm2 # 7050 <_sk_clut_4D_hsw+0xb30>
DB 197,44,88,210 ; vaddps %ymm2,%ymm10,%ymm10
- DB 196,226,125,24,29,33,63,0,0 ; vbroadcastss 0x3f21(%rip),%ymm3 # 6b88 <_sk_clut_4D_hsw+0xb34>
+ DB 196,226,125,24,29,84,66,0,0 ; vbroadcastss 0x4254(%rip),%ymm3 # 7054 <_sk_clut_4D_hsw+0xb34>
DB 196,98,61,188,211 ; vfnmadd231ps %ymm3,%ymm8,%ymm10
- DB 196,226,125,24,13,23,63,0,0 ; vbroadcastss 0x3f17(%rip),%ymm1 # 6b8c <_sk_clut_4D_hsw+0xb38>
+ DB 196,226,125,24,13,74,66,0,0 ; vbroadcastss 0x424a(%rip),%ymm1 # 7058 <_sk_clut_4D_hsw+0xb38>
DB 196,65,116,92,192 ; vsubps %ymm8,%ymm1,%ymm8
- DB 196,226,125,24,5,13,63,0,0 ; vbroadcastss 0x3f0d(%rip),%ymm0 # 6b90 <_sk_clut_4D_hsw+0xb3c>
+ DB 196,226,125,24,5,64,66,0,0 ; vbroadcastss 0x4240(%rip),%ymm0 # 705c <_sk_clut_4D_hsw+0xb3c>
DB 196,65,124,94,192 ; vdivps %ymm8,%ymm0,%ymm8
DB 196,65,44,88,192 ; vaddps %ymm8,%ymm10,%ymm8
DB 197,204,84,245 ; vandps %ymm5,%ymm6,%ymm6
@@ -2644,7 +2753,7 @@ _sk_gamma_hsw LABEL PROC
DB 197,244,92,205 ; vsubps %ymm5,%ymm1,%ymm1
DB 197,252,94,193 ; vdivps %ymm1,%ymm0,%ymm0
DB 197,236,88,192 ; vaddps %ymm0,%ymm2,%ymm0
- DB 196,226,125,24,13,126,62,0,0 ; vbroadcastss 0x3e7e(%rip),%ymm1 # 6b94 <_sk_clut_4D_hsw+0xb40>
+ DB 196,226,125,24,13,177,65,0,0 ; vbroadcastss 0x41b1(%rip),%ymm1 # 7060 <_sk_clut_4D_hsw+0xb40>
DB 197,188,89,209 ; vmulps %ymm1,%ymm8,%ymm2
DB 197,204,89,217 ; vmulps %ymm1,%ymm6,%ymm3
DB 197,252,89,225 ; vmulps %ymm1,%ymm0,%ymm4
@@ -2662,26 +2771,26 @@ _sk_gamma_hsw LABEL PROC
PUBLIC _sk_lab_to_xyz_hsw
_sk_lab_to_xyz_hsw LABEL PROC
- DB 196,98,125,24,5,54,62,0,0 ; vbroadcastss 0x3e36(%rip),%ymm8 # 6b98 <_sk_clut_4D_hsw+0xb44>
- DB 196,98,125,24,13,49,62,0,0 ; vbroadcastss 0x3e31(%rip),%ymm9 # 6b9c <_sk_clut_4D_hsw+0xb48>
- DB 196,98,125,24,21,44,62,0,0 ; vbroadcastss 0x3e2c(%rip),%ymm10 # 6ba0 <_sk_clut_4D_hsw+0xb4c>
+ DB 196,98,125,24,5,105,65,0,0 ; vbroadcastss 0x4169(%rip),%ymm8 # 7064 <_sk_clut_4D_hsw+0xb44>
+ DB 196,98,125,24,13,100,65,0,0 ; vbroadcastss 0x4164(%rip),%ymm9 # 7068 <_sk_clut_4D_hsw+0xb48>
+ DB 196,98,125,24,21,95,65,0,0 ; vbroadcastss 0x415f(%rip),%ymm10 # 706c <_sk_clut_4D_hsw+0xb4c>
DB 196,194,53,168,202 ; vfmadd213ps %ymm10,%ymm9,%ymm1
DB 196,194,53,168,210 ; vfmadd213ps %ymm10,%ymm9,%ymm2
- DB 196,98,125,24,13,29,62,0,0 ; vbroadcastss 0x3e1d(%rip),%ymm9 # 6ba4 <_sk_clut_4D_hsw+0xb50>
+ DB 196,98,125,24,13,80,65,0,0 ; vbroadcastss 0x4150(%rip),%ymm9 # 7070 <_sk_clut_4D_hsw+0xb50>
DB 196,66,125,184,200 ; vfmadd231ps %ymm8,%ymm0,%ymm9
- DB 196,226,125,24,5,19,62,0,0 ; vbroadcastss 0x3e13(%rip),%ymm0 # 6ba8 <_sk_clut_4D_hsw+0xb54>
+ DB 196,226,125,24,5,70,65,0,0 ; vbroadcastss 0x4146(%rip),%ymm0 # 7074 <_sk_clut_4D_hsw+0xb54>
DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0
- DB 196,98,125,24,5,10,62,0,0 ; vbroadcastss 0x3e0a(%rip),%ymm8 # 6bac <_sk_clut_4D_hsw+0xb58>
+ DB 196,98,125,24,5,61,65,0,0 ; vbroadcastss 0x413d(%rip),%ymm8 # 7078 <_sk_clut_4D_hsw+0xb58>
DB 196,98,117,168,192 ; vfmadd213ps %ymm0,%ymm1,%ymm8
- DB 196,98,125,24,13,0,62,0,0 ; vbroadcastss 0x3e00(%rip),%ymm9 # 6bb0 <_sk_clut_4D_hsw+0xb5c>
+ DB 196,98,125,24,13,51,65,0,0 ; vbroadcastss 0x4133(%rip),%ymm9 # 707c <_sk_clut_4D_hsw+0xb5c>
DB 196,98,109,172,200 ; vfnmadd213ps %ymm0,%ymm2,%ymm9
DB 196,193,60,89,200 ; vmulps %ymm8,%ymm8,%ymm1
DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1
- DB 196,226,125,24,21,237,61,0,0 ; vbroadcastss 0x3ded(%rip),%ymm2 # 6bb4 <_sk_clut_4D_hsw+0xb60>
+ DB 196,226,125,24,21,32,65,0,0 ; vbroadcastss 0x4120(%rip),%ymm2 # 7080 <_sk_clut_4D_hsw+0xb60>
DB 197,108,194,209,1 ; vcmpltps %ymm1,%ymm2,%ymm10
- DB 196,98,125,24,29,227,61,0,0 ; vbroadcastss 0x3de3(%rip),%ymm11 # 6bb8 <_sk_clut_4D_hsw+0xb64>
+ DB 196,98,125,24,29,22,65,0,0 ; vbroadcastss 0x4116(%rip),%ymm11 # 7084 <_sk_clut_4D_hsw+0xb64>
DB 196,65,60,88,195 ; vaddps %ymm11,%ymm8,%ymm8
- DB 196,98,125,24,37,217,61,0,0 ; vbroadcastss 0x3dd9(%rip),%ymm12 # 6bbc <_sk_clut_4D_hsw+0xb68>
+ DB 196,98,125,24,37,12,65,0,0 ; vbroadcastss 0x410c(%rip),%ymm12 # 7088 <_sk_clut_4D_hsw+0xb68>
DB 196,65,60,89,196 ; vmulps %ymm12,%ymm8,%ymm8
DB 196,99,61,74,193,160 ; vblendvps %ymm10,%ymm1,%ymm8,%ymm8
DB 197,252,89,200 ; vmulps %ymm0,%ymm0,%ymm1
@@ -2696,9 +2805,9 @@ _sk_lab_to_xyz_hsw LABEL PROC
DB 196,65,52,88,203 ; vaddps %ymm11,%ymm9,%ymm9
DB 196,65,52,89,204 ; vmulps %ymm12,%ymm9,%ymm9
DB 196,227,53,74,208,32 ; vblendvps %ymm2,%ymm0,%ymm9,%ymm2
- DB 196,226,125,24,5,142,61,0,0 ; vbroadcastss 0x3d8e(%rip),%ymm0 # 6bc0 <_sk_clut_4D_hsw+0xb6c>
+ DB 196,226,125,24,5,193,64,0,0 ; vbroadcastss 0x40c1(%rip),%ymm0 # 708c <_sk_clut_4D_hsw+0xb6c>
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
- DB 196,98,125,24,5,133,61,0,0 ; vbroadcastss 0x3d85(%rip),%ymm8 # 6bc4 <_sk_clut_4D_hsw+0xb70>
+ DB 196,98,125,24,5,184,64,0,0 ; vbroadcastss 0x40b8(%rip),%ymm8 # 7090 <_sk_clut_4D_hsw+0xb70>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2713,12 +2822,12 @@ _sk_load_a8_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,53 ; jne 2e96 <_sk_load_a8_hsw+0x4e>
+ DB 117,53 ; jne 302f <_sk_load_a8_hsw+0x4e>
DB 196,194,121,48,4,25 ; vpmovzxbw (%r9,%rbx,1),%xmm0
- DB 197,249,219,5,17,67,0,0 ; vpand 0x4311(%rip),%xmm0,%xmm0 # 7180 <_sk_clut_4D_hsw+0x112c>
+ DB 197,249,219,5,56,70,0,0 ; vpand 0x4638(%rip),%xmm0,%xmm0 # 7640 <_sk_clut_4D_hsw+0x1120>
DB 196,226,125,51,192 ; vpmovzxwd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,71,61,0,0 ; vbroadcastss 0x3d47(%rip),%ymm1 # 6bc8 <_sk_clut_4D_hsw+0xb74>
+ DB 196,226,125,24,13,122,64,0,0 ; vbroadcastss 0x407a(%rip),%ymm1 # 7094 <_sk_clut_4D_hsw+0xb74>
DB 197,252,89,217 ; vmulps %ymm1,%ymm0,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0
@@ -2731,15 +2840,15 @@ _sk_load_a8_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,189 ; ja 2e67 <_sk_load_a8_hsw+0x1f>
+ DB 119,189 ; ja 3000 <_sk_load_a8_hsw+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,119,0,0,0 ; lea 0x77(%rip),%r11 # 2f2c <_sk_load_a8_hsw+0xe4>
+ DB 76,141,29,122,0,0,0 ; lea 0x7a(%rip),%r11 # 30c8 <_sk_load_a8_hsw+0xe7>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
- DB 235,158 ; jmp 2e67 <_sk_load_a8_hsw+0x1f>
+ DB 235,158 ; jmp 3000 <_sk_load_a8_hsw+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 197,249,196,192,2 ; vpinsrw $0x2,%eax,%xmm0,%xmm0
@@ -2747,7 +2856,7 @@ _sk_load_a8_hsw LABEL PROC
DB 197,249,110,200 ; vmovd %eax,%xmm1
DB 196,226,121,48,201 ; vpmovzxbw %xmm1,%xmm1
DB 196,227,121,2,193,1 ; vpblendd $0x1,%xmm1,%xmm0,%xmm0
- DB 233,118,255,255,255 ; jmpq 2e67 <_sk_load_a8_hsw+0x1f>
+ DB 233,118,255,255,255 ; jmpq 3000 <_sk_load_a8_hsw+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 197,249,196,192,6 ; vpinsrw $0x6,%eax,%xmm0,%xmm0
@@ -2758,23 +2867,24 @@ _sk_load_a8_hsw LABEL PROC
DB 196,193,121,110,12,25 ; vmovd (%r9,%rbx,1),%xmm1
DB 196,226,121,48,201 ; vpmovzxbw %xmm1,%xmm1
DB 196,227,113,2,192,12 ; vpblendd $0xc,%xmm0,%xmm1,%xmm0
- DB 233,59,255,255,255 ; jmpq 2e67 <_sk_load_a8_hsw+0x1f>
- DB 146 ; xchg %eax,%edx
+ DB 233,59,255,255,255 ; jmpq 3000 <_sk_load_a8_hsw+0x1f>
+ DB 15,31,0 ; nopl (%rax)
+ DB 143 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,172,255,255,255,157,255 ; ljmp *-0x620001(%rdi,%rdi,8)
+ DB 255,169,255,255,255,154 ; ljmp *-0x65000001(%rcx)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 234 ; (bad)
+ DB 255,231 ; jmpq *%rdi
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 223,255 ; (bad)
+ DB 220,255 ; fdivr %st,%st(7)
DB 255 ; (bad)
- DB 255,212 ; callq *%rsp
+ DB 255,209 ; callq *%rcx
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,197 ; inc %ebp
+ DB 255,194 ; inc %edx
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -2789,12 +2899,12 @@ _sk_load_a8_dst_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,53 ; jne 2f96 <_sk_load_a8_dst_hsw+0x4e>
+ DB 117,53 ; jne 3132 <_sk_load_a8_dst_hsw+0x4e>
DB 196,194,121,48,36,25 ; vpmovzxbw (%r9,%rbx,1),%xmm4
- DB 197,217,219,37,33,66,0,0 ; vpand 0x4221(%rip),%xmm4,%xmm4 # 7190 <_sk_clut_4D_hsw+0x113c>
+ DB 197,217,219,37,69,69,0,0 ; vpand 0x4545(%rip),%xmm4,%xmm4 # 7650 <_sk_clut_4D_hsw+0x1130>
DB 196,226,125,51,228 ; vpmovzxwd %xmm4,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,226,125,24,45,75,60,0,0 ; vbroadcastss 0x3c4b(%rip),%ymm5 # 6bcc <_sk_clut_4D_hsw+0xb78>
+ DB 196,226,125,24,45,123,63,0,0 ; vbroadcastss 0x3f7b(%rip),%ymm5 # 7098 <_sk_clut_4D_hsw+0xb78>
DB 197,220,89,253 ; vmulps %ymm5,%ymm4,%ymm7
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,220,87,228 ; vxorps %ymm4,%ymm4,%ymm4
@@ -2807,15 +2917,15 @@ _sk_load_a8_dst_hsw LABEL PROC
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,189 ; ja 2f67 <_sk_load_a8_dst_hsw+0x1f>
+ DB 119,189 ; ja 3103 <_sk_load_a8_dst_hsw+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,119,0,0,0 ; lea 0x77(%rip),%r11 # 302c <_sk_load_a8_dst_hsw+0xe4>
+ DB 76,141,29,119,0,0,0 ; lea 0x77(%rip),%r11 # 31c8 <_sk_load_a8_dst_hsw+0xe4>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
- DB 235,158 ; jmp 2f67 <_sk_load_a8_dst_hsw+0x1f>
+ DB 235,158 ; jmp 3103 <_sk_load_a8_dst_hsw+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 197,217,196,224,2 ; vpinsrw $0x2,%eax,%xmm4,%xmm4
@@ -2823,7 +2933,7 @@ _sk_load_a8_dst_hsw LABEL PROC
DB 197,249,110,232 ; vmovd %eax,%xmm5
DB 196,226,121,48,237 ; vpmovzxbw %xmm5,%xmm5
DB 196,227,89,2,229,1 ; vpblendd $0x1,%xmm5,%xmm4,%xmm4
- DB 233,118,255,255,255 ; jmpq 2f67 <_sk_load_a8_dst_hsw+0x1f>
+ DB 233,118,255,255,255 ; jmpq 3103 <_sk_load_a8_dst_hsw+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 197,217,196,224,6 ; vpinsrw $0x6,%eax,%xmm4,%xmm4
@@ -2834,7 +2944,7 @@ _sk_load_a8_dst_hsw LABEL PROC
DB 196,193,121,110,44,25 ; vmovd (%r9,%rbx,1),%xmm5
DB 196,226,121,48,237 ; vpmovzxbw %xmm5,%xmm5
DB 196,227,81,2,228,12 ; vpblendd $0xc,%xmm4,%xmm5,%xmm4
- DB 233,59,255,255,255 ; jmpq 2f67 <_sk_load_a8_dst_hsw+0x1f>
+ DB 233,59,255,255,255 ; jmpq 3103 <_sk_load_a8_dst_hsw+0x1f>
DB 146 ; xchg %eax,%edx
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2893,7 +3003,7 @@ _sk_gather_a8_hsw LABEL PROC
DB 196,227,121,32,192,7 ; vpinsrb $0x7,%eax,%xmm0,%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,210,58,0,0 ; vbroadcastss 0x3ad2(%rip),%ymm1 # 6bd0 <_sk_clut_4D_hsw+0xb7c>
+ DB 196,226,125,24,13,2,62,0,0 ; vbroadcastss 0x3e02(%rip),%ymm1 # 709c <_sk_clut_4D_hsw+0xb7c>
DB 197,252,89,217 ; vmulps %ymm1,%ymm0,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0
@@ -2910,14 +3020,14 @@ _sk_store_a8_hsw LABEL PROC
DB 77,15,175,202 ; imul %r10,%r9
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
- DB 196,98,125,24,5,165,58,0,0 ; vbroadcastss 0x3aa5(%rip),%ymm8 # 6bd4 <_sk_clut_4D_hsw+0xb80>
+ DB 196,98,125,24,5,213,61,0,0 ; vbroadcastss 0x3dd5(%rip),%ymm8 # 70a0 <_sk_clut_4D_hsw+0xb80>
DB 196,65,100,89,192 ; vmulps %ymm8,%ymm3,%ymm8
DB 196,65,125,91,192 ; vcvtps2dq %ymm8,%ymm8
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,11 ; jne 3159 <_sk_store_a8_hsw+0x47>
+ DB 117,11 ; jne 32f5 <_sk_store_a8_hsw+0x47>
DB 196,65,123,17,4,25 ; vmovsd %xmm8,(%r9,%rbx,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -2926,25 +3036,25 @@ _sk_store_a8_hsw LABEL PROC
DB 65,128,226,7 ; and $0x7,%r10b
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,235 ; ja 3154 <_sk_store_a8_hsw+0x42>
+ DB 119,235 ; ja 32f0 <_sk_store_a8_hsw+0x42>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,87,0,0,0 ; lea 0x57(%rip),%r11 # 31d0 <_sk_store_a8_hsw+0xbe>
+ DB 76,141,29,87,0,0,0 ; lea 0x57(%rip),%r11 # 336c <_sk_store_a8_hsw+0xbe>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 196,67,121,20,4,25,0 ; vpextrb $0x0,%xmm8,(%r9,%rbx,1)
- DB 235,201 ; jmp 3154 <_sk_store_a8_hsw+0x42>
+ DB 235,201 ; jmp 32f0 <_sk_store_a8_hsw+0x42>
DB 196,67,121,20,68,25,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rbx,1)
- DB 196,98,57,0,5,4,64,0,0 ; vpshufb 0x4004(%rip),%xmm8,%xmm8 # 71a0 <_sk_clut_4D_hsw+0x114c>
+ DB 196,98,57,0,5,40,67,0,0 ; vpshufb 0x4328(%rip),%xmm8,%xmm8 # 7660 <_sk_clut_4D_hsw+0x1140>
DB 196,67,121,21,4,25,0 ; vpextrw $0x0,%xmm8,(%r9,%rbx,1)
- DB 235,175 ; jmp 3154 <_sk_store_a8_hsw+0x42>
+ DB 235,175 ; jmp 32f0 <_sk_store_a8_hsw+0x42>
DB 196,67,121,20,68,25,6,12 ; vpextrb $0xc,%xmm8,0x6(%r9,%rbx,1)
DB 196,67,121,20,68,25,5,10 ; vpextrb $0xa,%xmm8,0x5(%r9,%rbx,1)
DB 196,67,121,20,68,25,4,8 ; vpextrb $0x8,%xmm8,0x4(%r9,%rbx,1)
- DB 196,98,57,0,5,234,63,0,0 ; vpshufb 0x3fea(%rip),%xmm8,%xmm8 # 71b0 <_sk_clut_4D_hsw+0x115c>
+ DB 196,98,57,0,5,14,67,0,0 ; vpshufb 0x430e(%rip),%xmm8,%xmm8 # 7670 <_sk_clut_4D_hsw+0x1150>
DB 196,65,121,126,4,25 ; vmovd %xmm8,(%r9,%rbx,1)
- DB 235,134 ; jmp 3154 <_sk_store_a8_hsw+0x42>
+ DB 235,134 ; jmp 32f0 <_sk_store_a8_hsw+0x42>
DB 102,144 ; xchg %ax,%ax
DB 178,255 ; mov $0xff,%dl
DB 255 ; (bad)
@@ -2976,15 +3086,15 @@ _sk_load_g8_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,58 ; jne 323f <_sk_load_g8_hsw+0x53>
+ DB 117,58 ; jne 33db <_sk_load_g8_hsw+0x53>
DB 196,194,121,48,4,25 ; vpmovzxbw (%r9,%rbx,1),%xmm0
- DB 197,249,219,5,173,63,0,0 ; vpand 0x3fad(%rip),%xmm0,%xmm0 # 71c0 <_sk_clut_4D_hsw+0x116c>
+ DB 197,249,219,5,209,66,0,0 ; vpand 0x42d1(%rip),%xmm0,%xmm0 # 7680 <_sk_clut_4D_hsw+0x1160>
DB 196,226,125,51,192 ; vpmovzxwd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,179,57,0,0 ; vbroadcastss 0x39b3(%rip),%ymm1 # 6bd8 <_sk_clut_4D_hsw+0xb84>
+ DB 196,226,125,24,13,227,60,0,0 ; vbroadcastss 0x3ce3(%rip),%ymm1 # 70a4 <_sk_clut_4D_hsw+0xb84>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,168,57,0,0 ; vbroadcastss 0x39a8(%rip),%ymm3 # 6bdc <_sk_clut_4D_hsw+0xb88>
+ DB 196,226,125,24,29,216,60,0,0 ; vbroadcastss 0x3cd8(%rip),%ymm3 # 70a8 <_sk_clut_4D_hsw+0xb88>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
DB 91 ; pop %rbx
@@ -2994,15 +3104,15 @@ _sk_load_g8_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,184 ; ja 320b <_sk_load_g8_hsw+0x1f>
+ DB 119,184 ; ja 33a7 <_sk_load_g8_hsw+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,122,0,0,0 ; lea 0x7a(%rip),%r11 # 32d8 <_sk_load_g8_hsw+0xec>
+ DB 76,141,29,122,0,0,0 ; lea 0x7a(%rip),%r11 # 3474 <_sk_load_g8_hsw+0xec>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
- DB 235,153 ; jmp 320b <_sk_load_g8_hsw+0x1f>
+ DB 235,153 ; jmp 33a7 <_sk_load_g8_hsw+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 197,249,196,192,2 ; vpinsrw $0x2,%eax,%xmm0,%xmm0
@@ -3010,7 +3120,7 @@ _sk_load_g8_hsw LABEL PROC
DB 197,249,110,200 ; vmovd %eax,%xmm1
DB 196,226,121,48,201 ; vpmovzxbw %xmm1,%xmm1
DB 196,227,121,2,193,1 ; vpblendd $0x1,%xmm1,%xmm0,%xmm0
- DB 233,113,255,255,255 ; jmpq 320b <_sk_load_g8_hsw+0x1f>
+ DB 233,113,255,255,255 ; jmpq 33a7 <_sk_load_g8_hsw+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 197,249,196,192,6 ; vpinsrw $0x6,%eax,%xmm0,%xmm0
@@ -3021,7 +3131,7 @@ _sk_load_g8_hsw LABEL PROC
DB 196,193,121,110,12,25 ; vmovd (%r9,%rbx,1),%xmm1
DB 196,226,121,48,201 ; vpmovzxbw %xmm1,%xmm1
DB 196,227,113,2,192,12 ; vpblendd $0xc,%xmm0,%xmm1,%xmm0
- DB 233,54,255,255,255 ; jmpq 320b <_sk_load_g8_hsw+0x1f>
+ DB 233,54,255,255,255 ; jmpq 33a7 <_sk_load_g8_hsw+0x1f>
DB 15,31,0 ; nopl (%rax)
DB 143 ; (bad)
DB 255 ; (bad)
@@ -3053,15 +3163,15 @@ _sk_load_g8_dst_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,58 ; jne 3347 <_sk_load_g8_dst_hsw+0x53>
+ DB 117,58 ; jne 34e3 <_sk_load_g8_dst_hsw+0x53>
DB 196,194,121,48,36,25 ; vpmovzxbw (%r9,%rbx,1),%xmm4
- DB 197,217,219,37,181,62,0,0 ; vpand 0x3eb5(%rip),%xmm4,%xmm4 # 71d0 <_sk_clut_4D_hsw+0x117c>
+ DB 197,217,219,37,217,65,0,0 ; vpand 0x41d9(%rip),%xmm4,%xmm4 # 7690 <_sk_clut_4D_hsw+0x1170>
DB 196,226,125,51,228 ; vpmovzxwd %xmm4,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,226,125,24,45,179,56,0,0 ; vbroadcastss 0x38b3(%rip),%ymm5 # 6be0 <_sk_clut_4D_hsw+0xb8c>
+ DB 196,226,125,24,45,227,59,0,0 ; vbroadcastss 0x3be3(%rip),%ymm5 # 70ac <_sk_clut_4D_hsw+0xb8c>
DB 197,220,89,229 ; vmulps %ymm5,%ymm4,%ymm4
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,61,168,56,0,0 ; vbroadcastss 0x38a8(%rip),%ymm7 # 6be4 <_sk_clut_4D_hsw+0xb90>
+ DB 196,226,125,24,61,216,59,0,0 ; vbroadcastss 0x3bd8(%rip),%ymm7 # 70b0 <_sk_clut_4D_hsw+0xb90>
DB 197,252,40,236 ; vmovaps %ymm4,%ymm5
DB 197,252,40,244 ; vmovaps %ymm4,%ymm6
DB 91 ; pop %rbx
@@ -3071,15 +3181,15 @@ _sk_load_g8_dst_hsw LABEL PROC
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,184 ; ja 3313 <_sk_load_g8_dst_hsw+0x1f>
+ DB 119,184 ; ja 34af <_sk_load_g8_dst_hsw+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,122,0,0,0 ; lea 0x7a(%rip),%r11 # 33e0 <_sk_load_g8_dst_hsw+0xec>
+ DB 76,141,29,122,0,0,0 ; lea 0x7a(%rip),%r11 # 357c <_sk_load_g8_dst_hsw+0xec>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
- DB 235,153 ; jmp 3313 <_sk_load_g8_dst_hsw+0x1f>
+ DB 235,153 ; jmp 34af <_sk_load_g8_dst_hsw+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 197,217,196,224,2 ; vpinsrw $0x2,%eax,%xmm4,%xmm4
@@ -3087,7 +3197,7 @@ _sk_load_g8_dst_hsw LABEL PROC
DB 197,249,110,232 ; vmovd %eax,%xmm5
DB 196,226,121,48,237 ; vpmovzxbw %xmm5,%xmm5
DB 196,227,89,2,229,1 ; vpblendd $0x1,%xmm5,%xmm4,%xmm4
- DB 233,113,255,255,255 ; jmpq 3313 <_sk_load_g8_dst_hsw+0x1f>
+ DB 233,113,255,255,255 ; jmpq 34af <_sk_load_g8_dst_hsw+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 197,217,196,224,6 ; vpinsrw $0x6,%eax,%xmm4,%xmm4
@@ -3098,7 +3208,7 @@ _sk_load_g8_dst_hsw LABEL PROC
DB 196,193,121,110,44,25 ; vmovd (%r9,%rbx,1),%xmm5
DB 196,226,121,48,237 ; vpmovzxbw %xmm5,%xmm5
DB 196,227,81,2,228,12 ; vpblendd $0xc,%xmm4,%xmm5,%xmm4
- DB 233,54,255,255,255 ; jmpq 3313 <_sk_load_g8_dst_hsw+0x1f>
+ DB 233,54,255,255,255 ; jmpq 34af <_sk_load_g8_dst_hsw+0x1f>
DB 15,31,0 ; nopl (%rax)
DB 143 ; (bad)
DB 255 ; (bad)
@@ -3158,10 +3268,10 @@ _sk_gather_g8_hsw LABEL PROC
DB 196,227,121,32,192,7 ; vpinsrb $0x7,%eax,%xmm0,%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,54,55,0,0 ; vbroadcastss 0x3736(%rip),%ymm1 # 6be8 <_sk_clut_4D_hsw+0xb94>
+ DB 196,226,125,24,13,102,58,0,0 ; vbroadcastss 0x3a66(%rip),%ymm1 # 70b4 <_sk_clut_4D_hsw+0xb94>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,43,55,0,0 ; vbroadcastss 0x372b(%rip),%ymm3 # 6bec <_sk_clut_4D_hsw+0xb98>
+ DB 196,226,125,24,29,91,58,0,0 ; vbroadcastss 0x3a5b(%rip),%ymm3 # 70b8 <_sk_clut_4D_hsw+0xb98>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
DB 255,224 ; jmpq *%rax
@@ -3177,26 +3287,26 @@ _sk_load_565_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,115 ; jne 355a <_sk_load_565_hsw+0x8f>
+ DB 117,115 ; jne 36f6 <_sk_load_565_hsw+0x8f>
DB 196,193,122,111,4,89 ; vmovdqu (%r9,%rbx,2),%xmm0
DB 196,226,125,51,208 ; vpmovzxwd %xmm0,%ymm2
- DB 196,226,125,88,5,245,54,0,0 ; vpbroadcastd 0x36f5(%rip),%ymm0 # 6bf0 <_sk_clut_4D_hsw+0xb9c>
+ DB 196,226,125,88,5,37,58,0,0 ; vpbroadcastd 0x3a25(%rip),%ymm0 # 70bc <_sk_clut_4D_hsw+0xb9c>
DB 197,237,219,192 ; vpand %ymm0,%ymm2,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,232,54,0,0 ; vbroadcastss 0x36e8(%rip),%ymm1 # 6bf4 <_sk_clut_4D_hsw+0xba0>
+ DB 196,226,125,24,13,24,58,0,0 ; vbroadcastss 0x3a18(%rip),%ymm1 # 70c0 <_sk_clut_4D_hsw+0xba0>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,88,13,223,54,0,0 ; vpbroadcastd 0x36df(%rip),%ymm1 # 6bf8 <_sk_clut_4D_hsw+0xba4>
+ DB 196,226,125,88,13,15,58,0,0 ; vpbroadcastd 0x3a0f(%rip),%ymm1 # 70c4 <_sk_clut_4D_hsw+0xba4>
DB 197,237,219,201 ; vpand %ymm1,%ymm2,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,29,210,54,0,0 ; vbroadcastss 0x36d2(%rip),%ymm3 # 6bfc <_sk_clut_4D_hsw+0xba8>
+ DB 196,226,125,24,29,2,58,0,0 ; vbroadcastss 0x3a02(%rip),%ymm3 # 70c8 <_sk_clut_4D_hsw+0xba8>
DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1
- DB 196,226,125,88,29,201,54,0,0 ; vpbroadcastd 0x36c9(%rip),%ymm3 # 6c00 <_sk_clut_4D_hsw+0xbac>
+ DB 196,226,125,88,29,249,57,0,0 ; vpbroadcastd 0x39f9(%rip),%ymm3 # 70cc <_sk_clut_4D_hsw+0xbac>
DB 197,237,219,211 ; vpand %ymm3,%ymm2,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,226,125,24,29,188,54,0,0 ; vbroadcastss 0x36bc(%rip),%ymm3 # 6c04 <_sk_clut_4D_hsw+0xbb0>
+ DB 196,226,125,24,29,236,57,0,0 ; vbroadcastss 0x39ec(%rip),%ymm3 # 70d0 <_sk_clut_4D_hsw+0xbb0>
DB 197,236,89,211 ; vmulps %ymm3,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,177,54,0,0 ; vbroadcastss 0x36b1(%rip),%ymm3 # 6c08 <_sk_clut_4D_hsw+0xbb4>
+ DB 196,226,125,24,29,225,57,0,0 ; vbroadcastss 0x39e1(%rip),%ymm3 # 70d4 <_sk_clut_4D_hsw+0xbb4>
DB 91 ; pop %rbx
DB 255,224 ; jmpq *%rax
DB 69,137,194 ; mov %r8d,%r10d
@@ -3204,27 +3314,27 @@ _sk_load_565_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,123,255,255,255 ; ja 34ed <_sk_load_565_hsw+0x22>
+ DB 15,135,123,255,255,255 ; ja 3689 <_sk_load_565_hsw+0x22>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,99,0,0,0 ; lea 0x63(%rip),%r11 # 35e0 <_sk_load_565_hsw+0x115>
+ DB 76,141,29,99,0,0,0 ; lea 0x63(%rip),%r11 # 377c <_sk_load_565_hsw+0x115>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
- DB 233,89,255,255,255 ; jmpq 34ed <_sk_load_565_hsw+0x22>
+ DB 233,89,255,255,255 ; jmpq 3689 <_sk_load_565_hsw+0x22>
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 196,193,121,196,68,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,110,12,89 ; vmovd (%r9,%rbx,2),%xmm1
DB 196,227,121,2,193,1 ; vpblendd $0x1,%xmm1,%xmm0,%xmm0
- DB 233,60,255,255,255 ; jmpq 34ed <_sk_load_565_hsw+0x22>
+ DB 233,60,255,255,255 ; jmpq 3689 <_sk_load_565_hsw+0x22>
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 196,193,121,196,68,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,196,68,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,196,68,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,122,126,12,89 ; vmovq (%r9,%rbx,2),%xmm1
DB 196,227,113,2,192,12 ; vpblendd $0xc,%xmm0,%xmm1,%xmm0
- DB 233,15,255,255,255 ; jmpq 34ed <_sk_load_565_hsw+0x22>
+ DB 233,15,255,255,255 ; jmpq 3689 <_sk_load_565_hsw+0x22>
DB 102,144 ; xchg %ax,%ax
DB 166 ; cmpsb %es:(%rdi),%ds:(%rsi)
DB 255 ; (bad)
@@ -3256,26 +3366,26 @@ _sk_load_565_dst_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,115 ; jne 368b <_sk_load_565_dst_hsw+0x8f>
+ DB 117,115 ; jne 3827 <_sk_load_565_dst_hsw+0x8f>
DB 196,193,122,111,36,89 ; vmovdqu (%r9,%rbx,2),%xmm4
DB 196,226,125,51,244 ; vpmovzxwd %xmm4,%ymm6
- DB 196,226,125,88,37,224,53,0,0 ; vpbroadcastd 0x35e0(%rip),%ymm4 # 6c0c <_sk_clut_4D_hsw+0xbb8>
+ DB 196,226,125,88,37,16,57,0,0 ; vpbroadcastd 0x3910(%rip),%ymm4 # 70d8 <_sk_clut_4D_hsw+0xbb8>
DB 197,205,219,228 ; vpand %ymm4,%ymm6,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,226,125,24,45,211,53,0,0 ; vbroadcastss 0x35d3(%rip),%ymm5 # 6c10 <_sk_clut_4D_hsw+0xbbc>
+ DB 196,226,125,24,45,3,57,0,0 ; vbroadcastss 0x3903(%rip),%ymm5 # 70dc <_sk_clut_4D_hsw+0xbbc>
DB 197,220,89,229 ; vmulps %ymm5,%ymm4,%ymm4
- DB 196,226,125,88,45,202,53,0,0 ; vpbroadcastd 0x35ca(%rip),%ymm5 # 6c14 <_sk_clut_4D_hsw+0xbc0>
+ DB 196,226,125,88,45,250,56,0,0 ; vpbroadcastd 0x38fa(%rip),%ymm5 # 70e0 <_sk_clut_4D_hsw+0xbc0>
DB 197,205,219,237 ; vpand %ymm5,%ymm6,%ymm5
DB 197,252,91,237 ; vcvtdq2ps %ymm5,%ymm5
- DB 196,226,125,24,61,189,53,0,0 ; vbroadcastss 0x35bd(%rip),%ymm7 # 6c18 <_sk_clut_4D_hsw+0xbc4>
+ DB 196,226,125,24,61,237,56,0,0 ; vbroadcastss 0x38ed(%rip),%ymm7 # 70e4 <_sk_clut_4D_hsw+0xbc4>
DB 197,212,89,239 ; vmulps %ymm7,%ymm5,%ymm5
- DB 196,226,125,88,61,180,53,0,0 ; vpbroadcastd 0x35b4(%rip),%ymm7 # 6c1c <_sk_clut_4D_hsw+0xbc8>
+ DB 196,226,125,88,61,228,56,0,0 ; vpbroadcastd 0x38e4(%rip),%ymm7 # 70e8 <_sk_clut_4D_hsw+0xbc8>
DB 197,205,219,247 ; vpand %ymm7,%ymm6,%ymm6
DB 197,252,91,246 ; vcvtdq2ps %ymm6,%ymm6
- DB 196,226,125,24,61,167,53,0,0 ; vbroadcastss 0x35a7(%rip),%ymm7 # 6c20 <_sk_clut_4D_hsw+0xbcc>
+ DB 196,226,125,24,61,215,56,0,0 ; vbroadcastss 0x38d7(%rip),%ymm7 # 70ec <_sk_clut_4D_hsw+0xbcc>
DB 197,204,89,247 ; vmulps %ymm7,%ymm6,%ymm6
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,61,156,53,0,0 ; vbroadcastss 0x359c(%rip),%ymm7 # 6c24 <_sk_clut_4D_hsw+0xbd0>
+ DB 196,226,125,24,61,204,56,0,0 ; vbroadcastss 0x38cc(%rip),%ymm7 # 70f0 <_sk_clut_4D_hsw+0xbd0>
DB 91 ; pop %rbx
DB 255,224 ; jmpq *%rax
DB 69,137,194 ; mov %r8d,%r10d
@@ -3283,27 +3393,27 @@ _sk_load_565_dst_hsw LABEL PROC
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,123,255,255,255 ; ja 361e <_sk_load_565_dst_hsw+0x22>
+ DB 15,135,123,255,255,255 ; ja 37ba <_sk_load_565_dst_hsw+0x22>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,98,0,0,0 ; lea 0x62(%rip),%r11 # 3710 <_sk_load_565_dst_hsw+0x114>
+ DB 76,141,29,98,0,0,0 ; lea 0x62(%rip),%r11 # 38ac <_sk_load_565_dst_hsw+0x114>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
- DB 233,89,255,255,255 ; jmpq 361e <_sk_load_565_dst_hsw+0x22>
+ DB 233,89,255,255,255 ; jmpq 37ba <_sk_load_565_dst_hsw+0x22>
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 196,193,89,196,100,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,121,110,44,89 ; vmovd (%r9,%rbx,2),%xmm5
DB 196,227,89,2,229,1 ; vpblendd $0x1,%xmm5,%xmm4,%xmm4
- DB 233,60,255,255,255 ; jmpq 361e <_sk_load_565_dst_hsw+0x22>
+ DB 233,60,255,255,255 ; jmpq 37ba <_sk_load_565_dst_hsw+0x22>
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 196,193,89,196,100,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,89,196,100,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,89,196,100,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,122,126,44,89 ; vmovq (%r9,%rbx,2),%xmm5
DB 196,227,81,2,228,12 ; vpblendd $0xc,%xmm4,%xmm5,%xmm4
- DB 233,15,255,255,255 ; jmpq 361e <_sk_load_565_dst_hsw+0x22>
+ DB 233,15,255,255,255 ; jmpq 37ba <_sk_load_565_dst_hsw+0x22>
DB 144 ; nop
DB 167 ; cmpsl %es:(%rdi),%ds:(%rsi)
DB 255 ; (bad)
@@ -3364,23 +3474,23 @@ _sk_gather_565_hsw LABEL PROC
DB 67,15,183,4,89 ; movzwl (%r9,%r11,2),%eax
DB 197,249,196,192,7 ; vpinsrw $0x7,%eax,%xmm0,%xmm0
DB 196,226,125,51,208 ; vpmovzxwd %xmm0,%ymm2
- DB 196,226,125,88,5,75,52,0,0 ; vpbroadcastd 0x344b(%rip),%ymm0 # 6c28 <_sk_clut_4D_hsw+0xbd4>
+ DB 196,226,125,88,5,123,55,0,0 ; vpbroadcastd 0x377b(%rip),%ymm0 # 70f4 <_sk_clut_4D_hsw+0xbd4>
DB 197,237,219,192 ; vpand %ymm0,%ymm2,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,62,52,0,0 ; vbroadcastss 0x343e(%rip),%ymm1 # 6c2c <_sk_clut_4D_hsw+0xbd8>
+ DB 196,226,125,24,13,110,55,0,0 ; vbroadcastss 0x376e(%rip),%ymm1 # 70f8 <_sk_clut_4D_hsw+0xbd8>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,88,13,53,52,0,0 ; vpbroadcastd 0x3435(%rip),%ymm1 # 6c30 <_sk_clut_4D_hsw+0xbdc>
+ DB 196,226,125,88,13,101,55,0,0 ; vpbroadcastd 0x3765(%rip),%ymm1 # 70fc <_sk_clut_4D_hsw+0xbdc>
DB 197,237,219,201 ; vpand %ymm1,%ymm2,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,29,40,52,0,0 ; vbroadcastss 0x3428(%rip),%ymm3 # 6c34 <_sk_clut_4D_hsw+0xbe0>
+ DB 196,226,125,24,29,88,55,0,0 ; vbroadcastss 0x3758(%rip),%ymm3 # 7100 <_sk_clut_4D_hsw+0xbe0>
DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1
- DB 196,226,125,88,29,31,52,0,0 ; vpbroadcastd 0x341f(%rip),%ymm3 # 6c38 <_sk_clut_4D_hsw+0xbe4>
+ DB 196,226,125,88,29,79,55,0,0 ; vpbroadcastd 0x374f(%rip),%ymm3 # 7104 <_sk_clut_4D_hsw+0xbe4>
DB 197,237,219,211 ; vpand %ymm3,%ymm2,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,226,125,24,29,18,52,0,0 ; vbroadcastss 0x3412(%rip),%ymm3 # 6c3c <_sk_clut_4D_hsw+0xbe8>
+ DB 196,226,125,24,29,66,55,0,0 ; vbroadcastss 0x3742(%rip),%ymm3 # 7108 <_sk_clut_4D_hsw+0xbe8>
DB 197,236,89,211 ; vmulps %ymm3,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,7,52,0,0 ; vbroadcastss 0x3407(%rip),%ymm3 # 6c40 <_sk_clut_4D_hsw+0xbec>
+ DB 196,226,125,24,29,55,55,0,0 ; vbroadcastss 0x3737(%rip),%ymm3 # 710c <_sk_clut_4D_hsw+0xbec>
DB 255,224 ; jmpq *%rax
PUBLIC _sk_store_565_hsw
@@ -3393,11 +3503,11 @@ _sk_store_565_hsw LABEL PROC
DB 77,1,201 ; add %r9,%r9
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
- DB 196,98,125,24,5,233,51,0,0 ; vbroadcastss 0x33e9(%rip),%ymm8 # 6c44 <_sk_clut_4D_hsw+0xbf0>
+ DB 196,98,125,24,5,25,55,0,0 ; vbroadcastss 0x3719(%rip),%ymm8 # 7110 <_sk_clut_4D_hsw+0xbf0>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,193,53,114,241,11 ; vpslld $0xb,%ymm9,%ymm9
- DB 196,98,125,24,21,212,51,0,0 ; vbroadcastss 0x33d4(%rip),%ymm10 # 6c48 <_sk_clut_4D_hsw+0xbf4>
+ DB 196,98,125,24,21,4,55,0,0 ; vbroadcastss 0x3704(%rip),%ymm10 # 7114 <_sk_clut_4D_hsw+0xbf4>
DB 196,65,116,89,210 ; vmulps %ymm10,%ymm1,%ymm10
DB 196,65,125,91,210 ; vcvtps2dq %ymm10,%ymm10
DB 196,193,45,114,242,5 ; vpslld $0x5,%ymm10,%ymm10
@@ -3408,7 +3518,7 @@ _sk_store_565_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,11 ; jne 38b3 <_sk_store_565_hsw+0x78>
+ DB 117,11 ; jne 3a4f <_sk_store_565_hsw+0x78>
DB 196,65,122,127,4,89 ; vmovdqu %xmm8,(%r9,%rbx,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -3417,22 +3527,22 @@ _sk_store_565_hsw LABEL PROC
DB 65,128,226,7 ; and $0x7,%r10b
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,235 ; ja 38ae <_sk_store_565_hsw+0x73>
+ DB 119,235 ; ja 3a4a <_sk_store_565_hsw+0x73>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,66,0,0,0 ; lea 0x42(%rip),%r11 # 3910 <_sk_store_565_hsw+0xd5>
+ DB 76,141,29,66,0,0,0 ; lea 0x42(%rip),%r11 # 3aac <_sk_store_565_hsw+0xd5>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 196,67,121,21,4,89,0 ; vpextrw $0x0,%xmm8,(%r9,%rbx,2)
- DB 235,206 ; jmp 38ae <_sk_store_565_hsw+0x73>
+ DB 235,206 ; jmp 3a4a <_sk_store_565_hsw+0x73>
DB 196,67,121,21,68,89,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rbx,2)
DB 196,65,121,126,4,89 ; vmovd %xmm8,(%r9,%rbx,2)
- DB 235,190 ; jmp 38ae <_sk_store_565_hsw+0x73>
+ DB 235,190 ; jmp 3a4a <_sk_store_565_hsw+0x73>
DB 196,67,121,21,68,89,12,6 ; vpextrw $0x6,%xmm8,0xc(%r9,%rbx,2)
DB 196,67,121,21,68,89,10,5 ; vpextrw $0x5,%xmm8,0xa(%r9,%rbx,2)
DB 196,67,121,21,68,89,8,4 ; vpextrw $0x4,%xmm8,0x8(%r9,%rbx,2)
DB 196,65,121,214,4,89 ; vmovq %xmm8,(%r9,%rbx,2)
- DB 235,158 ; jmp 38ae <_sk_store_565_hsw+0x73>
+ DB 235,158 ; jmp 3a4a <_sk_store_565_hsw+0x73>
DB 199 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -3450,7 +3560,7 @@ _sk_store_565_hsw LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 232,255,255,255,224 ; callq ffffffffe1003928 <_sk_clut_4D_hsw+0xffffffffe0ffd8d4>
+ DB 232,255,255,255,224 ; callq ffffffffe1003ac4 <_sk_clut_4D_hsw+0xffffffffe0ffd5a4>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -3466,28 +3576,28 @@ _sk_load_4444_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,139,0,0,0 ; jne 39d7 <_sk_load_4444_hsw+0xab>
+ DB 15,133,139,0,0,0 ; jne 3b73 <_sk_load_4444_hsw+0xab>
DB 196,193,122,111,4,89 ; vmovdqu (%r9,%rbx,2),%xmm0
DB 196,226,125,51,216 ; vpmovzxwd %xmm0,%ymm3
- DB 196,226,125,88,5,236,50,0,0 ; vpbroadcastd 0x32ec(%rip),%ymm0 # 6c4c <_sk_clut_4D_hsw+0xbf8>
+ DB 196,226,125,88,5,28,54,0,0 ; vpbroadcastd 0x361c(%rip),%ymm0 # 7118 <_sk_clut_4D_hsw+0xbf8>
DB 197,229,219,192 ; vpand %ymm0,%ymm3,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,223,50,0,0 ; vbroadcastss 0x32df(%rip),%ymm1 # 6c50 <_sk_clut_4D_hsw+0xbfc>
+ DB 196,226,125,24,13,15,54,0,0 ; vbroadcastss 0x360f(%rip),%ymm1 # 711c <_sk_clut_4D_hsw+0xbfc>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,88,13,214,50,0,0 ; vpbroadcastd 0x32d6(%rip),%ymm1 # 6c54 <_sk_clut_4D_hsw+0xc00>
+ DB 196,226,125,88,13,6,54,0,0 ; vpbroadcastd 0x3606(%rip),%ymm1 # 7120 <_sk_clut_4D_hsw+0xc00>
DB 197,229,219,201 ; vpand %ymm1,%ymm3,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,21,201,50,0,0 ; vbroadcastss 0x32c9(%rip),%ymm2 # 6c58 <_sk_clut_4D_hsw+0xc04>
+ DB 196,226,125,24,21,249,53,0,0 ; vbroadcastss 0x35f9(%rip),%ymm2 # 7124 <_sk_clut_4D_hsw+0xc04>
DB 197,244,89,202 ; vmulps %ymm2,%ymm1,%ymm1
- DB 196,226,125,88,21,192,50,0,0 ; vpbroadcastd 0x32c0(%rip),%ymm2 # 6c5c <_sk_clut_4D_hsw+0xc08>
+ DB 196,226,125,88,21,240,53,0,0 ; vpbroadcastd 0x35f0(%rip),%ymm2 # 7128 <_sk_clut_4D_hsw+0xc08>
DB 197,229,219,210 ; vpand %ymm2,%ymm3,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,98,125,24,5,179,50,0,0 ; vbroadcastss 0x32b3(%rip),%ymm8 # 6c60 <_sk_clut_4D_hsw+0xc0c>
+ DB 196,98,125,24,5,227,53,0,0 ; vbroadcastss 0x35e3(%rip),%ymm8 # 712c <_sk_clut_4D_hsw+0xc0c>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
- DB 196,98,125,88,5,169,50,0,0 ; vpbroadcastd 0x32a9(%rip),%ymm8 # 6c64 <_sk_clut_4D_hsw+0xc10>
+ DB 196,98,125,88,5,217,53,0,0 ; vpbroadcastd 0x35d9(%rip),%ymm8 # 7130 <_sk_clut_4D_hsw+0xc10>
DB 196,193,101,219,216 ; vpand %ymm8,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,155,50,0,0 ; vbroadcastss 0x329b(%rip),%ymm8 # 6c68 <_sk_clut_4D_hsw+0xc14>
+ DB 196,98,125,24,5,203,53,0,0 ; vbroadcastss 0x35cb(%rip),%ymm8 # 7134 <_sk_clut_4D_hsw+0xc14>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -3497,27 +3607,27 @@ _sk_load_4444_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,99,255,255,255 ; ja 3952 <_sk_load_4444_hsw+0x26>
+ DB 15,135,99,255,255,255 ; ja 3aee <_sk_load_4444_hsw+0x26>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,98,0,0,0 ; lea 0x62(%rip),%r11 # 3a5c <_sk_load_4444_hsw+0x130>
+ DB 76,141,29,98,0,0,0 ; lea 0x62(%rip),%r11 # 3bf8 <_sk_load_4444_hsw+0x130>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
- DB 233,65,255,255,255 ; jmpq 3952 <_sk_load_4444_hsw+0x26>
+ DB 233,65,255,255,255 ; jmpq 3aee <_sk_load_4444_hsw+0x26>
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 196,193,121,196,68,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,110,12,89 ; vmovd (%r9,%rbx,2),%xmm1
DB 196,227,121,2,193,1 ; vpblendd $0x1,%xmm1,%xmm0,%xmm0
- DB 233,36,255,255,255 ; jmpq 3952 <_sk_load_4444_hsw+0x26>
+ DB 233,36,255,255,255 ; jmpq 3aee <_sk_load_4444_hsw+0x26>
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 196,193,121,196,68,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,196,68,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,196,68,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,122,126,12,89 ; vmovq (%r9,%rbx,2),%xmm1
DB 196,227,113,2,192,12 ; vpblendd $0xc,%xmm0,%xmm1,%xmm0
- DB 233,247,254,255,255 ; jmpq 3952 <_sk_load_4444_hsw+0x26>
+ DB 233,247,254,255,255 ; jmpq 3aee <_sk_load_4444_hsw+0x26>
DB 144 ; nop
DB 167 ; cmpsl %es:(%rdi),%ds:(%rsi)
DB 255 ; (bad)
@@ -3550,28 +3660,28 @@ _sk_load_4444_dst_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,139,0,0,0 ; jne 3b23 <_sk_load_4444_dst_hsw+0xab>
+ DB 15,133,139,0,0,0 ; jne 3cbf <_sk_load_4444_dst_hsw+0xab>
DB 196,193,122,111,36,89 ; vmovdqu (%r9,%rbx,2),%xmm4
DB 196,226,125,51,252 ; vpmovzxwd %xmm4,%ymm7
- DB 196,226,125,88,37,192,49,0,0 ; vpbroadcastd 0x31c0(%rip),%ymm4 # 6c6c <_sk_clut_4D_hsw+0xc18>
+ DB 196,226,125,88,37,240,52,0,0 ; vpbroadcastd 0x34f0(%rip),%ymm4 # 7138 <_sk_clut_4D_hsw+0xc18>
DB 197,197,219,228 ; vpand %ymm4,%ymm7,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,226,125,24,45,179,49,0,0 ; vbroadcastss 0x31b3(%rip),%ymm5 # 6c70 <_sk_clut_4D_hsw+0xc1c>
+ DB 196,226,125,24,45,227,52,0,0 ; vbroadcastss 0x34e3(%rip),%ymm5 # 713c <_sk_clut_4D_hsw+0xc1c>
DB 197,220,89,229 ; vmulps %ymm5,%ymm4,%ymm4
- DB 196,226,125,88,45,170,49,0,0 ; vpbroadcastd 0x31aa(%rip),%ymm5 # 6c74 <_sk_clut_4D_hsw+0xc20>
+ DB 196,226,125,88,45,218,52,0,0 ; vpbroadcastd 0x34da(%rip),%ymm5 # 7140 <_sk_clut_4D_hsw+0xc20>
DB 197,197,219,237 ; vpand %ymm5,%ymm7,%ymm5
DB 197,252,91,237 ; vcvtdq2ps %ymm5,%ymm5
- DB 196,226,125,24,53,157,49,0,0 ; vbroadcastss 0x319d(%rip),%ymm6 # 6c78 <_sk_clut_4D_hsw+0xc24>
+ DB 196,226,125,24,53,205,52,0,0 ; vbroadcastss 0x34cd(%rip),%ymm6 # 7144 <_sk_clut_4D_hsw+0xc24>
DB 197,212,89,238 ; vmulps %ymm6,%ymm5,%ymm5
- DB 196,226,125,88,53,148,49,0,0 ; vpbroadcastd 0x3194(%rip),%ymm6 # 6c7c <_sk_clut_4D_hsw+0xc28>
+ DB 196,226,125,88,53,196,52,0,0 ; vpbroadcastd 0x34c4(%rip),%ymm6 # 7148 <_sk_clut_4D_hsw+0xc28>
DB 197,197,219,246 ; vpand %ymm6,%ymm7,%ymm6
DB 197,252,91,246 ; vcvtdq2ps %ymm6,%ymm6
- DB 196,98,125,24,5,135,49,0,0 ; vbroadcastss 0x3187(%rip),%ymm8 # 6c80 <_sk_clut_4D_hsw+0xc2c>
+ DB 196,98,125,24,5,183,52,0,0 ; vbroadcastss 0x34b7(%rip),%ymm8 # 714c <_sk_clut_4D_hsw+0xc2c>
DB 196,193,76,89,240 ; vmulps %ymm8,%ymm6,%ymm6
- DB 196,98,125,88,5,125,49,0,0 ; vpbroadcastd 0x317d(%rip),%ymm8 # 6c84 <_sk_clut_4D_hsw+0xc30>
+ DB 196,98,125,88,5,173,52,0,0 ; vpbroadcastd 0x34ad(%rip),%ymm8 # 7150 <_sk_clut_4D_hsw+0xc30>
DB 196,193,69,219,248 ; vpand %ymm8,%ymm7,%ymm7
DB 197,252,91,255 ; vcvtdq2ps %ymm7,%ymm7
- DB 196,98,125,24,5,111,49,0,0 ; vbroadcastss 0x316f(%rip),%ymm8 # 6c88 <_sk_clut_4D_hsw+0xc34>
+ DB 196,98,125,24,5,159,52,0,0 ; vbroadcastss 0x349f(%rip),%ymm8 # 7154 <_sk_clut_4D_hsw+0xc34>
DB 196,193,68,89,248 ; vmulps %ymm8,%ymm7,%ymm7
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -3581,27 +3691,27 @@ _sk_load_4444_dst_hsw LABEL PROC
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,99,255,255,255 ; ja 3a9e <_sk_load_4444_dst_hsw+0x26>
+ DB 15,135,99,255,255,255 ; ja 3c3a <_sk_load_4444_dst_hsw+0x26>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,98,0,0,0 ; lea 0x62(%rip),%r11 # 3ba8 <_sk_load_4444_dst_hsw+0x130>
+ DB 76,141,29,98,0,0,0 ; lea 0x62(%rip),%r11 # 3d44 <_sk_load_4444_dst_hsw+0x130>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
- DB 233,65,255,255,255 ; jmpq 3a9e <_sk_load_4444_dst_hsw+0x26>
+ DB 233,65,255,255,255 ; jmpq 3c3a <_sk_load_4444_dst_hsw+0x26>
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 196,193,89,196,100,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,121,110,44,89 ; vmovd (%r9,%rbx,2),%xmm5
DB 196,227,89,2,229,1 ; vpblendd $0x1,%xmm5,%xmm4,%xmm4
- DB 233,36,255,255,255 ; jmpq 3a9e <_sk_load_4444_dst_hsw+0x26>
+ DB 233,36,255,255,255 ; jmpq 3c3a <_sk_load_4444_dst_hsw+0x26>
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 196,193,89,196,100,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,89,196,100,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,89,196,100,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,122,126,44,89 ; vmovq (%r9,%rbx,2),%xmm5
DB 196,227,81,2,228,12 ; vpblendd $0xc,%xmm4,%xmm5,%xmm4
- DB 233,247,254,255,255 ; jmpq 3a9e <_sk_load_4444_dst_hsw+0x26>
+ DB 233,247,254,255,255 ; jmpq 3c3a <_sk_load_4444_dst_hsw+0x26>
DB 144 ; nop
DB 167 ; cmpsl %es:(%rdi),%ds:(%rsi)
DB 255 ; (bad)
@@ -3662,25 +3772,25 @@ _sk_gather_4444_hsw LABEL PROC
DB 67,15,183,4,89 ; movzwl (%r9,%r11,2),%eax
DB 197,249,196,192,7 ; vpinsrw $0x7,%eax,%xmm0,%xmm0
DB 196,226,125,51,216 ; vpmovzxwd %xmm0,%ymm3
- DB 196,226,125,88,5,23,48,0,0 ; vpbroadcastd 0x3017(%rip),%ymm0 # 6c8c <_sk_clut_4D_hsw+0xc38>
+ DB 196,226,125,88,5,71,51,0,0 ; vpbroadcastd 0x3347(%rip),%ymm0 # 7158 <_sk_clut_4D_hsw+0xc38>
DB 197,229,219,192 ; vpand %ymm0,%ymm3,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,10,48,0,0 ; vbroadcastss 0x300a(%rip),%ymm1 # 6c90 <_sk_clut_4D_hsw+0xc3c>
+ DB 196,226,125,24,13,58,51,0,0 ; vbroadcastss 0x333a(%rip),%ymm1 # 715c <_sk_clut_4D_hsw+0xc3c>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,88,13,1,48,0,0 ; vpbroadcastd 0x3001(%rip),%ymm1 # 6c94 <_sk_clut_4D_hsw+0xc40>
+ DB 196,226,125,88,13,49,51,0,0 ; vpbroadcastd 0x3331(%rip),%ymm1 # 7160 <_sk_clut_4D_hsw+0xc40>
DB 197,229,219,201 ; vpand %ymm1,%ymm3,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,21,244,47,0,0 ; vbroadcastss 0x2ff4(%rip),%ymm2 # 6c98 <_sk_clut_4D_hsw+0xc44>
+ DB 196,226,125,24,21,36,51,0,0 ; vbroadcastss 0x3324(%rip),%ymm2 # 7164 <_sk_clut_4D_hsw+0xc44>
DB 197,244,89,202 ; vmulps %ymm2,%ymm1,%ymm1
- DB 196,226,125,88,21,235,47,0,0 ; vpbroadcastd 0x2feb(%rip),%ymm2 # 6c9c <_sk_clut_4D_hsw+0xc48>
+ DB 196,226,125,88,21,27,51,0,0 ; vpbroadcastd 0x331b(%rip),%ymm2 # 7168 <_sk_clut_4D_hsw+0xc48>
DB 197,229,219,210 ; vpand %ymm2,%ymm3,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,98,125,24,5,222,47,0,0 ; vbroadcastss 0x2fde(%rip),%ymm8 # 6ca0 <_sk_clut_4D_hsw+0xc4c>
+ DB 196,98,125,24,5,14,51,0,0 ; vbroadcastss 0x330e(%rip),%ymm8 # 716c <_sk_clut_4D_hsw+0xc4c>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
- DB 196,98,125,88,5,212,47,0,0 ; vpbroadcastd 0x2fd4(%rip),%ymm8 # 6ca4 <_sk_clut_4D_hsw+0xc50>
+ DB 196,98,125,88,5,4,51,0,0 ; vpbroadcastd 0x3304(%rip),%ymm8 # 7170 <_sk_clut_4D_hsw+0xc50>
DB 196,193,101,219,216 ; vpand %ymm8,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,198,47,0,0 ; vbroadcastss 0x2fc6(%rip),%ymm8 # 6ca8 <_sk_clut_4D_hsw+0xc54>
+ DB 196,98,125,24,5,246,50,0,0 ; vbroadcastss 0x32f6(%rip),%ymm8 # 7174 <_sk_clut_4D_hsw+0xc54>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -3695,7 +3805,7 @@ _sk_store_4444_hsw LABEL PROC
DB 77,1,201 ; add %r9,%r9
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
- DB 196,98,125,24,5,161,47,0,0 ; vbroadcastss 0x2fa1(%rip),%ymm8 # 6cac <_sk_clut_4D_hsw+0xc58>
+ DB 196,98,125,24,5,209,50,0,0 ; vbroadcastss 0x32d1(%rip),%ymm8 # 7178 <_sk_clut_4D_hsw+0xc58>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,193,53,114,241,12 ; vpslld $0xc,%ymm9,%ymm9
@@ -3713,7 +3823,7 @@ _sk_store_4444_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,11 ; jne 3d6f <_sk_store_4444_hsw+0x84>
+ DB 117,11 ; jne 3f0b <_sk_store_4444_hsw+0x84>
DB 196,65,122,127,4,89 ; vmovdqu %xmm8,(%r9,%rbx,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -3722,22 +3832,22 @@ _sk_store_4444_hsw LABEL PROC
DB 65,128,226,7 ; and $0x7,%r10b
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,235 ; ja 3d6a <_sk_store_4444_hsw+0x7f>
+ DB 119,235 ; ja 3f06 <_sk_store_4444_hsw+0x7f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,66,0,0,0 ; lea 0x42(%rip),%r11 # 3dcc <_sk_store_4444_hsw+0xe1>
+ DB 76,141,29,66,0,0,0 ; lea 0x42(%rip),%r11 # 3f68 <_sk_store_4444_hsw+0xe1>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 196,67,121,21,4,89,0 ; vpextrw $0x0,%xmm8,(%r9,%rbx,2)
- DB 235,206 ; jmp 3d6a <_sk_store_4444_hsw+0x7f>
+ DB 235,206 ; jmp 3f06 <_sk_store_4444_hsw+0x7f>
DB 196,67,121,21,68,89,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rbx,2)
DB 196,65,121,126,4,89 ; vmovd %xmm8,(%r9,%rbx,2)
- DB 235,190 ; jmp 3d6a <_sk_store_4444_hsw+0x7f>
+ DB 235,190 ; jmp 3f06 <_sk_store_4444_hsw+0x7f>
DB 196,67,121,21,68,89,12,6 ; vpextrw $0x6,%xmm8,0xc(%r9,%rbx,2)
DB 196,67,121,21,68,89,10,5 ; vpextrw $0x5,%xmm8,0xa(%r9,%rbx,2)
DB 196,67,121,21,68,89,8,4 ; vpextrw $0x4,%xmm8,0x8(%r9,%rbx,2)
DB 196,65,121,214,4,89 ; vmovq %xmm8,(%r9,%rbx,2)
- DB 235,158 ; jmp 3d6a <_sk_store_4444_hsw+0x7f>
+ DB 235,158 ; jmp 3f06 <_sk_store_4444_hsw+0x7f>
DB 199 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -3755,90 +3865,178 @@ _sk_store_4444_hsw LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 232,255,255,255,224 ; callq ffffffffe1003de4 <_sk_clut_4D_hsw+0xffffffffe0ffdd90>
+ DB 232,255,255,255,224 ; callq ffffffffe1003f80 <_sk_clut_4D_hsw+0xffffffffe0ffda60>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
PUBLIC _sk_load_8888_hsw
_sk_load_8888_hsw LABEL PROC
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,87 ; jne 3e62 <_sk_load_8888_hsw+0x7a>
- DB 197,252,16,24 ; vmovups (%rax),%ymm3
- DB 197,228,84,5,233,48,0,0 ; vandps 0x30e9(%rip),%ymm3,%ymm0 # 6f00 <_sk_clut_4D_hsw+0xeac>
+ DB 117,87 ; jne 3ff8 <_sk_load_8888_hsw+0x74>
+ DB 196,193,126,111,28,153 ; vmovdqu (%r9,%rbx,4),%ymm3
+ DB 197,229,219,5,17,52,0,0 ; vpand 0x3411(%rip),%ymm3,%ymm0 # 73c0 <_sk_clut_4D_hsw+0xea0>
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,5,140,46,0,0 ; vbroadcastss 0x2e8c(%rip),%ymm8 # 6cb0 <_sk_clut_4D_hsw+0xc5c>
+ DB 196,98,125,24,5,192,49,0,0 ; vbroadcastss 0x31c0(%rip),%ymm8 # 717c <_sk_clut_4D_hsw+0xc5c>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 196,226,101,0,13,238,48,0,0 ; vpshufb 0x30ee(%rip),%ymm3,%ymm1 # 6f20 <_sk_clut_4D_hsw+0xecc>
+ DB 196,226,101,0,13,22,52,0,0 ; vpshufb 0x3416(%rip),%ymm3,%ymm1 # 73e0 <_sk_clut_4D_hsw+0xec0>
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
- DB 196,226,101,0,21,252,48,0,0 ; vpshufb 0x30fc(%rip),%ymm3,%ymm2 # 6f40 <_sk_clut_4D_hsw+0xeec>
+ DB 196,226,101,0,21,36,52,0,0 ; vpshufb 0x3424(%rip),%ymm3,%ymm2 # 7400 <_sk_clut_4D_hsw+0xee0>
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
DB 197,229,114,211,24 ; vpsrld $0x18,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
+ DB 91 ; pop %rbx
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,194 ; vmovq %r10,%xmm0
- DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
- DB 196,226,125,44,24 ; vmaskmovps (%rax),%ymm0,%ymm3
- DB 235,135 ; jmp 3e0f <_sk_load_8888_hsw+0x27>
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 197,229,239,219 ; vpxor %ymm3,%ymm3,%ymm3
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,155 ; ja 3fa7 <_sk_load_8888_hsw+0x23>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,141,0,0,0 ; lea 0x8d(%rip),%r11 # 40a4 <_sk_load_8888_hsw+0x120>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,193,121,110,28,153 ; vmovd (%r9,%rbx,4),%xmm3
+ DB 233,124,255,255,255 ; jmpq 3fa7 <_sk_load_8888_hsw+0x23>
+ DB 196,193,121,110,68,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm0
+ DB 196,226,121,89,192 ; vpbroadcastq %xmm0,%xmm0
+ DB 197,245,239,201 ; vpxor %ymm1,%ymm1,%ymm1
+ DB 196,227,117,2,216,4 ; vpblendd $0x4,%ymm0,%ymm1,%ymm3
+ DB 196,193,122,126,4,153 ; vmovq (%r9,%rbx,4),%xmm0
+ DB 196,227,101,2,216,3 ; vpblendd $0x3,%ymm0,%ymm3,%ymm3
+ DB 233,85,255,255,255 ; jmpq 3fa7 <_sk_load_8888_hsw+0x23>
+ DB 196,193,121,110,68,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm0
+ DB 196,226,125,89,192 ; vpbroadcastq %xmm0,%ymm0
+ DB 197,245,239,201 ; vpxor %ymm1,%ymm1,%ymm1
+ DB 196,227,117,2,216,64 ; vpblendd $0x40,%ymm0,%ymm1,%ymm3
+ DB 196,227,125,57,216,1 ; vextracti128 $0x1,%ymm3,%xmm0
+ DB 196,195,121,34,68,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm0,%xmm0
+ DB 196,227,101,56,216,1 ; vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ DB 196,227,125,57,216,1 ; vextracti128 $0x1,%ymm3,%xmm0
+ DB 196,195,121,34,68,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm0,%xmm0
+ DB 196,227,101,56,216,1 ; vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ DB 196,193,122,111,4,153 ; vmovdqu (%r9,%rbx,4),%xmm0
+ DB 196,227,125,2,219,240 ; vpblendd $0xf0,%ymm3,%ymm0,%ymm3
+ DB 233,6,255,255,255 ; jmpq 3fa7 <_sk_load_8888_hsw+0x23>
+ DB 15,31,0 ; nopl (%rax)
+ DB 124,255 ; jl 40a5 <_sk_load_8888_hsw+0x121>
+ DB 255 ; (bad)
+ DB 255,157,255,255,255,135 ; lcall *-0x78000001(%rbp)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 236 ; in (%dx),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 216,255 ; fdivr %st(7),%st
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 174 ; scas %es:(%rdi),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_8888_dst_hsw
_sk_load_8888_dst_hsw LABEL PROC
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,87 ; jne 3f02 <_sk_load_8888_dst_hsw+0x7a>
- DB 197,252,16,56 ; vmovups (%rax),%ymm7
- DB 197,196,84,37,169,48,0,0 ; vandps 0x30a9(%rip),%ymm7,%ymm4 # 6f60 <_sk_clut_4D_hsw+0xf0c>
+ DB 117,87 ; jne 4134 <_sk_load_8888_dst_hsw+0x74>
+ DB 196,193,126,111,60,153 ; vmovdqu (%r9,%rbx,4),%ymm7
+ DB 197,197,219,37,53,51,0,0 ; vpand 0x3335(%rip),%ymm7,%ymm4 # 7420 <_sk_clut_4D_hsw+0xf00>
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,98,125,24,5,240,45,0,0 ; vbroadcastss 0x2df0(%rip),%ymm8 # 6cb4 <_sk_clut_4D_hsw+0xc60>
+ DB 196,98,125,24,5,136,48,0,0 ; vbroadcastss 0x3088(%rip),%ymm8 # 7180 <_sk_clut_4D_hsw+0xc60>
DB 196,193,92,89,224 ; vmulps %ymm8,%ymm4,%ymm4
- DB 196,226,69,0,45,174,48,0,0 ; vpshufb 0x30ae(%rip),%ymm7,%ymm5 # 6f80 <_sk_clut_4D_hsw+0xf2c>
+ DB 196,226,69,0,45,58,51,0,0 ; vpshufb 0x333a(%rip),%ymm7,%ymm5 # 7440 <_sk_clut_4D_hsw+0xf20>
DB 197,252,91,237 ; vcvtdq2ps %ymm5,%ymm5
DB 196,193,84,89,232 ; vmulps %ymm8,%ymm5,%ymm5
- DB 196,226,69,0,53,188,48,0,0 ; vpshufb 0x30bc(%rip),%ymm7,%ymm6 # 6fa0 <_sk_clut_4D_hsw+0xf4c>
+ DB 196,226,69,0,53,72,51,0,0 ; vpshufb 0x3348(%rip),%ymm7,%ymm6 # 7460 <_sk_clut_4D_hsw+0xf40>
DB 197,252,91,246 ; vcvtdq2ps %ymm6,%ymm6
DB 196,193,76,89,240 ; vmulps %ymm8,%ymm6,%ymm6
DB 197,197,114,215,24 ; vpsrld $0x18,%ymm7,%ymm7
DB 197,252,91,255 ; vcvtdq2ps %ymm7,%ymm7
DB 196,193,68,89,248 ; vmulps %ymm8,%ymm7,%ymm7
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
+ DB 91 ; pop %rbx
+ DB 255,224 ; jmpq *%rax
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 197,197,239,255 ; vpxor %ymm7,%ymm7,%ymm7
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,155 ; ja 40e3 <_sk_load_8888_dst_hsw+0x23>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,141,0,0,0 ; lea 0x8d(%rip),%r11 # 41e0 <_sk_load_8888_dst_hsw+0x120>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,226 ; vmovq %r10,%xmm4
- DB 196,226,125,33,228 ; vpmovsxbd %xmm4,%ymm4
- DB 196,226,93,44,56 ; vmaskmovps (%rax),%ymm4,%ymm7
- DB 235,135 ; jmp 3eaf <_sk_load_8888_dst_hsw+0x27>
+ DB 196,193,121,110,60,153 ; vmovd (%r9,%rbx,4),%xmm7
+ DB 233,124,255,255,255 ; jmpq 40e3 <_sk_load_8888_dst_hsw+0x23>
+ DB 196,193,121,110,100,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm4
+ DB 196,226,121,89,228 ; vpbroadcastq %xmm4,%xmm4
+ DB 197,213,239,237 ; vpxor %ymm5,%ymm5,%ymm5
+ DB 196,227,85,2,252,4 ; vpblendd $0x4,%ymm4,%ymm5,%ymm7
+ DB 196,193,122,126,36,153 ; vmovq (%r9,%rbx,4),%xmm4
+ DB 196,227,69,2,252,3 ; vpblendd $0x3,%ymm4,%ymm7,%ymm7
+ DB 233,85,255,255,255 ; jmpq 40e3 <_sk_load_8888_dst_hsw+0x23>
+ DB 196,193,121,110,100,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm4
+ DB 196,226,125,89,228 ; vpbroadcastq %xmm4,%ymm4
+ DB 197,213,239,237 ; vpxor %ymm5,%ymm5,%ymm5
+ DB 196,227,85,2,252,64 ; vpblendd $0x40,%ymm4,%ymm5,%ymm7
+ DB 196,227,125,57,252,1 ; vextracti128 $0x1,%ymm7,%xmm4
+ DB 196,195,89,34,100,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,227,69,56,252,1 ; vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ DB 196,227,125,57,252,1 ; vextracti128 $0x1,%ymm7,%xmm4
+ DB 196,195,89,34,100,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,227,69,56,252,1 ; vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ DB 196,193,122,111,36,153 ; vmovdqu (%r9,%rbx,4),%xmm4
+ DB 196,227,93,2,255,240 ; vpblendd $0xf0,%ymm7,%ymm4,%ymm7
+ DB 233,6,255,255,255 ; jmpq 40e3 <_sk_load_8888_dst_hsw+0x23>
+ DB 15,31,0 ; nopl (%rax)
+ DB 124,255 ; jl 41e1 <_sk_load_8888_dst_hsw+0x121>
+ DB 255 ; (bad)
+ DB 255,157,255,255,255,135 ; lcall *-0x78000001(%rbp)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 236 ; in (%dx),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 216,255 ; fdivr %st(7),%st
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 174 ; scas %es:(%rdi),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_gather_8888_hsw
_sk_gather_8888_hsw LABEL PROC
@@ -3851,14 +4049,14 @@ _sk_gather_8888_hsw LABEL PROC
DB 197,245,254,192 ; vpaddd %ymm0,%ymm1,%ymm0
DB 197,245,118,201 ; vpcmpeqd %ymm1,%ymm1,%ymm1
DB 196,194,117,144,28,129 ; vpgatherdd %ymm1,(%r9,%ymm0,4),%ymm3
- DB 197,229,219,5,106,48,0,0 ; vpand 0x306a(%rip),%ymm3,%ymm0 # 6fc0 <_sk_clut_4D_hsw+0xf6c>
+ DB 197,229,219,5,86,50,0,0 ; vpand 0x3256(%rip),%ymm3,%ymm0 # 7480 <_sk_clut_4D_hsw+0xf60>
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,5,85,45,0,0 ; vbroadcastss 0x2d55(%rip),%ymm8 # 6cb8 <_sk_clut_4D_hsw+0xc64>
+ DB 196,98,125,24,5,77,47,0,0 ; vbroadcastss 0x2f4d(%rip),%ymm8 # 7184 <_sk_clut_4D_hsw+0xc64>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 196,226,101,0,13,111,48,0,0 ; vpshufb 0x306f(%rip),%ymm3,%ymm1 # 6fe0 <_sk_clut_4D_hsw+0xf8c>
+ DB 196,226,101,0,13,91,50,0,0 ; vpshufb 0x325b(%rip),%ymm3,%ymm1 # 74a0 <_sk_clut_4D_hsw+0xf80>
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
- DB 196,226,101,0,21,125,48,0,0 ; vpshufb 0x307d(%rip),%ymm3,%ymm2 # 7000 <_sk_clut_4D_hsw+0xfac>
+ DB 196,226,101,0,21,105,50,0,0 ; vpshufb 0x3269(%rip),%ymm3,%ymm2 # 74c0 <_sk_clut_4D_hsw+0xfa0>
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
DB 197,229,114,211,24 ; vpsrld $0x18,%ymm3,%ymm3
@@ -3869,16 +4067,15 @@ _sk_gather_8888_hsw LABEL PROC
PUBLIC _sk_store_8888_hsw
_sk_store_8888_hsw LABEL PROC
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
- DB 196,98,125,24,5,247,44,0,0 ; vbroadcastss 0x2cf7(%rip),%ymm8 # 6cbc <_sk_clut_4D_hsw+0xc68>
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
+ DB 196,98,125,24,5,245,46,0,0 ; vbroadcastss 0x2ef5(%rip),%ymm8 # 7188 <_sk_clut_4D_hsw+0xc68>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,65,116,89,208 ; vmulps %ymm8,%ymm1,%ymm10
@@ -3894,100 +4091,220 @@ _sk_store_8888_hsw LABEL PROC
DB 196,65,45,235,192 ; vpor %ymm8,%ymm10,%ymm8
DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,11 ; jne 401e <_sk_store_8888_hsw+0x80>
- DB 197,124,17,0 ; vmovups %ymm8,(%rax)
+ DB 117,11 ; jne 42ec <_sk_store_8888_hsw+0x7a>
+ DB 196,65,126,127,4,153 ; vmovdqu %ymm8,(%r9,%rbx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
+ DB 91 ; pop %rbx
+ DB 255,224 ; jmpq *%rax
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,235 ; ja 42e7 <_sk_store_8888_hsw+0x75>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,85,0,0,0 ; lea 0x55(%rip),%r11 # 435c <_sk_store_8888_hsw+0xea>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,65,249,110,202 ; vmovq %r10,%xmm9
- DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9
- DB 196,98,53,46,0 ; vmaskmovps %ymm8,%ymm9,(%rax)
- DB 235,211 ; jmp 4017 <_sk_store_8888_hsw+0x79>
+ DB 196,65,121,126,4,153 ; vmovd %xmm8,(%r9,%rbx,4)
+ DB 235,207 ; jmp 42e7 <_sk_store_8888_hsw+0x75>
+ DB 196,67,121,22,68,153,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ DB 196,65,121,214,4,153 ; vmovq %xmm8,(%r9,%rbx,4)
+ DB 235,191 ; jmp 42e7 <_sk_store_8888_hsw+0x75>
+ DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,24,2 ; vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,20,1 ; vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
+ DB 196,65,121,126,76,153,16 ; vmovd %xmm9,0x10(%r9,%rbx,4)
+ DB 196,65,122,127,4,153 ; vmovdqu %xmm8,(%r9,%rbx,4)
+ DB 235,142 ; jmp 42e7 <_sk_store_8888_hsw+0x75>
+ DB 15,31,0 ; nopl (%rax)
+ DB 180,255 ; mov $0xff,%ah
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 188,255,255,255,245 ; mov $0xf5ffffff,%esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 232,255,255,255,218 ; callq ffffffffdb004370 <_sk_clut_4D_hsw+0xffffffffdaffde50>
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,204 ; dec %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_bgra_hsw
_sk_load_bgra_hsw LABEL PROC
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,87 ; jne 40be <_sk_load_bgra_hsw+0x7a>
- DB 197,252,16,24 ; vmovups (%rax),%ymm3
- DB 197,228,84,5,173,47,0,0 ; vandps 0x2fad(%rip),%ymm3,%ymm0 # 7020 <_sk_clut_4D_hsw+0xfcc>
+ DB 117,87 ; jne 43ec <_sk_load_bgra_hsw+0x74>
+ DB 196,193,126,111,28,153 ; vmovdqu (%r9,%rbx,4),%ymm3
+ DB 197,229,219,5,61,49,0,0 ; vpand 0x313d(%rip),%ymm3,%ymm0 # 74e0 <_sk_clut_4D_hsw+0xfc0>
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,5,64,44,0,0 ; vbroadcastss 0x2c40(%rip),%ymm8 # 6cc0 <_sk_clut_4D_hsw+0xc6c>
+ DB 196,98,125,24,5,220,45,0,0 ; vbroadcastss 0x2ddc(%rip),%ymm8 # 718c <_sk_clut_4D_hsw+0xc6c>
DB 196,193,124,89,208 ; vmulps %ymm8,%ymm0,%ymm2
- DB 196,226,101,0,5,178,47,0,0 ; vpshufb 0x2fb2(%rip),%ymm3,%ymm0 # 7040 <_sk_clut_4D_hsw+0xfec>
+ DB 196,226,101,0,5,66,49,0,0 ; vpshufb 0x3142(%rip),%ymm3,%ymm0 # 7500 <_sk_clut_4D_hsw+0xfe0>
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
DB 196,193,124,89,200 ; vmulps %ymm8,%ymm0,%ymm1
- DB 196,226,101,0,5,192,47,0,0 ; vpshufb 0x2fc0(%rip),%ymm3,%ymm0 # 7060 <_sk_clut_4D_hsw+0x100c>
+ DB 196,226,101,0,5,80,49,0,0 ; vpshufb 0x3150(%rip),%ymm3,%ymm0 # 7520 <_sk_clut_4D_hsw+0x1000>
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 197,229,114,211,24 ; vpsrld $0x18,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
+ DB 91 ; pop %rbx
+ DB 255,224 ; jmpq *%rax
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 197,229,239,219 ; vpxor %ymm3,%ymm3,%ymm3
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,155 ; ja 439b <_sk_load_bgra_hsw+0x23>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,141,0,0,0 ; lea 0x8d(%rip),%r11 # 4498 <_sk_load_bgra_hsw+0x120>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,194 ; vmovq %r10,%xmm0
- DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
- DB 196,226,125,44,24 ; vmaskmovps (%rax),%ymm0,%ymm3
- DB 235,135 ; jmp 406b <_sk_load_bgra_hsw+0x27>
+ DB 196,193,121,110,28,153 ; vmovd (%r9,%rbx,4),%xmm3
+ DB 233,124,255,255,255 ; jmpq 439b <_sk_load_bgra_hsw+0x23>
+ DB 196,193,121,110,68,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm0
+ DB 196,226,121,89,192 ; vpbroadcastq %xmm0,%xmm0
+ DB 197,245,239,201 ; vpxor %ymm1,%ymm1,%ymm1
+ DB 196,227,117,2,216,4 ; vpblendd $0x4,%ymm0,%ymm1,%ymm3
+ DB 196,193,122,126,4,153 ; vmovq (%r9,%rbx,4),%xmm0
+ DB 196,227,101,2,216,3 ; vpblendd $0x3,%ymm0,%ymm3,%ymm3
+ DB 233,85,255,255,255 ; jmpq 439b <_sk_load_bgra_hsw+0x23>
+ DB 196,193,121,110,68,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm0
+ DB 196,226,125,89,192 ; vpbroadcastq %xmm0,%ymm0
+ DB 197,245,239,201 ; vpxor %ymm1,%ymm1,%ymm1
+ DB 196,227,117,2,216,64 ; vpblendd $0x40,%ymm0,%ymm1,%ymm3
+ DB 196,227,125,57,216,1 ; vextracti128 $0x1,%ymm3,%xmm0
+ DB 196,195,121,34,68,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm0,%xmm0
+ DB 196,227,101,56,216,1 ; vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ DB 196,227,125,57,216,1 ; vextracti128 $0x1,%ymm3,%xmm0
+ DB 196,195,121,34,68,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm0,%xmm0
+ DB 196,227,101,56,216,1 ; vinserti128 $0x1,%xmm0,%ymm3,%ymm3
+ DB 196,193,122,111,4,153 ; vmovdqu (%r9,%rbx,4),%xmm0
+ DB 196,227,125,2,219,240 ; vpblendd $0xf0,%ymm3,%ymm0,%ymm3
+ DB 233,6,255,255,255 ; jmpq 439b <_sk_load_bgra_hsw+0x23>
+ DB 15,31,0 ; nopl (%rax)
+ DB 124,255 ; jl 4499 <_sk_load_bgra_hsw+0x121>
+ DB 255 ; (bad)
+ DB 255,157,255,255,255,135 ; lcall *-0x78000001(%rbp)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 236 ; in (%dx),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 216,255 ; fdivr %st(7),%st
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 174 ; scas %es:(%rdi),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_bgra_dst_hsw
_sk_load_bgra_dst_hsw LABEL PROC
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,87 ; jne 415e <_sk_load_bgra_dst_hsw+0x7a>
- DB 197,252,16,56 ; vmovups (%rax),%ymm7
- DB 197,196,84,37,109,47,0,0 ; vandps 0x2f6d(%rip),%ymm7,%ymm4 # 7080 <_sk_clut_4D_hsw+0x102c>
+ DB 117,87 ; jne 4528 <_sk_load_bgra_dst_hsw+0x74>
+ DB 196,193,126,111,60,153 ; vmovdqu (%r9,%rbx,4),%ymm7
+ DB 197,197,219,37,97,48,0,0 ; vpand 0x3061(%rip),%ymm7,%ymm4 # 7540 <_sk_clut_4D_hsw+0x1020>
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,98,125,24,5,164,43,0,0 ; vbroadcastss 0x2ba4(%rip),%ymm8 # 6cc4 <_sk_clut_4D_hsw+0xc70>
+ DB 196,98,125,24,5,164,44,0,0 ; vbroadcastss 0x2ca4(%rip),%ymm8 # 7190 <_sk_clut_4D_hsw+0xc70>
DB 196,193,92,89,240 ; vmulps %ymm8,%ymm4,%ymm6
- DB 196,226,69,0,37,114,47,0,0 ; vpshufb 0x2f72(%rip),%ymm7,%ymm4 # 70a0 <_sk_clut_4D_hsw+0x104c>
+ DB 196,226,69,0,37,102,48,0,0 ; vpshufb 0x3066(%rip),%ymm7,%ymm4 # 7560 <_sk_clut_4D_hsw+0x1040>
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
DB 196,193,92,89,232 ; vmulps %ymm8,%ymm4,%ymm5
- DB 196,226,69,0,37,128,47,0,0 ; vpshufb 0x2f80(%rip),%ymm7,%ymm4 # 70c0 <_sk_clut_4D_hsw+0x106c>
+ DB 196,226,69,0,37,116,48,0,0 ; vpshufb 0x3074(%rip),%ymm7,%ymm4 # 7580 <_sk_clut_4D_hsw+0x1060>
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
DB 196,193,92,89,224 ; vmulps %ymm8,%ymm4,%ymm4
DB 197,197,114,215,24 ; vpsrld $0x18,%ymm7,%ymm7
DB 197,252,91,255 ; vcvtdq2ps %ymm7,%ymm7
DB 196,193,68,89,248 ; vmulps %ymm8,%ymm7,%ymm7
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
+ DB 91 ; pop %rbx
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,226 ; vmovq %r10,%xmm4
- DB 196,226,125,33,228 ; vpmovsxbd %xmm4,%ymm4
- DB 196,226,93,44,56 ; vmaskmovps (%rax),%ymm4,%ymm7
- DB 235,135 ; jmp 410b <_sk_load_bgra_dst_hsw+0x27>
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 197,197,239,255 ; vpxor %ymm7,%ymm7,%ymm7
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,155 ; ja 44d7 <_sk_load_bgra_dst_hsw+0x23>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,141,0,0,0 ; lea 0x8d(%rip),%r11 # 45d4 <_sk_load_bgra_dst_hsw+0x120>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,193,121,110,60,153 ; vmovd (%r9,%rbx,4),%xmm7
+ DB 233,124,255,255,255 ; jmpq 44d7 <_sk_load_bgra_dst_hsw+0x23>
+ DB 196,193,121,110,100,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm4
+ DB 196,226,121,89,228 ; vpbroadcastq %xmm4,%xmm4
+ DB 197,213,239,237 ; vpxor %ymm5,%ymm5,%ymm5
+ DB 196,227,85,2,252,4 ; vpblendd $0x4,%ymm4,%ymm5,%ymm7
+ DB 196,193,122,126,36,153 ; vmovq (%r9,%rbx,4),%xmm4
+ DB 196,227,69,2,252,3 ; vpblendd $0x3,%ymm4,%ymm7,%ymm7
+ DB 233,85,255,255,255 ; jmpq 44d7 <_sk_load_bgra_dst_hsw+0x23>
+ DB 196,193,121,110,100,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm4
+ DB 196,226,125,89,228 ; vpbroadcastq %xmm4,%ymm4
+ DB 197,213,239,237 ; vpxor %ymm5,%ymm5,%ymm5
+ DB 196,227,85,2,252,64 ; vpblendd $0x40,%ymm4,%ymm5,%ymm7
+ DB 196,227,125,57,252,1 ; vextracti128 $0x1,%ymm7,%xmm4
+ DB 196,195,89,34,100,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,227,69,56,252,1 ; vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ DB 196,227,125,57,252,1 ; vextracti128 $0x1,%ymm7,%xmm4
+ DB 196,195,89,34,100,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,227,69,56,252,1 ; vinserti128 $0x1,%xmm4,%ymm7,%ymm7
+ DB 196,193,122,111,36,153 ; vmovdqu (%r9,%rbx,4),%xmm4
+ DB 196,227,93,2,255,240 ; vpblendd $0xf0,%ymm7,%ymm4,%ymm7
+ DB 233,6,255,255,255 ; jmpq 44d7 <_sk_load_bgra_dst_hsw+0x23>
+ DB 15,31,0 ; nopl (%rax)
+ DB 124,255 ; jl 45d5 <_sk_load_bgra_dst_hsw+0x121>
+ DB 255 ; (bad)
+ DB 255,157,255,255,255,135 ; lcall *-0x78000001(%rbp)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 236 ; in (%dx),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 216,255 ; fdivr %st(7),%st
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 174 ; scas %es:(%rdi),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_gather_bgra_hsw
_sk_gather_bgra_hsw LABEL PROC
@@ -4000,14 +4317,14 @@ _sk_gather_bgra_hsw LABEL PROC
DB 197,245,254,192 ; vpaddd %ymm0,%ymm1,%ymm0
DB 197,245,118,201 ; vpcmpeqd %ymm1,%ymm1,%ymm1
DB 196,194,117,144,28,129 ; vpgatherdd %ymm1,(%r9,%ymm0,4),%ymm3
- DB 197,229,219,5,46,47,0,0 ; vpand 0x2f2e(%rip),%ymm3,%ymm0 # 70e0 <_sk_clut_4D_hsw+0x108c>
+ DB 197,229,219,5,130,47,0,0 ; vpand 0x2f82(%rip),%ymm3,%ymm0 # 75a0 <_sk_clut_4D_hsw+0x1080>
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,5,9,43,0,0 ; vbroadcastss 0x2b09(%rip),%ymm8 # 6cc8 <_sk_clut_4D_hsw+0xc74>
+ DB 196,98,125,24,5,105,43,0,0 ; vbroadcastss 0x2b69(%rip),%ymm8 # 7194 <_sk_clut_4D_hsw+0xc74>
DB 196,193,124,89,208 ; vmulps %ymm8,%ymm0,%ymm2
- DB 196,226,101,0,5,51,47,0,0 ; vpshufb 0x2f33(%rip),%ymm3,%ymm0 # 7100 <_sk_clut_4D_hsw+0x10ac>
+ DB 196,226,101,0,5,135,47,0,0 ; vpshufb 0x2f87(%rip),%ymm3,%ymm0 # 75c0 <_sk_clut_4D_hsw+0x10a0>
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
DB 196,193,124,89,200 ; vmulps %ymm8,%ymm0,%ymm1
- DB 196,226,101,0,5,65,47,0,0 ; vpshufb 0x2f41(%rip),%ymm3,%ymm0 # 7120 <_sk_clut_4D_hsw+0x10cc>
+ DB 196,226,101,0,5,149,47,0,0 ; vpshufb 0x2f95(%rip),%ymm3,%ymm0 # 75e0 <_sk_clut_4D_hsw+0x10c0>
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 197,229,114,211,24 ; vpsrld $0x18,%ymm3,%ymm3
@@ -4018,16 +4335,15 @@ _sk_gather_bgra_hsw LABEL PROC
PUBLIC _sk_store_bgra_hsw
_sk_store_bgra_hsw LABEL PROC
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
- DB 196,98,125,24,5,171,42,0,0 ; vbroadcastss 0x2aab(%rip),%ymm8 # 6ccc <_sk_clut_4D_hsw+0xc78>
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
+ DB 196,98,125,24,5,17,43,0,0 ; vbroadcastss 0x2b11(%rip),%ymm8 # 7198 <_sk_clut_4D_hsw+0xc78>
DB 196,65,108,89,200 ; vmulps %ymm8,%ymm2,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,65,116,89,208 ; vmulps %ymm8,%ymm1,%ymm10
@@ -4043,20 +4359,52 @@ _sk_store_bgra_hsw LABEL PROC
DB 196,65,45,235,192 ; vpor %ymm8,%ymm10,%ymm8
DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,11 ; jne 427a <_sk_store_bgra_hsw+0x80>
- DB 197,124,17,0 ; vmovups %ymm8,(%rax)
+ DB 117,11 ; jne 46e0 <_sk_store_bgra_hsw+0x7a>
+ DB 196,65,126,127,4,153 ; vmovdqu %ymm8,(%r9,%rbx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
+ DB 91 ; pop %rbx
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,65,249,110,202 ; vmovq %r10,%xmm9
- DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9
- DB 196,98,53,46,0 ; vmaskmovps %ymm8,%ymm9,(%rax)
- DB 235,211 ; jmp 4273 <_sk_store_bgra_hsw+0x79>
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,235 ; ja 46db <_sk_store_bgra_hsw+0x75>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,85,0,0,0 ; lea 0x55(%rip),%r11 # 4750 <_sk_store_bgra_hsw+0xea>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,121,126,4,153 ; vmovd %xmm8,(%r9,%rbx,4)
+ DB 235,207 ; jmp 46db <_sk_store_bgra_hsw+0x75>
+ DB 196,67,121,22,68,153,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ DB 196,65,121,214,4,153 ; vmovq %xmm8,(%r9,%rbx,4)
+ DB 235,191 ; jmp 46db <_sk_store_bgra_hsw+0x75>
+ DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,24,2 ; vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,20,1 ; vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
+ DB 196,65,121,126,76,153,16 ; vmovd %xmm9,0x10(%r9,%rbx,4)
+ DB 196,65,122,127,4,153 ; vmovdqu %xmm8,(%r9,%rbx,4)
+ DB 235,142 ; jmp 46db <_sk_store_bgra_hsw+0x75>
+ DB 15,31,0 ; nopl (%rax)
+ DB 180,255 ; mov $0xff,%ah
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 188,255,255,255,245 ; mov $0xf5ffffff,%esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 232,255,255,255,218 ; callq ffffffffdb004764 <_sk_clut_4D_hsw+0xffffffffdaffe244>
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,204 ; dec %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_f16_hsw
_sk_load_f16_hsw LABEL PROC
@@ -4068,7 +4416,7 @@ _sk_load_f16_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,194 ; movslq %edx,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,101 ; jne 4321 <_sk_load_f16_hsw+0x81>
+ DB 117,101 ; jne 47ed <_sk_load_f16_hsw+0x81>
DB 196,65,121,16,4,193 ; vmovupd (%r9,%rax,8),%xmm8
DB 196,193,121,16,84,193,16 ; vmovupd 0x10(%r9,%rax,8),%xmm2
DB 196,193,121,16,92,193,32 ; vmovupd 0x20(%r9,%rax,8),%xmm3
@@ -4094,29 +4442,29 @@ _sk_load_f16_hsw LABEL PROC
DB 196,65,123,16,4,193 ; vmovsd (%r9,%rax,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,85 ; je 4387 <_sk_load_f16_hsw+0xe7>
+ DB 116,85 ; je 4853 <_sk_load_f16_hsw+0xe7>
DB 196,65,57,22,68,193,8 ; vmovhpd 0x8(%r9,%rax,8),%xmm8,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,72 ; jb 4387 <_sk_load_f16_hsw+0xe7>
+ DB 114,72 ; jb 4853 <_sk_load_f16_hsw+0xe7>
DB 196,193,123,16,84,193,16 ; vmovsd 0x10(%r9,%rax,8),%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 116,72 ; je 4394 <_sk_load_f16_hsw+0xf4>
+ DB 116,72 ; je 4860 <_sk_load_f16_hsw+0xf4>
DB 196,193,105,22,84,193,24 ; vmovhpd 0x18(%r9,%rax,8),%xmm2,%xmm2
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,59 ; jb 4394 <_sk_load_f16_hsw+0xf4>
+ DB 114,59 ; jb 4860 <_sk_load_f16_hsw+0xf4>
DB 196,193,123,16,92,193,32 ; vmovsd 0x20(%r9,%rax,8),%xmm3
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 15,132,109,255,255,255 ; je 42d7 <_sk_load_f16_hsw+0x37>
+ DB 15,132,109,255,255,255 ; je 47a3 <_sk_load_f16_hsw+0x37>
DB 196,193,97,22,92,193,40 ; vmovhpd 0x28(%r9,%rax,8),%xmm3,%xmm3
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,130,92,255,255,255 ; jb 42d7 <_sk_load_f16_hsw+0x37>
+ DB 15,130,92,255,255,255 ; jb 47a3 <_sk_load_f16_hsw+0x37>
DB 196,65,122,126,76,193,48 ; vmovq 0x30(%r9,%rax,8),%xmm9
- DB 233,80,255,255,255 ; jmpq 42d7 <_sk_load_f16_hsw+0x37>
+ DB 233,80,255,255,255 ; jmpq 47a3 <_sk_load_f16_hsw+0x37>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,67,255,255,255 ; jmpq 42d7 <_sk_load_f16_hsw+0x37>
+ DB 233,67,255,255,255 ; jmpq 47a3 <_sk_load_f16_hsw+0x37>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,58,255,255,255 ; jmpq 42d7 <_sk_load_f16_hsw+0x37>
+ DB 233,58,255,255,255 ; jmpq 47a3 <_sk_load_f16_hsw+0x37>
PUBLIC _sk_load_f16_dst_hsw
_sk_load_f16_dst_hsw LABEL PROC
@@ -4128,7 +4476,7 @@ _sk_load_f16_dst_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,194 ; movslq %edx,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,101 ; jne 441e <_sk_load_f16_dst_hsw+0x81>
+ DB 117,101 ; jne 48ea <_sk_load_f16_dst_hsw+0x81>
DB 196,65,121,16,4,193 ; vmovupd (%r9,%rax,8),%xmm8
DB 196,193,121,16,116,193,16 ; vmovupd 0x10(%r9,%rax,8),%xmm6
DB 196,193,121,16,124,193,32 ; vmovupd 0x20(%r9,%rax,8),%xmm7
@@ -4154,29 +4502,29 @@ _sk_load_f16_dst_hsw LABEL PROC
DB 196,65,123,16,4,193 ; vmovsd (%r9,%rax,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,85 ; je 4484 <_sk_load_f16_dst_hsw+0xe7>
+ DB 116,85 ; je 4950 <_sk_load_f16_dst_hsw+0xe7>
DB 196,65,57,22,68,193,8 ; vmovhpd 0x8(%r9,%rax,8),%xmm8,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,72 ; jb 4484 <_sk_load_f16_dst_hsw+0xe7>
+ DB 114,72 ; jb 4950 <_sk_load_f16_dst_hsw+0xe7>
DB 196,193,123,16,116,193,16 ; vmovsd 0x10(%r9,%rax,8),%xmm6
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 116,72 ; je 4491 <_sk_load_f16_dst_hsw+0xf4>
+ DB 116,72 ; je 495d <_sk_load_f16_dst_hsw+0xf4>
DB 196,193,73,22,116,193,24 ; vmovhpd 0x18(%r9,%rax,8),%xmm6,%xmm6
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,59 ; jb 4491 <_sk_load_f16_dst_hsw+0xf4>
+ DB 114,59 ; jb 495d <_sk_load_f16_dst_hsw+0xf4>
DB 196,193,123,16,124,193,32 ; vmovsd 0x20(%r9,%rax,8),%xmm7
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 15,132,109,255,255,255 ; je 43d4 <_sk_load_f16_dst_hsw+0x37>
+ DB 15,132,109,255,255,255 ; je 48a0 <_sk_load_f16_dst_hsw+0x37>
DB 196,193,65,22,124,193,40 ; vmovhpd 0x28(%r9,%rax,8),%xmm7,%xmm7
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,130,92,255,255,255 ; jb 43d4 <_sk_load_f16_dst_hsw+0x37>
+ DB 15,130,92,255,255,255 ; jb 48a0 <_sk_load_f16_dst_hsw+0x37>
DB 196,65,122,126,76,193,48 ; vmovq 0x30(%r9,%rax,8),%xmm9
- DB 233,80,255,255,255 ; jmpq 43d4 <_sk_load_f16_dst_hsw+0x37>
+ DB 233,80,255,255,255 ; jmpq 48a0 <_sk_load_f16_dst_hsw+0x37>
DB 197,193,87,255 ; vxorpd %xmm7,%xmm7,%xmm7
DB 197,201,87,246 ; vxorpd %xmm6,%xmm6,%xmm6
- DB 233,67,255,255,255 ; jmpq 43d4 <_sk_load_f16_dst_hsw+0x37>
+ DB 233,67,255,255,255 ; jmpq 48a0 <_sk_load_f16_dst_hsw+0x37>
DB 197,193,87,255 ; vxorpd %xmm7,%xmm7,%xmm7
- DB 233,58,255,255,255 ; jmpq 43d4 <_sk_load_f16_dst_hsw+0x37>
+ DB 233,58,255,255,255 ; jmpq 48a0 <_sk_load_f16_dst_hsw+0x37>
PUBLIC _sk_gather_f16_hsw
_sk_gather_f16_hsw LABEL PROC
@@ -4235,7 +4583,7 @@ _sk_store_f16_hsw LABEL PROC
DB 196,65,57,98,205 ; vpunpckldq %xmm13,%xmm8,%xmm9
DB 196,65,57,106,197 ; vpunpckhdq %xmm13,%xmm8,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,31 ; jne 459f <_sk_store_f16_hsw+0x7b>
+ DB 117,31 ; jne 4a6b <_sk_store_f16_hsw+0x7b>
DB 196,65,120,17,28,193 ; vmovups %xmm11,(%r9,%rax,8)
DB 196,65,120,17,84,193,16 ; vmovups %xmm10,0x10(%r9,%rax,8)
DB 196,65,120,17,76,193,32 ; vmovups %xmm9,0x20(%r9,%rax,8)
@@ -4244,22 +4592,22 @@ _sk_store_f16_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,214,28,193 ; vmovq %xmm11,(%r9,%rax,8)
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,240 ; je 459b <_sk_store_f16_hsw+0x77>
+ DB 116,240 ; je 4a67 <_sk_store_f16_hsw+0x77>
DB 196,65,121,23,92,193,8 ; vmovhpd %xmm11,0x8(%r9,%rax,8)
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,227 ; jb 459b <_sk_store_f16_hsw+0x77>
+ DB 114,227 ; jb 4a67 <_sk_store_f16_hsw+0x77>
DB 196,65,121,214,84,193,16 ; vmovq %xmm10,0x10(%r9,%rax,8)
- DB 116,218 ; je 459b <_sk_store_f16_hsw+0x77>
+ DB 116,218 ; je 4a67 <_sk_store_f16_hsw+0x77>
DB 196,65,121,23,84,193,24 ; vmovhpd %xmm10,0x18(%r9,%rax,8)
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,205 ; jb 459b <_sk_store_f16_hsw+0x77>
+ DB 114,205 ; jb 4a67 <_sk_store_f16_hsw+0x77>
DB 196,65,121,214,76,193,32 ; vmovq %xmm9,0x20(%r9,%rax,8)
- DB 116,196 ; je 459b <_sk_store_f16_hsw+0x77>
+ DB 116,196 ; je 4a67 <_sk_store_f16_hsw+0x77>
DB 196,65,121,23,76,193,40 ; vmovhpd %xmm9,0x28(%r9,%rax,8)
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,183 ; jb 459b <_sk_store_f16_hsw+0x77>
+ DB 114,183 ; jb 4a67 <_sk_store_f16_hsw+0x77>
DB 196,65,121,214,68,193,48 ; vmovq %xmm8,0x30(%r9,%rax,8)
- DB 235,174 ; jmp 459b <_sk_store_f16_hsw+0x77>
+ DB 235,174 ; jmp 4a67 <_sk_store_f16_hsw+0x77>
PUBLIC _sk_load_u16_be_hsw
_sk_load_u16_be_hsw LABEL PROC
@@ -4272,7 +4620,7 @@ _sk_load_u16_be_hsw LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 73,99,194 ; movslq %r10d,%rax
DB 77,133,192 ; test %r8,%r8
- DB 15,133,204,0,0,0 ; jne 46e0 <_sk_load_u16_be_hsw+0xf3>
+ DB 15,133,204,0,0,0 ; jne 4bac <_sk_load_u16_be_hsw+0xf3>
DB 196,65,121,16,4,65 ; vmovupd (%r9,%rax,2),%xmm8
DB 196,193,121,16,84,65,16 ; vmovupd 0x10(%r9,%rax,2),%xmm2
DB 196,193,121,16,92,65,32 ; vmovupd 0x20(%r9,%rax,2),%xmm3
@@ -4291,7 +4639,7 @@ _sk_load_u16_be_hsw LABEL PROC
DB 197,241,235,192 ; vpor %xmm0,%xmm1,%xmm0
DB 196,226,125,51,192 ; vpmovzxwd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,21,91,38,0,0 ; vbroadcastss 0x265b(%rip),%ymm10 # 6cd0 <_sk_clut_4D_hsw+0xc7c>
+ DB 196,98,125,24,21,91,38,0,0 ; vbroadcastss 0x265b(%rip),%ymm10 # 719c <_sk_clut_4D_hsw+0xc7c>
DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0
DB 197,185,109,202 ; vpunpckhqdq %xmm2,%xmm8,%xmm1
DB 197,233,113,241,8 ; vpsllw $0x8,%xmm1,%xmm2
@@ -4319,29 +4667,29 @@ _sk_load_u16_be_hsw LABEL PROC
DB 196,65,123,16,4,65 ; vmovsd (%r9,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,85 ; je 4746 <_sk_load_u16_be_hsw+0x159>
+ DB 116,85 ; je 4c12 <_sk_load_u16_be_hsw+0x159>
DB 196,65,57,22,68,65,8 ; vmovhpd 0x8(%r9,%rax,2),%xmm8,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,72 ; jb 4746 <_sk_load_u16_be_hsw+0x159>
+ DB 114,72 ; jb 4c12 <_sk_load_u16_be_hsw+0x159>
DB 196,193,123,16,84,65,16 ; vmovsd 0x10(%r9,%rax,2),%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 116,72 ; je 4753 <_sk_load_u16_be_hsw+0x166>
+ DB 116,72 ; je 4c1f <_sk_load_u16_be_hsw+0x166>
DB 196,193,105,22,84,65,24 ; vmovhpd 0x18(%r9,%rax,2),%xmm2,%xmm2
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,59 ; jb 4753 <_sk_load_u16_be_hsw+0x166>
+ DB 114,59 ; jb 4c1f <_sk_load_u16_be_hsw+0x166>
DB 196,193,123,16,92,65,32 ; vmovsd 0x20(%r9,%rax,2),%xmm3
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 15,132,6,255,255,255 ; je 462f <_sk_load_u16_be_hsw+0x42>
+ DB 15,132,6,255,255,255 ; je 4afb <_sk_load_u16_be_hsw+0x42>
DB 196,193,97,22,92,65,40 ; vmovhpd 0x28(%r9,%rax,2),%xmm3,%xmm3
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,130,245,254,255,255 ; jb 462f <_sk_load_u16_be_hsw+0x42>
+ DB 15,130,245,254,255,255 ; jb 4afb <_sk_load_u16_be_hsw+0x42>
DB 196,65,122,126,76,65,48 ; vmovq 0x30(%r9,%rax,2),%xmm9
- DB 233,233,254,255,255 ; jmpq 462f <_sk_load_u16_be_hsw+0x42>
+ DB 233,233,254,255,255 ; jmpq 4afb <_sk_load_u16_be_hsw+0x42>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,220,254,255,255 ; jmpq 462f <_sk_load_u16_be_hsw+0x42>
+ DB 233,220,254,255,255 ; jmpq 4afb <_sk_load_u16_be_hsw+0x42>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,211,254,255,255 ; jmpq 462f <_sk_load_u16_be_hsw+0x42>
+ DB 233,211,254,255,255 ; jmpq 4afb <_sk_load_u16_be_hsw+0x42>
PUBLIC _sk_load_rgb_u16_be_hsw
_sk_load_rgb_u16_be_hsw LABEL PROC
@@ -4356,7 +4704,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 72,141,4,64 ; lea (%rax,%rax,2),%rax
DB 72,193,248,32 ; sar $0x20,%rax
DB 77,133,192 ; test %r8,%r8
- DB 15,133,204,0,0,0 ; jne 4853 <_sk_load_rgb_u16_be_hsw+0xf7>
+ DB 15,133,204,0,0,0 ; jne 4d1f <_sk_load_rgb_u16_be_hsw+0xf7>
DB 196,193,122,111,4,65 ; vmovdqu (%r9,%rax,2),%xmm0
DB 196,193,122,111,84,65,12 ; vmovdqu 0xc(%r9,%rax,2),%xmm2
DB 196,193,122,111,76,65,24 ; vmovdqu 0x18(%r9,%rax,2),%xmm1
@@ -4380,7 +4728,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 197,241,235,192 ; vpor %xmm0,%xmm1,%xmm0
DB 196,226,125,51,192 ; vpmovzxwd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,21,211,36,0,0 ; vbroadcastss 0x24d3(%rip),%ymm10 # 6cd4 <_sk_clut_4D_hsw+0xc80>
+ DB 196,98,125,24,21,211,36,0,0 ; vbroadcastss 0x24d3(%rip),%ymm10 # 71a0 <_sk_clut_4D_hsw+0xc80>
DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0
DB 197,185,109,202 ; vpunpckhqdq %xmm2,%xmm8,%xmm1
DB 197,233,113,241,8 ; vpsllw $0x8,%xmm1,%xmm2
@@ -4397,41 +4745,41 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 196,193,108,89,210 ; vmulps %ymm10,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,135,36,0,0 ; vbroadcastss 0x2487(%rip),%ymm3 # 6cd8 <_sk_clut_4D_hsw+0xc84>
+ DB 196,226,125,24,29,135,36,0,0 ; vbroadcastss 0x2487(%rip),%ymm3 # 71a4 <_sk_clut_4D_hsw+0xc84>
DB 255,224 ; jmpq *%rax
DB 196,193,121,110,4,65 ; vmovd (%r9,%rax,2),%xmm0
DB 196,193,121,196,68,65,4,2 ; vpinsrw $0x2,0x4(%r9,%rax,2),%xmm0,%xmm0
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 117,5 ; jne 486c <_sk_load_rgb_u16_be_hsw+0x110>
- DB 233,79,255,255,255 ; jmpq 47bb <_sk_load_rgb_u16_be_hsw+0x5f>
+ DB 117,5 ; jne 4d38 <_sk_load_rgb_u16_be_hsw+0x110>
+ DB 233,79,255,255,255 ; jmpq 4c87 <_sk_load_rgb_u16_be_hsw+0x5f>
DB 196,193,121,110,76,65,6 ; vmovd 0x6(%r9,%rax,2),%xmm1
DB 196,65,113,196,68,65,10,2 ; vpinsrw $0x2,0xa(%r9,%rax,2),%xmm1,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,26 ; jb 489b <_sk_load_rgb_u16_be_hsw+0x13f>
+ DB 114,26 ; jb 4d67 <_sk_load_rgb_u16_be_hsw+0x13f>
DB 196,193,121,110,76,65,12 ; vmovd 0xc(%r9,%rax,2),%xmm1
DB 196,193,113,196,84,65,16,2 ; vpinsrw $0x2,0x10(%r9,%rax,2),%xmm1,%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 117,10 ; jne 48a0 <_sk_load_rgb_u16_be_hsw+0x144>
- DB 233,32,255,255,255 ; jmpq 47bb <_sk_load_rgb_u16_be_hsw+0x5f>
- DB 233,27,255,255,255 ; jmpq 47bb <_sk_load_rgb_u16_be_hsw+0x5f>
+ DB 117,10 ; jne 4d6c <_sk_load_rgb_u16_be_hsw+0x144>
+ DB 233,32,255,255,255 ; jmpq 4c87 <_sk_load_rgb_u16_be_hsw+0x5f>
+ DB 233,27,255,255,255 ; jmpq 4c87 <_sk_load_rgb_u16_be_hsw+0x5f>
DB 196,193,121,110,76,65,18 ; vmovd 0x12(%r9,%rax,2),%xmm1
DB 196,65,113,196,76,65,22,2 ; vpinsrw $0x2,0x16(%r9,%rax,2),%xmm1,%xmm9
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,26 ; jb 48cf <_sk_load_rgb_u16_be_hsw+0x173>
+ DB 114,26 ; jb 4d9b <_sk_load_rgb_u16_be_hsw+0x173>
DB 196,193,121,110,76,65,24 ; vmovd 0x18(%r9,%rax,2),%xmm1
DB 196,193,113,196,76,65,28,2 ; vpinsrw $0x2,0x1c(%r9,%rax,2),%xmm1,%xmm1
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 117,10 ; jne 48d4 <_sk_load_rgb_u16_be_hsw+0x178>
- DB 233,236,254,255,255 ; jmpq 47bb <_sk_load_rgb_u16_be_hsw+0x5f>
- DB 233,231,254,255,255 ; jmpq 47bb <_sk_load_rgb_u16_be_hsw+0x5f>
+ DB 117,10 ; jne 4da0 <_sk_load_rgb_u16_be_hsw+0x178>
+ DB 233,236,254,255,255 ; jmpq 4c87 <_sk_load_rgb_u16_be_hsw+0x5f>
+ DB 233,231,254,255,255 ; jmpq 4c87 <_sk_load_rgb_u16_be_hsw+0x5f>
DB 196,193,121,110,92,65,30 ; vmovd 0x1e(%r9,%rax,2),%xmm3
DB 196,65,97,196,92,65,34,2 ; vpinsrw $0x2,0x22(%r9,%rax,2),%xmm3,%xmm11
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,20 ; jb 48fd <_sk_load_rgb_u16_be_hsw+0x1a1>
+ DB 114,20 ; jb 4dc9 <_sk_load_rgb_u16_be_hsw+0x1a1>
DB 196,193,121,110,92,65,36 ; vmovd 0x24(%r9,%rax,2),%xmm3
DB 196,193,97,196,92,65,40,2 ; vpinsrw $0x2,0x28(%r9,%rax,2),%xmm3,%xmm3
- DB 233,190,254,255,255 ; jmpq 47bb <_sk_load_rgb_u16_be_hsw+0x5f>
- DB 233,185,254,255,255 ; jmpq 47bb <_sk_load_rgb_u16_be_hsw+0x5f>
+ DB 233,190,254,255,255 ; jmpq 4c87 <_sk_load_rgb_u16_be_hsw+0x5f>
+ DB 233,185,254,255,255 ; jmpq 4c87 <_sk_load_rgb_u16_be_hsw+0x5f>
PUBLIC _sk_store_u16_be_hsw
_sk_store_u16_be_hsw LABEL PROC
@@ -4443,7 +4791,7 @@ _sk_store_u16_be_hsw LABEL PROC
DB 77,1,201 ; add %r9,%r9
DB 76,3,8 ; add (%rax),%r9
DB 73,99,194 ; movslq %r10d,%rax
- DB 196,98,125,24,5,179,35,0,0 ; vbroadcastss 0x23b3(%rip),%ymm8 # 6cdc <_sk_clut_4D_hsw+0xc88>
+ DB 196,98,125,24,5,179,35,0,0 ; vbroadcastss 0x23b3(%rip),%ymm8 # 71a8 <_sk_clut_4D_hsw+0xc88>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,67,125,25,202,1 ; vextractf128 $0x1,%ymm9,%xmm10
@@ -4481,7 +4829,7 @@ _sk_store_u16_be_hsw LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,31 ; jne 4a0d <_sk_store_u16_be_hsw+0x10b>
+ DB 117,31 ; jne 4ed9 <_sk_store_u16_be_hsw+0x10b>
DB 196,65,120,17,28,65 ; vmovups %xmm11,(%r9,%rax,2)
DB 196,65,120,17,84,65,16 ; vmovups %xmm10,0x10(%r9,%rax,2)
DB 196,65,120,17,76,65,32 ; vmovups %xmm9,0x20(%r9,%rax,2)
@@ -4490,28 +4838,28 @@ _sk_store_u16_be_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,214,28,65 ; vmovq %xmm11,(%r9,%rax,2)
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,240 ; je 4a09 <_sk_store_u16_be_hsw+0x107>
+ DB 116,240 ; je 4ed5 <_sk_store_u16_be_hsw+0x107>
DB 196,65,121,23,92,65,8 ; vmovhpd %xmm11,0x8(%r9,%rax,2)
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,227 ; jb 4a09 <_sk_store_u16_be_hsw+0x107>
+ DB 114,227 ; jb 4ed5 <_sk_store_u16_be_hsw+0x107>
DB 196,65,121,214,84,65,16 ; vmovq %xmm10,0x10(%r9,%rax,2)
- DB 116,218 ; je 4a09 <_sk_store_u16_be_hsw+0x107>
+ DB 116,218 ; je 4ed5 <_sk_store_u16_be_hsw+0x107>
DB 196,65,121,23,84,65,24 ; vmovhpd %xmm10,0x18(%r9,%rax,2)
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,205 ; jb 4a09 <_sk_store_u16_be_hsw+0x107>
+ DB 114,205 ; jb 4ed5 <_sk_store_u16_be_hsw+0x107>
DB 196,65,121,214,76,65,32 ; vmovq %xmm9,0x20(%r9,%rax,2)
- DB 116,196 ; je 4a09 <_sk_store_u16_be_hsw+0x107>
+ DB 116,196 ; je 4ed5 <_sk_store_u16_be_hsw+0x107>
DB 196,65,121,23,76,65,40 ; vmovhpd %xmm9,0x28(%r9,%rax,2)
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,183 ; jb 4a09 <_sk_store_u16_be_hsw+0x107>
+ DB 114,183 ; jb 4ed5 <_sk_store_u16_be_hsw+0x107>
DB 196,65,121,214,68,65,48 ; vmovq %xmm8,0x30(%r9,%rax,2)
- DB 235,174 ; jmp 4a09 <_sk_store_u16_be_hsw+0x107>
+ DB 235,174 ; jmp 4ed5 <_sk_store_u16_be_hsw+0x107>
PUBLIC _sk_load_f32_hsw
_sk_load_f32_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,135,128,0,0,0 ; ja 4ae7 <_sk_load_f32_hsw+0x8c>
+ DB 15,135,128,0,0,0 ; ja 4fb3 <_sk_load_f32_hsw+0x8c>
DB 68,141,20,149,0,0,0,0 ; lea 0x0(,%rdx,4),%r10d
DB 76,99,88,8 ; movslq 0x8(%rax),%r11
DB 76,99,201 ; movslq %ecx,%r9
@@ -4519,7 +4867,7 @@ _sk_load_f32_hsw LABEL PROC
DB 73,193,225,2 ; shl $0x2,%r9
DB 76,3,8 ; add (%rax),%r9
DB 77,99,210 ; movslq %r10d,%r10
- DB 76,141,29,133,0,0,0 ; lea 0x85(%rip),%r11 # 4b10 <_sk_load_f32_hsw+0xb5>
+ DB 76,141,29,133,0,0,0 ; lea 0x85(%rip),%r11 # 4fdc <_sk_load_f32_hsw+0xb5>
DB 75,99,4,131 ; movslq (%r11,%r8,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
@@ -4565,7 +4913,7 @@ PUBLIC _sk_load_f32_dst_hsw
_sk_load_f32_dst_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,135,128,0,0,0 ; ja 4bbc <_sk_load_f32_dst_hsw+0x8c>
+ DB 15,135,128,0,0,0 ; ja 5088 <_sk_load_f32_dst_hsw+0x8c>
DB 68,141,20,149,0,0,0,0 ; lea 0x0(,%rdx,4),%r10d
DB 76,99,88,8 ; movslq 0x8(%rax),%r11
DB 76,99,201 ; movslq %ecx,%r9
@@ -4573,7 +4921,7 @@ _sk_load_f32_dst_hsw LABEL PROC
DB 73,193,225,2 ; shl $0x2,%r9
DB 76,3,8 ; add (%rax),%r9
DB 77,99,210 ; movslq %r10d,%r10
- DB 76,141,29,132,0,0,0 ; lea 0x84(%rip),%r11 # 4be4 <_sk_load_f32_dst_hsw+0xb4>
+ DB 76,141,29,132,0,0,0 ; lea 0x84(%rip),%r11 # 50b0 <_sk_load_f32_dst_hsw+0xb4>
DB 75,99,4,131 ; movslq (%r11,%r8,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
@@ -4635,7 +4983,7 @@ _sk_store_f32_hsw LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 77,133,192 ; test %r8,%r8
- DB 117,55 ; jne 4c83 <_sk_store_f32_hsw+0x7f>
+ DB 117,55 ; jne 514f <_sk_store_f32_hsw+0x7f>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -4648,22 +4996,22 @@ _sk_store_f32_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,129 ; vmovupd %xmm10,(%r9,%rax,4)
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,240 ; je 4c7f <_sk_store_f32_hsw+0x7b>
+ DB 116,240 ; je 514b <_sk_store_f32_hsw+0x7b>
DB 196,65,121,17,76,129,16 ; vmovupd %xmm9,0x10(%r9,%rax,4)
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,227 ; jb 4c7f <_sk_store_f32_hsw+0x7b>
+ DB 114,227 ; jb 514b <_sk_store_f32_hsw+0x7b>
DB 196,65,121,17,68,129,32 ; vmovupd %xmm8,0x20(%r9,%rax,4)
- DB 116,218 ; je 4c7f <_sk_store_f32_hsw+0x7b>
+ DB 116,218 ; je 514b <_sk_store_f32_hsw+0x7b>
DB 196,65,121,17,92,129,48 ; vmovupd %xmm11,0x30(%r9,%rax,4)
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,205 ; jb 4c7f <_sk_store_f32_hsw+0x7b>
+ DB 114,205 ; jb 514b <_sk_store_f32_hsw+0x7b>
DB 196,67,125,25,84,129,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r9,%rax,4)
- DB 116,195 ; je 4c7f <_sk_store_f32_hsw+0x7b>
+ DB 116,195 ; je 514b <_sk_store_f32_hsw+0x7b>
DB 196,67,125,25,76,129,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r9,%rax,4)
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,181 ; jb 4c7f <_sk_store_f32_hsw+0x7b>
+ DB 114,181 ; jb 514b <_sk_store_f32_hsw+0x7b>
DB 196,67,125,25,68,129,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r9,%rax,4)
- DB 235,171 ; jmp 4c7f <_sk_store_f32_hsw+0x7b>
+ DB 235,171 ; jmp 514b <_sk_store_f32_hsw+0x7b>
PUBLIC _sk_clamp_x_hsw
_sk_clamp_x_hsw LABEL PROC
@@ -4730,7 +5078,7 @@ _sk_mirror_x_hsw LABEL PROC
DB 196,65,124,92,218 ; vsubps %ymm10,%ymm0,%ymm11
DB 196,193,58,88,192 ; vaddss %xmm8,%xmm8,%xmm0
DB 196,98,125,24,192 ; vbroadcastss %xmm0,%ymm8
- DB 197,178,89,5,47,31,0,0 ; vmulss 0x1f2f(%rip),%xmm9,%xmm0 # 6ce0 <_sk_clut_4D_hsw+0xc8c>
+ DB 197,178,89,5,47,31,0,0 ; vmulss 0x1f2f(%rip),%xmm9,%xmm0 # 71ac <_sk_clut_4D_hsw+0xc8c>
DB 196,226,125,24,192 ; vbroadcastss %xmm0,%ymm0
DB 197,164,89,192 ; vmulps %ymm0,%ymm11,%ymm0
DB 196,227,125,8,192,1 ; vroundps $0x1,%ymm0,%ymm0
@@ -4755,7 +5103,7 @@ _sk_mirror_y_hsw LABEL PROC
DB 196,65,116,92,218 ; vsubps %ymm10,%ymm1,%ymm11
DB 196,193,58,88,200 ; vaddss %xmm8,%xmm8,%xmm1
DB 196,98,125,24,193 ; vbroadcastss %xmm1,%ymm8
- DB 197,178,89,13,207,30,0,0 ; vmulss 0x1ecf(%rip),%xmm9,%xmm1 # 6ce4 <_sk_clut_4D_hsw+0xc90>
+ DB 197,178,89,13,207,30,0,0 ; vmulss 0x1ecf(%rip),%xmm9,%xmm1 # 71b0 <_sk_clut_4D_hsw+0xc90>
DB 196,226,125,24,201 ; vbroadcastss %xmm1,%ymm1
DB 197,164,89,201 ; vmulps %ymm1,%ymm11,%ymm1
DB 196,227,125,8,201,1 ; vroundps $0x1,%ymm1,%ymm1
@@ -4775,7 +5123,7 @@ PUBLIC _sk_clamp_x_1_hsw
_sk_clamp_x_1_hsw LABEL PROC
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 197,188,95,192 ; vmaxps %ymm0,%ymm8,%ymm0
- DB 196,98,125,24,5,132,30,0,0 ; vbroadcastss 0x1e84(%rip),%ymm8 # 6ce8 <_sk_clut_4D_hsw+0xc94>
+ DB 196,98,125,24,5,132,30,0,0 ; vbroadcastss 0x1e84(%rip),%ymm8 # 71b4 <_sk_clut_4D_hsw+0xc94>
DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -4789,9 +5137,9 @@ _sk_repeat_x_1_hsw LABEL PROC
PUBLIC _sk_mirror_x_1_hsw
_sk_mirror_x_1_hsw LABEL PROC
- DB 196,98,125,24,5,103,30,0,0 ; vbroadcastss 0x1e67(%rip),%ymm8 # 6cec <_sk_clut_4D_hsw+0xc98>
+ DB 196,98,125,24,5,103,30,0,0 ; vbroadcastss 0x1e67(%rip),%ymm8 # 71b8 <_sk_clut_4D_hsw+0xc98>
DB 196,193,124,88,192 ; vaddps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,13,93,30,0,0 ; vbroadcastss 0x1e5d(%rip),%ymm9 # 6cf0 <_sk_clut_4D_hsw+0xc9c>
+ DB 196,98,125,24,13,93,30,0,0 ; vbroadcastss 0x1e5d(%rip),%ymm9 # 71bc <_sk_clut_4D_hsw+0xc9c>
DB 196,65,124,89,201 ; vmulps %ymm9,%ymm0,%ymm9
DB 196,67,125,8,201,1 ; vroundps $0x1,%ymm9,%ymm9
DB 196,65,52,88,201 ; vaddps %ymm9,%ymm9,%ymm9
@@ -4805,11 +5153,11 @@ _sk_mirror_x_1_hsw LABEL PROC
PUBLIC _sk_luminance_to_alpha_hsw
_sk_luminance_to_alpha_hsw LABEL PROC
- DB 196,226,125,24,29,45,30,0,0 ; vbroadcastss 0x1e2d(%rip),%ymm3 # 6cf4 <_sk_clut_4D_hsw+0xca0>
- DB 196,98,125,24,5,40,30,0,0 ; vbroadcastss 0x1e28(%rip),%ymm8 # 6cf8 <_sk_clut_4D_hsw+0xca4>
+ DB 196,226,125,24,29,45,30,0,0 ; vbroadcastss 0x1e2d(%rip),%ymm3 # 71c0 <_sk_clut_4D_hsw+0xca0>
+ DB 196,98,125,24,5,40,30,0,0 ; vbroadcastss 0x1e28(%rip),%ymm8 # 71c4 <_sk_clut_4D_hsw+0xca4>
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
DB 196,226,125,184,203 ; vfmadd231ps %ymm3,%ymm0,%ymm1
- DB 196,226,125,24,29,25,30,0,0 ; vbroadcastss 0x1e19(%rip),%ymm3 # 6cfc <_sk_clut_4D_hsw+0xca8>
+ DB 196,226,125,24,29,25,30,0,0 ; vbroadcastss 0x1e19(%rip),%ymm3 # 71c8 <_sk_clut_4D_hsw+0xca8>
DB 196,226,109,168,217 ; vfmadd213ps %ymm1,%ymm2,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0
@@ -4993,9 +5341,9 @@ _sk_evenly_spaced_gradient_hsw LABEL PROC
DB 76,139,72,8 ; mov 0x8(%rax),%r9
DB 77,137,211 ; mov %r10,%r11
DB 73,255,203 ; dec %r11
- DB 120,7 ; js 51dc <_sk_evenly_spaced_gradient_hsw+0x19>
+ DB 120,7 ; js 56a8 <_sk_evenly_spaced_gradient_hsw+0x19>
DB 196,193,242,42,203 ; vcvtsi2ss %r11,%xmm1,%xmm1
- DB 235,22 ; jmp 51f2 <_sk_evenly_spaced_gradient_hsw+0x2f>
+ DB 235,22 ; jmp 56be <_sk_evenly_spaced_gradient_hsw+0x2f>
DB 76,137,219 ; mov %r11,%rbx
DB 72,209,235 ; shr %rbx
DB 65,131,227,1 ; and $0x1,%r11d
@@ -5006,7 +5354,7 @@ _sk_evenly_spaced_gradient_hsw LABEL PROC
DB 197,244,89,200 ; vmulps %ymm0,%ymm1,%ymm1
DB 197,126,91,217 ; vcvttps2dq %ymm1,%ymm11
DB 73,131,250,8 ; cmp $0x8,%r10
- DB 119,70 ; ja 524b <_sk_evenly_spaced_gradient_hsw+0x88>
+ DB 119,70 ; ja 5717 <_sk_evenly_spaced_gradient_hsw+0x88>
DB 196,66,37,22,1 ; vpermps (%r9),%ymm11,%ymm8
DB 72,139,88,40 ; mov 0x28(%rax),%rbx
DB 196,98,37,22,11 ; vpermps (%rbx),%ymm11,%ymm9
@@ -5022,7 +5370,7 @@ _sk_evenly_spaced_gradient_hsw LABEL PROC
DB 196,226,37,22,27 ; vpermps (%rbx),%ymm11,%ymm3
DB 72,139,64,64 ; mov 0x40(%rax),%rax
DB 196,98,37,22,40 ; vpermps (%rax),%ymm11,%ymm13
- DB 235,110 ; jmp 52b9 <_sk_evenly_spaced_gradient_hsw+0xf6>
+ DB 235,110 ; jmp 5785 <_sk_evenly_spaced_gradient_hsw+0xf6>
DB 196,65,13,118,246 ; vpcmpeqd %ymm14,%ymm14,%ymm14
DB 197,245,118,201 ; vpcmpeqd %ymm1,%ymm1,%ymm1
DB 196,2,117,146,4,153 ; vgatherdps %ymm1,(%r9,%ymm11,4),%ymm8
@@ -5057,14 +5405,14 @@ _sk_evenly_spaced_gradient_hsw LABEL PROC
PUBLIC _sk_gauss_a_to_rgba_hsw
_sk_gauss_a_to_rgba_hsw LABEL PROC
- DB 196,226,125,24,5,33,26,0,0 ; vbroadcastss 0x1a21(%rip),%ymm0 # 6d00 <_sk_clut_4D_hsw+0xcac>
- DB 196,226,125,24,13,28,26,0,0 ; vbroadcastss 0x1a1c(%rip),%ymm1 # 6d04 <_sk_clut_4D_hsw+0xcb0>
+ DB 196,226,125,24,5,33,26,0,0 ; vbroadcastss 0x1a21(%rip),%ymm0 # 71cc <_sk_clut_4D_hsw+0xcac>
+ DB 196,226,125,24,13,28,26,0,0 ; vbroadcastss 0x1a1c(%rip),%ymm1 # 71d0 <_sk_clut_4D_hsw+0xcb0>
DB 196,226,101,168,200 ; vfmadd213ps %ymm0,%ymm3,%ymm1
- DB 196,226,125,24,5,18,26,0,0 ; vbroadcastss 0x1a12(%rip),%ymm0 # 6d08 <_sk_clut_4D_hsw+0xcb4>
+ DB 196,226,125,24,5,18,26,0,0 ; vbroadcastss 0x1a12(%rip),%ymm0 # 71d4 <_sk_clut_4D_hsw+0xcb4>
DB 196,226,101,184,193 ; vfmadd231ps %ymm1,%ymm3,%ymm0
- DB 196,226,125,24,13,8,26,0,0 ; vbroadcastss 0x1a08(%rip),%ymm1 # 6d0c <_sk_clut_4D_hsw+0xcb8>
+ DB 196,226,125,24,13,8,26,0,0 ; vbroadcastss 0x1a08(%rip),%ymm1 # 71d8 <_sk_clut_4D_hsw+0xcb8>
DB 196,226,101,184,200 ; vfmadd231ps %ymm0,%ymm3,%ymm1
- DB 196,226,125,24,5,254,25,0,0 ; vbroadcastss 0x19fe(%rip),%ymm0 # 6d10 <_sk_clut_4D_hsw+0xcbc>
+ DB 196,226,125,24,5,254,25,0,0 ; vbroadcastss 0x19fe(%rip),%ymm0 # 71dc <_sk_clut_4D_hsw+0xcbc>
DB 196,226,101,184,193 ; vfmadd231ps %ymm1,%ymm3,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
@@ -5077,11 +5425,11 @@ _sk_gradient_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,8 ; mov (%rax),%r9
DB 73,131,249,1 ; cmp $0x1,%r9
- DB 15,134,180,0,0,0 ; jbe 53ea <_sk_gradient_hsw+0xc3>
+ DB 15,134,180,0,0,0 ; jbe 58b6 <_sk_gradient_hsw+0xc3>
DB 76,139,80,72 ; mov 0x48(%rax),%r10
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 65,187,1,0,0,0 ; mov $0x1,%r11d
- DB 196,226,125,24,21,199,25,0,0 ; vbroadcastss 0x19c7(%rip),%ymm2 # 6d14 <_sk_clut_4D_hsw+0xcc0>
+ DB 196,226,125,24,21,199,25,0,0 ; vbroadcastss 0x19c7(%rip),%ymm2 # 71e0 <_sk_clut_4D_hsw+0xcc0>
DB 196,65,53,239,201 ; vpxor %ymm9,%ymm9,%ymm9
DB 196,130,125,24,28,154 ; vbroadcastss (%r10,%r11,4),%ymm3
DB 197,228,194,216,2 ; vcmpleps %ymm0,%ymm3,%ymm3
@@ -5089,10 +5437,10 @@ _sk_gradient_hsw LABEL PROC
DB 196,65,101,254,201 ; vpaddd %ymm9,%ymm3,%ymm9
DB 73,255,195 ; inc %r11
DB 77,57,217 ; cmp %r11,%r9
- DB 117,226 ; jne 5352 <_sk_gradient_hsw+0x2b>
+ DB 117,226 ; jne 581e <_sk_gradient_hsw+0x2b>
DB 76,139,80,8 ; mov 0x8(%rax),%r10
DB 73,131,249,8 ; cmp $0x8,%r9
- DB 118,121 ; jbe 53f3 <_sk_gradient_hsw+0xcc>
+ DB 118,121 ; jbe 58bf <_sk_gradient_hsw+0xcc>
DB 196,65,13,118,246 ; vpcmpeqd %ymm14,%ymm14,%ymm14
DB 197,245,118,201 ; vpcmpeqd %ymm1,%ymm1,%ymm1
DB 196,2,117,146,4,138 ; vgatherdps %ymm1,(%r10,%ymm9,4),%ymm8
@@ -5116,7 +5464,7 @@ _sk_gradient_hsw LABEL PROC
DB 196,130,21,146,28,137 ; vgatherdps %ymm13,(%r9,%ymm9,4),%ymm3
DB 72,139,64,64 ; mov 0x40(%rax),%rax
DB 196,34,13,146,44,136 ; vgatherdps %ymm14,(%rax,%ymm9,4),%ymm13
- DB 235,77 ; jmp 5437 <_sk_gradient_hsw+0x110>
+ DB 235,77 ; jmp 5903 <_sk_gradient_hsw+0x110>
DB 76,139,80,8 ; mov 0x8(%rax),%r10
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
DB 196,66,53,22,2 ; vpermps (%r10),%ymm9,%ymm8
@@ -5172,24 +5520,24 @@ _sk_xy_to_unit_angle_hsw LABEL PROC
DB 196,65,52,95,226 ; vmaxps %ymm10,%ymm9,%ymm12
DB 196,65,36,94,220 ; vdivps %ymm12,%ymm11,%ymm11
DB 196,65,36,89,227 ; vmulps %ymm11,%ymm11,%ymm12
- DB 196,98,125,24,45,70,24,0,0 ; vbroadcastss 0x1846(%rip),%ymm13 # 6d18 <_sk_clut_4D_hsw+0xcc4>
- DB 196,98,125,24,53,65,24,0,0 ; vbroadcastss 0x1841(%rip),%ymm14 # 6d1c <_sk_clut_4D_hsw+0xcc8>
+ DB 196,98,125,24,45,70,24,0,0 ; vbroadcastss 0x1846(%rip),%ymm13 # 71e4 <_sk_clut_4D_hsw+0xcc4>
+ DB 196,98,125,24,53,65,24,0,0 ; vbroadcastss 0x1841(%rip),%ymm14 # 71e8 <_sk_clut_4D_hsw+0xcc8>
DB 196,66,29,184,245 ; vfmadd231ps %ymm13,%ymm12,%ymm14
- DB 196,98,125,24,45,55,24,0,0 ; vbroadcastss 0x1837(%rip),%ymm13 # 6d20 <_sk_clut_4D_hsw+0xccc>
+ DB 196,98,125,24,45,55,24,0,0 ; vbroadcastss 0x1837(%rip),%ymm13 # 71ec <_sk_clut_4D_hsw+0xccc>
DB 196,66,29,184,238 ; vfmadd231ps %ymm14,%ymm12,%ymm13
- DB 196,98,125,24,53,45,24,0,0 ; vbroadcastss 0x182d(%rip),%ymm14 # 6d24 <_sk_clut_4D_hsw+0xcd0>
+ DB 196,98,125,24,53,45,24,0,0 ; vbroadcastss 0x182d(%rip),%ymm14 # 71f0 <_sk_clut_4D_hsw+0xcd0>
DB 196,66,29,184,245 ; vfmadd231ps %ymm13,%ymm12,%ymm14
DB 196,65,36,89,222 ; vmulps %ymm14,%ymm11,%ymm11
DB 196,65,52,194,202,1 ; vcmpltps %ymm10,%ymm9,%ymm9
- DB 196,98,125,24,21,24,24,0,0 ; vbroadcastss 0x1818(%rip),%ymm10 # 6d28 <_sk_clut_4D_hsw+0xcd4>
+ DB 196,98,125,24,21,24,24,0,0 ; vbroadcastss 0x1818(%rip),%ymm10 # 71f4 <_sk_clut_4D_hsw+0xcd4>
DB 196,65,44,92,211 ; vsubps %ymm11,%ymm10,%ymm10
DB 196,67,37,74,202,144 ; vblendvps %ymm9,%ymm10,%ymm11,%ymm9
DB 196,193,124,194,192,1 ; vcmpltps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,21,2,24,0,0 ; vbroadcastss 0x1802(%rip),%ymm10 # 6d2c <_sk_clut_4D_hsw+0xcd8>
+ DB 196,98,125,24,21,2,24,0,0 ; vbroadcastss 0x1802(%rip),%ymm10 # 71f8 <_sk_clut_4D_hsw+0xcd8>
DB 196,65,44,92,209 ; vsubps %ymm9,%ymm10,%ymm10
DB 196,195,53,74,194,0 ; vblendvps %ymm0,%ymm10,%ymm9,%ymm0
DB 196,65,116,194,200,1 ; vcmpltps %ymm8,%ymm1,%ymm9
- DB 196,98,125,24,21,236,23,0,0 ; vbroadcastss 0x17ec(%rip),%ymm10 # 6d30 <_sk_clut_4D_hsw+0xcdc>
+ DB 196,98,125,24,21,236,23,0,0 ; vbroadcastss 0x17ec(%rip),%ymm10 # 71fc <_sk_clut_4D_hsw+0xcdc>
DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10
DB 196,195,125,74,194,144 ; vblendvps %ymm9,%ymm10,%ymm0,%ymm0
DB 196,65,124,194,200,3 ; vcmpunordps %ymm8,%ymm0,%ymm9
@@ -5213,23 +5561,23 @@ _sk_xy_to_2pt_conical_quadratic_max_hsw LABEL PROC
DB 197,50,89,80,44 ; vmulss 0x2c(%rax),%xmm9,%xmm10
DB 196,66,125,24,210 ; vbroadcastss %xmm10,%ymm10
DB 197,44,88,208 ; vaddps %ymm0,%ymm10,%ymm10
- DB 196,98,125,24,29,160,23,0,0 ; vbroadcastss 0x17a0(%rip),%ymm11 # 6d34 <_sk_clut_4D_hsw+0xce0>
+ DB 196,98,125,24,29,160,23,0,0 ; vbroadcastss 0x17a0(%rip),%ymm11 # 7200 <_sk_clut_4D_hsw+0xce0>
DB 196,65,44,89,211 ; vmulps %ymm11,%ymm10,%ymm10
DB 197,116,89,217 ; vmulps %ymm1,%ymm1,%ymm11
DB 196,98,125,184,216 ; vfmadd231ps %ymm0,%ymm0,%ymm11
DB 196,193,50,89,193 ; vmulss %xmm9,%xmm9,%xmm0
DB 196,226,125,24,192 ; vbroadcastss %xmm0,%ymm0
DB 197,164,92,192 ; vsubps %ymm0,%ymm11,%ymm0
- DB 196,98,125,24,13,127,23,0,0 ; vbroadcastss 0x177f(%rip),%ymm9 # 6d38 <_sk_clut_4D_hsw+0xce4>
+ DB 196,98,125,24,13,127,23,0,0 ; vbroadcastss 0x177f(%rip),%ymm9 # 7204 <_sk_clut_4D_hsw+0xce4>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 196,194,45,184,194 ; vfmadd231ps %ymm10,%ymm10,%ymm0
DB 197,252,81,192 ; vsqrtps %ymm0,%ymm0
DB 196,98,125,24,64,36 ; vbroadcastss 0x24(%rax),%ymm8
- DB 196,98,125,24,13,98,23,0,0 ; vbroadcastss 0x1762(%rip),%ymm9 # 6d3c <_sk_clut_4D_hsw+0xce8>
+ DB 196,98,125,24,13,98,23,0,0 ; vbroadcastss 0x1762(%rip),%ymm9 # 7208 <_sk_clut_4D_hsw+0xce8>
DB 196,65,44,87,201 ; vxorps %ymm9,%ymm10,%ymm9
DB 196,65,124,92,210 ; vsubps %ymm10,%ymm0,%ymm10
- DB 196,98,125,24,29,83,23,0,0 ; vbroadcastss 0x1753(%rip),%ymm11 # 6d40 <_sk_clut_4D_hsw+0xcec>
+ DB 196,98,125,24,29,83,23,0,0 ; vbroadcastss 0x1753(%rip),%ymm11 # 720c <_sk_clut_4D_hsw+0xcec>
DB 196,65,60,89,195 ; vmulps %ymm11,%ymm8,%ymm8
DB 196,65,44,89,208 ; vmulps %ymm8,%ymm10,%ymm10
DB 197,180,92,192 ; vsubps %ymm0,%ymm9,%ymm0
@@ -5246,23 +5594,23 @@ _sk_xy_to_2pt_conical_quadratic_min_hsw LABEL PROC
DB 197,50,89,80,44 ; vmulss 0x2c(%rax),%xmm9,%xmm10
DB 196,66,125,24,210 ; vbroadcastss %xmm10,%ymm10
DB 197,44,88,208 ; vaddps %ymm0,%ymm10,%ymm10
- DB 196,98,125,24,29,24,23,0,0 ; vbroadcastss 0x1718(%rip),%ymm11 # 6d44 <_sk_clut_4D_hsw+0xcf0>
+ DB 196,98,125,24,29,24,23,0,0 ; vbroadcastss 0x1718(%rip),%ymm11 # 7210 <_sk_clut_4D_hsw+0xcf0>
DB 196,65,44,89,211 ; vmulps %ymm11,%ymm10,%ymm10
DB 197,116,89,217 ; vmulps %ymm1,%ymm1,%ymm11
DB 196,98,125,184,216 ; vfmadd231ps %ymm0,%ymm0,%ymm11
DB 196,193,50,89,193 ; vmulss %xmm9,%xmm9,%xmm0
DB 196,226,125,24,192 ; vbroadcastss %xmm0,%ymm0
DB 197,164,92,192 ; vsubps %ymm0,%ymm11,%ymm0
- DB 196,98,125,24,13,247,22,0,0 ; vbroadcastss 0x16f7(%rip),%ymm9 # 6d48 <_sk_clut_4D_hsw+0xcf4>
+ DB 196,98,125,24,13,247,22,0,0 ; vbroadcastss 0x16f7(%rip),%ymm9 # 7214 <_sk_clut_4D_hsw+0xcf4>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 196,194,45,184,194 ; vfmadd231ps %ymm10,%ymm10,%ymm0
DB 197,252,81,192 ; vsqrtps %ymm0,%ymm0
DB 196,98,125,24,64,36 ; vbroadcastss 0x24(%rax),%ymm8
- DB 196,98,125,24,13,218,22,0,0 ; vbroadcastss 0x16da(%rip),%ymm9 # 6d4c <_sk_clut_4D_hsw+0xcf8>
+ DB 196,98,125,24,13,218,22,0,0 ; vbroadcastss 0x16da(%rip),%ymm9 # 7218 <_sk_clut_4D_hsw+0xcf8>
DB 196,65,44,87,201 ; vxorps %ymm9,%ymm10,%ymm9
DB 196,65,124,92,210 ; vsubps %ymm10,%ymm0,%ymm10
- DB 196,98,125,24,29,203,22,0,0 ; vbroadcastss 0x16cb(%rip),%ymm11 # 6d50 <_sk_clut_4D_hsw+0xcfc>
+ DB 196,98,125,24,29,203,22,0,0 ; vbroadcastss 0x16cb(%rip),%ymm11 # 721c <_sk_clut_4D_hsw+0xcfc>
DB 196,65,60,89,195 ; vmulps %ymm11,%ymm8,%ymm8
DB 196,65,44,89,208 ; vmulps %ymm8,%ymm10,%ymm10
DB 197,180,92,192 ; vsubps %ymm0,%ymm9,%ymm0
@@ -5278,14 +5626,14 @@ _sk_xy_to_2pt_conical_linear_hsw LABEL PROC
DB 197,58,89,72,44 ; vmulss 0x2c(%rax),%xmm8,%xmm9
DB 196,66,125,24,201 ; vbroadcastss %xmm9,%ymm9
DB 197,52,88,200 ; vaddps %ymm0,%ymm9,%ymm9
- DB 196,98,125,24,21,150,22,0,0 ; vbroadcastss 0x1696(%rip),%ymm10 # 6d54 <_sk_clut_4D_hsw+0xd00>
+ DB 196,98,125,24,21,150,22,0,0 ; vbroadcastss 0x1696(%rip),%ymm10 # 7220 <_sk_clut_4D_hsw+0xd00>
DB 196,65,52,89,202 ; vmulps %ymm10,%ymm9,%ymm9
DB 197,116,89,209 ; vmulps %ymm1,%ymm1,%ymm10
DB 196,98,125,184,208 ; vfmadd231ps %ymm0,%ymm0,%ymm10
DB 196,193,58,89,192 ; vmulss %xmm8,%xmm8,%xmm0
DB 196,226,125,24,192 ; vbroadcastss %xmm0,%ymm0
DB 197,172,92,192 ; vsubps %ymm0,%ymm10,%ymm0
- DB 196,98,125,24,5,117,22,0,0 ; vbroadcastss 0x1675(%rip),%ymm8 # 6d58 <_sk_clut_4D_hsw+0xd04>
+ DB 196,98,125,24,5,117,22,0,0 ; vbroadcastss 0x1675(%rip),%ymm8 # 7224 <_sk_clut_4D_hsw+0xd04>
DB 196,193,124,87,192 ; vxorps %ymm8,%ymm0,%ymm0
DB 196,193,124,94,193 ; vdivps %ymm9,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -5319,7 +5667,7 @@ _sk_apply_vector_mask_hsw LABEL PROC
PUBLIC _sk_save_xy_hsw
_sk_save_xy_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,21,22,0,0 ; vbroadcastss 0x1615(%rip),%ymm8 # 6d5c <_sk_clut_4D_hsw+0xd08>
+ DB 196,98,125,24,5,21,22,0,0 ; vbroadcastss 0x1615(%rip),%ymm8 # 7228 <_sk_clut_4D_hsw+0xd08>
DB 196,65,124,88,200 ; vaddps %ymm8,%ymm0,%ymm9
DB 196,67,125,8,209,1 ; vroundps $0x1,%ymm9,%ymm10
DB 196,65,52,92,202 ; vsubps %ymm10,%ymm9,%ymm9
@@ -5349,9 +5697,9 @@ _sk_accumulate_hsw LABEL PROC
PUBLIC _sk_bilinear_nx_hsw
_sk_bilinear_nx_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,169,21,0,0 ; vbroadcastss 0x15a9(%rip),%ymm0 # 6d60 <_sk_clut_4D_hsw+0xd0c>
+ DB 196,226,125,24,5,169,21,0,0 ; vbroadcastss 0x15a9(%rip),%ymm0 # 722c <_sk_clut_4D_hsw+0xd0c>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
- DB 196,98,125,24,5,160,21,0,0 ; vbroadcastss 0x15a0(%rip),%ymm8 # 6d64 <_sk_clut_4D_hsw+0xd10>
+ DB 196,98,125,24,5,160,21,0,0 ; vbroadcastss 0x15a0(%rip),%ymm8 # 7230 <_sk_clut_4D_hsw+0xd10>
DB 197,60,92,64,64 ; vsubps 0x40(%rax),%ymm8,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -5360,7 +5708,7 @@ _sk_bilinear_nx_hsw LABEL PROC
PUBLIC _sk_bilinear_px_hsw
_sk_bilinear_px_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,136,21,0,0 ; vbroadcastss 0x1588(%rip),%ymm0 # 6d68 <_sk_clut_4D_hsw+0xd14>
+ DB 196,226,125,24,5,136,21,0,0 ; vbroadcastss 0x1588(%rip),%ymm0 # 7234 <_sk_clut_4D_hsw+0xd14>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
DB 197,124,16,64,64 ; vmovups 0x40(%rax),%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
@@ -5370,9 +5718,9 @@ _sk_bilinear_px_hsw LABEL PROC
PUBLIC _sk_bilinear_ny_hsw
_sk_bilinear_ny_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,108,21,0,0 ; vbroadcastss 0x156c(%rip),%ymm1 # 6d6c <_sk_clut_4D_hsw+0xd18>
+ DB 196,226,125,24,13,108,21,0,0 ; vbroadcastss 0x156c(%rip),%ymm1 # 7238 <_sk_clut_4D_hsw+0xd18>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
- DB 196,98,125,24,5,98,21,0,0 ; vbroadcastss 0x1562(%rip),%ymm8 # 6d70 <_sk_clut_4D_hsw+0xd1c>
+ DB 196,98,125,24,5,98,21,0,0 ; vbroadcastss 0x1562(%rip),%ymm8 # 723c <_sk_clut_4D_hsw+0xd1c>
DB 197,60,92,64,96 ; vsubps 0x60(%rax),%ymm8,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -5381,7 +5729,7 @@ _sk_bilinear_ny_hsw LABEL PROC
PUBLIC _sk_bilinear_py_hsw
_sk_bilinear_py_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,74,21,0,0 ; vbroadcastss 0x154a(%rip),%ymm1 # 6d74 <_sk_clut_4D_hsw+0xd20>
+ DB 196,226,125,24,13,74,21,0,0 ; vbroadcastss 0x154a(%rip),%ymm1 # 7240 <_sk_clut_4D_hsw+0xd20>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
DB 197,124,16,64,96 ; vmovups 0x60(%rax),%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
@@ -5391,13 +5739,13 @@ _sk_bilinear_py_hsw LABEL PROC
PUBLIC _sk_bicubic_n3x_hsw
_sk_bicubic_n3x_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,45,21,0,0 ; vbroadcastss 0x152d(%rip),%ymm0 # 6d78 <_sk_clut_4D_hsw+0xd24>
+ DB 196,226,125,24,5,45,21,0,0 ; vbroadcastss 0x152d(%rip),%ymm0 # 7244 <_sk_clut_4D_hsw+0xd24>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
- DB 196,98,125,24,5,36,21,0,0 ; vbroadcastss 0x1524(%rip),%ymm8 # 6d7c <_sk_clut_4D_hsw+0xd28>
+ DB 196,98,125,24,5,36,21,0,0 ; vbroadcastss 0x1524(%rip),%ymm8 # 7248 <_sk_clut_4D_hsw+0xd28>
DB 197,60,92,64,64 ; vsubps 0x40(%rax),%ymm8,%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,21,21,0,0 ; vbroadcastss 0x1515(%rip),%ymm10 # 6d80 <_sk_clut_4D_hsw+0xd2c>
- DB 196,98,125,24,29,16,21,0,0 ; vbroadcastss 0x1510(%rip),%ymm11 # 6d84 <_sk_clut_4D_hsw+0xd30>
+ DB 196,98,125,24,21,21,21,0,0 ; vbroadcastss 0x1515(%rip),%ymm10 # 724c <_sk_clut_4D_hsw+0xd2c>
+ DB 196,98,125,24,29,16,21,0,0 ; vbroadcastss 0x1510(%rip),%ymm11 # 7250 <_sk_clut_4D_hsw+0xd30>
DB 196,66,61,168,218 ; vfmadd213ps %ymm10,%ymm8,%ymm11
DB 196,65,36,89,193 ; vmulps %ymm9,%ymm11,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
@@ -5407,16 +5755,16 @@ _sk_bicubic_n3x_hsw LABEL PROC
PUBLIC _sk_bicubic_n1x_hsw
_sk_bicubic_n1x_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,243,20,0,0 ; vbroadcastss 0x14f3(%rip),%ymm0 # 6d88 <_sk_clut_4D_hsw+0xd34>
+ DB 196,226,125,24,5,243,20,0,0 ; vbroadcastss 0x14f3(%rip),%ymm0 # 7254 <_sk_clut_4D_hsw+0xd34>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
- DB 196,98,125,24,5,234,20,0,0 ; vbroadcastss 0x14ea(%rip),%ymm8 # 6d8c <_sk_clut_4D_hsw+0xd38>
+ DB 196,98,125,24,5,234,20,0,0 ; vbroadcastss 0x14ea(%rip),%ymm8 # 7258 <_sk_clut_4D_hsw+0xd38>
DB 197,60,92,64,64 ; vsubps 0x40(%rax),%ymm8,%ymm8
- DB 196,98,125,24,13,224,20,0,0 ; vbroadcastss 0x14e0(%rip),%ymm9 # 6d90 <_sk_clut_4D_hsw+0xd3c>
- DB 196,98,125,24,21,219,20,0,0 ; vbroadcastss 0x14db(%rip),%ymm10 # 6d94 <_sk_clut_4D_hsw+0xd40>
+ DB 196,98,125,24,13,224,20,0,0 ; vbroadcastss 0x14e0(%rip),%ymm9 # 725c <_sk_clut_4D_hsw+0xd3c>
+ DB 196,98,125,24,21,219,20,0,0 ; vbroadcastss 0x14db(%rip),%ymm10 # 7260 <_sk_clut_4D_hsw+0xd40>
DB 196,66,61,168,209 ; vfmadd213ps %ymm9,%ymm8,%ymm10
- DB 196,98,125,24,13,209,20,0,0 ; vbroadcastss 0x14d1(%rip),%ymm9 # 6d98 <_sk_clut_4D_hsw+0xd44>
+ DB 196,98,125,24,13,209,20,0,0 ; vbroadcastss 0x14d1(%rip),%ymm9 # 7264 <_sk_clut_4D_hsw+0xd44>
DB 196,66,61,184,202 ; vfmadd231ps %ymm10,%ymm8,%ymm9
- DB 196,98,125,24,21,199,20,0,0 ; vbroadcastss 0x14c7(%rip),%ymm10 # 6d9c <_sk_clut_4D_hsw+0xd48>
+ DB 196,98,125,24,21,199,20,0,0 ; vbroadcastss 0x14c7(%rip),%ymm10 # 7268 <_sk_clut_4D_hsw+0xd48>
DB 196,66,61,184,209 ; vfmadd231ps %ymm9,%ymm8,%ymm10
DB 197,124,17,144,128,0,0,0 ; vmovups %ymm10,0x80(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -5425,14 +5773,14 @@ _sk_bicubic_n1x_hsw LABEL PROC
PUBLIC _sk_bicubic_p1x_hsw
_sk_bicubic_p1x_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,175,20,0,0 ; vbroadcastss 0x14af(%rip),%ymm8 # 6da0 <_sk_clut_4D_hsw+0xd4c>
+ DB 196,98,125,24,5,175,20,0,0 ; vbroadcastss 0x14af(%rip),%ymm8 # 726c <_sk_clut_4D_hsw+0xd4c>
DB 197,188,88,0 ; vaddps (%rax),%ymm8,%ymm0
DB 197,124,16,72,64 ; vmovups 0x40(%rax),%ymm9
- DB 196,98,125,24,21,161,20,0,0 ; vbroadcastss 0x14a1(%rip),%ymm10 # 6da4 <_sk_clut_4D_hsw+0xd50>
- DB 196,98,125,24,29,156,20,0,0 ; vbroadcastss 0x149c(%rip),%ymm11 # 6da8 <_sk_clut_4D_hsw+0xd54>
+ DB 196,98,125,24,21,161,20,0,0 ; vbroadcastss 0x14a1(%rip),%ymm10 # 7270 <_sk_clut_4D_hsw+0xd50>
+ DB 196,98,125,24,29,156,20,0,0 ; vbroadcastss 0x149c(%rip),%ymm11 # 7274 <_sk_clut_4D_hsw+0xd54>
DB 196,66,53,168,218 ; vfmadd213ps %ymm10,%ymm9,%ymm11
DB 196,66,53,168,216 ; vfmadd213ps %ymm8,%ymm9,%ymm11
- DB 196,98,125,24,5,141,20,0,0 ; vbroadcastss 0x148d(%rip),%ymm8 # 6dac <_sk_clut_4D_hsw+0xd58>
+ DB 196,98,125,24,5,141,20,0,0 ; vbroadcastss 0x148d(%rip),%ymm8 # 7278 <_sk_clut_4D_hsw+0xd58>
DB 196,66,53,184,195 ; vfmadd231ps %ymm11,%ymm9,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -5441,12 +5789,12 @@ _sk_bicubic_p1x_hsw LABEL PROC
PUBLIC _sk_bicubic_p3x_hsw
_sk_bicubic_p3x_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,117,20,0,0 ; vbroadcastss 0x1475(%rip),%ymm0 # 6db0 <_sk_clut_4D_hsw+0xd5c>
+ DB 196,226,125,24,5,117,20,0,0 ; vbroadcastss 0x1475(%rip),%ymm0 # 727c <_sk_clut_4D_hsw+0xd5c>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
DB 197,124,16,64,64 ; vmovups 0x40(%rax),%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,98,20,0,0 ; vbroadcastss 0x1462(%rip),%ymm10 # 6db4 <_sk_clut_4D_hsw+0xd60>
- DB 196,98,125,24,29,93,20,0,0 ; vbroadcastss 0x145d(%rip),%ymm11 # 6db8 <_sk_clut_4D_hsw+0xd64>
+ DB 196,98,125,24,21,98,20,0,0 ; vbroadcastss 0x1462(%rip),%ymm10 # 7280 <_sk_clut_4D_hsw+0xd60>
+ DB 196,98,125,24,29,93,20,0,0 ; vbroadcastss 0x145d(%rip),%ymm11 # 7284 <_sk_clut_4D_hsw+0xd64>
DB 196,66,61,168,218 ; vfmadd213ps %ymm10,%ymm8,%ymm11
DB 196,65,52,89,195 ; vmulps %ymm11,%ymm9,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
@@ -5456,13 +5804,13 @@ _sk_bicubic_p3x_hsw LABEL PROC
PUBLIC _sk_bicubic_n3y_hsw
_sk_bicubic_n3y_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,64,20,0,0 ; vbroadcastss 0x1440(%rip),%ymm1 # 6dbc <_sk_clut_4D_hsw+0xd68>
+ DB 196,226,125,24,13,64,20,0,0 ; vbroadcastss 0x1440(%rip),%ymm1 # 7288 <_sk_clut_4D_hsw+0xd68>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
- DB 196,98,125,24,5,54,20,0,0 ; vbroadcastss 0x1436(%rip),%ymm8 # 6dc0 <_sk_clut_4D_hsw+0xd6c>
+ DB 196,98,125,24,5,54,20,0,0 ; vbroadcastss 0x1436(%rip),%ymm8 # 728c <_sk_clut_4D_hsw+0xd6c>
DB 197,60,92,64,96 ; vsubps 0x60(%rax),%ymm8,%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,39,20,0,0 ; vbroadcastss 0x1427(%rip),%ymm10 # 6dc4 <_sk_clut_4D_hsw+0xd70>
- DB 196,98,125,24,29,34,20,0,0 ; vbroadcastss 0x1422(%rip),%ymm11 # 6dc8 <_sk_clut_4D_hsw+0xd74>
+ DB 196,98,125,24,21,39,20,0,0 ; vbroadcastss 0x1427(%rip),%ymm10 # 7290 <_sk_clut_4D_hsw+0xd70>
+ DB 196,98,125,24,29,34,20,0,0 ; vbroadcastss 0x1422(%rip),%ymm11 # 7294 <_sk_clut_4D_hsw+0xd74>
DB 196,66,61,168,218 ; vfmadd213ps %ymm10,%ymm8,%ymm11
DB 196,65,36,89,193 ; vmulps %ymm9,%ymm11,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
@@ -5472,16 +5820,16 @@ _sk_bicubic_n3y_hsw LABEL PROC
PUBLIC _sk_bicubic_n1y_hsw
_sk_bicubic_n1y_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,5,20,0,0 ; vbroadcastss 0x1405(%rip),%ymm1 # 6dcc <_sk_clut_4D_hsw+0xd78>
+ DB 196,226,125,24,13,5,20,0,0 ; vbroadcastss 0x1405(%rip),%ymm1 # 7298 <_sk_clut_4D_hsw+0xd78>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
- DB 196,98,125,24,5,251,19,0,0 ; vbroadcastss 0x13fb(%rip),%ymm8 # 6dd0 <_sk_clut_4D_hsw+0xd7c>
+ DB 196,98,125,24,5,251,19,0,0 ; vbroadcastss 0x13fb(%rip),%ymm8 # 729c <_sk_clut_4D_hsw+0xd7c>
DB 197,60,92,64,96 ; vsubps 0x60(%rax),%ymm8,%ymm8
- DB 196,98,125,24,13,241,19,0,0 ; vbroadcastss 0x13f1(%rip),%ymm9 # 6dd4 <_sk_clut_4D_hsw+0xd80>
- DB 196,98,125,24,21,236,19,0,0 ; vbroadcastss 0x13ec(%rip),%ymm10 # 6dd8 <_sk_clut_4D_hsw+0xd84>
+ DB 196,98,125,24,13,241,19,0,0 ; vbroadcastss 0x13f1(%rip),%ymm9 # 72a0 <_sk_clut_4D_hsw+0xd80>
+ DB 196,98,125,24,21,236,19,0,0 ; vbroadcastss 0x13ec(%rip),%ymm10 # 72a4 <_sk_clut_4D_hsw+0xd84>
DB 196,66,61,168,209 ; vfmadd213ps %ymm9,%ymm8,%ymm10
- DB 196,98,125,24,13,226,19,0,0 ; vbroadcastss 0x13e2(%rip),%ymm9 # 6ddc <_sk_clut_4D_hsw+0xd88>
+ DB 196,98,125,24,13,226,19,0,0 ; vbroadcastss 0x13e2(%rip),%ymm9 # 72a8 <_sk_clut_4D_hsw+0xd88>
DB 196,66,61,184,202 ; vfmadd231ps %ymm10,%ymm8,%ymm9
- DB 196,98,125,24,21,216,19,0,0 ; vbroadcastss 0x13d8(%rip),%ymm10 # 6de0 <_sk_clut_4D_hsw+0xd8c>
+ DB 196,98,125,24,21,216,19,0,0 ; vbroadcastss 0x13d8(%rip),%ymm10 # 72ac <_sk_clut_4D_hsw+0xd8c>
DB 196,66,61,184,209 ; vfmadd231ps %ymm9,%ymm8,%ymm10
DB 197,124,17,144,160,0,0,0 ; vmovups %ymm10,0xa0(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -5490,14 +5838,14 @@ _sk_bicubic_n1y_hsw LABEL PROC
PUBLIC _sk_bicubic_p1y_hsw
_sk_bicubic_p1y_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,192,19,0,0 ; vbroadcastss 0x13c0(%rip),%ymm8 # 6de4 <_sk_clut_4D_hsw+0xd90>
+ DB 196,98,125,24,5,192,19,0,0 ; vbroadcastss 0x13c0(%rip),%ymm8 # 72b0 <_sk_clut_4D_hsw+0xd90>
DB 197,188,88,72,32 ; vaddps 0x20(%rax),%ymm8,%ymm1
DB 197,124,16,72,96 ; vmovups 0x60(%rax),%ymm9
- DB 196,98,125,24,21,177,19,0,0 ; vbroadcastss 0x13b1(%rip),%ymm10 # 6de8 <_sk_clut_4D_hsw+0xd94>
- DB 196,98,125,24,29,172,19,0,0 ; vbroadcastss 0x13ac(%rip),%ymm11 # 6dec <_sk_clut_4D_hsw+0xd98>
+ DB 196,98,125,24,21,177,19,0,0 ; vbroadcastss 0x13b1(%rip),%ymm10 # 72b4 <_sk_clut_4D_hsw+0xd94>
+ DB 196,98,125,24,29,172,19,0,0 ; vbroadcastss 0x13ac(%rip),%ymm11 # 72b8 <_sk_clut_4D_hsw+0xd98>
DB 196,66,53,168,218 ; vfmadd213ps %ymm10,%ymm9,%ymm11
DB 196,66,53,168,216 ; vfmadd213ps %ymm8,%ymm9,%ymm11
- DB 196,98,125,24,5,157,19,0,0 ; vbroadcastss 0x139d(%rip),%ymm8 # 6df0 <_sk_clut_4D_hsw+0xd9c>
+ DB 196,98,125,24,5,157,19,0,0 ; vbroadcastss 0x139d(%rip),%ymm8 # 72bc <_sk_clut_4D_hsw+0xd9c>
DB 196,66,53,184,195 ; vfmadd231ps %ymm11,%ymm9,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -5506,12 +5854,12 @@ _sk_bicubic_p1y_hsw LABEL PROC
PUBLIC _sk_bicubic_p3y_hsw
_sk_bicubic_p3y_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,133,19,0,0 ; vbroadcastss 0x1385(%rip),%ymm1 # 6df4 <_sk_clut_4D_hsw+0xda0>
+ DB 196,226,125,24,13,133,19,0,0 ; vbroadcastss 0x1385(%rip),%ymm1 # 72c0 <_sk_clut_4D_hsw+0xda0>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
DB 197,124,16,64,96 ; vmovups 0x60(%rax),%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,113,19,0,0 ; vbroadcastss 0x1371(%rip),%ymm10 # 6df8 <_sk_clut_4D_hsw+0xda4>
- DB 196,98,125,24,29,108,19,0,0 ; vbroadcastss 0x136c(%rip),%ymm11 # 6dfc <_sk_clut_4D_hsw+0xda8>
+ DB 196,98,125,24,21,113,19,0,0 ; vbroadcastss 0x1371(%rip),%ymm10 # 72c4 <_sk_clut_4D_hsw+0xda4>
+ DB 196,98,125,24,29,108,19,0,0 ; vbroadcastss 0x136c(%rip),%ymm11 # 72c8 <_sk_clut_4D_hsw+0xda8>
DB 196,66,61,168,218 ; vfmadd213ps %ymm10,%ymm8,%ymm11
DB 196,65,52,89,195 ; vmulps %ymm11,%ymm9,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
@@ -5641,19 +5989,19 @@ _sk_clut_3D_hsw LABEL PROC
DB 197,254,127,76,36,32 ; vmovdqu %ymm1,0x20(%rsp)
DB 72,139,0 ; mov (%rax),%rax
DB 196,193,117,254,204 ; vpaddd %ymm12,%ymm1,%ymm1
- DB 196,226,125,88,61,51,17,0,0 ; vpbroadcastd 0x1133(%rip),%ymm7 # 6e04 <_sk_clut_4D_hsw+0xdb0>
+ DB 196,226,125,88,61,51,17,0,0 ; vpbroadcastd 0x1133(%rip),%ymm7 # 72d0 <_sk_clut_4D_hsw+0xdb0>
DB 196,226,117,64,207 ; vpmulld %ymm7,%ymm1,%ymm1
DB 197,205,118,246 ; vpcmpeqd %ymm6,%ymm6,%ymm6
DB 196,98,77,146,4,136 ; vgatherdps %ymm6,(%rax,%ymm1,4),%ymm8
- DB 196,98,125,88,21,31,17,0,0 ; vpbroadcastd 0x111f(%rip),%ymm10 # 6e08 <_sk_clut_4D_hsw+0xdb4>
+ DB 196,98,125,88,21,31,17,0,0 ; vpbroadcastd 0x111f(%rip),%ymm10 # 72d4 <_sk_clut_4D_hsw+0xdb4>
DB 196,193,117,254,242 ; vpaddd %ymm10,%ymm1,%ymm6
DB 196,65,37,118,219 ; vpcmpeqd %ymm11,%ymm11,%ymm11
DB 196,226,37,146,28,176 ; vgatherdps %ymm11,(%rax,%ymm6,4),%ymm3
- DB 196,98,125,88,29,10,17,0,0 ; vpbroadcastd 0x110a(%rip),%ymm11 # 6e0c <_sk_clut_4D_hsw+0xdb8>
+ DB 196,98,125,88,29,10,17,0,0 ; vpbroadcastd 0x110a(%rip),%ymm11 # 72d8 <_sk_clut_4D_hsw+0xdb8>
DB 196,193,117,254,203 ; vpaddd %ymm11,%ymm1,%ymm1
DB 197,205,118,246 ; vpcmpeqd %ymm6,%ymm6,%ymm6
DB 196,226,77,146,36,136 ; vgatherdps %ymm6,(%rax,%ymm1,4),%ymm4
- DB 196,98,125,24,45,230,16,0,0 ; vbroadcastss 0x10e6(%rip),%ymm13 # 6e00 <_sk_clut_4D_hsw+0xdac>
+ DB 196,98,125,24,45,230,16,0,0 ; vbroadcastss 0x10e6(%rip),%ymm13 # 72cc <_sk_clut_4D_hsw+0xdac>
DB 196,193,124,88,245 ; vaddps %ymm13,%ymm0,%ymm6
DB 197,254,91,246 ; vcvttps2dq %ymm6,%ymm6
DB 196,226,77,64,205 ; vpmulld %ymm5,%ymm6,%ymm1
@@ -5871,19 +6219,19 @@ _sk_clut_4D_hsw LABEL PROC
DB 197,126,91,245 ; vcvttps2dq %ymm5,%ymm14
DB 196,98,13,64,233 ; vpmulld %ymm1,%ymm14,%ymm13
DB 197,149,254,198 ; vpaddd %ymm6,%ymm13,%ymm0
- DB 196,98,125,88,21,111,12,0,0 ; vpbroadcastd 0xc6f(%rip),%ymm10 # 6e14 <_sk_clut_4D_hsw+0xdc0>
+ DB 196,98,125,88,21,111,12,0,0 ; vpbroadcastd 0xc6f(%rip),%ymm10 # 72e0 <_sk_clut_4D_hsw+0xdc0>
DB 196,194,125,64,194 ; vpmulld %ymm10,%ymm0,%ymm0
DB 196,65,61,118,192 ; vpcmpeqd %ymm8,%ymm8,%ymm8
DB 196,98,61,146,60,128 ; vgatherdps %ymm8,(%rax,%ymm0,4),%ymm15
- DB 196,98,125,88,29,90,12,0,0 ; vpbroadcastd 0xc5a(%rip),%ymm11 # 6e18 <_sk_clut_4D_hsw+0xdc4>
+ DB 196,98,125,88,29,90,12,0,0 ; vpbroadcastd 0xc5a(%rip),%ymm11 # 72e4 <_sk_clut_4D_hsw+0xdc4>
DB 196,65,125,254,195 ; vpaddd %ymm11,%ymm0,%ymm8
DB 196,65,53,118,201 ; vpcmpeqd %ymm9,%ymm9,%ymm9
DB 196,162,53,146,20,128 ; vgatherdps %ymm9,(%rax,%ymm8,4),%ymm2
- DB 196,98,125,88,37,69,12,0,0 ; vpbroadcastd 0xc45(%rip),%ymm12 # 6e1c <_sk_clut_4D_hsw+0xdc8>
+ DB 196,98,125,88,37,69,12,0,0 ; vpbroadcastd 0xc45(%rip),%ymm12 # 72e8 <_sk_clut_4D_hsw+0xdc8>
DB 196,193,125,254,196 ; vpaddd %ymm12,%ymm0,%ymm0
DB 196,65,61,118,192 ; vpcmpeqd %ymm8,%ymm8,%ymm8
DB 196,226,61,146,60,128 ; vgatherdps %ymm8,(%rax,%ymm0,4),%ymm7
- DB 196,98,125,24,13,32,12,0,0 ; vbroadcastss 0xc20(%rip),%ymm9 # 6e10 <_sk_clut_4D_hsw+0xdbc>
+ DB 196,98,125,24,13,32,12,0,0 ; vbroadcastss 0xc20(%rip),%ymm9 # 72dc <_sk_clut_4D_hsw+0xdbc>
DB 196,65,84,88,193 ; vaddps %ymm9,%ymm5,%ymm8
DB 196,65,126,91,192 ; vcvttps2dq %ymm8,%ymm8
DB 196,226,61,64,193 ; vpmulld %ymm1,%ymm8,%ymm0
@@ -6208,7 +6556,7 @@ _sk_clut_4D_hsw LABEL PROC
DB 197,196,92,214 ; vsubps %ymm6,%ymm7,%ymm2
DB 196,226,61,168,214 ; vfmadd213ps %ymm6,%ymm8,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,51,5,0,0 ; vbroadcastss 0x533(%rip),%ymm3 # 6e20 <_sk_clut_4D_hsw+0xdcc>
+ DB 196,226,125,24,29,51,5,0,0 ; vbroadcastss 0x533(%rip),%ymm3 # 72ec <_sk_clut_4D_hsw+0xdcc>
DB 197,252,16,164,36,160,3,0,0 ; vmovups 0x3a0(%rsp),%ymm4
DB 197,252,16,172,36,192,3,0,0 ; vmovups 0x3c0(%rsp),%ymm5
DB 197,252,16,180,36,224,3,0,0 ; vmovups 0x3e0(%rsp),%ymm6
@@ -6259,25 +6607,25 @@ ALIGN 4
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope 69d5 <.literal4+0xb9>
+ DB 71,225,61 ; rex.RXB loope 6ea1 <.literal4+0xb9>
DB 0,0 ; add %al,(%rax)
DB 128,63,154 ; cmpb $0x9a,(%rdi)
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope 69e5 <.literal4+0xc9>
+ DB 71,225,61 ; rex.RXB loope 6eb1 <.literal4+0xc9>
DB 0,0 ; add %al,(%rax)
DB 128,63,154 ; cmpb $0x9a,(%rdi)
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope 69f5 <.literal4+0xd9>
+ DB 71,225,61 ; rex.RXB loope 6ec1 <.literal4+0xd9>
DB 0,0 ; add %al,(%rax)
DB 128,63,154 ; cmpb $0x9a,(%rdi)
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope 6a05 <.literal4+0xe9>
+ DB 71,225,61 ; rex.RXB loope 6ed1 <.literal4+0xe9>
DB 0,0 ; add %al,(%rax)
DB 128,63,0 ; cmpb $0x0,(%rdi)
DB 0,128,63,0,0,127 ; add %al,0x7f00003f(%rax)
@@ -6340,7 +6688,7 @@ ALIGN 4
DB 190,129,128,128,59 ; mov $0x3b808081,%esi
DB 129,128,128,59,0,248,0,0,8,33 ; addl $0x21080000,-0x7ffc480(%rax)
DB 132,55 ; test %dh,(%rdi)
- DB 224,7 ; loopne 6a79 <.literal4+0x15d>
+ DB 224,7 ; loopne 6f45 <.literal4+0x15d>
DB 0,0 ; add %al,(%rax)
DB 33,8 ; and %ecx,(%rax)
DB 2,58 ; add (%rdx),%bh
@@ -6356,10 +6704,10 @@ ALIGN 4
DB 129,128,128,59,129,128,128,59,0,0 ; addl $0x3b80,-0x7f7ec480(%rax)
DB 0,52,255 ; add %dh,(%rdi,%rdi,8)
DB 255 ; (bad)
- DB 127,0 ; jg 6aa0 <.literal4+0x184>
+ DB 127,0 ; jg 6f6c <.literal4+0x184>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja 6b19 <.literal4+0x1fd>
+ DB 119,115 ; ja 6fe5 <.literal4+0x1fd>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -6373,10 +6721,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg 6ad4 <.literal4+0x1b8>
+ DB 127,0 ; jg 6fa0 <.literal4+0x1b8>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja 6b4d <.literal4+0x231>
+ DB 119,115 ; ja 7019 <.literal4+0x231>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -6390,10 +6738,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg 6b08 <.literal4+0x1ec>
+ DB 127,0 ; jg 6fd4 <.literal4+0x1ec>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja 6b81 <.literal4+0x265>
+ DB 119,115 ; ja 704d <.literal4+0x265>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -6407,10 +6755,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg 6b3c <.literal4+0x220>
+ DB 127,0 ; jg 7008 <.literal4+0x220>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja 6bb5 <.literal4+0x299>
+ DB 119,115 ; ja 7081 <.literal4+0x299>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -6424,10 +6772,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg 6b70 <.literal4+0x254>
+ DB 127,0 ; jg 703c <.literal4+0x254>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja 6be9 <.literal4+0x2cd>
+ DB 119,115 ; ja 70b5 <.literal4+0x2cd>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -6440,7 +6788,7 @@ ALIGN 4
DB 0,75,0 ; add %cl,0x0(%rbx)
DB 0,200 ; add %cl,%al
DB 66,0,0 ; rex.X add %al,(%rax)
- DB 127,67 ; jg 6be3 <.literal4+0x2c7>
+ DB 127,67 ; jg 70af <.literal4+0x2c7>
DB 0,0 ; add %al,(%rax)
DB 0,195 ; add %al,%bl
DB 0,0 ; add %al,(%rax)
@@ -6452,7 +6800,7 @@ ALIGN 4
DB 190,80,128,3,62 ; mov $0x3e038050,%esi
DB 31 ; (bad)
DB 215 ; xlat %ds:(%rbx)
- DB 118,63 ; jbe 6c03 <.literal4+0x2e7>
+ DB 118,63 ; jbe 70cf <.literal4+0x2e7>
DB 246,64,83,63 ; testb $0x3f,0x53(%rax)
DB 129,128,128,59,129,128,128,59,129,128; addl $0x80813b80,-0x7f7ec480(%rax)
DB 128,59,0 ; cmpb $0x0,(%rbx)
@@ -6474,7 +6822,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 8,33 ; or %ah,(%rcx)
DB 132,55 ; test %dh,(%rdi)
- DB 224,7 ; loopne 6c1d <.literal4+0x301>
+ DB 224,7 ; loopne 70e9 <.literal4+0x301>
DB 0,0 ; add %al,(%rax)
DB 33,8 ; and %ecx,(%rax)
DB 2,58 ; add (%rdx),%bh
@@ -6486,7 +6834,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 8,33 ; or %ah,(%rcx)
DB 132,55 ; test %dh,(%rdi)
- DB 224,7 ; loopne 6c39 <.literal4+0x31d>
+ DB 224,7 ; loopne 7105 <.literal4+0x31d>
DB 0,0 ; add %al,(%rax)
DB 33,8 ; and %ecx,(%rax)
DB 2,58 ; add (%rdx),%bh
@@ -6497,7 +6845,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 248 ; clc
DB 65,0,0 ; add %al,(%r8)
- DB 124,66 ; jl 6c8e <.literal4+0x372>
+ DB 124,66 ; jl 715a <.literal4+0x372>
DB 0,240 ; add %dh,%al
DB 0,0 ; add %al,(%rax)
DB 137,136,136,55,0,15 ; mov %ecx,0xf003788(%rax)
@@ -6523,7 +6871,7 @@ ALIGN 4
DB 137,136,136,59,15,0 ; mov %ecx,0xf3b88(%rax)
DB 0,0 ; add %al,(%rax)
DB 137,136,136,61,0,0 ; mov %ecx,0x3d88(%rax)
- DB 112,65 ; jo 6cf1 <.literal4+0x3d5>
+ DB 112,65 ; jo 71bd <.literal4+0x3d5>
DB 129,128,128,59,129,128,128,59,129,128; addl $0x80813b80,-0x7f7ec480(%rax)
DB 128,59,0 ; cmpb $0x0,(%rbx)
DB 0,127,67 ; add %bh,0x43(%rdi)
@@ -6537,7 +6885,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 128,63,0 ; cmpb $0x0,(%rdi)
DB 255 ; (bad)
- DB 127,71 ; jg 6d27 <.literal4+0x40b>
+ DB 127,71 ; jg 71f3 <.literal4+0x40b>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
DB 0,0 ; add %al,(%rax)
@@ -6644,16 +6992,16 @@ ALIGN 4
DB 170 ; stos %al,%es:(%rdi)
DB 170 ; stos %al,%es:(%rdi)
DB 190,114,28,199,62 ; mov $0x3ec71c72,%esi
- DB 114,249 ; jb 6dfb <.literal4+0x4df>
- DB 127,63 ; jg 6e43 <_sk_clut_4D_hsw+0xdef>
+ DB 114,249 ; jb 72c7 <.literal4+0x4df>
+ DB 127,63 ; jg 730f <_sk_clut_4D_hsw+0xdef>
DB 3,0 ; add (%rax),%eax
DB 0,0 ; add %al,(%rax)
DB 1,0 ; add %eax,(%rax)
DB 0,0 ; add %al,(%rax)
DB 2,0 ; add (%rax),%al
DB 0,0 ; add %al,(%rax)
- DB 114,249 ; jb 6e0b <.literal4+0x4ef>
- DB 127,63 ; jg 6e53 <_sk_clut_4D_hsw+0xdff>
+ DB 114,249 ; jb 72d7 <.literal4+0x4ef>
+ DB 127,63 ; jg 731f <_sk_clut_4D_hsw+0xdff>
DB 3,0 ; add (%rax),%eax
DB 0,0 ; add %al,(%rax)
DB 1,0 ; add %eax,(%rax)
@@ -6683,16 +7031,16 @@ ALIGN 32
DB 0,0 ; add %al,(%rax)
DB 1,255 ; add %edi,%edi
DB 255 ; (bad)
- DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a006e68 <_sk_clut_4D_hsw+0xa000e14>
+ DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a007328 <_sk_clut_4D_hsw+0xa000e08>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12006e70 <_sk_clut_4D_hsw+0x12000e1c>
+ DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12007330 <_sk_clut_4D_hsw+0x12000e10>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a006e78 <_sk_clut_4D_hsw+0x1a000e24>
+ DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a007338 <_sk_clut_4D_hsw+0x1a000e18>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3006e80 <_sk_clut_4D_hsw+0x3000e2c>
+ DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3007340 <_sk_clut_4D_hsw+0x3000e20>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,6 ; incl (%rsi)
@@ -6735,16 +7083,16 @@ ALIGN 32
DB 0,0 ; add %al,(%rax)
DB 1,255 ; add %edi,%edi
DB 255 ; (bad)
- DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a006ec8 <_sk_clut_4D_hsw+0xa000e74>
+ DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a007388 <_sk_clut_4D_hsw+0xa000e68>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12006ed0 <_sk_clut_4D_hsw+0x12000e7c>
+ DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12007390 <_sk_clut_4D_hsw+0x12000e70>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a006ed8 <_sk_clut_4D_hsw+0x1a000e84>
+ DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a007398 <_sk_clut_4D_hsw+0x1a000e78>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3006ee0 <_sk_clut_4D_hsw+0x3000e8c>
+ DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 30073a0 <_sk_clut_4D_hsw+0x3000e80>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,6 ; incl (%rsi)
@@ -6787,16 +7135,16 @@ ALIGN 32
DB 0,0 ; add %al,(%rax)
DB 1,255 ; add %edi,%edi
DB 255 ; (bad)
- DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a006f28 <_sk_clut_4D_hsw+0xa000ed4>
+ DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a0073e8 <_sk_clut_4D_hsw+0xa000ec8>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12006f30 <_sk_clut_4D_hsw+0x12000edc>
+ DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 120073f0 <_sk_clut_4D_hsw+0x12000ed0>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a006f38 <_sk_clut_4D_hsw+0x1a000ee4>
+ DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a0073f8 <_sk_clut_4D_hsw+0x1a000ed8>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3006f40 <_sk_clut_4D_hsw+0x3000eec>
+ DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3007400 <_sk_clut_4D_hsw+0x3000ee0>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,6 ; incl (%rsi)
@@ -6839,16 +7187,16 @@ ALIGN 32
DB 0,0 ; add %al,(%rax)
DB 1,255 ; add %edi,%edi
DB 255 ; (bad)
- DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a006f88 <_sk_clut_4D_hsw+0xa000f34>
+ DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a007448 <_sk_clut_4D_hsw+0xa000f28>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12006f90 <_sk_clut_4D_hsw+0x12000f3c>
+ DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12007450 <_sk_clut_4D_hsw+0x12000f30>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a006f98 <_sk_clut_4D_hsw+0x1a000f44>
+ DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a007458 <_sk_clut_4D_hsw+0x1a000f38>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3006fa0 <_sk_clut_4D_hsw+0x3000f4c>
+ DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3007460 <_sk_clut_4D_hsw+0x3000f40>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,6 ; incl (%rsi)
@@ -6891,16 +7239,16 @@ ALIGN 32
DB 0,0 ; add %al,(%rax)
DB 1,255 ; add %edi,%edi
DB 255 ; (bad)
- DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a006fe8 <_sk_clut_4D_hsw+0xa000f94>
+ DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a0074a8 <_sk_clut_4D_hsw+0xa000f88>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12006ff0 <_sk_clut_4D_hsw+0x12000f9c>
+ DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 120074b0 <_sk_clut_4D_hsw+0x12000f90>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a006ff8 <_sk_clut_4D_hsw+0x1a000fa4>
+ DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a0074b8 <_sk_clut_4D_hsw+0x1a000f98>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3007000 <_sk_clut_4D_hsw+0x3000fac>
+ DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 30074c0 <_sk_clut_4D_hsw+0x3000fa0>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,6 ; incl (%rsi)
@@ -6943,16 +7291,16 @@ ALIGN 32
DB 0,0 ; add %al,(%rax)
DB 1,255 ; add %edi,%edi
DB 255 ; (bad)
- DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a007048 <_sk_clut_4D_hsw+0xa000ff4>
+ DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a007508 <_sk_clut_4D_hsw+0xa000fe8>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12007050 <_sk_clut_4D_hsw+0x12000ffc>
+ DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12007510 <_sk_clut_4D_hsw+0x12000ff0>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a007058 <_sk_clut_4D_hsw+0x1a001004>
+ DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a007518 <_sk_clut_4D_hsw+0x1a000ff8>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3007060 <_sk_clut_4D_hsw+0x300100c>
+ DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3007520 <_sk_clut_4D_hsw+0x3001000>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,6 ; incl (%rsi)
@@ -6995,16 +7343,16 @@ ALIGN 32
DB 0,0 ; add %al,(%rax)
DB 1,255 ; add %edi,%edi
DB 255 ; (bad)
- DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a0070a8 <_sk_clut_4D_hsw+0xa001054>
+ DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a007568 <_sk_clut_4D_hsw+0xa001048>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 120070b0 <_sk_clut_4D_hsw+0x1200105c>
+ DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12007570 <_sk_clut_4D_hsw+0x12001050>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a0070b8 <_sk_clut_4D_hsw+0x1a001064>
+ DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a007578 <_sk_clut_4D_hsw+0x1a001058>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 30070c0 <_sk_clut_4D_hsw+0x300106c>
+ DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3007580 <_sk_clut_4D_hsw+0x3001060>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,6 ; incl (%rsi)
@@ -7047,16 +7395,16 @@ ALIGN 32
DB 0,0 ; add %al,(%rax)
DB 1,255 ; add %edi,%edi
DB 255 ; (bad)
- DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a007108 <_sk_clut_4D_hsw+0xa0010b4>
+ DB 255,5,255,255,255,9 ; incl 0x9ffffff(%rip) # a0075c8 <_sk_clut_4D_hsw+0xa0010a8>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 12007110 <_sk_clut_4D_hsw+0x120010bc>
+ DB 255,13,255,255,255,17 ; decl 0x11ffffff(%rip) # 120075d0 <_sk_clut_4D_hsw+0x120010b0>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a007118 <_sk_clut_4D_hsw+0x1a0010c4>
+ DB 255,21,255,255,255,25 ; callq *0x19ffffff(%rip) # 1a0075d8 <_sk_clut_4D_hsw+0x1a0010b8>
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 3007120 <_sk_clut_4D_hsw+0x30010cc>
+ DB 255,29,255,255,255,2 ; lcall *0x2ffffff(%rip) # 30075e0 <_sk_clut_4D_hsw+0x30010c0>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,6 ; incl (%rsi)
@@ -7257,7 +7605,7 @@ _sk_seed_shader_avx LABEL PROC
DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0
DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,231,173,0,0 ; vbroadcastss 0xade7(%rip),%ymm1 # af4c <_sk_clut_4D_avx+0x2173>
+ DB 196,226,125,24,13,155,177,0,0 ; vbroadcastss 0xb19b(%rip),%ymm1 # b300 <_sk_clut_4D_avx+0x2173>
DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0
DB 197,252,88,7 ; vaddps (%rdi),%ymm0,%ymm0
DB 197,249,110,209 ; vmovd %ecx,%xmm2
@@ -7266,7 +7614,7 @@ _sk_seed_shader_avx LABEL PROC
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 197,236,88,201 ; vaddps %ymm1,%ymm2,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,21,193,173,0,0 ; vbroadcastss 0xadc1(%rip),%ymm2 # af50 <_sk_clut_4D_avx+0x2177>
+ DB 196,226,125,24,21,117,177,0,0 ; vbroadcastss 0xb175(%rip),%ymm2 # b304 <_sk_clut_4D_avx+0x2177>
DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3
DB 197,220,87,228 ; vxorps %ymm4,%ymm4,%ymm4
DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5
@@ -7288,7 +7636,7 @@ _sk_dither_avx LABEL PROC
DB 196,65,121,112,201,0 ; vpshufd $0x0,%xmm9,%xmm9
DB 196,67,53,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
DB 196,65,52,87,208 ; vxorps %ymm8,%ymm9,%ymm10
- DB 196,98,125,24,29,106,173,0,0 ; vbroadcastss 0xad6a(%rip),%ymm11 # af54 <_sk_clut_4D_avx+0x217b>
+ DB 196,98,125,24,29,30,177,0,0 ; vbroadcastss 0xb11e(%rip),%ymm11 # b308 <_sk_clut_4D_avx+0x217b>
DB 196,65,44,84,203 ; vandps %ymm11,%ymm10,%ymm9
DB 196,193,25,114,241,5 ; vpslld $0x5,%xmm9,%xmm12
DB 196,67,125,25,201,1 ; vextractf128 $0x1,%ymm9,%xmm9
@@ -7299,8 +7647,8 @@ _sk_dither_avx LABEL PROC
DB 196,67,125,25,219,1 ; vextractf128 $0x1,%ymm11,%xmm11
DB 196,193,33,114,243,4 ; vpslld $0x4,%xmm11,%xmm11
DB 196,67,29,24,219,1 ; vinsertf128 $0x1,%xmm11,%ymm12,%ymm11
- DB 196,98,125,24,37,43,173,0,0 ; vbroadcastss 0xad2b(%rip),%ymm12 # af58 <_sk_clut_4D_avx+0x217f>
- DB 196,98,125,24,45,38,173,0,0 ; vbroadcastss 0xad26(%rip),%ymm13 # af5c <_sk_clut_4D_avx+0x2183>
+ DB 196,98,125,24,37,223,176,0,0 ; vbroadcastss 0xb0df(%rip),%ymm12 # b30c <_sk_clut_4D_avx+0x217f>
+ DB 196,98,125,24,45,218,176,0,0 ; vbroadcastss 0xb0da(%rip),%ymm13 # b310 <_sk_clut_4D_avx+0x2183>
DB 196,65,44,84,245 ; vandps %ymm13,%ymm10,%ymm14
DB 196,193,1,114,246,2 ; vpslld $0x2,%xmm14,%xmm15
DB 196,67,125,25,246,1 ; vextractf128 $0x1,%ymm14,%xmm14
@@ -7327,9 +7675,9 @@ _sk_dither_avx LABEL PROC
DB 196,65,60,86,193 ; vorps %ymm9,%ymm8,%ymm8
DB 196,65,60,86,194 ; vorps %ymm10,%ymm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,13,145,172,0,0 ; vbroadcastss 0xac91(%rip),%ymm9 # af60 <_sk_clut_4D_avx+0x2187>
+ DB 196,98,125,24,13,69,176,0,0 ; vbroadcastss 0xb045(%rip),%ymm9 # b314 <_sk_clut_4D_avx+0x2187>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
- DB 196,98,125,24,13,135,172,0,0 ; vbroadcastss 0xac87(%rip),%ymm9 # af64 <_sk_clut_4D_avx+0x218b>
+ DB 196,98,125,24,13,59,176,0,0 ; vbroadcastss 0xb03b(%rip),%ymm9 # b318 <_sk_clut_4D_avx+0x218b>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 196,98,125,24,8 ; vbroadcastss (%rax),%ymm9
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
@@ -7359,7 +7707,7 @@ _sk_uniform_color_avx LABEL PROC
PUBLIC _sk_black_color_avx
_sk_black_color_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,39,172,0,0 ; vbroadcastss 0xac27(%rip),%ymm3 # af68 <_sk_clut_4D_avx+0x218f>
+ DB 196,226,125,24,29,219,175,0,0 ; vbroadcastss 0xafdb(%rip),%ymm3 # b31c <_sk_clut_4D_avx+0x218f>
DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2
@@ -7368,7 +7716,7 @@ _sk_black_color_avx LABEL PROC
PUBLIC _sk_white_color_avx
_sk_white_color_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,18,172,0,0 ; vbroadcastss 0xac12(%rip),%ymm0 # af6c <_sk_clut_4D_avx+0x2193>
+ DB 196,226,125,24,5,198,175,0,0 ; vbroadcastss 0xafc6(%rip),%ymm0 # b320 <_sk_clut_4D_avx+0x2193>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
DB 197,252,40,216 ; vmovaps %ymm0,%ymm3
@@ -7406,7 +7754,7 @@ _sk_clear_avx LABEL PROC
PUBLIC _sk_srcatop_avx
_sk_srcatop_avx LABEL PROC
DB 197,252,89,199 ; vmulps %ymm7,%ymm0,%ymm0
- DB 196,98,125,24,5,181,171,0,0 ; vbroadcastss 0xabb5(%rip),%ymm8 # af70 <_sk_clut_4D_avx+0x2197>
+ DB 196,98,125,24,5,105,175,0,0 ; vbroadcastss 0xaf69(%rip),%ymm8 # b324 <_sk_clut_4D_avx+0x2197>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,204 ; vmulps %ymm4,%ymm8,%ymm9
DB 197,180,88,192 ; vaddps %ymm0,%ymm9,%ymm0
@@ -7425,7 +7773,7 @@ _sk_srcatop_avx LABEL PROC
PUBLIC _sk_dstatop_avx
_sk_dstatop_avx LABEL PROC
DB 197,100,89,196 ; vmulps %ymm4,%ymm3,%ymm8
- DB 196,98,125,24,13,119,171,0,0 ; vbroadcastss 0xab77(%rip),%ymm9 # af74 <_sk_clut_4D_avx+0x219b>
+ DB 196,98,125,24,13,43,175,0,0 ; vbroadcastss 0xaf2b(%rip),%ymm9 # b328 <_sk_clut_4D_avx+0x219b>
DB 197,52,92,207 ; vsubps %ymm7,%ymm9,%ymm9
DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0
DB 197,188,88,192 ; vaddps %ymm0,%ymm8,%ymm0
@@ -7461,7 +7809,7 @@ _sk_dstin_avx LABEL PROC
PUBLIC _sk_srcout_avx
_sk_srcout_avx LABEL PROC
- DB 196,98,125,24,5,22,171,0,0 ; vbroadcastss 0xab16(%rip),%ymm8 # af78 <_sk_clut_4D_avx+0x219f>
+ DB 196,98,125,24,5,202,174,0,0 ; vbroadcastss 0xaeca(%rip),%ymm8 # b32c <_sk_clut_4D_avx+0x219f>
DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1
@@ -7472,7 +7820,7 @@ _sk_srcout_avx LABEL PROC
PUBLIC _sk_dstout_avx
_sk_dstout_avx LABEL PROC
- DB 196,226,125,24,5,249,170,0,0 ; vbroadcastss 0xaaf9(%rip),%ymm0 # af7c <_sk_clut_4D_avx+0x21a3>
+ DB 196,226,125,24,5,173,174,0,0 ; vbroadcastss 0xaead(%rip),%ymm0 # b330 <_sk_clut_4D_avx+0x21a3>
DB 197,252,92,219 ; vsubps %ymm3,%ymm0,%ymm3
DB 197,228,89,196 ; vmulps %ymm4,%ymm3,%ymm0
DB 197,228,89,205 ; vmulps %ymm5,%ymm3,%ymm1
@@ -7483,7 +7831,7 @@ _sk_dstout_avx LABEL PROC
PUBLIC _sk_srcover_avx
_sk_srcover_avx LABEL PROC
- DB 196,98,125,24,5,220,170,0,0 ; vbroadcastss 0xaadc(%rip),%ymm8 # af80 <_sk_clut_4D_avx+0x21a7>
+ DB 196,98,125,24,5,144,174,0,0 ; vbroadcastss 0xae90(%rip),%ymm8 # b334 <_sk_clut_4D_avx+0x21a7>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,204 ; vmulps %ymm4,%ymm8,%ymm9
DB 197,180,88,192 ; vaddps %ymm0,%ymm9,%ymm0
@@ -7498,7 +7846,7 @@ _sk_srcover_avx LABEL PROC
PUBLIC _sk_dstover_avx
_sk_dstover_avx LABEL PROC
- DB 196,98,125,24,5,175,170,0,0 ; vbroadcastss 0xaaaf(%rip),%ymm8 # af84 <_sk_clut_4D_avx+0x21ab>
+ DB 196,98,125,24,5,99,174,0,0 ; vbroadcastss 0xae63(%rip),%ymm8 # b338 <_sk_clut_4D_avx+0x21ab>
DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0
@@ -7522,7 +7870,7 @@ _sk_modulate_avx LABEL PROC
PUBLIC _sk_multiply_avx
_sk_multiply_avx LABEL PROC
- DB 196,98,125,24,5,110,170,0,0 ; vbroadcastss 0xaa6e(%rip),%ymm8 # af88 <_sk_clut_4D_avx+0x21af>
+ DB 196,98,125,24,5,34,174,0,0 ; vbroadcastss 0xae22(%rip),%ymm8 # b33c <_sk_clut_4D_avx+0x21af>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,52,89,208 ; vmulps %ymm0,%ymm9,%ymm10
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -7576,7 +7924,7 @@ _sk_screen_avx LABEL PROC
PUBLIC _sk_xor__avx
_sk_xor__avx LABEL PROC
- DB 196,98,125,24,5,189,169,0,0 ; vbroadcastss 0xa9bd(%rip),%ymm8 # af8c <_sk_clut_4D_avx+0x21b3>
+ DB 196,98,125,24,5,113,173,0,0 ; vbroadcastss 0xad71(%rip),%ymm8 # b340 <_sk_clut_4D_avx+0x21b3>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -7611,7 +7959,7 @@ _sk_darken_avx LABEL PROC
DB 197,100,89,206 ; vmulps %ymm6,%ymm3,%ymm9
DB 196,193,108,95,209 ; vmaxps %ymm9,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,61,169,0,0 ; vbroadcastss 0xa93d(%rip),%ymm8 # af90 <_sk_clut_4D_avx+0x21b7>
+ DB 196,98,125,24,5,241,172,0,0 ; vbroadcastss 0xacf1(%rip),%ymm8 # b344 <_sk_clut_4D_avx+0x21b7>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8
DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3
@@ -7635,7 +7983,7 @@ _sk_lighten_avx LABEL PROC
DB 197,100,89,206 ; vmulps %ymm6,%ymm3,%ymm9
DB 196,193,108,93,209 ; vminps %ymm9,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,233,168,0,0 ; vbroadcastss 0xa8e9(%rip),%ymm8 # af94 <_sk_clut_4D_avx+0x21bb>
+ DB 196,98,125,24,5,157,172,0,0 ; vbroadcastss 0xac9d(%rip),%ymm8 # b348 <_sk_clut_4D_avx+0x21bb>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8
DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3
@@ -7662,7 +8010,7 @@ _sk_difference_avx LABEL PROC
DB 196,193,108,93,209 ; vminps %ymm9,%ymm2,%ymm2
DB 197,236,88,210 ; vaddps %ymm2,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,137,168,0,0 ; vbroadcastss 0xa889(%rip),%ymm8 # af98 <_sk_clut_4D_avx+0x21bf>
+ DB 196,98,125,24,5,61,172,0,0 ; vbroadcastss 0xac3d(%rip),%ymm8 # b34c <_sk_clut_4D_avx+0x21bf>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8
DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3
@@ -7683,7 +8031,7 @@ _sk_exclusion_avx LABEL PROC
DB 197,236,89,214 ; vmulps %ymm6,%ymm2,%ymm2
DB 197,236,88,210 ; vaddps %ymm2,%ymm2,%ymm2
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
- DB 196,98,125,24,5,68,168,0,0 ; vbroadcastss 0xa844(%rip),%ymm8 # af9c <_sk_clut_4D_avx+0x21c3>
+ DB 196,98,125,24,5,248,171,0,0 ; vbroadcastss 0xabf8(%rip),%ymm8 # b350 <_sk_clut_4D_avx+0x21c3>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8
DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3
@@ -7692,7 +8040,7 @@ _sk_exclusion_avx LABEL PROC
PUBLIC _sk_colorburn_avx
_sk_colorburn_avx LABEL PROC
- DB 196,98,125,24,5,47,168,0,0 ; vbroadcastss 0xa82f(%rip),%ymm8 # afa0 <_sk_clut_4D_avx+0x21c7>
+ DB 196,98,125,24,5,227,171,0,0 ; vbroadcastss 0xabe3(%rip),%ymm8 # b354 <_sk_clut_4D_avx+0x21c7>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,52,89,216 ; vmulps %ymm0,%ymm9,%ymm11
DB 196,65,44,87,210 ; vxorps %ymm10,%ymm10,%ymm10
@@ -7752,7 +8100,7 @@ _sk_colorburn_avx LABEL PROC
PUBLIC _sk_colordodge_avx
_sk_colordodge_avx LABEL PROC
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
- DB 196,98,125,24,13,43,167,0,0 ; vbroadcastss 0xa72b(%rip),%ymm9 # afa4 <_sk_clut_4D_avx+0x21cb>
+ DB 196,98,125,24,13,223,170,0,0 ; vbroadcastss 0xaadf(%rip),%ymm9 # b358 <_sk_clut_4D_avx+0x21cb>
DB 197,52,92,215 ; vsubps %ymm7,%ymm9,%ymm10
DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
DB 197,52,92,203 ; vsubps %ymm3,%ymm9,%ymm9
@@ -7807,7 +8155,7 @@ _sk_colordodge_avx LABEL PROC
PUBLIC _sk_hardlight_avx
_sk_hardlight_avx LABEL PROC
- DB 196,98,125,24,5,61,166,0,0 ; vbroadcastss 0xa63d(%rip),%ymm8 # afa8 <_sk_clut_4D_avx+0x21cf>
+ DB 196,98,125,24,5,241,169,0,0 ; vbroadcastss 0xa9f1(%rip),%ymm8 # b35c <_sk_clut_4D_avx+0x21cf>
DB 197,60,92,215 ; vsubps %ymm7,%ymm8,%ymm10
DB 197,44,89,200 ; vmulps %ymm0,%ymm10,%ymm9
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -7860,7 +8208,7 @@ _sk_hardlight_avx LABEL PROC
PUBLIC _sk_overlay_avx
_sk_overlay_avx LABEL PROC
- DB 196,98,125,24,5,102,165,0,0 ; vbroadcastss 0xa566(%rip),%ymm8 # afac <_sk_clut_4D_avx+0x21d3>
+ DB 196,98,125,24,5,26,169,0,0 ; vbroadcastss 0xa91a(%rip),%ymm8 # b360 <_sk_clut_4D_avx+0x21d3>
DB 197,60,92,215 ; vsubps %ymm7,%ymm8,%ymm10
DB 197,44,89,200 ; vmulps %ymm0,%ymm10,%ymm9
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -7925,10 +8273,10 @@ _sk_softlight_avx LABEL PROC
DB 196,65,60,88,192 ; vaddps %ymm8,%ymm8,%ymm8
DB 196,65,60,89,216 ; vmulps %ymm8,%ymm8,%ymm11
DB 196,65,60,88,195 ; vaddps %ymm11,%ymm8,%ymm8
- DB 196,98,125,24,29,89,164,0,0 ; vbroadcastss 0xa459(%rip),%ymm11 # afb4 <_sk_clut_4D_avx+0x21db>
+ DB 196,98,125,24,29,13,168,0,0 ; vbroadcastss 0xa80d(%rip),%ymm11 # b368 <_sk_clut_4D_avx+0x21db>
DB 196,65,28,88,235 ; vaddps %ymm11,%ymm12,%ymm13
DB 196,65,20,89,192 ; vmulps %ymm8,%ymm13,%ymm8
- DB 196,98,125,24,45,74,164,0,0 ; vbroadcastss 0xa44a(%rip),%ymm13 # afb8 <_sk_clut_4D_avx+0x21df>
+ DB 196,98,125,24,45,254,167,0,0 ; vbroadcastss 0xa7fe(%rip),%ymm13 # b36c <_sk_clut_4D_avx+0x21df>
DB 196,65,28,89,245 ; vmulps %ymm13,%ymm12,%ymm14
DB 196,65,12,88,192 ; vaddps %ymm8,%ymm14,%ymm8
DB 196,65,124,82,244 ; vrsqrtps %ymm12,%ymm14
@@ -7939,7 +8287,7 @@ _sk_softlight_avx LABEL PROC
DB 197,4,194,255,2 ; vcmpleps %ymm7,%ymm15,%ymm15
DB 196,67,13,74,240,240 ; vblendvps %ymm15,%ymm8,%ymm14,%ymm14
DB 197,116,88,249 ; vaddps %ymm1,%ymm1,%ymm15
- DB 196,98,125,24,5,8,164,0,0 ; vbroadcastss 0xa408(%rip),%ymm8 # afb0 <_sk_clut_4D_avx+0x21d7>
+ DB 196,98,125,24,5,188,167,0,0 ; vbroadcastss 0xa7bc(%rip),%ymm8 # b364 <_sk_clut_4D_avx+0x21d7>
DB 196,65,60,92,228 ; vsubps %ymm12,%ymm8,%ymm12
DB 197,132,92,195 ; vsubps %ymm3,%ymm15,%ymm0
DB 196,65,124,89,228 ; vmulps %ymm12,%ymm0,%ymm12
@@ -8066,12 +8414,12 @@ _sk_hue_avx LABEL PROC
DB 196,65,28,89,219 ; vmulps %ymm11,%ymm12,%ymm11
DB 196,65,36,94,222 ; vdivps %ymm14,%ymm11,%ymm11
DB 196,67,37,74,224,240 ; vblendvps %ymm15,%ymm8,%ymm11,%ymm12
- DB 196,98,125,24,53,210,161,0,0 ; vbroadcastss 0xa1d2(%rip),%ymm14 # afbc <_sk_clut_4D_avx+0x21e3>
+ DB 196,98,125,24,53,134,165,0,0 ; vbroadcastss 0xa586(%rip),%ymm14 # b370 <_sk_clut_4D_avx+0x21e3>
DB 196,65,92,89,222 ; vmulps %ymm14,%ymm4,%ymm11
- DB 196,98,125,24,61,200,161,0,0 ; vbroadcastss 0xa1c8(%rip),%ymm15 # afc0 <_sk_clut_4D_avx+0x21e7>
+ DB 196,98,125,24,61,124,165,0,0 ; vbroadcastss 0xa57c(%rip),%ymm15 # b374 <_sk_clut_4D_avx+0x21e7>
DB 196,65,84,89,239 ; vmulps %ymm15,%ymm5,%ymm13
DB 196,65,36,88,221 ; vaddps %ymm13,%ymm11,%ymm11
- DB 196,226,125,24,5,185,161,0,0 ; vbroadcastss 0xa1b9(%rip),%ymm0 # afc4 <_sk_clut_4D_avx+0x21eb>
+ DB 196,226,125,24,5,109,165,0,0 ; vbroadcastss 0xa56d(%rip),%ymm0 # b378 <_sk_clut_4D_avx+0x21eb>
DB 197,76,89,232 ; vmulps %ymm0,%ymm6,%ymm13
DB 196,65,36,88,221 ; vaddps %ymm13,%ymm11,%ymm11
DB 196,65,52,89,238 ; vmulps %ymm14,%ymm9,%ymm13
@@ -8132,7 +8480,7 @@ _sk_hue_avx LABEL PROC
DB 196,65,36,95,208 ; vmaxps %ymm8,%ymm11,%ymm10
DB 196,195,109,74,209,240 ; vblendvps %ymm15,%ymm9,%ymm2,%ymm2
DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,146,160,0,0 ; vbroadcastss 0xa092(%rip),%ymm8 # afc8 <_sk_clut_4D_avx+0x21ef>
+ DB 196,98,125,24,5,70,164,0,0 ; vbroadcastss 0xa446(%rip),%ymm8 # b37c <_sk_clut_4D_avx+0x21ef>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,180,89,201 ; vmulps %ymm1,%ymm9,%ymm1
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -8189,12 +8537,12 @@ _sk_saturation_avx LABEL PROC
DB 196,65,28,89,219 ; vmulps %ymm11,%ymm12,%ymm11
DB 196,65,36,94,222 ; vdivps %ymm14,%ymm11,%ymm11
DB 196,67,37,74,224,240 ; vblendvps %ymm15,%ymm8,%ymm11,%ymm12
- DB 196,98,125,24,53,154,159,0,0 ; vbroadcastss 0x9f9a(%rip),%ymm14 # afcc <_sk_clut_4D_avx+0x21f3>
+ DB 196,98,125,24,53,78,163,0,0 ; vbroadcastss 0xa34e(%rip),%ymm14 # b380 <_sk_clut_4D_avx+0x21f3>
DB 196,65,92,89,222 ; vmulps %ymm14,%ymm4,%ymm11
- DB 196,98,125,24,61,144,159,0,0 ; vbroadcastss 0x9f90(%rip),%ymm15 # afd0 <_sk_clut_4D_avx+0x21f7>
+ DB 196,98,125,24,61,68,163,0,0 ; vbroadcastss 0xa344(%rip),%ymm15 # b384 <_sk_clut_4D_avx+0x21f7>
DB 196,65,84,89,239 ; vmulps %ymm15,%ymm5,%ymm13
DB 196,65,36,88,221 ; vaddps %ymm13,%ymm11,%ymm11
- DB 196,226,125,24,5,129,159,0,0 ; vbroadcastss 0x9f81(%rip),%ymm0 # afd4 <_sk_clut_4D_avx+0x21fb>
+ DB 196,226,125,24,5,53,163,0,0 ; vbroadcastss 0xa335(%rip),%ymm0 # b388 <_sk_clut_4D_avx+0x21fb>
DB 197,76,89,232 ; vmulps %ymm0,%ymm6,%ymm13
DB 196,65,36,88,221 ; vaddps %ymm13,%ymm11,%ymm11
DB 196,65,52,89,238 ; vmulps %ymm14,%ymm9,%ymm13
@@ -8255,7 +8603,7 @@ _sk_saturation_avx LABEL PROC
DB 196,65,36,95,208 ; vmaxps %ymm8,%ymm11,%ymm10
DB 196,195,109,74,209,240 ; vblendvps %ymm15,%ymm9,%ymm2,%ymm2
DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,90,158,0,0 ; vbroadcastss 0x9e5a(%rip),%ymm8 # afd8 <_sk_clut_4D_avx+0x21ff>
+ DB 196,98,125,24,5,14,162,0,0 ; vbroadcastss 0xa20e(%rip),%ymm8 # b38c <_sk_clut_4D_avx+0x21ff>
DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9
DB 197,180,89,201 ; vmulps %ymm1,%ymm9,%ymm1
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
@@ -8284,12 +8632,12 @@ _sk_color_avx LABEL PROC
DB 197,252,17,68,36,32 ; vmovups %ymm0,0x20(%rsp)
DB 197,124,89,199 ; vmulps %ymm7,%ymm0,%ymm8
DB 197,116,89,207 ; vmulps %ymm7,%ymm1,%ymm9
- DB 196,98,125,24,45,234,157,0,0 ; vbroadcastss 0x9dea(%rip),%ymm13 # afdc <_sk_clut_4D_avx+0x2203>
+ DB 196,98,125,24,45,158,161,0,0 ; vbroadcastss 0xa19e(%rip),%ymm13 # b390 <_sk_clut_4D_avx+0x2203>
DB 196,65,92,89,213 ; vmulps %ymm13,%ymm4,%ymm10
- DB 196,98,125,24,53,224,157,0,0 ; vbroadcastss 0x9de0(%rip),%ymm14 # afe0 <_sk_clut_4D_avx+0x2207>
+ DB 196,98,125,24,53,148,161,0,0 ; vbroadcastss 0xa194(%rip),%ymm14 # b394 <_sk_clut_4D_avx+0x2207>
DB 196,65,84,89,222 ; vmulps %ymm14,%ymm5,%ymm11
DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
- DB 196,98,125,24,61,209,157,0,0 ; vbroadcastss 0x9dd1(%rip),%ymm15 # afe4 <_sk_clut_4D_avx+0x220b>
+ DB 196,98,125,24,61,133,161,0,0 ; vbroadcastss 0xa185(%rip),%ymm15 # b398 <_sk_clut_4D_avx+0x220b>
DB 196,65,76,89,223 ; vmulps %ymm15,%ymm6,%ymm11
DB 196,193,44,88,195 ; vaddps %ymm11,%ymm10,%ymm0
DB 196,65,60,89,221 ; vmulps %ymm13,%ymm8,%ymm11
@@ -8352,7 +8700,7 @@ _sk_color_avx LABEL PROC
DB 196,65,44,95,207 ; vmaxps %ymm15,%ymm10,%ymm9
DB 196,195,37,74,192,0 ; vblendvps %ymm0,%ymm8,%ymm11,%ymm0
DB 196,65,124,95,199 ; vmaxps %ymm15,%ymm0,%ymm8
- DB 196,226,125,24,5,152,156,0,0 ; vbroadcastss 0x9c98(%rip),%ymm0 # afe8 <_sk_clut_4D_avx+0x220f>
+ DB 196,226,125,24,5,76,160,0,0 ; vbroadcastss 0xa04c(%rip),%ymm0 # b39c <_sk_clut_4D_avx+0x220f>
DB 197,124,92,215 ; vsubps %ymm7,%ymm0,%ymm10
DB 197,172,89,84,36,32 ; vmulps 0x20(%rsp),%ymm10,%ymm2
DB 197,124,92,219 ; vsubps %ymm3,%ymm0,%ymm11
@@ -8382,12 +8730,12 @@ _sk_luminosity_avx LABEL PROC
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
DB 197,100,89,196 ; vmulps %ymm4,%ymm3,%ymm8
DB 197,100,89,205 ; vmulps %ymm5,%ymm3,%ymm9
- DB 196,98,125,24,45,36,156,0,0 ; vbroadcastss 0x9c24(%rip),%ymm13 # afec <_sk_clut_4D_avx+0x2213>
+ DB 196,98,125,24,45,216,159,0,0 ; vbroadcastss 0x9fd8(%rip),%ymm13 # b3a0 <_sk_clut_4D_avx+0x2213>
DB 196,65,108,89,213 ; vmulps %ymm13,%ymm2,%ymm10
- DB 196,98,125,24,53,26,156,0,0 ; vbroadcastss 0x9c1a(%rip),%ymm14 # aff0 <_sk_clut_4D_avx+0x2217>
+ DB 196,98,125,24,53,206,159,0,0 ; vbroadcastss 0x9fce(%rip),%ymm14 # b3a4 <_sk_clut_4D_avx+0x2217>
DB 196,65,116,89,222 ; vmulps %ymm14,%ymm1,%ymm11
DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
- DB 196,98,125,24,61,11,156,0,0 ; vbroadcastss 0x9c0b(%rip),%ymm15 # aff4 <_sk_clut_4D_avx+0x221b>
+ DB 196,98,125,24,61,191,159,0,0 ; vbroadcastss 0x9fbf(%rip),%ymm15 # b3a8 <_sk_clut_4D_avx+0x221b>
DB 196,65,28,89,223 ; vmulps %ymm15,%ymm12,%ymm11
DB 196,193,44,88,195 ; vaddps %ymm11,%ymm10,%ymm0
DB 196,65,60,89,221 ; vmulps %ymm13,%ymm8,%ymm11
@@ -8450,7 +8798,7 @@ _sk_luminosity_avx LABEL PROC
DB 196,65,44,95,207 ; vmaxps %ymm15,%ymm10,%ymm9
DB 196,195,37,74,192,0 ; vblendvps %ymm0,%ymm8,%ymm11,%ymm0
DB 196,65,124,95,199 ; vmaxps %ymm15,%ymm0,%ymm8
- DB 196,226,125,24,5,210,154,0,0 ; vbroadcastss 0x9ad2(%rip),%ymm0 # aff8 <_sk_clut_4D_avx+0x221f>
+ DB 196,226,125,24,5,134,158,0,0 ; vbroadcastss 0x9e86(%rip),%ymm0 # b3ac <_sk_clut_4D_avx+0x221f>
DB 197,124,92,215 ; vsubps %ymm7,%ymm0,%ymm10
DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
DB 197,124,92,219 ; vsubps %ymm3,%ymm0,%ymm11
@@ -8473,40 +8821,38 @@ _sk_luminosity_avx LABEL PROC
PUBLIC _sk_srcover_rgba_8888_avx
_sk_srcover_rgba_8888_avx LABEL PROC
- DB 72,131,236,16 ; sub $0x10,%rsp
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,45,1,0,0 ; jne 16cc <_sk_srcover_rgba_8888_avx+0x158>
- DB 197,252,16,56 ; vmovups (%rax),%ymm7
- DB 197,124,40,13,149,160,0,0 ; vmovaps 0xa095(%rip),%ymm9 # b640 <_sk_clut_4D_avx+0x2867>
- DB 196,193,68,84,225 ; vandps %ymm9,%ymm7,%ymm4
+ DB 15,133,43,1,0,0 ; jne 16c0 <_sk_srcover_rgba_8888_avx+0x14c>
+ DB 196,65,125,16,4,153 ; vmovupd (%r9,%rbx,4),%ymm8
+ DB 197,125,40,13,157,162,0,0 ; vmovapd 0xa29d(%rip),%ymm9 # b840 <_sk_clut_4D_avx+0x26b3>
+ DB 196,193,61,84,225 ; vandpd %ymm9,%ymm8,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 197,209,114,215,8 ; vpsrld $0x8,%xmm7,%xmm5
- DB 196,195,125,25,248,1 ; vextractf128 $0x1,%ymm7,%xmm8
- DB 196,193,73,114,208,8 ; vpsrld $0x8,%xmm8,%xmm6
+ DB 196,193,81,114,208,8 ; vpsrld $0x8,%xmm8,%xmm5
+ DB 196,99,125,25,199,1 ; vextractf128 $0x1,%ymm8,%xmm7
+ DB 197,201,114,215,8 ; vpsrld $0x8,%xmm7,%xmm6
DB 196,227,85,24,238,1 ; vinsertf128 $0x1,%xmm6,%ymm5,%ymm5
- DB 196,193,84,84,233 ; vandps %ymm9,%ymm5,%ymm5
+ DB 196,193,85,84,233 ; vandpd %ymm9,%ymm5,%ymm5
DB 197,252,91,237 ; vcvtdq2ps %ymm5,%ymm5
- DB 197,169,114,215,16 ; vpsrld $0x10,%xmm7,%xmm10
- DB 196,193,73,114,208,16 ; vpsrld $0x10,%xmm8,%xmm6
+ DB 196,193,41,114,208,16 ; vpsrld $0x10,%xmm8,%xmm10
+ DB 197,201,114,215,16 ; vpsrld $0x10,%xmm7,%xmm6
DB 196,227,45,24,246,1 ; vinsertf128 $0x1,%xmm6,%ymm10,%ymm6
- DB 196,193,76,84,241 ; vandps %ymm9,%ymm6,%ymm6
+ DB 196,193,77,84,241 ; vandpd %ymm9,%ymm6,%ymm6
DB 197,252,91,246 ; vcvtdq2ps %ymm6,%ymm6
- DB 197,177,114,215,24 ; vpsrld $0x18,%xmm7,%xmm9
- DB 196,193,65,114,208,24 ; vpsrld $0x18,%xmm8,%xmm7
- DB 196,227,53,24,255,1 ; vinsertf128 $0x1,%xmm7,%ymm9,%ymm7
+ DB 196,193,57,114,208,24 ; vpsrld $0x18,%xmm8,%xmm8
+ DB 197,193,114,215,24 ; vpsrld $0x18,%xmm7,%xmm7
+ DB 196,227,61,24,255,1 ; vinsertf128 $0x1,%xmm7,%ymm8,%ymm7
DB 197,252,91,255 ; vcvtdq2ps %ymm7,%ymm7
- DB 196,98,125,24,5,240,153,0,0 ; vbroadcastss 0x99f0(%rip),%ymm8 # affc <_sk_clut_4D_avx+0x2223>
+ DB 196,98,125,24,5,172,157,0,0 ; vbroadcastss 0x9dac(%rip),%ymm8 # b3b0 <_sk_clut_4D_avx+0x2223>
DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8
- DB 196,98,125,24,13,231,153,0,0 ; vbroadcastss 0x99e7(%rip),%ymm9 # b000 <_sk_clut_4D_avx+0x2227>
+ DB 196,98,125,24,13,163,157,0,0 ; vbroadcastss 0x9da3(%rip),%ymm9 # b3b4 <_sk_clut_4D_avx+0x2227>
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
DB 197,60,89,212 ; vmulps %ymm4,%ymm8,%ymm10
DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0
@@ -8539,40 +8885,99 @@ _sk_srcover_rgba_8888_avx LABEL PROC
DB 196,65,53,86,202 ; vorpd %ymm10,%ymm9,%ymm9
DB 196,65,61,86,193 ; vorpd %ymm9,%ymm8,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,90 ; jne 1717 <_sk_srcover_rgba_8888_avx+0x1a3>
- DB 197,124,17,0 ; vmovups %ymm8,(%rax)
+ DB 117,67 ; jne 16f8 <_sk_srcover_rgba_8888_avx+0x184>
+ DB 196,65,124,17,4,153 ; vmovups %ymm8,(%r9,%rbx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
- DB 72,131,196,16 ; add $0x10,%rsp
- DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,226 ; vmovq %r10,%xmm4
- DB 196,226,121,48,228 ; vpmovzxbw %xmm4,%xmm4
- DB 196,226,89,0,45,156,157,0,0 ; vpshufb 0x9d9c(%rip),%xmm4,%xmm5 # b490 <_sk_clut_4D_avx+0x26b7>
- DB 196,226,121,33,237 ; vpmovsxbd %xmm5,%xmm5
- DB 196,226,89,0,37,158,157,0,0 ; vpshufb 0x9d9e(%rip),%xmm4,%xmm4 # b4a0 <_sk_clut_4D_avx+0x26c7>
- DB 196,226,121,33,228 ; vpmovsxbd %xmm4,%xmm4
- DB 196,227,85,24,228,1 ; vinsertf128 $0x1,%xmm4,%ymm5,%ymm4
- DB 196,226,93,44,56 ; vmaskmovps (%rax),%ymm4,%ymm7
- DB 233,140,254,255,255 ; jmpq 15a3 <_sk_srcover_rgba_8888_avx+0x2f>
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,65,249,110,202 ; vmovq %r10,%xmm9
- DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
- DB 196,98,49,0,21,81,157,0,0 ; vpshufb 0x9d51(%rip),%xmm9,%xmm10 # b490 <_sk_clut_4D_avx+0x26b7>
- DB 196,66,121,33,210 ; vpmovsxbd %xmm10,%xmm10
- DB 196,98,49,0,13,83,157,0,0 ; vpshufb 0x9d53(%rip),%xmm9,%xmm9 # b4a0 <_sk_clut_4D_avx+0x26c7>
- DB 196,66,121,33,201 ; vpmovsxbd %xmm9,%xmm9
- DB 196,67,45,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- DB 196,98,53,46,0 ; vmaskmovps %ymm8,%ymm9,(%rax)
- DB 233,95,255,255,255 ; jmpq 16c1 <_sk_srcover_rgba_8888_avx+0x14d>
+ DB 91 ; pop %rbx
+ DB 255,224 ; jmpq *%rax
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 15,135,194,254,255,255 ; ja 159b <_sk_srcover_rgba_8888_avx+0x27>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,4,1,0,0 ; lea 0x104(%rip),%r11 # 17e8 <_sk_srcover_rgba_8888_avx+0x274>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,122,16,4,153 ; vmovss (%r9,%rbx,4),%xmm8
+ DB 233,163,254,255,255 ; jmpq 159b <_sk_srcover_rgba_8888_avx+0x27>
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,179 ; ja 16bb <_sk_srcover_rgba_8888_avx+0x147>
+ DB 65,15,182,194 ; movzbl %r10b,%eax
+ DB 76,141,21,241,0,0,0 ; lea 0xf1(%rip),%r10 # 1804 <_sk_srcover_rgba_8888_avx+0x290>
+ DB 73,99,4,130 ; movslq (%r10,%rax,4),%rax
+ DB 76,1,208 ; add %r10,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,121,126,4,153 ; vmovd %xmm8,(%r9,%rbx,4)
+ DB 235,151 ; jmp 16bb <_sk_srcover_rgba_8888_avx+0x147>
+ DB 196,193,121,110,100,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm4
+ DB 197,249,112,228,68 ; vpshufd $0x44,%xmm4,%xmm4
+ DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5
+ DB 196,99,85,12,196,4 ; vblendps $0x4,%ymm4,%ymm5,%ymm8
+ DB 196,193,123,16,36,153 ; vmovsd (%r9,%rbx,4),%xmm4
+ DB 196,99,61,13,196,1 ; vblendpd $0x1,%ymm4,%ymm8,%ymm8
+ DB 233,80,254,255,255 ; jmpq 159b <_sk_srcover_rgba_8888_avx+0x27>
+ DB 196,193,121,110,100,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm4
+ DB 197,249,112,228,68 ; vpshufd $0x44,%xmm4,%xmm4
+ DB 196,227,125,24,228,1 ; vinsertf128 $0x1,%xmm4,%ymm0,%ymm4
+ DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5
+ DB 196,99,85,12,196,64 ; vblendps $0x40,%ymm4,%ymm5,%ymm8
+ DB 196,99,125,25,196,1 ; vextractf128 $0x1,%ymm8,%xmm4
+ DB 196,195,89,34,100,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,99,61,24,196,1 ; vinsertf128 $0x1,%xmm4,%ymm8,%ymm8
+ DB 196,99,125,25,196,1 ; vextractf128 $0x1,%ymm8,%xmm4
+ DB 196,195,89,34,100,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,99,61,24,196,1 ; vinsertf128 $0x1,%xmm4,%ymm8,%ymm8
+ DB 196,193,121,16,36,153 ; vmovupd (%r9,%rbx,4),%xmm4
+ DB 196,67,93,13,192,12 ; vblendpd $0xc,%ymm8,%ymm4,%ymm8
+ DB 233,251,253,255,255 ; jmpq 159b <_sk_srcover_rgba_8888_avx+0x27>
+ DB 196,67,121,22,68,153,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ DB 196,65,121,214,4,153 ; vmovq %xmm8,(%r9,%rbx,4)
+ DB 233,8,255,255,255 ; jmpq 16bb <_sk_srcover_rgba_8888_avx+0x147>
+ DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,24,2 ; vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,20,1 ; vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
+ DB 196,65,122,17,76,153,16 ; vmovss %xmm9,0x10(%r9,%rbx,4)
+ DB 196,65,121,17,4,153 ; vmovupd %xmm8,(%r9,%rbx,4)
+ DB 233,212,254,255,255 ; jmpq 16bb <_sk_srcover_rgba_8888_avx+0x147>
+ DB 144 ; nop
+ DB 5,255,255,255,82 ; add $0x52ffffff,%eax
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 60,255 ; cmp $0xff,%al
+ DB 255 ; (bad)
+ DB 255,167,255,255,255,147 ; jmpq *-0x6c000001(%rdi)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 127,255 ; jg 17fd <_sk_srcover_rgba_8888_avx+0x289>
+ DB 255 ; (bad)
+ DB 255,99,255 ; jmpq *-0x1(%rbx)
+ DB 255 ; (bad)
+ DB 255,24 ; lcall *(%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,164,255,255,255,156,255 ; jmpq *-0x630001(%rdi,%rdi,8)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 216,255 ; fdivr %st(7),%st
+ DB 255 ; (bad)
+ DB 255,203 ; dec %ebx
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 189,255,255,255,175 ; mov $0xafffffff,%ebp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_clamp_0_avx
_sk_clamp_0_avx LABEL PROC
@@ -8586,7 +8991,7 @@ _sk_clamp_0_avx LABEL PROC
PUBLIC _sk_clamp_1_avx
_sk_clamp_1_avx LABEL PROC
- DB 196,98,125,24,5,124,152,0,0 ; vbroadcastss 0x987c(%rip),%ymm8 # b004 <_sk_clut_4D_avx+0x222b>
+ DB 196,98,125,24,5,114,155,0,0 ; vbroadcastss 0x9b72(%rip),%ymm8 # b3b8 <_sk_clut_4D_avx+0x222b>
DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0
DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1
DB 196,193,108,93,208 ; vminps %ymm8,%ymm2,%ymm2
@@ -8596,7 +9001,7 @@ _sk_clamp_1_avx LABEL PROC
PUBLIC _sk_clamp_a_avx
_sk_clamp_a_avx LABEL PROC
- DB 196,98,125,24,5,95,152,0,0 ; vbroadcastss 0x985f(%rip),%ymm8 # b008 <_sk_clut_4D_avx+0x222f>
+ DB 196,98,125,24,5,85,155,0,0 ; vbroadcastss 0x9b55(%rip),%ymm8 # b3bc <_sk_clut_4D_avx+0x222f>
DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3
DB 197,252,93,195 ; vminps %ymm3,%ymm0,%ymm0
DB 197,244,93,203 ; vminps %ymm3,%ymm1,%ymm1
@@ -8606,7 +9011,7 @@ _sk_clamp_a_avx LABEL PROC
PUBLIC _sk_clamp_a_dst_avx
_sk_clamp_a_dst_avx LABEL PROC
- DB 196,98,125,24,5,69,152,0,0 ; vbroadcastss 0x9845(%rip),%ymm8 # b00c <_sk_clut_4D_avx+0x2233>
+ DB 196,98,125,24,5,59,155,0,0 ; vbroadcastss 0x9b3b(%rip),%ymm8 # b3c0 <_sk_clut_4D_avx+0x2233>
DB 196,193,68,93,248 ; vminps %ymm8,%ymm7,%ymm7
DB 197,220,93,231 ; vminps %ymm7,%ymm4,%ymm4
DB 197,212,93,239 ; vminps %ymm7,%ymm5,%ymm5
@@ -8633,7 +9038,7 @@ _sk_swap_rb_avx LABEL PROC
PUBLIC _sk_invert_avx
_sk_invert_avx LABEL PROC
- DB 196,98,125,24,5,4,152,0,0 ; vbroadcastss 0x9804(%rip),%ymm8 # b010 <_sk_clut_4D_avx+0x2237>
+ DB 196,98,125,24,5,250,154,0,0 ; vbroadcastss 0x9afa(%rip),%ymm8 # b3c4 <_sk_clut_4D_avx+0x2237>
DB 197,188,92,192 ; vsubps %ymm0,%ymm8,%ymm0
DB 197,188,92,201 ; vsubps %ymm1,%ymm8,%ymm1
DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2
@@ -8679,7 +9084,7 @@ PUBLIC _sk_unpremul_avx
_sk_unpremul_avx LABEL PROC
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,65,100,194,200,0 ; vcmpeqps %ymm8,%ymm3,%ymm9
- DB 196,98,125,24,21,152,151,0,0 ; vbroadcastss 0x9798(%rip),%ymm10 # b014 <_sk_clut_4D_avx+0x223b>
+ DB 196,98,125,24,21,142,154,0,0 ; vbroadcastss 0x9a8e(%rip),%ymm10 # b3c8 <_sk_clut_4D_avx+0x223b>
DB 197,44,94,211 ; vdivps %ymm3,%ymm10,%ymm10
DB 196,67,45,74,192,144 ; vblendvps %ymm9,%ymm8,%ymm10,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
@@ -8690,17 +9095,17 @@ _sk_unpremul_avx LABEL PROC
PUBLIC _sk_from_srgb_avx
_sk_from_srgb_avx LABEL PROC
- DB 196,98,125,24,5,121,151,0,0 ; vbroadcastss 0x9779(%rip),%ymm8 # b018 <_sk_clut_4D_avx+0x223f>
+ DB 196,98,125,24,5,111,154,0,0 ; vbroadcastss 0x9a6f(%rip),%ymm8 # b3cc <_sk_clut_4D_avx+0x223f>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 197,124,89,208 ; vmulps %ymm0,%ymm0,%ymm10
- DB 196,98,125,24,29,107,151,0,0 ; vbroadcastss 0x976b(%rip),%ymm11 # b01c <_sk_clut_4D_avx+0x2243>
+ DB 196,98,125,24,29,97,154,0,0 ; vbroadcastss 0x9a61(%rip),%ymm11 # b3d0 <_sk_clut_4D_avx+0x2243>
DB 196,65,124,89,227 ; vmulps %ymm11,%ymm0,%ymm12
- DB 196,98,125,24,45,97,151,0,0 ; vbroadcastss 0x9761(%rip),%ymm13 # b020 <_sk_clut_4D_avx+0x2247>
+ DB 196,98,125,24,45,87,154,0,0 ; vbroadcastss 0x9a57(%rip),%ymm13 # b3d4 <_sk_clut_4D_avx+0x2247>
DB 196,65,28,88,229 ; vaddps %ymm13,%ymm12,%ymm12
DB 196,65,44,89,212 ; vmulps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,37,82,151,0,0 ; vbroadcastss 0x9752(%rip),%ymm12 # b024 <_sk_clut_4D_avx+0x224b>
+ DB 196,98,125,24,37,72,154,0,0 ; vbroadcastss 0x9a48(%rip),%ymm12 # b3d8 <_sk_clut_4D_avx+0x224b>
DB 196,65,44,88,212 ; vaddps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,53,72,151,0,0 ; vbroadcastss 0x9748(%rip),%ymm14 # b028 <_sk_clut_4D_avx+0x224f>
+ DB 196,98,125,24,53,62,154,0,0 ; vbroadcastss 0x9a3e(%rip),%ymm14 # b3dc <_sk_clut_4D_avx+0x224f>
DB 196,193,124,194,198,1 ; vcmpltps %ymm14,%ymm0,%ymm0
DB 196,195,45,74,193,0 ; vblendvps %ymm0,%ymm9,%ymm10,%ymm0
DB 196,65,116,89,200 ; vmulps %ymm8,%ymm1,%ymm9
@@ -8724,17 +9129,17 @@ _sk_from_srgb_avx LABEL PROC
PUBLIC _sk_from_srgb_dst_avx
_sk_from_srgb_dst_avx LABEL PROC
- DB 196,98,125,24,5,225,150,0,0 ; vbroadcastss 0x96e1(%rip),%ymm8 # b02c <_sk_clut_4D_avx+0x2253>
+ DB 196,98,125,24,5,215,153,0,0 ; vbroadcastss 0x99d7(%rip),%ymm8 # b3e0 <_sk_clut_4D_avx+0x2253>
DB 196,65,92,89,200 ; vmulps %ymm8,%ymm4,%ymm9
DB 197,92,89,212 ; vmulps %ymm4,%ymm4,%ymm10
- DB 196,98,125,24,29,211,150,0,0 ; vbroadcastss 0x96d3(%rip),%ymm11 # b030 <_sk_clut_4D_avx+0x2257>
+ DB 196,98,125,24,29,201,153,0,0 ; vbroadcastss 0x99c9(%rip),%ymm11 # b3e4 <_sk_clut_4D_avx+0x2257>
DB 196,65,92,89,227 ; vmulps %ymm11,%ymm4,%ymm12
- DB 196,98,125,24,45,201,150,0,0 ; vbroadcastss 0x96c9(%rip),%ymm13 # b034 <_sk_clut_4D_avx+0x225b>
+ DB 196,98,125,24,45,191,153,0,0 ; vbroadcastss 0x99bf(%rip),%ymm13 # b3e8 <_sk_clut_4D_avx+0x225b>
DB 196,65,28,88,229 ; vaddps %ymm13,%ymm12,%ymm12
DB 196,65,44,89,212 ; vmulps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,37,186,150,0,0 ; vbroadcastss 0x96ba(%rip),%ymm12 # b038 <_sk_clut_4D_avx+0x225f>
+ DB 196,98,125,24,37,176,153,0,0 ; vbroadcastss 0x99b0(%rip),%ymm12 # b3ec <_sk_clut_4D_avx+0x225f>
DB 196,65,44,88,212 ; vaddps %ymm12,%ymm10,%ymm10
- DB 196,98,125,24,53,176,150,0,0 ; vbroadcastss 0x96b0(%rip),%ymm14 # b03c <_sk_clut_4D_avx+0x2263>
+ DB 196,98,125,24,53,166,153,0,0 ; vbroadcastss 0x99a6(%rip),%ymm14 # b3f0 <_sk_clut_4D_avx+0x2263>
DB 196,193,92,194,230,1 ; vcmpltps %ymm14,%ymm4,%ymm4
DB 196,195,45,74,225,64 ; vblendvps %ymm4,%ymm9,%ymm10,%ymm4
DB 196,65,84,89,200 ; vmulps %ymm8,%ymm5,%ymm9
@@ -8759,20 +9164,20 @@ _sk_from_srgb_dst_avx LABEL PROC
PUBLIC _sk_to_srgb_avx
_sk_to_srgb_avx LABEL PROC
DB 197,124,82,200 ; vrsqrtps %ymm0,%ymm9
- DB 196,98,125,24,5,69,150,0,0 ; vbroadcastss 0x9645(%rip),%ymm8 # b040 <_sk_clut_4D_avx+0x2267>
+ DB 196,98,125,24,5,59,153,0,0 ; vbroadcastss 0x993b(%rip),%ymm8 # b3f4 <_sk_clut_4D_avx+0x2267>
DB 196,65,124,89,208 ; vmulps %ymm8,%ymm0,%ymm10
- DB 196,98,125,24,29,59,150,0,0 ; vbroadcastss 0x963b(%rip),%ymm11 # b044 <_sk_clut_4D_avx+0x226b>
+ DB 196,98,125,24,29,49,153,0,0 ; vbroadcastss 0x9931(%rip),%ymm11 # b3f8 <_sk_clut_4D_avx+0x226b>
DB 196,65,52,89,227 ; vmulps %ymm11,%ymm9,%ymm12
- DB 196,98,125,24,45,49,150,0,0 ; vbroadcastss 0x9631(%rip),%ymm13 # b048 <_sk_clut_4D_avx+0x226f>
+ DB 196,98,125,24,45,39,153,0,0 ; vbroadcastss 0x9927(%rip),%ymm13 # b3fc <_sk_clut_4D_avx+0x226f>
DB 196,65,28,88,229 ; vaddps %ymm13,%ymm12,%ymm12
DB 196,65,52,89,228 ; vmulps %ymm12,%ymm9,%ymm12
- DB 196,98,125,24,53,34,150,0,0 ; vbroadcastss 0x9622(%rip),%ymm14 # b04c <_sk_clut_4D_avx+0x2273>
+ DB 196,98,125,24,53,24,153,0,0 ; vbroadcastss 0x9918(%rip),%ymm14 # b400 <_sk_clut_4D_avx+0x2273>
DB 196,65,28,88,230 ; vaddps %ymm14,%ymm12,%ymm12
- DB 196,98,125,24,61,24,150,0,0 ; vbroadcastss 0x9618(%rip),%ymm15 # b050 <_sk_clut_4D_avx+0x2277>
+ DB 196,98,125,24,61,14,153,0,0 ; vbroadcastss 0x990e(%rip),%ymm15 # b404 <_sk_clut_4D_avx+0x2277>
DB 196,65,52,88,207 ; vaddps %ymm15,%ymm9,%ymm9
DB 196,65,124,83,201 ; vrcpps %ymm9,%ymm9
DB 196,65,52,89,204 ; vmulps %ymm12,%ymm9,%ymm9
- DB 196,98,125,24,37,4,150,0,0 ; vbroadcastss 0x9604(%rip),%ymm12 # b054 <_sk_clut_4D_avx+0x227b>
+ DB 196,98,125,24,37,250,152,0,0 ; vbroadcastss 0x98fa(%rip),%ymm12 # b408 <_sk_clut_4D_avx+0x227b>
DB 196,193,124,194,196,1 ; vcmpltps %ymm12,%ymm0,%ymm0
DB 196,195,53,74,194,0 ; vblendvps %ymm0,%ymm10,%ymm9,%ymm0
DB 197,124,82,201 ; vrsqrtps %ymm1,%ymm9
@@ -8807,7 +9212,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
DB 197,124,93,201 ; vminps %ymm1,%ymm0,%ymm9
DB 197,52,93,202 ; vminps %ymm2,%ymm9,%ymm9
DB 196,65,60,92,209 ; vsubps %ymm9,%ymm8,%ymm10
- DB 196,98,125,24,29,106,149,0,0 ; vbroadcastss 0x956a(%rip),%ymm11 # b058 <_sk_clut_4D_avx+0x227f>
+ DB 196,98,125,24,29,96,152,0,0 ; vbroadcastss 0x9860(%rip),%ymm11 # b40c <_sk_clut_4D_avx+0x227f>
DB 196,65,36,94,218 ; vdivps %ymm10,%ymm11,%ymm11
DB 197,116,92,226 ; vsubps %ymm2,%ymm1,%ymm12
DB 196,65,28,89,227 ; vmulps %ymm11,%ymm12,%ymm12
@@ -8817,19 +9222,19 @@ _sk_rgb_to_hsl_avx LABEL PROC
DB 196,193,108,89,211 ; vmulps %ymm11,%ymm2,%ymm2
DB 197,252,92,201 ; vsubps %ymm1,%ymm0,%ymm1
DB 196,193,116,89,203 ; vmulps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,29,67,149,0,0 ; vbroadcastss 0x9543(%rip),%ymm11 # b064 <_sk_clut_4D_avx+0x228b>
+ DB 196,98,125,24,29,57,152,0,0 ; vbroadcastss 0x9839(%rip),%ymm11 # b418 <_sk_clut_4D_avx+0x228b>
DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,29,49,149,0,0 ; vbroadcastss 0x9531(%rip),%ymm11 # b060 <_sk_clut_4D_avx+0x2287>
+ DB 196,98,125,24,29,39,152,0,0 ; vbroadcastss 0x9827(%rip),%ymm11 # b414 <_sk_clut_4D_avx+0x2287>
DB 196,193,108,88,211 ; vaddps %ymm11,%ymm2,%ymm2
DB 196,227,117,74,202,224 ; vblendvps %ymm14,%ymm2,%ymm1,%ymm1
- DB 196,226,125,24,21,25,149,0,0 ; vbroadcastss 0x9519(%rip),%ymm2 # b05c <_sk_clut_4D_avx+0x2283>
+ DB 196,226,125,24,21,15,152,0,0 ; vbroadcastss 0x980f(%rip),%ymm2 # b410 <_sk_clut_4D_avx+0x2283>
DB 196,65,12,87,246 ; vxorps %ymm14,%ymm14,%ymm14
DB 196,227,13,74,210,208 ; vblendvps %ymm13,%ymm2,%ymm14,%ymm2
DB 197,188,194,192,0 ; vcmpeqps %ymm0,%ymm8,%ymm0
DB 196,193,108,88,212 ; vaddps %ymm12,%ymm2,%ymm2
DB 196,227,117,74,194,0 ; vblendvps %ymm0,%ymm2,%ymm1,%ymm0
DB 196,193,60,88,201 ; vaddps %ymm9,%ymm8,%ymm1
- DB 196,98,125,24,37,0,149,0,0 ; vbroadcastss 0x9500(%rip),%ymm12 # b06c <_sk_clut_4D_avx+0x2293>
+ DB 196,98,125,24,37,246,151,0,0 ; vbroadcastss 0x97f6(%rip),%ymm12 # b420 <_sk_clut_4D_avx+0x2293>
DB 196,193,116,89,212 ; vmulps %ymm12,%ymm1,%ymm2
DB 197,28,194,226,1 ; vcmpltps %ymm2,%ymm12,%ymm12
DB 196,65,36,92,216 ; vsubps %ymm8,%ymm11,%ymm11
@@ -8839,7 +9244,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
DB 197,172,94,201 ; vdivps %ymm1,%ymm10,%ymm1
DB 196,195,125,74,198,128 ; vblendvps %ymm8,%ymm14,%ymm0,%ymm0
DB 196,195,117,74,206,128 ; vblendvps %ymm8,%ymm14,%ymm1,%ymm1
- DB 196,98,125,24,5,195,148,0,0 ; vbroadcastss 0x94c3(%rip),%ymm8 # b068 <_sk_clut_4D_avx+0x228f>
+ DB 196,98,125,24,5,185,151,0,0 ; vbroadcastss 0x97b9(%rip),%ymm8 # b41c <_sk_clut_4D_avx+0x228f>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -8854,7 +9259,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
DB 197,252,17,28,36 ; vmovups %ymm3,(%rsp)
DB 197,252,40,225 ; vmovaps %ymm1,%ymm4
DB 197,252,40,216 ; vmovaps %ymm0,%ymm3
- DB 196,98,125,24,5,138,148,0,0 ; vbroadcastss 0x948a(%rip),%ymm8 # b070 <_sk_clut_4D_avx+0x2297>
+ DB 196,98,125,24,5,128,151,0,0 ; vbroadcastss 0x9780(%rip),%ymm8 # b424 <_sk_clut_4D_avx+0x2297>
DB 197,60,194,202,2 ; vcmpleps %ymm2,%ymm8,%ymm9
DB 197,92,89,210 ; vmulps %ymm2,%ymm4,%ymm10
DB 196,65,92,92,218 ; vsubps %ymm10,%ymm4,%ymm11
@@ -8862,23 +9267,23 @@ _sk_hsl_to_rgb_avx LABEL PROC
DB 197,52,88,210 ; vaddps %ymm2,%ymm9,%ymm10
DB 197,108,88,202 ; vaddps %ymm2,%ymm2,%ymm9
DB 196,65,52,92,202 ; vsubps %ymm10,%ymm9,%ymm9
- DB 196,98,125,24,29,100,148,0,0 ; vbroadcastss 0x9464(%rip),%ymm11 # b074 <_sk_clut_4D_avx+0x229b>
+ DB 196,98,125,24,29,90,151,0,0 ; vbroadcastss 0x975a(%rip),%ymm11 # b428 <_sk_clut_4D_avx+0x229b>
DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
DB 196,67,125,8,227,1 ; vroundps $0x1,%ymm11,%ymm12
DB 196,65,36,92,252 ; vsubps %ymm12,%ymm11,%ymm15
DB 196,65,44,92,217 ; vsubps %ymm9,%ymm10,%ymm11
- DB 196,98,125,24,37,78,148,0,0 ; vbroadcastss 0x944e(%rip),%ymm12 # b07c <_sk_clut_4D_avx+0x22a3>
+ DB 196,98,125,24,37,68,151,0,0 ; vbroadcastss 0x9744(%rip),%ymm12 # b430 <_sk_clut_4D_avx+0x22a3>
DB 196,193,4,89,196 ; vmulps %ymm12,%ymm15,%ymm0
- DB 196,98,125,24,45,68,148,0,0 ; vbroadcastss 0x9444(%rip),%ymm13 # b080 <_sk_clut_4D_avx+0x22a7>
+ DB 196,98,125,24,45,58,151,0,0 ; vbroadcastss 0x973a(%rip),%ymm13 # b434 <_sk_clut_4D_avx+0x22a7>
DB 197,20,92,240 ; vsubps %ymm0,%ymm13,%ymm14
DB 196,65,36,89,246 ; vmulps %ymm14,%ymm11,%ymm14
DB 196,65,52,88,246 ; vaddps %ymm14,%ymm9,%ymm14
- DB 196,226,125,24,13,37,148,0,0 ; vbroadcastss 0x9425(%rip),%ymm1 # b078 <_sk_clut_4D_avx+0x229f>
+ DB 196,226,125,24,13,27,151,0,0 ; vbroadcastss 0x971b(%rip),%ymm1 # b42c <_sk_clut_4D_avx+0x229f>
DB 196,193,116,194,255,2 ; vcmpleps %ymm15,%ymm1,%ymm7
DB 196,195,13,74,249,112 ; vblendvps %ymm7,%ymm9,%ymm14,%ymm7
DB 196,65,60,194,247,2 ; vcmpleps %ymm15,%ymm8,%ymm14
DB 196,227,45,74,255,224 ; vblendvps %ymm14,%ymm7,%ymm10,%ymm7
- DB 196,98,125,24,53,16,148,0,0 ; vbroadcastss 0x9410(%rip),%ymm14 # b084 <_sk_clut_4D_avx+0x22ab>
+ DB 196,98,125,24,53,6,151,0,0 ; vbroadcastss 0x9706(%rip),%ymm14 # b438 <_sk_clut_4D_avx+0x22ab>
DB 196,65,12,194,255,2 ; vcmpleps %ymm15,%ymm14,%ymm15
DB 196,193,124,89,195 ; vmulps %ymm11,%ymm0,%ymm0
DB 197,180,88,192 ; vaddps %ymm0,%ymm9,%ymm0
@@ -8897,7 +9302,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
DB 197,164,89,247 ; vmulps %ymm7,%ymm11,%ymm6
DB 197,180,88,246 ; vaddps %ymm6,%ymm9,%ymm6
DB 196,227,77,74,237,0 ; vblendvps %ymm0,%ymm5,%ymm6,%ymm5
- DB 196,226,125,24,5,178,147,0,0 ; vbroadcastss 0x93b2(%rip),%ymm0 # b088 <_sk_clut_4D_avx+0x22af>
+ DB 196,226,125,24,5,168,150,0,0 ; vbroadcastss 0x96a8(%rip),%ymm0 # b43c <_sk_clut_4D_avx+0x22af>
DB 197,228,88,192 ; vaddps %ymm0,%ymm3,%ymm0
DB 196,227,125,8,216,1 ; vroundps $0x1,%ymm0,%ymm3
DB 197,252,92,195 ; vsubps %ymm3,%ymm0,%ymm0
@@ -8948,15 +9353,15 @@ _sk_scale_u8_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,75 ; jne 1de3 <_sk_scale_u8_avx+0x64>
+ DB 117,75 ; jne 1ea1 <_sk_scale_u8_avx+0x64>
DB 196,66,121,48,4,25 ; vpmovzxbw (%r9,%rbx,1),%xmm8
- DB 197,57,219,5,10,151,0,0 ; vpand 0x970a(%rip),%xmm8,%xmm8 # b4b0 <_sk_clut_4D_avx+0x26d7>
+ DB 197,57,219,5,220,154,0,0 ; vpand 0x9adc(%rip),%xmm8,%xmm8 # b940 <_sk_clut_4D_avx+0x27b3>
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 196,65,57,105,201 ; vpunpckhwd %xmm9,%xmm8,%xmm9
DB 196,66,121,51,192 ; vpmovzxwd %xmm8,%xmm8
DB 196,67,61,24,193,1 ; vinsertf128 $0x1,%xmm9,%ymm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,13,195,146,0,0 ; vbroadcastss 0x92c3(%rip),%ymm9 # b08c <_sk_clut_4D_avx+0x22b3>
+ DB 196,98,125,24,13,185,149,0,0 ; vbroadcastss 0x95b9(%rip),%ymm9 # b440 <_sk_clut_4D_avx+0x22b3>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1
@@ -8970,15 +9375,15 @@ _sk_scale_u8_avx LABEL PROC
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,166 ; ja 1d9e <_sk_scale_u8_avx+0x1f>
+ DB 119,166 ; ja 1e5c <_sk_scale_u8_avx+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,121,0,0,0 ; lea 0x79(%rip),%r11 # 1e7c <_sk_scale_u8_avx+0xfd>
+ DB 76,141,29,123,0,0,0 ; lea 0x7b(%rip),%r11 # 1f3c <_sk_scale_u8_avx+0xff>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,121,110,192 ; vmovd %eax,%xmm8
- DB 235,135 ; jmp 1d9e <_sk_scale_u8_avx+0x1f>
+ DB 235,135 ; jmp 1e5c <_sk_scale_u8_avx+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 197,57,196,192,2 ; vpinsrw $0x2,%eax,%xmm8,%xmm8
@@ -8986,7 +9391,7 @@ _sk_scale_u8_avx LABEL PROC
DB 197,121,110,200 ; vmovd %eax,%xmm9
DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
DB 196,67,57,14,193,3 ; vpblendw $0x3,%xmm9,%xmm8,%xmm8
- DB 233,94,255,255,255 ; jmpq 1d9e <_sk_scale_u8_avx+0x1f>
+ DB 233,94,255,255,255 ; jmpq 1e5c <_sk_scale_u8_avx+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 197,57,196,192,6 ; vpinsrw $0x6,%eax,%xmm8,%xmm8
@@ -8997,24 +9402,21 @@ _sk_scale_u8_avx LABEL PROC
DB 196,65,121,110,12,25 ; vmovd (%r9,%rbx,1),%xmm9
DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
DB 196,67,49,14,192,240 ; vpblendw $0xf0,%xmm8,%xmm9,%xmm8
- DB 233,34,255,255,255 ; jmpq 1d9e <_sk_scale_u8_avx+0x1f>
- DB 144 ; nop
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255,171,255,255,255,155 ; ljmp *-0x64000001(%rbx)
- DB 255 ; (bad)
+ DB 233,34,255,255,255 ; jmpq 1e5c <_sk_scale_u8_avx+0x1f>
+ DB 102,144 ; xchg %ax,%ax
+ DB 142,255 ; mov %edi,%?
DB 255 ; (bad)
+ DB 255,169,255,255,255,153 ; ljmp *-0x66000001(%rcx)
DB 255 ; (bad)
- DB 234 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
+ DB 232,255,255,255,221 ; callq ffffffffde001f4c <_sk_clut_4D_avx+0xffffffffddff8dbf>
DB 255 ; (bad)
- DB 223,255 ; (bad)
DB 255 ; (bad)
- DB 255,212 ; callq *%rsp
+ DB 255,210 ; callq *%rdx
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,196 ; inc %esp
+ DB 255,194 ; inc %edx
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -9048,15 +9450,15 @@ _sk_lerp_u8_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,111 ; jne 1f5f <_sk_lerp_u8_avx+0x88>
+ DB 117,111 ; jne 201f <_sk_lerp_u8_avx+0x88>
DB 196,66,121,48,4,25 ; vpmovzxbw (%r9,%rbx,1),%xmm8
- DB 197,57,219,5,194,149,0,0 ; vpand 0x95c2(%rip),%xmm8,%xmm8 # b4c0 <_sk_clut_4D_avx+0x26e7>
+ DB 197,57,219,5,146,153,0,0 ; vpand 0x9992(%rip),%xmm8,%xmm8 # b950 <_sk_clut_4D_avx+0x27c3>
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 196,65,57,105,201 ; vpunpckhwd %xmm9,%xmm8,%xmm9
DB 196,66,121,51,192 ; vpmovzxwd %xmm8,%xmm8
DB 196,67,61,24,193,1 ; vinsertf128 $0x1,%xmm9,%ymm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,13,111,145,0,0 ; vbroadcastss 0x916f(%rip),%ymm9 # b090 <_sk_clut_4D_avx+0x22b7>
+ DB 196,98,125,24,13,99,148,0,0 ; vbroadcastss 0x9463(%rip),%ymm9 # b444 <_sk_clut_4D_avx+0x22b7>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,252,92,196 ; vsubps %ymm4,%ymm0,%ymm0
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
@@ -9078,15 +9480,15 @@ _sk_lerp_u8_avx LABEL PROC
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,130 ; ja 1ef6 <_sk_lerp_u8_avx+0x1f>
+ DB 119,130 ; ja 1fb6 <_sk_lerp_u8_avx+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,125,0,0,0 ; lea 0x7d(%rip),%r11 # 1ffc <_sk_lerp_u8_avx+0x125>
+ DB 76,141,29,125,0,0,0 ; lea 0x7d(%rip),%r11 # 20bc <_sk_lerp_u8_avx+0x125>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,121,110,192 ; vmovd %eax,%xmm8
- DB 233,96,255,255,255 ; jmpq 1ef6 <_sk_lerp_u8_avx+0x1f>
+ DB 233,96,255,255,255 ; jmpq 1fb6 <_sk_lerp_u8_avx+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 197,57,196,192,2 ; vpinsrw $0x2,%eax,%xmm8,%xmm8
@@ -9094,7 +9496,7 @@ _sk_lerp_u8_avx LABEL PROC
DB 197,121,110,200 ; vmovd %eax,%xmm9
DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
DB 196,67,57,14,193,3 ; vpblendw $0x3,%xmm9,%xmm8,%xmm8
- DB 233,55,255,255,255 ; jmpq 1ef6 <_sk_lerp_u8_avx+0x1f>
+ DB 233,55,255,255,255 ; jmpq 1fb6 <_sk_lerp_u8_avx+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 197,57,196,192,6 ; vpinsrw $0x6,%eax,%xmm8,%xmm8
@@ -9105,7 +9507,7 @@ _sk_lerp_u8_avx LABEL PROC
DB 196,65,121,110,12,25 ; vmovd (%r9,%rbx,1),%xmm9
DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
DB 196,67,49,14,192,240 ; vpblendw $0xf0,%xmm8,%xmm9,%xmm8
- DB 233,251,254,255,255 ; jmpq 1ef6 <_sk_lerp_u8_avx+0x1f>
+ DB 233,251,254,255,255 ; jmpq 1fb6 <_sk_lerp_u8_avx+0x1f>
DB 144 ; nop
DB 140,255 ; mov %?,%edi
DB 255 ; (bad)
@@ -9113,7 +9515,7 @@ _sk_lerp_u8_avx LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf00200c <_sk_clut_4D_avx+0xffffffffdeff9233>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf0020cc <_sk_clut_4D_avx+0xffffffffdeff8f3f>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -9135,26 +9537,26 @@ _sk_lerp_565_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,209,0,0,0 ; jne 2109 <_sk_lerp_565_avx+0xf1>
+ DB 15,133,209,0,0,0 ; jne 21c9 <_sk_lerp_565_avx+0xf1>
DB 196,65,122,111,4,89 ; vmovdqu (%r9,%rbx,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 196,65,57,105,201 ; vpunpckhwd %xmm9,%xmm8,%xmm9
DB 196,66,121,51,192 ; vpmovzxwd %xmm8,%xmm8
DB 196,67,61,24,193,1 ; vinsertf128 $0x1,%xmm9,%ymm8,%ymm8
- DB 196,98,125,24,13,56,144,0,0 ; vbroadcastss 0x9038(%rip),%ymm9 # b094 <_sk_clut_4D_avx+0x22bb>
+ DB 196,98,125,24,13,44,147,0,0 ; vbroadcastss 0x932c(%rip),%ymm9 # b448 <_sk_clut_4D_avx+0x22bb>
DB 196,65,60,84,201 ; vandps %ymm9,%ymm8,%ymm9
DB 196,65,124,91,201 ; vcvtdq2ps %ymm9,%ymm9
- DB 196,98,125,24,21,41,144,0,0 ; vbroadcastss 0x9029(%rip),%ymm10 # b098 <_sk_clut_4D_avx+0x22bf>
+ DB 196,98,125,24,21,29,147,0,0 ; vbroadcastss 0x931d(%rip),%ymm10 # b44c <_sk_clut_4D_avx+0x22bf>
DB 196,65,52,89,202 ; vmulps %ymm10,%ymm9,%ymm9
- DB 196,98,125,24,21,31,144,0,0 ; vbroadcastss 0x901f(%rip),%ymm10 # b09c <_sk_clut_4D_avx+0x22c3>
+ DB 196,98,125,24,21,19,147,0,0 ; vbroadcastss 0x9313(%rip),%ymm10 # b450 <_sk_clut_4D_avx+0x22c3>
DB 196,65,60,84,210 ; vandps %ymm10,%ymm8,%ymm10
DB 196,65,124,91,210 ; vcvtdq2ps %ymm10,%ymm10
- DB 196,98,125,24,29,16,144,0,0 ; vbroadcastss 0x9010(%rip),%ymm11 # b0a0 <_sk_clut_4D_avx+0x22c7>
+ DB 196,98,125,24,29,4,147,0,0 ; vbroadcastss 0x9304(%rip),%ymm11 # b454 <_sk_clut_4D_avx+0x22c7>
DB 196,65,44,89,211 ; vmulps %ymm11,%ymm10,%ymm10
- DB 196,98,125,24,29,6,144,0,0 ; vbroadcastss 0x9006(%rip),%ymm11 # b0a4 <_sk_clut_4D_avx+0x22cb>
+ DB 196,98,125,24,29,250,146,0,0 ; vbroadcastss 0x92fa(%rip),%ymm11 # b458 <_sk_clut_4D_avx+0x22cb>
DB 196,65,60,84,195 ; vandps %ymm11,%ymm8,%ymm8
DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8
- DB 196,98,125,24,29,247,143,0,0 ; vbroadcastss 0x8ff7(%rip),%ymm11 # b0a8 <_sk_clut_4D_avx+0x22cf>
+ DB 196,98,125,24,29,235,146,0,0 ; vbroadcastss 0x92eb(%rip),%ymm11 # b45c <_sk_clut_4D_avx+0x22cf>
DB 196,65,60,89,195 ; vmulps %ymm11,%ymm8,%ymm8
DB 197,252,92,196 ; vsubps %ymm4,%ymm0,%ymm0
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
@@ -9182,27 +9584,27 @@ _sk_lerp_565_avx LABEL PROC
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,28,255,255,255 ; ja 203e <_sk_lerp_565_avx+0x26>
+ DB 15,135,28,255,255,255 ; ja 20fe <_sk_lerp_565_avx+0x26>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,99,0,0,0 ; lea 0x63(%rip),%r11 # 2190 <_sk_lerp_565_avx+0x178>
+ DB 76,141,29,99,0,0,0 ; lea 0x63(%rip),%r11 # 2250 <_sk_lerp_565_avx+0x178>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,121,110,192 ; vmovd %eax,%xmm8
- DB 233,250,254,255,255 ; jmpq 203e <_sk_lerp_565_avx+0x26>
+ DB 233,250,254,255,255 ; jmpq 20fe <_sk_lerp_565_avx+0x26>
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 196,65,57,196,68,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm8,%xmm8
DB 196,65,121,110,12,89 ; vmovd (%r9,%rbx,2),%xmm9
DB 196,67,57,14,193,3 ; vpblendw $0x3,%xmm9,%xmm8,%xmm8
- DB 233,220,254,255,255 ; jmpq 203e <_sk_lerp_565_avx+0x26>
+ DB 233,220,254,255,255 ; jmpq 20fe <_sk_lerp_565_avx+0x26>
DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8
DB 196,65,57,196,68,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm8,%xmm8
DB 196,65,57,196,68,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm8,%xmm8
DB 196,65,57,196,68,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm8,%xmm8
DB 196,65,122,126,12,89 ; vmovq (%r9,%rbx,2),%xmm9
DB 196,67,49,14,192,240 ; vpblendw $0xf0,%xmm8,%xmm9,%xmm8
- DB 233,174,254,255,255 ; jmpq 203e <_sk_lerp_565_avx+0x26>
+ DB 233,174,254,255,255 ; jmpq 20fe <_sk_lerp_565_avx+0x26>
DB 166 ; cmpsb %es:(%rdi),%ds:(%rsi)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -9224,130 +9626,166 @@ _sk_lerp_565_avx LABEL PROC
PUBLIC _sk_load_tables_avx
_sk_load_tables_avx LABEL PROC
- DB 83 ; push %rbx
- DB 72,131,236,48 ; sub $0x30,%rsp
+ DB 72,131,236,56 ; sub $0x38,%rsp
DB 197,252,17,60,36 ; vmovups %ymm7,(%rsp)
- DB 73,137,201 ; mov %rcx,%r9
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,141,20,149,0,0,0,0 ; lea 0x0(,%rdx,4),%r10
- DB 76,3,16 ; add (%rax),%r10
+ DB 76,139,8 ; mov (%rax),%r9
DB 77,133,192 ; test %r8,%r8
- DB 15,133,251,1,0,0 ; jne 23ca <_sk_load_tables_avx+0x21e>
- DB 196,65,124,16,18 ; vmovups (%r10),%ymm10
- DB 197,124,40,13,132,148,0,0 ; vmovaps 0x9484(%rip),%ymm9 # b660 <_sk_clut_4D_avx+0x2887>
- DB 196,193,44,84,201 ; vandps %ymm9,%ymm10,%ymm1
+ DB 15,133,7,2,0,0 ; jne 248a <_sk_load_tables_avx+0x21e>
+ DB 196,65,125,16,20,145 ; vmovupd (%r9,%rdx,4),%ymm10
+ DB 197,125,40,13,207,149,0,0 ; vmovapd 0x95cf(%rip),%ymm9 # b860 <_sk_clut_4D_avx+0x26d3>
+ DB 196,193,45,84,201 ; vandpd %ymm9,%ymm10,%ymm1
DB 196,227,125,25,200,1 ; vextractf128 $0x1,%ymm1,%xmm0
- DB 196,193,249,126,195 ; vmovq %xmm0,%r11
- DB 69,137,218 ; mov %r11d,%r10d
- DB 72,139,88,8 ; mov 0x8(%rax),%rbx
- DB 196,161,122,16,20,147 ; vmovss (%rbx,%r10,4),%xmm2
+ DB 196,193,249,126,193 ; vmovq %xmm0,%r9
+ DB 69,137,202 ; mov %r9d,%r10d
+ DB 76,139,88,8 ; mov 0x8(%rax),%r11
+ DB 196,129,122,16,20,147 ; vmovss (%r11,%r10,4),%xmm2
DB 196,195,249,22,194,1 ; vpextrq $0x1,%xmm0,%r10
- DB 73,193,235,32 ; shr $0x20,%r11
- DB 196,163,105,33,4,155,16 ; vinsertps $0x10,(%rbx,%r11,4),%xmm2,%xmm0
- DB 68,137,209 ; mov %r10d,%ecx
- DB 196,227,121,33,4,139,32 ; vinsertps $0x20,(%rbx,%rcx,4),%xmm0,%xmm0
- DB 196,193,249,126,203 ; vmovq %xmm1,%r11
+ DB 73,193,233,32 ; shr $0x20,%r9
+ DB 196,131,105,33,4,139,16 ; vinsertps $0x10,(%r11,%r9,4),%xmm2,%xmm0
+ DB 69,137,209 ; mov %r10d,%r9d
+ DB 196,131,121,33,4,139,32 ; vinsertps $0x20,(%r11,%r9,4),%xmm0,%xmm0
+ DB 196,193,249,126,201 ; vmovq %xmm1,%r9
DB 73,193,234,32 ; shr $0x20,%r10
- DB 196,35,121,33,44,147,48 ; vinsertps $0x30,(%rbx,%r10,4),%xmm0,%xmm13
- DB 68,137,217 ; mov %r11d,%ecx
- DB 197,250,16,20,139 ; vmovss (%rbx,%rcx,4),%xmm2
- DB 196,227,249,22,201,1 ; vpextrq $0x1,%xmm1,%rcx
- DB 73,193,235,32 ; shr $0x20,%r11
- DB 196,163,105,33,12,155,16 ; vinsertps $0x10,(%rbx,%r11,4),%xmm2,%xmm1
- DB 65,137,202 ; mov %ecx,%r10d
- DB 72,193,233,32 ; shr $0x20,%rcx
- DB 196,163,113,33,12,147,32 ; vinsertps $0x20,(%rbx,%r10,4),%xmm1,%xmm1
- DB 76,139,80,16 ; mov 0x10(%rax),%r10
- DB 196,99,113,33,36,139,48 ; vinsertps $0x30,(%rbx,%rcx,4),%xmm1,%xmm12
+ DB 196,3,121,33,4,147,48 ; vinsertps $0x30,(%r11,%r10,4),%xmm0,%xmm8
+ DB 69,137,202 ; mov %r9d,%r10d
+ DB 196,129,122,16,20,147 ; vmovss (%r11,%r10,4),%xmm2
+ DB 196,195,249,22,202,1 ; vpextrq $0x1,%xmm1,%r10
+ DB 73,193,233,32 ; shr $0x20,%r9
+ DB 196,131,105,33,12,139,16 ; vinsertps $0x10,(%r11,%r9,4),%xmm2,%xmm1
+ DB 69,137,209 ; mov %r10d,%r9d
+ DB 73,193,234,32 ; shr $0x20,%r10
+ DB 196,131,113,33,12,139,32 ; vinsertps $0x20,(%r11,%r9,4),%xmm1,%xmm1
+ DB 76,139,72,16 ; mov 0x10(%rax),%r9
+ DB 196,3,113,33,36,147,48 ; vinsertps $0x30,(%r11,%r10,4),%xmm1,%xmm12
DB 196,193,105,114,210,8 ; vpsrld $0x8,%xmm10,%xmm2
- DB 196,67,125,25,208,1 ; vextractf128 $0x1,%ymm10,%xmm8
- DB 196,193,121,114,208,8 ; vpsrld $0x8,%xmm8,%xmm0
+ DB 196,67,125,25,213,1 ; vextractf128 $0x1,%ymm10,%xmm13
+ DB 196,193,121,114,213,8 ; vpsrld $0x8,%xmm13,%xmm0
DB 196,227,109,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm2,%ymm0
- DB 196,193,124,84,209 ; vandps %ymm9,%ymm0,%ymm2
+ DB 196,193,125,84,209 ; vandpd %ymm9,%ymm0,%ymm2
DB 196,227,125,25,208,1 ; vextractf128 $0x1,%ymm2,%xmm0
- DB 196,225,249,126,193 ; vmovq %xmm0,%rcx
- DB 137,203 ; mov %ecx,%ebx
- DB 196,193,122,16,12,154 ; vmovss (%r10,%rbx,4),%xmm1
- DB 196,227,249,22,195,1 ; vpextrq $0x1,%xmm0,%rbx
- DB 72,193,233,32 ; shr $0x20,%rcx
- DB 196,67,113,33,52,138,16 ; vinsertps $0x10,(%r10,%rcx,4),%xmm1,%xmm14
- DB 137,217 ; mov %ebx,%ecx
- DB 196,193,122,16,28,138 ; vmovss (%r10,%rcx,4),%xmm3
- DB 196,225,249,126,209 ; vmovq %xmm2,%rcx
- DB 72,193,235,32 ; shr $0x20,%rbx
- DB 196,193,122,16,12,154 ; vmovss (%r10,%rbx,4),%xmm1
- DB 137,203 ; mov %ecx,%ebx
- DB 196,193,122,16,4,154 ; vmovss (%r10,%rbx,4),%xmm0
- DB 196,227,249,22,211,1 ; vpextrq $0x1,%xmm2,%rbx
- DB 72,193,233,32 ; shr $0x20,%rcx
- DB 196,67,121,33,28,138,16 ; vinsertps $0x10,(%r10,%rcx,4),%xmm0,%xmm11
- DB 137,217 ; mov %ebx,%ecx
- DB 196,65,122,16,60,138 ; vmovss (%r10,%rcx,4),%xmm15
- DB 196,195,29,24,197,1 ; vinsertf128 $0x1,%xmm13,%ymm12,%ymm0
- DB 72,193,235,32 ; shr $0x20,%rbx
+ DB 196,193,249,126,194 ; vmovq %xmm0,%r10
+ DB 69,137,211 ; mov %r10d,%r11d
+ DB 196,129,122,16,12,153 ; vmovss (%r9,%r11,4),%xmm1
+ DB 196,195,249,22,195,1 ; vpextrq $0x1,%xmm0,%r11
+ DB 73,193,234,32 ; shr $0x20,%r10
+ DB 196,3,113,33,52,145,16 ; vinsertps $0x10,(%r9,%r10,4),%xmm1,%xmm14
+ DB 69,137,218 ; mov %r11d,%r10d
+ DB 196,129,122,16,28,145 ; vmovss (%r9,%r10,4),%xmm3
+ DB 196,193,249,126,210 ; vmovq %xmm2,%r10
+ DB 73,193,235,32 ; shr $0x20,%r11
+ DB 196,129,122,16,12,153 ; vmovss (%r9,%r11,4),%xmm1
+ DB 69,137,211 ; mov %r10d,%r11d
+ DB 196,129,122,16,4,153 ; vmovss (%r9,%r11,4),%xmm0
+ DB 196,195,249,22,211,1 ; vpextrq $0x1,%xmm2,%r11
+ DB 73,193,234,32 ; shr $0x20,%r10
+ DB 196,3,121,33,28,145,16 ; vinsertps $0x10,(%r9,%r10,4),%xmm0,%xmm11
+ DB 69,137,218 ; mov %r11d,%r10d
+ DB 196,1,122,16,60,145 ; vmovss (%r9,%r10,4),%xmm15
+ DB 196,195,29,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm12,%ymm0
+ DB 73,193,235,32 ; shr $0x20,%r11
DB 196,227,9,33,219,32 ; vinsertps $0x20,%xmm3,%xmm14,%xmm3
DB 196,227,97,33,249,48 ; vinsertps $0x30,%xmm1,%xmm3,%xmm7
- DB 196,65,122,16,52,154 ; vmovss (%r10,%rbx,4),%xmm14
- DB 72,139,64,24 ; mov 0x18(%rax),%rax
+ DB 196,1,122,16,52,153 ; vmovss (%r9,%r11,4),%xmm14
+ DB 76,139,80,24 ; mov 0x18(%rax),%r10
DB 196,193,97,114,210,16 ; vpsrld $0x10,%xmm10,%xmm3
- DB 196,193,105,114,208,16 ; vpsrld $0x10,%xmm8,%xmm2
+ DB 196,193,105,114,213,16 ; vpsrld $0x10,%xmm13,%xmm2
DB 196,227,101,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm3,%ymm2
- DB 196,65,108,84,201 ; vandps %ymm9,%ymm2,%ymm9
+ DB 196,65,109,84,201 ; vandpd %ymm9,%ymm2,%ymm9
DB 196,99,125,25,202,1 ; vextractf128 $0x1,%ymm9,%xmm2
- DB 196,225,249,126,209 ; vmovq %xmm2,%rcx
- DB 137,203 ; mov %ecx,%ebx
- DB 197,250,16,28,152 ; vmovss (%rax,%rbx,4),%xmm3
- DB 196,227,249,22,211,1 ; vpextrq $0x1,%xmm2,%rbx
- DB 72,193,233,32 ; shr $0x20,%rcx
- DB 196,99,97,33,36,136,16 ; vinsertps $0x10,(%rax,%rcx,4),%xmm3,%xmm12
- DB 137,217 ; mov %ebx,%ecx
- DB 197,250,16,28,136 ; vmovss (%rax,%rcx,4),%xmm3
- DB 196,97,249,126,201 ; vmovq %xmm9,%rcx
- DB 72,193,235,32 ; shr $0x20,%rbx
- DB 197,250,16,20,152 ; vmovss (%rax,%rbx,4),%xmm2
- DB 137,203 ; mov %ecx,%ebx
- DB 197,250,16,12,152 ; vmovss (%rax,%rbx,4),%xmm1
- DB 196,99,249,22,203,1 ; vpextrq $0x1,%xmm9,%rbx
- DB 72,193,233,32 ; shr $0x20,%rcx
- DB 196,99,113,33,12,136,16 ; vinsertps $0x10,(%rax,%rcx,4),%xmm1,%xmm9
- DB 137,217 ; mov %ebx,%ecx
- DB 197,122,16,44,136 ; vmovss (%rax,%rcx,4),%xmm13
+ DB 196,193,249,126,209 ; vmovq %xmm2,%r9
+ DB 68,137,200 ; mov %r9d,%eax
+ DB 196,193,122,16,28,130 ; vmovss (%r10,%rax,4),%xmm3
+ DB 196,195,249,22,211,1 ; vpextrq $0x1,%xmm2,%r11
+ DB 73,193,233,32 ; shr $0x20,%r9
+ DB 196,3,97,33,36,138,16 ; vinsertps $0x10,(%r10,%r9,4),%xmm3,%xmm12
+ DB 68,137,216 ; mov %r11d,%eax
+ DB 196,193,122,16,28,130 ; vmovss (%r10,%rax,4),%xmm3
+ DB 196,65,249,126,201 ; vmovq %xmm9,%r9
+ DB 73,193,235,32 ; shr $0x20,%r11
+ DB 196,129,122,16,20,154 ; vmovss (%r10,%r11,4),%xmm2
+ DB 68,137,200 ; mov %r9d,%eax
+ DB 196,193,122,16,12,130 ; vmovss (%r10,%rax,4),%xmm1
+ DB 196,67,249,22,203,1 ; vpextrq $0x1,%xmm9,%r11
+ DB 73,193,233,32 ; shr $0x20,%r9
+ DB 196,3,113,33,12,138,16 ; vinsertps $0x10,(%r10,%r9,4),%xmm1,%xmm9
+ DB 68,137,216 ; mov %r11d,%eax
+ DB 196,65,122,16,4,130 ; vmovss (%r10,%rax,4),%xmm8
DB 196,195,33,33,207,32 ; vinsertps $0x20,%xmm15,%xmm11,%xmm1
- DB 72,193,235,32 ; shr $0x20,%rbx
- DB 197,122,16,28,152 ; vmovss (%rax,%rbx,4),%xmm11
+ DB 73,193,235,32 ; shr $0x20,%r11
+ DB 196,1,122,16,28,154 ; vmovss (%r10,%r11,4),%xmm11
DB 196,195,113,33,206,48 ; vinsertps $0x30,%xmm14,%xmm1,%xmm1
DB 196,227,117,24,207,1 ; vinsertf128 $0x1,%xmm7,%ymm1,%ymm1
DB 196,227,25,33,219,32 ; vinsertps $0x20,%xmm3,%xmm12,%xmm3
DB 196,227,97,33,210,48 ; vinsertps $0x30,%xmm2,%xmm3,%xmm2
- DB 196,195,49,33,221,32 ; vinsertps $0x20,%xmm13,%xmm9,%xmm3
+ DB 196,195,49,33,216,32 ; vinsertps $0x20,%xmm8,%xmm9,%xmm3
DB 196,195,97,33,219,48 ; vinsertps $0x30,%xmm11,%xmm3,%xmm3
DB 196,227,101,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm3,%ymm2
DB 196,193,97,114,210,24 ; vpsrld $0x18,%xmm10,%xmm3
- DB 196,193,65,114,208,24 ; vpsrld $0x18,%xmm8,%xmm7
+ DB 196,193,65,114,213,24 ; vpsrld $0x18,%xmm13,%xmm7
DB 196,227,101,24,223,1 ; vinsertf128 $0x1,%xmm7,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,226,125,24,61,247,140,0,0 ; vbroadcastss 0x8cf7(%rip),%ymm7 # b0ac <_sk_clut_4D_avx+0x22d3>
+ DB 196,226,125,24,61,231,143,0,0 ; vbroadcastss 0x8fe7(%rip),%ymm7 # b460 <_sk_clut_4D_avx+0x22d3>
DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
DB 197,252,16,60,36 ; vmovups (%rsp),%ymm7
- DB 72,131,196,48 ; add $0x30,%rsp
- DB 91 ; pop %rbx
+ DB 72,131,196,56 ; add $0x38,%rsp
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,195,255,255,255,255 ; mov $0xffffffffffffffff,%r11
- DB 73,211,235 ; shr %cl,%r11
- DB 196,193,249,110,195 ; vmovq %r11,%xmm0
- DB 196,226,121,48,192 ; vpmovzxbw %xmm0,%xmm0
- DB 196,226,121,0,13,222,144,0,0 ; vpshufb 0x90de(%rip),%xmm0,%xmm1 # b4d0 <_sk_clut_4D_avx+0x26f7>
- DB 196,226,121,33,201 ; vpmovsxbd %xmm1,%xmm1
- DB 196,226,121,0,5,224,144,0,0 ; vpshufb 0x90e0(%rip),%xmm0,%xmm0 # b4e0 <_sk_clut_4D_avx+0x2707>
- DB 196,226,121,33,192 ; vpmovsxbd %xmm0,%xmm0
- DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- DB 196,66,125,44,18 ; vmaskmovps (%r10),%ymm0,%ymm10
- DB 233,191,253,255,255 ; jmpq 21d4 <_sk_load_tables_avx+0x28>
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 196,65,44,87,210 ; vxorps %ymm10,%ymm10,%ymm10
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 15,135,230,253,255,255 ; ja 2289 <_sk_load_tables_avx+0x1d>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,146,0,0,0 ; lea 0x92(%rip),%r11 # 2540 <_sk_load_tables_avx+0x2d4>
+ DB 79,99,20,147 ; movslq (%r11,%r10,4),%r10
+ DB 77,1,218 ; add %r11,%r10
+ DB 65,255,226 ; jmpq *%r10
+ DB 196,65,122,16,20,145 ; vmovss (%r9,%rdx,4),%xmm10
+ DB 233,198,253,255,255 ; jmpq 2289 <_sk_load_tables_avx+0x1d>
+ DB 196,193,121,110,68,145,8 ; vmovd 0x8(%r9,%rdx,4),%xmm0
+ DB 197,249,112,192,68 ; vpshufd $0x44,%xmm0,%xmm0
+ DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
+ DB 196,99,117,12,208,4 ; vblendps $0x4,%ymm0,%ymm1,%ymm10
+ DB 196,193,123,16,4,145 ; vmovsd (%r9,%rdx,4),%xmm0
+ DB 196,99,45,13,208,1 ; vblendpd $0x1,%ymm0,%ymm10,%ymm10
+ DB 233,159,253,255,255 ; jmpq 2289 <_sk_load_tables_avx+0x1d>
+ DB 196,193,121,110,68,145,24 ; vmovd 0x18(%r9,%rdx,4),%xmm0
+ DB 197,249,112,192,68 ; vpshufd $0x44,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
+ DB 196,99,117,12,208,64 ; vblendps $0x40,%ymm0,%ymm1,%ymm10
+ DB 196,99,125,25,208,1 ; vextractf128 $0x1,%ymm10,%xmm0
+ DB 196,195,121,34,68,145,20,1 ; vpinsrd $0x1,0x14(%r9,%rdx,4),%xmm0,%xmm0
+ DB 196,99,45,24,208,1 ; vinsertf128 $0x1,%xmm0,%ymm10,%ymm10
+ DB 196,99,125,25,208,1 ; vextractf128 $0x1,%ymm10,%xmm0
+ DB 196,195,121,34,68,145,16,0 ; vpinsrd $0x0,0x10(%r9,%rdx,4),%xmm0,%xmm0
+ DB 196,99,45,24,208,1 ; vinsertf128 $0x1,%xmm0,%ymm10,%ymm10
+ DB 196,193,121,16,4,145 ; vmovupd (%r9,%rdx,4),%xmm0
+ DB 196,67,125,13,210,12 ; vblendpd $0xc,%ymm10,%ymm0,%ymm10
+ DB 233,74,253,255,255 ; jmpq 2289 <_sk_load_tables_avx+0x1d>
+ DB 144 ; nop
+ DB 120,255 ; js 2541 <_sk_load_tables_avx+0x2d5>
+ DB 255 ; (bad)
+ DB 255,153,255,255,255,131 ; lcall *-0x7c000001(%rcx)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 238 ; out %al,(%dx)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 218,255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,198 ; inc %esi
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 170 ; stos %al,%es:(%rdi)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_tables_u16_be_avx
_sk_load_tables_u16_be_avx LABEL PROC
@@ -9357,7 +9795,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 76,141,20,149,0,0,0,0 ; lea 0x0(,%rdx,4),%r10
DB 77,133,192 ; test %r8,%r8
DB 197,252,17,60,36 ; vmovups %ymm7,(%rsp)
- DB 15,133,87,2,0,0 ; jne 268b <_sk_load_tables_u16_be_avx+0x276>
+ DB 15,133,87,2,0,0 ; jne 27d2 <_sk_load_tables_u16_be_avx+0x276>
DB 196,1,121,16,4,81 ; vmovupd (%r9,%r10,2),%xmm8
DB 196,129,121,16,84,81,16 ; vmovupd 0x10(%r9,%r10,2),%xmm2
DB 196,129,121,16,92,81,32 ; vmovupd 0x20(%r9,%r10,2),%xmm3
@@ -9372,7 +9810,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 197,113,105,219 ; vpunpckhwd %xmm3,%xmm1,%xmm11
DB 197,177,108,200 ; vpunpcklqdq %xmm0,%xmm9,%xmm1
DB 197,49,109,224 ; vpunpckhqdq %xmm0,%xmm9,%xmm12
- DB 197,121,111,21,111,144,0,0 ; vmovdqa 0x906f(%rip),%xmm10 # b4f0 <_sk_clut_4D_avx+0x2717>
+ DB 197,121,111,21,152,147,0,0 ; vmovdqa 0x9398(%rip),%xmm10 # b960 <_sk_clut_4D_avx+0x27d3>
DB 196,193,113,219,202 ; vpand %xmm10,%xmm1,%xmm1
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 196,193,113,105,209 ; vpunpckhwd %xmm9,%xmm1,%xmm2
@@ -9470,7 +9908,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 196,226,121,51,219 ; vpmovzxwd %xmm3,%xmm3
DB 196,227,101,24,223,1 ; vinsertf128 $0x1,%xmm7,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,226,125,24,61,54,138,0,0 ; vbroadcastss 0x8a36(%rip),%ymm7 # b0b0 <_sk_clut_4D_avx+0x22d7>
+ DB 196,226,125,24,61,163,140,0,0 ; vbroadcastss 0x8ca3(%rip),%ymm7 # b464 <_sk_clut_4D_avx+0x22d7>
DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,16,60,36 ; vmovups (%rsp),%ymm7
@@ -9479,29 +9917,29 @@ _sk_load_tables_u16_be_avx LABEL PROC
DB 196,1,123,16,4,81 ; vmovsd (%r9,%r10,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,85 ; je 26f1 <_sk_load_tables_u16_be_avx+0x2dc>
+ DB 116,85 ; je 2838 <_sk_load_tables_u16_be_avx+0x2dc>
DB 196,1,57,22,68,81,8 ; vmovhpd 0x8(%r9,%r10,2),%xmm8,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,72 ; jb 26f1 <_sk_load_tables_u16_be_avx+0x2dc>
+ DB 114,72 ; jb 2838 <_sk_load_tables_u16_be_avx+0x2dc>
DB 196,129,123,16,84,81,16 ; vmovsd 0x10(%r9,%r10,2),%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 116,72 ; je 26fe <_sk_load_tables_u16_be_avx+0x2e9>
+ DB 116,72 ; je 2845 <_sk_load_tables_u16_be_avx+0x2e9>
DB 196,129,105,22,84,81,24 ; vmovhpd 0x18(%r9,%r10,2),%xmm2,%xmm2
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,59 ; jb 26fe <_sk_load_tables_u16_be_avx+0x2e9>
+ DB 114,59 ; jb 2845 <_sk_load_tables_u16_be_avx+0x2e9>
DB 196,129,123,16,92,81,32 ; vmovsd 0x20(%r9,%r10,2),%xmm3
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 15,132,123,253,255,255 ; je 244f <_sk_load_tables_u16_be_avx+0x3a>
+ DB 15,132,123,253,255,255 ; je 2596 <_sk_load_tables_u16_be_avx+0x3a>
DB 196,129,97,22,92,81,40 ; vmovhpd 0x28(%r9,%r10,2),%xmm3,%xmm3
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,130,106,253,255,255 ; jb 244f <_sk_load_tables_u16_be_avx+0x3a>
+ DB 15,130,106,253,255,255 ; jb 2596 <_sk_load_tables_u16_be_avx+0x3a>
DB 196,1,122,126,76,81,48 ; vmovq 0x30(%r9,%r10,2),%xmm9
- DB 233,94,253,255,255 ; jmpq 244f <_sk_load_tables_u16_be_avx+0x3a>
+ DB 233,94,253,255,255 ; jmpq 2596 <_sk_load_tables_u16_be_avx+0x3a>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,81,253,255,255 ; jmpq 244f <_sk_load_tables_u16_be_avx+0x3a>
+ DB 233,81,253,255,255 ; jmpq 2596 <_sk_load_tables_u16_be_avx+0x3a>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,72,253,255,255 ; jmpq 244f <_sk_load_tables_u16_be_avx+0x3a>
+ DB 233,72,253,255,255 ; jmpq 2596 <_sk_load_tables_u16_be_avx+0x3a>
PUBLIC _sk_load_tables_rgb_u16_be_avx
_sk_load_tables_rgb_u16_be_avx LABEL PROC
@@ -9512,7 +9950,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 77,133,192 ; test %r8,%r8
DB 197,252,17,124,36,32 ; vmovups %ymm7,0x20(%rsp)
DB 197,252,17,52,36 ; vmovups %ymm6,(%rsp)
- DB 15,133,74,2,0,0 ; jne 2972 <_sk_load_tables_rgb_u16_be_avx+0x26b>
+ DB 15,133,74,2,0,0 ; jne 2ab9 <_sk_load_tables_rgb_u16_be_avx+0x26b>
DB 196,129,122,111,4,81 ; vmovdqu (%r9,%r10,2),%xmm0
DB 196,129,122,111,84,81,12 ; vmovdqu 0xc(%r9,%r10,2),%xmm2
DB 196,129,122,111,76,81,24 ; vmovdqu 0x18(%r9,%r10,2),%xmm1
@@ -9533,7 +9971,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 197,185,108,218 ; vpunpcklqdq %xmm2,%xmm8,%xmm3
DB 197,57,109,218 ; vpunpckhqdq %xmm2,%xmm8,%xmm11
DB 197,121,108,193 ; vpunpcklqdq %xmm1,%xmm0,%xmm8
- DB 197,121,111,13,110,141,0,0 ; vmovdqa 0x8d6e(%rip),%xmm9 # b500 <_sk_clut_4D_avx+0x2727>
+ DB 197,121,111,13,151,144,0,0 ; vmovdqa 0x9097(%rip),%xmm9 # b970 <_sk_clut_4D_avx+0x27e3>
DB 196,193,97,219,193 ; vpand %xmm9,%xmm3,%xmm0
DB 196,65,41,239,210 ; vpxor %xmm10,%xmm10,%xmm10
DB 196,193,121,105,202 ; vpunpckhwd %xmm10,%xmm0,%xmm1
@@ -9623,7 +10061,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 196,195,105,33,211,48 ; vinsertps $0x30,%xmm11,%xmm2,%xmm2
DB 196,227,109,24,211,1 ; vinsertf128 $0x1,%xmm3,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,83,135,0,0 ; vbroadcastss 0x8753(%rip),%ymm3 # b0b4 <_sk_clut_4D_avx+0x22db>
+ DB 196,226,125,24,29,192,137,0,0 ; vbroadcastss 0x89c0(%rip),%ymm3 # b468 <_sk_clut_4D_avx+0x22db>
DB 197,252,16,52,36 ; vmovups (%rsp),%ymm6
DB 197,252,16,124,36,32 ; vmovups 0x20(%rsp),%ymm7
DB 72,131,196,88 ; add $0x58,%rsp
@@ -9631,41 +10069,41 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
DB 196,129,121,110,4,81 ; vmovd (%r9,%r10,2),%xmm0
DB 196,129,121,196,68,81,4,2 ; vpinsrw $0x2,0x4(%r9,%r10,2),%xmm0,%xmm0
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 117,5 ; jne 298b <_sk_load_tables_rgb_u16_be_avx+0x284>
- DB 233,209,253,255,255 ; jmpq 275c <_sk_load_tables_rgb_u16_be_avx+0x55>
+ DB 117,5 ; jne 2ad2 <_sk_load_tables_rgb_u16_be_avx+0x284>
+ DB 233,209,253,255,255 ; jmpq 28a3 <_sk_load_tables_rgb_u16_be_avx+0x55>
DB 196,129,121,110,76,81,6 ; vmovd 0x6(%r9,%r10,2),%xmm1
DB 196,1,113,196,68,81,10,2 ; vpinsrw $0x2,0xa(%r9,%r10,2),%xmm1,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,26 ; jb 29ba <_sk_load_tables_rgb_u16_be_avx+0x2b3>
+ DB 114,26 ; jb 2b01 <_sk_load_tables_rgb_u16_be_avx+0x2b3>
DB 196,129,121,110,76,81,12 ; vmovd 0xc(%r9,%r10,2),%xmm1
DB 196,129,113,196,84,81,16,2 ; vpinsrw $0x2,0x10(%r9,%r10,2),%xmm1,%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 117,10 ; jne 29bf <_sk_load_tables_rgb_u16_be_avx+0x2b8>
- DB 233,162,253,255,255 ; jmpq 275c <_sk_load_tables_rgb_u16_be_avx+0x55>
- DB 233,157,253,255,255 ; jmpq 275c <_sk_load_tables_rgb_u16_be_avx+0x55>
+ DB 117,10 ; jne 2b06 <_sk_load_tables_rgb_u16_be_avx+0x2b8>
+ DB 233,162,253,255,255 ; jmpq 28a3 <_sk_load_tables_rgb_u16_be_avx+0x55>
+ DB 233,157,253,255,255 ; jmpq 28a3 <_sk_load_tables_rgb_u16_be_avx+0x55>
DB 196,129,121,110,76,81,18 ; vmovd 0x12(%r9,%r10,2),%xmm1
DB 196,1,113,196,76,81,22,2 ; vpinsrw $0x2,0x16(%r9,%r10,2),%xmm1,%xmm9
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,26 ; jb 29ee <_sk_load_tables_rgb_u16_be_avx+0x2e7>
+ DB 114,26 ; jb 2b35 <_sk_load_tables_rgb_u16_be_avx+0x2e7>
DB 196,129,121,110,76,81,24 ; vmovd 0x18(%r9,%r10,2),%xmm1
DB 196,129,113,196,76,81,28,2 ; vpinsrw $0x2,0x1c(%r9,%r10,2),%xmm1,%xmm1
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 117,10 ; jne 29f3 <_sk_load_tables_rgb_u16_be_avx+0x2ec>
- DB 233,110,253,255,255 ; jmpq 275c <_sk_load_tables_rgb_u16_be_avx+0x55>
- DB 233,105,253,255,255 ; jmpq 275c <_sk_load_tables_rgb_u16_be_avx+0x55>
+ DB 117,10 ; jne 2b3a <_sk_load_tables_rgb_u16_be_avx+0x2ec>
+ DB 233,110,253,255,255 ; jmpq 28a3 <_sk_load_tables_rgb_u16_be_avx+0x55>
+ DB 233,105,253,255,255 ; jmpq 28a3 <_sk_load_tables_rgb_u16_be_avx+0x55>
DB 196,129,121,110,92,81,30 ; vmovd 0x1e(%r9,%r10,2),%xmm3
DB 196,1,97,196,92,81,34,2 ; vpinsrw $0x2,0x22(%r9,%r10,2),%xmm3,%xmm11
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,20 ; jb 2a1c <_sk_load_tables_rgb_u16_be_avx+0x315>
+ DB 114,20 ; jb 2b63 <_sk_load_tables_rgb_u16_be_avx+0x315>
DB 196,129,121,110,92,81,36 ; vmovd 0x24(%r9,%r10,2),%xmm3
DB 196,129,97,196,92,81,40,2 ; vpinsrw $0x2,0x28(%r9,%r10,2),%xmm3,%xmm3
- DB 233,64,253,255,255 ; jmpq 275c <_sk_load_tables_rgb_u16_be_avx+0x55>
- DB 233,59,253,255,255 ; jmpq 275c <_sk_load_tables_rgb_u16_be_avx+0x55>
+ DB 233,64,253,255,255 ; jmpq 28a3 <_sk_load_tables_rgb_u16_be_avx+0x55>
+ DB 233,59,253,255,255 ; jmpq 28a3 <_sk_load_tables_rgb_u16_be_avx+0x55>
PUBLIC _sk_byte_tables_avx
_sk_byte_tables_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,140,134,0,0 ; vbroadcastss 0x868c(%rip),%ymm8 # b0b8 <_sk_clut_4D_avx+0x22df>
+ DB 196,98,125,24,5,249,136,0,0 ; vbroadcastss 0x88f9(%rip),%ymm8 # b46c <_sk_clut_4D_avx+0x22df>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
DB 197,125,91,200 ; vcvtps2dq %ymm0,%ymm9
DB 196,65,249,126,201 ; vmovq %xmm9,%r9
@@ -9784,7 +10222,7 @@ _sk_byte_tables_avx LABEL PROC
DB 196,194,121,49,204 ; vpmovzxbd %xmm12,%xmm1
DB 196,194,121,49,213 ; vpmovzxbd %xmm13,%xmm2
DB 196,227,117,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
- DB 196,98,125,24,13,54,132,0,0 ; vbroadcastss 0x8436(%rip),%ymm9 # b0bc <_sk_clut_4D_avx+0x22e3>
+ DB 196,98,125,24,13,163,134,0,0 ; vbroadcastss 0x86a3(%rip),%ymm9 # b470 <_sk_clut_4D_avx+0x22e3>
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
DB 196,193,116,89,201 ; vmulps %ymm9,%ymm1,%ymm1
@@ -9898,7 +10336,7 @@ _sk_byte_tables_rgb_avx LABEL PROC
DB 196,194,121,49,203 ; vpmovzxbd %xmm11,%xmm1
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,13,23,130,0,0 ; vbroadcastss 0x8217(%rip),%ymm9 # b0c0 <_sk_clut_4D_avx+0x22e7>
+ DB 196,98,125,24,13,132,132,0,0 ; vbroadcastss 0x8484(%rip),%ymm9 # b474 <_sk_clut_4D_avx+0x22e7>
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
DB 196,194,121,49,202 ; vpmovzxbd %xmm10,%xmm1
DB 196,194,121,49,212 ; vpmovzxbd %xmm12,%xmm2
@@ -10093,36 +10531,36 @@ _sk_parametric_r_avx LABEL PROC
DB 196,193,124,88,195 ; vaddps %ymm11,%ymm0,%ymm0
DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
DB 197,124,91,216 ; vcvtdq2ps %ymm0,%ymm11
- DB 196,98,125,24,37,196,126,0,0 ; vbroadcastss 0x7ec4(%rip),%ymm12 # b0c4 <_sk_clut_4D_avx+0x22eb>
+ DB 196,98,125,24,37,49,129,0,0 ; vbroadcastss 0x8131(%rip),%ymm12 # b478 <_sk_clut_4D_avx+0x22eb>
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,186,126,0,0 ; vbroadcastss 0x7eba(%rip),%ymm12 # b0c8 <_sk_clut_4D_avx+0x22ef>
+ DB 196,98,125,24,37,39,129,0,0 ; vbroadcastss 0x8127(%rip),%ymm12 # b47c <_sk_clut_4D_avx+0x22ef>
DB 196,193,124,84,196 ; vandps %ymm12,%ymm0,%ymm0
- DB 196,98,125,24,37,176,126,0,0 ; vbroadcastss 0x7eb0(%rip),%ymm12 # b0cc <_sk_clut_4D_avx+0x22f3>
+ DB 196,98,125,24,37,29,129,0,0 ; vbroadcastss 0x811d(%rip),%ymm12 # b480 <_sk_clut_4D_avx+0x22f3>
DB 196,193,124,86,196 ; vorps %ymm12,%ymm0,%ymm0
- DB 196,98,125,24,37,166,126,0,0 ; vbroadcastss 0x7ea6(%rip),%ymm12 # b0d0 <_sk_clut_4D_avx+0x22f7>
+ DB 196,98,125,24,37,19,129,0,0 ; vbroadcastss 0x8113(%rip),%ymm12 # b484 <_sk_clut_4D_avx+0x22f7>
DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,156,126,0,0 ; vbroadcastss 0x7e9c(%rip),%ymm12 # b0d4 <_sk_clut_4D_avx+0x22fb>
+ DB 196,98,125,24,37,9,129,0,0 ; vbroadcastss 0x8109(%rip),%ymm12 # b488 <_sk_clut_4D_avx+0x22fb>
DB 196,65,124,89,228 ; vmulps %ymm12,%ymm0,%ymm12
DB 196,65,36,92,220 ; vsubps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,141,126,0,0 ; vbroadcastss 0x7e8d(%rip),%ymm12 # b0d8 <_sk_clut_4D_avx+0x22ff>
+ DB 196,98,125,24,37,250,128,0,0 ; vbroadcastss 0x80fa(%rip),%ymm12 # b48c <_sk_clut_4D_avx+0x22ff>
DB 196,193,124,88,196 ; vaddps %ymm12,%ymm0,%ymm0
- DB 196,98,125,24,37,131,126,0,0 ; vbroadcastss 0x7e83(%rip),%ymm12 # b0dc <_sk_clut_4D_avx+0x2303>
+ DB 196,98,125,24,37,240,128,0,0 ; vbroadcastss 0x80f0(%rip),%ymm12 # b490 <_sk_clut_4D_avx+0x2303>
DB 197,156,94,192 ; vdivps %ymm0,%ymm12,%ymm0
DB 197,164,92,192 ; vsubps %ymm0,%ymm11,%ymm0
DB 197,172,89,192 ; vmulps %ymm0,%ymm10,%ymm0
DB 196,99,125,8,208,1 ; vroundps $0x1,%ymm0,%ymm10
DB 196,65,124,92,210 ; vsubps %ymm10,%ymm0,%ymm10
- DB 196,98,125,24,29,103,126,0,0 ; vbroadcastss 0x7e67(%rip),%ymm11 # b0e0 <_sk_clut_4D_avx+0x2307>
+ DB 196,98,125,24,29,212,128,0,0 ; vbroadcastss 0x80d4(%rip),%ymm11 # b494 <_sk_clut_4D_avx+0x2307>
DB 196,193,124,88,195 ; vaddps %ymm11,%ymm0,%ymm0
- DB 196,98,125,24,29,93,126,0,0 ; vbroadcastss 0x7e5d(%rip),%ymm11 # b0e4 <_sk_clut_4D_avx+0x230b>
+ DB 196,98,125,24,29,202,128,0,0 ; vbroadcastss 0x80ca(%rip),%ymm11 # b498 <_sk_clut_4D_avx+0x230b>
DB 196,65,44,89,219 ; vmulps %ymm11,%ymm10,%ymm11
DB 196,193,124,92,195 ; vsubps %ymm11,%ymm0,%ymm0
- DB 196,98,125,24,29,78,126,0,0 ; vbroadcastss 0x7e4e(%rip),%ymm11 # b0e8 <_sk_clut_4D_avx+0x230f>
+ DB 196,98,125,24,29,187,128,0,0 ; vbroadcastss 0x80bb(%rip),%ymm11 # b49c <_sk_clut_4D_avx+0x230f>
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
- DB 196,98,125,24,29,68,126,0,0 ; vbroadcastss 0x7e44(%rip),%ymm11 # b0ec <_sk_clut_4D_avx+0x2313>
+ DB 196,98,125,24,29,177,128,0,0 ; vbroadcastss 0x80b1(%rip),%ymm11 # b4a0 <_sk_clut_4D_avx+0x2313>
DB 196,65,36,94,210 ; vdivps %ymm10,%ymm11,%ymm10
DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0
- DB 196,98,125,24,21,53,126,0,0 ; vbroadcastss 0x7e35(%rip),%ymm10 # b0f0 <_sk_clut_4D_avx+0x2317>
+ DB 196,98,125,24,21,162,128,0,0 ; vbroadcastss 0x80a2(%rip),%ymm10 # b4a4 <_sk_clut_4D_avx+0x2317>
DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0
DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -10130,7 +10568,7 @@ _sk_parametric_r_avx LABEL PROC
DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,5,12,126,0,0 ; vbroadcastss 0x7e0c(%rip),%ymm8 # b0f4 <_sk_clut_4D_avx+0x231b>
+ DB 196,98,125,24,5,121,128,0,0 ; vbroadcastss 0x8079(%rip),%ymm8 # b4a8 <_sk_clut_4D_avx+0x231b>
DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -10150,36 +10588,36 @@ _sk_parametric_g_avx LABEL PROC
DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1
DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
DB 197,124,91,217 ; vcvtdq2ps %ymm1,%ymm11
- DB 196,98,125,24,37,189,125,0,0 ; vbroadcastss 0x7dbd(%rip),%ymm12 # b0f8 <_sk_clut_4D_avx+0x231f>
+ DB 196,98,125,24,37,42,128,0,0 ; vbroadcastss 0x802a(%rip),%ymm12 # b4ac <_sk_clut_4D_avx+0x231f>
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,179,125,0,0 ; vbroadcastss 0x7db3(%rip),%ymm12 # b0fc <_sk_clut_4D_avx+0x2323>
+ DB 196,98,125,24,37,32,128,0,0 ; vbroadcastss 0x8020(%rip),%ymm12 # b4b0 <_sk_clut_4D_avx+0x2323>
DB 196,193,116,84,204 ; vandps %ymm12,%ymm1,%ymm1
- DB 196,98,125,24,37,169,125,0,0 ; vbroadcastss 0x7da9(%rip),%ymm12 # b100 <_sk_clut_4D_avx+0x2327>
+ DB 196,98,125,24,37,22,128,0,0 ; vbroadcastss 0x8016(%rip),%ymm12 # b4b4 <_sk_clut_4D_avx+0x2327>
DB 196,193,116,86,204 ; vorps %ymm12,%ymm1,%ymm1
- DB 196,98,125,24,37,159,125,0,0 ; vbroadcastss 0x7d9f(%rip),%ymm12 # b104 <_sk_clut_4D_avx+0x232b>
+ DB 196,98,125,24,37,12,128,0,0 ; vbroadcastss 0x800c(%rip),%ymm12 # b4b8 <_sk_clut_4D_avx+0x232b>
DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,149,125,0,0 ; vbroadcastss 0x7d95(%rip),%ymm12 # b108 <_sk_clut_4D_avx+0x232f>
+ DB 196,98,125,24,37,2,128,0,0 ; vbroadcastss 0x8002(%rip),%ymm12 # b4bc <_sk_clut_4D_avx+0x232f>
DB 196,65,116,89,228 ; vmulps %ymm12,%ymm1,%ymm12
DB 196,65,36,92,220 ; vsubps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,134,125,0,0 ; vbroadcastss 0x7d86(%rip),%ymm12 # b10c <_sk_clut_4D_avx+0x2333>
+ DB 196,98,125,24,37,243,127,0,0 ; vbroadcastss 0x7ff3(%rip),%ymm12 # b4c0 <_sk_clut_4D_avx+0x2333>
DB 196,193,116,88,204 ; vaddps %ymm12,%ymm1,%ymm1
- DB 196,98,125,24,37,124,125,0,0 ; vbroadcastss 0x7d7c(%rip),%ymm12 # b110 <_sk_clut_4D_avx+0x2337>
+ DB 196,98,125,24,37,233,127,0,0 ; vbroadcastss 0x7fe9(%rip),%ymm12 # b4c4 <_sk_clut_4D_avx+0x2337>
DB 197,156,94,201 ; vdivps %ymm1,%ymm12,%ymm1
DB 197,164,92,201 ; vsubps %ymm1,%ymm11,%ymm1
DB 197,172,89,201 ; vmulps %ymm1,%ymm10,%ymm1
DB 196,99,125,8,209,1 ; vroundps $0x1,%ymm1,%ymm10
DB 196,65,116,92,210 ; vsubps %ymm10,%ymm1,%ymm10
- DB 196,98,125,24,29,96,125,0,0 ; vbroadcastss 0x7d60(%rip),%ymm11 # b114 <_sk_clut_4D_avx+0x233b>
+ DB 196,98,125,24,29,205,127,0,0 ; vbroadcastss 0x7fcd(%rip),%ymm11 # b4c8 <_sk_clut_4D_avx+0x233b>
DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,29,86,125,0,0 ; vbroadcastss 0x7d56(%rip),%ymm11 # b118 <_sk_clut_4D_avx+0x233f>
+ DB 196,98,125,24,29,195,127,0,0 ; vbroadcastss 0x7fc3(%rip),%ymm11 # b4cc <_sk_clut_4D_avx+0x233f>
DB 196,65,44,89,219 ; vmulps %ymm11,%ymm10,%ymm11
DB 196,193,116,92,203 ; vsubps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,29,71,125,0,0 ; vbroadcastss 0x7d47(%rip),%ymm11 # b11c <_sk_clut_4D_avx+0x2343>
+ DB 196,98,125,24,29,180,127,0,0 ; vbroadcastss 0x7fb4(%rip),%ymm11 # b4d0 <_sk_clut_4D_avx+0x2343>
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
- DB 196,98,125,24,29,61,125,0,0 ; vbroadcastss 0x7d3d(%rip),%ymm11 # b120 <_sk_clut_4D_avx+0x2347>
+ DB 196,98,125,24,29,170,127,0,0 ; vbroadcastss 0x7faa(%rip),%ymm11 # b4d4 <_sk_clut_4D_avx+0x2347>
DB 196,65,36,94,210 ; vdivps %ymm10,%ymm11,%ymm10
DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1
- DB 196,98,125,24,21,46,125,0,0 ; vbroadcastss 0x7d2e(%rip),%ymm10 # b124 <_sk_clut_4D_avx+0x234b>
+ DB 196,98,125,24,21,155,127,0,0 ; vbroadcastss 0x7f9b(%rip),%ymm10 # b4d8 <_sk_clut_4D_avx+0x234b>
DB 196,193,116,89,202 ; vmulps %ymm10,%ymm1,%ymm1
DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -10187,7 +10625,7 @@ _sk_parametric_g_avx LABEL PROC
DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,116,95,200 ; vmaxps %ymm8,%ymm1,%ymm1
- DB 196,98,125,24,5,5,125,0,0 ; vbroadcastss 0x7d05(%rip),%ymm8 # b128 <_sk_clut_4D_avx+0x234f>
+ DB 196,98,125,24,5,114,127,0,0 ; vbroadcastss 0x7f72(%rip),%ymm8 # b4dc <_sk_clut_4D_avx+0x234f>
DB 196,193,116,93,200 ; vminps %ymm8,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -10207,36 +10645,36 @@ _sk_parametric_b_avx LABEL PROC
DB 196,193,108,88,211 ; vaddps %ymm11,%ymm2,%ymm2
DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
DB 197,124,91,218 ; vcvtdq2ps %ymm2,%ymm11
- DB 196,98,125,24,37,182,124,0,0 ; vbroadcastss 0x7cb6(%rip),%ymm12 # b12c <_sk_clut_4D_avx+0x2353>
+ DB 196,98,125,24,37,35,127,0,0 ; vbroadcastss 0x7f23(%rip),%ymm12 # b4e0 <_sk_clut_4D_avx+0x2353>
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,172,124,0,0 ; vbroadcastss 0x7cac(%rip),%ymm12 # b130 <_sk_clut_4D_avx+0x2357>
+ DB 196,98,125,24,37,25,127,0,0 ; vbroadcastss 0x7f19(%rip),%ymm12 # b4e4 <_sk_clut_4D_avx+0x2357>
DB 196,193,108,84,212 ; vandps %ymm12,%ymm2,%ymm2
- DB 196,98,125,24,37,162,124,0,0 ; vbroadcastss 0x7ca2(%rip),%ymm12 # b134 <_sk_clut_4D_avx+0x235b>
+ DB 196,98,125,24,37,15,127,0,0 ; vbroadcastss 0x7f0f(%rip),%ymm12 # b4e8 <_sk_clut_4D_avx+0x235b>
DB 196,193,108,86,212 ; vorps %ymm12,%ymm2,%ymm2
- DB 196,98,125,24,37,152,124,0,0 ; vbroadcastss 0x7c98(%rip),%ymm12 # b138 <_sk_clut_4D_avx+0x235f>
+ DB 196,98,125,24,37,5,127,0,0 ; vbroadcastss 0x7f05(%rip),%ymm12 # b4ec <_sk_clut_4D_avx+0x235f>
DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,142,124,0,0 ; vbroadcastss 0x7c8e(%rip),%ymm12 # b13c <_sk_clut_4D_avx+0x2363>
+ DB 196,98,125,24,37,251,126,0,0 ; vbroadcastss 0x7efb(%rip),%ymm12 # b4f0 <_sk_clut_4D_avx+0x2363>
DB 196,65,108,89,228 ; vmulps %ymm12,%ymm2,%ymm12
DB 196,65,36,92,220 ; vsubps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,127,124,0,0 ; vbroadcastss 0x7c7f(%rip),%ymm12 # b140 <_sk_clut_4D_avx+0x2367>
+ DB 196,98,125,24,37,236,126,0,0 ; vbroadcastss 0x7eec(%rip),%ymm12 # b4f4 <_sk_clut_4D_avx+0x2367>
DB 196,193,108,88,212 ; vaddps %ymm12,%ymm2,%ymm2
- DB 196,98,125,24,37,117,124,0,0 ; vbroadcastss 0x7c75(%rip),%ymm12 # b144 <_sk_clut_4D_avx+0x236b>
+ DB 196,98,125,24,37,226,126,0,0 ; vbroadcastss 0x7ee2(%rip),%ymm12 # b4f8 <_sk_clut_4D_avx+0x236b>
DB 197,156,94,210 ; vdivps %ymm2,%ymm12,%ymm2
DB 197,164,92,210 ; vsubps %ymm2,%ymm11,%ymm2
DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
DB 196,99,125,8,210,1 ; vroundps $0x1,%ymm2,%ymm10
DB 196,65,108,92,210 ; vsubps %ymm10,%ymm2,%ymm10
- DB 196,98,125,24,29,89,124,0,0 ; vbroadcastss 0x7c59(%rip),%ymm11 # b148 <_sk_clut_4D_avx+0x236f>
+ DB 196,98,125,24,29,198,126,0,0 ; vbroadcastss 0x7ec6(%rip),%ymm11 # b4fc <_sk_clut_4D_avx+0x236f>
DB 196,193,108,88,211 ; vaddps %ymm11,%ymm2,%ymm2
- DB 196,98,125,24,29,79,124,0,0 ; vbroadcastss 0x7c4f(%rip),%ymm11 # b14c <_sk_clut_4D_avx+0x2373>
+ DB 196,98,125,24,29,188,126,0,0 ; vbroadcastss 0x7ebc(%rip),%ymm11 # b500 <_sk_clut_4D_avx+0x2373>
DB 196,65,44,89,219 ; vmulps %ymm11,%ymm10,%ymm11
DB 196,193,108,92,211 ; vsubps %ymm11,%ymm2,%ymm2
- DB 196,98,125,24,29,64,124,0,0 ; vbroadcastss 0x7c40(%rip),%ymm11 # b150 <_sk_clut_4D_avx+0x2377>
+ DB 196,98,125,24,29,173,126,0,0 ; vbroadcastss 0x7ead(%rip),%ymm11 # b504 <_sk_clut_4D_avx+0x2377>
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
- DB 196,98,125,24,29,54,124,0,0 ; vbroadcastss 0x7c36(%rip),%ymm11 # b154 <_sk_clut_4D_avx+0x237b>
+ DB 196,98,125,24,29,163,126,0,0 ; vbroadcastss 0x7ea3(%rip),%ymm11 # b508 <_sk_clut_4D_avx+0x237b>
DB 196,65,36,94,210 ; vdivps %ymm10,%ymm11,%ymm10
DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2
- DB 196,98,125,24,21,39,124,0,0 ; vbroadcastss 0x7c27(%rip),%ymm10 # b158 <_sk_clut_4D_avx+0x237f>
+ DB 196,98,125,24,21,148,126,0,0 ; vbroadcastss 0x7e94(%rip),%ymm10 # b50c <_sk_clut_4D_avx+0x237f>
DB 196,193,108,89,210 ; vmulps %ymm10,%ymm2,%ymm2
DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -10244,7 +10682,7 @@ _sk_parametric_b_avx LABEL PROC
DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,108,95,208 ; vmaxps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,254,123,0,0 ; vbroadcastss 0x7bfe(%rip),%ymm8 # b15c <_sk_clut_4D_avx+0x2383>
+ DB 196,98,125,24,5,107,126,0,0 ; vbroadcastss 0x7e6b(%rip),%ymm8 # b510 <_sk_clut_4D_avx+0x2383>
DB 196,193,108,93,208 ; vminps %ymm8,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -10264,36 +10702,36 @@ _sk_parametric_a_avx LABEL PROC
DB 196,193,100,88,219 ; vaddps %ymm11,%ymm3,%ymm3
DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
DB 197,124,91,219 ; vcvtdq2ps %ymm3,%ymm11
- DB 196,98,125,24,37,175,123,0,0 ; vbroadcastss 0x7baf(%rip),%ymm12 # b160 <_sk_clut_4D_avx+0x2387>
+ DB 196,98,125,24,37,28,126,0,0 ; vbroadcastss 0x7e1c(%rip),%ymm12 # b514 <_sk_clut_4D_avx+0x2387>
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,165,123,0,0 ; vbroadcastss 0x7ba5(%rip),%ymm12 # b164 <_sk_clut_4D_avx+0x238b>
+ DB 196,98,125,24,37,18,126,0,0 ; vbroadcastss 0x7e12(%rip),%ymm12 # b518 <_sk_clut_4D_avx+0x238b>
DB 196,193,100,84,220 ; vandps %ymm12,%ymm3,%ymm3
- DB 196,98,125,24,37,155,123,0,0 ; vbroadcastss 0x7b9b(%rip),%ymm12 # b168 <_sk_clut_4D_avx+0x238f>
+ DB 196,98,125,24,37,8,126,0,0 ; vbroadcastss 0x7e08(%rip),%ymm12 # b51c <_sk_clut_4D_avx+0x238f>
DB 196,193,100,86,220 ; vorps %ymm12,%ymm3,%ymm3
- DB 196,98,125,24,37,145,123,0,0 ; vbroadcastss 0x7b91(%rip),%ymm12 # b16c <_sk_clut_4D_avx+0x2393>
+ DB 196,98,125,24,37,254,125,0,0 ; vbroadcastss 0x7dfe(%rip),%ymm12 # b520 <_sk_clut_4D_avx+0x2393>
DB 196,65,36,88,220 ; vaddps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,135,123,0,0 ; vbroadcastss 0x7b87(%rip),%ymm12 # b170 <_sk_clut_4D_avx+0x2397>
+ DB 196,98,125,24,37,244,125,0,0 ; vbroadcastss 0x7df4(%rip),%ymm12 # b524 <_sk_clut_4D_avx+0x2397>
DB 196,65,100,89,228 ; vmulps %ymm12,%ymm3,%ymm12
DB 196,65,36,92,220 ; vsubps %ymm12,%ymm11,%ymm11
- DB 196,98,125,24,37,120,123,0,0 ; vbroadcastss 0x7b78(%rip),%ymm12 # b174 <_sk_clut_4D_avx+0x239b>
+ DB 196,98,125,24,37,229,125,0,0 ; vbroadcastss 0x7de5(%rip),%ymm12 # b528 <_sk_clut_4D_avx+0x239b>
DB 196,193,100,88,220 ; vaddps %ymm12,%ymm3,%ymm3
- DB 196,98,125,24,37,110,123,0,0 ; vbroadcastss 0x7b6e(%rip),%ymm12 # b178 <_sk_clut_4D_avx+0x239f>
+ DB 196,98,125,24,37,219,125,0,0 ; vbroadcastss 0x7ddb(%rip),%ymm12 # b52c <_sk_clut_4D_avx+0x239f>
DB 197,156,94,219 ; vdivps %ymm3,%ymm12,%ymm3
DB 197,164,92,219 ; vsubps %ymm3,%ymm11,%ymm3
DB 197,172,89,219 ; vmulps %ymm3,%ymm10,%ymm3
DB 196,99,125,8,211,1 ; vroundps $0x1,%ymm3,%ymm10
DB 196,65,100,92,210 ; vsubps %ymm10,%ymm3,%ymm10
- DB 196,98,125,24,29,82,123,0,0 ; vbroadcastss 0x7b52(%rip),%ymm11 # b17c <_sk_clut_4D_avx+0x23a3>
+ DB 196,98,125,24,29,191,125,0,0 ; vbroadcastss 0x7dbf(%rip),%ymm11 # b530 <_sk_clut_4D_avx+0x23a3>
DB 196,193,100,88,219 ; vaddps %ymm11,%ymm3,%ymm3
- DB 196,98,125,24,29,72,123,0,0 ; vbroadcastss 0x7b48(%rip),%ymm11 # b180 <_sk_clut_4D_avx+0x23a7>
+ DB 196,98,125,24,29,181,125,0,0 ; vbroadcastss 0x7db5(%rip),%ymm11 # b534 <_sk_clut_4D_avx+0x23a7>
DB 196,65,44,89,219 ; vmulps %ymm11,%ymm10,%ymm11
DB 196,193,100,92,219 ; vsubps %ymm11,%ymm3,%ymm3
- DB 196,98,125,24,29,57,123,0,0 ; vbroadcastss 0x7b39(%rip),%ymm11 # b184 <_sk_clut_4D_avx+0x23ab>
+ DB 196,98,125,24,29,166,125,0,0 ; vbroadcastss 0x7da6(%rip),%ymm11 # b538 <_sk_clut_4D_avx+0x23ab>
DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
- DB 196,98,125,24,29,47,123,0,0 ; vbroadcastss 0x7b2f(%rip),%ymm11 # b188 <_sk_clut_4D_avx+0x23af>
+ DB 196,98,125,24,29,156,125,0,0 ; vbroadcastss 0x7d9c(%rip),%ymm11 # b53c <_sk_clut_4D_avx+0x23af>
DB 196,65,36,94,210 ; vdivps %ymm10,%ymm11,%ymm10
DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3
- DB 196,98,125,24,21,32,123,0,0 ; vbroadcastss 0x7b20(%rip),%ymm10 # b18c <_sk_clut_4D_avx+0x23b3>
+ DB 196,98,125,24,21,141,125,0,0 ; vbroadcastss 0x7d8d(%rip),%ymm10 # b540 <_sk_clut_4D_avx+0x23b3>
DB 196,193,100,89,218 ; vmulps %ymm10,%ymm3,%ymm3
DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3
DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
@@ -10301,7 +10739,7 @@ _sk_parametric_a_avx LABEL PROC
DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 196,193,100,95,216 ; vmaxps %ymm8,%ymm3,%ymm3
- DB 196,98,125,24,5,247,122,0,0 ; vbroadcastss 0x7af7(%rip),%ymm8 # b190 <_sk_clut_4D_avx+0x23b7>
+ DB 196,98,125,24,5,100,125,0,0 ; vbroadcastss 0x7d64(%rip),%ymm8 # b544 <_sk_clut_4D_avx+0x23b7>
DB 196,193,100,93,216 ; vminps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -10318,34 +10756,34 @@ _sk_gamma_avx LABEL PROC
DB 197,252,40,233 ; vmovaps %ymm1,%ymm5
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,91,200 ; vcvtdq2ps %ymm0,%ymm1
- DB 196,98,125,24,5,176,122,0,0 ; vbroadcastss 0x7ab0(%rip),%ymm8 # b194 <_sk_clut_4D_avx+0x23bb>
+ DB 196,98,125,24,5,29,125,0,0 ; vbroadcastss 0x7d1d(%rip),%ymm8 # b548 <_sk_clut_4D_avx+0x23bb>
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
- DB 196,98,125,24,13,166,122,0,0 ; vbroadcastss 0x7aa6(%rip),%ymm9 # b198 <_sk_clut_4D_avx+0x23bf>
+ DB 196,98,125,24,13,19,125,0,0 ; vbroadcastss 0x7d13(%rip),%ymm9 # b54c <_sk_clut_4D_avx+0x23bf>
DB 196,193,124,84,193 ; vandps %ymm9,%ymm0,%ymm0
- DB 196,226,125,24,37,156,122,0,0 ; vbroadcastss 0x7a9c(%rip),%ymm4 # b19c <_sk_clut_4D_avx+0x23c3>
+ DB 196,226,125,24,37,9,125,0,0 ; vbroadcastss 0x7d09(%rip),%ymm4 # b550 <_sk_clut_4D_avx+0x23c3>
DB 197,252,86,196 ; vorps %ymm4,%ymm0,%ymm0
- DB 196,98,125,24,21,147,122,0,0 ; vbroadcastss 0x7a93(%rip),%ymm10 # b1a0 <_sk_clut_4D_avx+0x23c7>
+ DB 196,98,125,24,21,0,125,0,0 ; vbroadcastss 0x7d00(%rip),%ymm10 # b554 <_sk_clut_4D_avx+0x23c7>
DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1
- DB 196,98,125,24,29,137,122,0,0 ; vbroadcastss 0x7a89(%rip),%ymm11 # b1a4 <_sk_clut_4D_avx+0x23cb>
+ DB 196,98,125,24,29,246,124,0,0 ; vbroadcastss 0x7cf6(%rip),%ymm11 # b558 <_sk_clut_4D_avx+0x23cb>
DB 196,193,124,89,211 ; vmulps %ymm11,%ymm0,%ymm2
DB 197,244,92,202 ; vsubps %ymm2,%ymm1,%ymm1
- DB 196,98,125,24,37,123,122,0,0 ; vbroadcastss 0x7a7b(%rip),%ymm12 # b1a8 <_sk_clut_4D_avx+0x23cf>
+ DB 196,98,125,24,37,232,124,0,0 ; vbroadcastss 0x7ce8(%rip),%ymm12 # b55c <_sk_clut_4D_avx+0x23cf>
DB 196,193,124,88,196 ; vaddps %ymm12,%ymm0,%ymm0
- DB 196,98,125,24,45,113,122,0,0 ; vbroadcastss 0x7a71(%rip),%ymm13 # b1ac <_sk_clut_4D_avx+0x23d3>
+ DB 196,98,125,24,45,222,124,0,0 ; vbroadcastss 0x7cde(%rip),%ymm13 # b560 <_sk_clut_4D_avx+0x23d3>
DB 197,148,94,192 ; vdivps %ymm0,%ymm13,%ymm0
DB 197,244,92,192 ; vsubps %ymm0,%ymm1,%ymm0
DB 196,98,125,24,48 ; vbroadcastss (%rax),%ymm14
DB 196,193,124,89,198 ; vmulps %ymm14,%ymm0,%ymm0
DB 196,227,125,8,200,1 ; vroundps $0x1,%ymm0,%ymm1
DB 197,252,92,241 ; vsubps %ymm1,%ymm0,%ymm6
- DB 196,98,125,24,61,80,122,0,0 ; vbroadcastss 0x7a50(%rip),%ymm15 # b1b0 <_sk_clut_4D_avx+0x23d7>
+ DB 196,98,125,24,61,189,124,0,0 ; vbroadcastss 0x7cbd(%rip),%ymm15 # b564 <_sk_clut_4D_avx+0x23d7>
DB 196,193,124,88,199 ; vaddps %ymm15,%ymm0,%ymm0
- DB 196,226,125,24,29,70,122,0,0 ; vbroadcastss 0x7a46(%rip),%ymm3 # b1b4 <_sk_clut_4D_avx+0x23db>
+ DB 196,226,125,24,29,179,124,0,0 ; vbroadcastss 0x7cb3(%rip),%ymm3 # b568 <_sk_clut_4D_avx+0x23db>
DB 197,204,89,203 ; vmulps %ymm3,%ymm6,%ymm1
DB 197,252,92,201 ; vsubps %ymm1,%ymm0,%ymm1
- DB 196,226,125,24,21,57,122,0,0 ; vbroadcastss 0x7a39(%rip),%ymm2 # b1b8 <_sk_clut_4D_avx+0x23df>
+ DB 196,226,125,24,21,166,124,0,0 ; vbroadcastss 0x7ca6(%rip),%ymm2 # b56c <_sk_clut_4D_avx+0x23df>
DB 197,236,92,198 ; vsubps %ymm6,%ymm2,%ymm0
- DB 196,226,125,24,53,48,122,0,0 ; vbroadcastss 0x7a30(%rip),%ymm6 # b1bc <_sk_clut_4D_avx+0x23e3>
+ DB 196,226,125,24,53,157,124,0,0 ; vbroadcastss 0x7c9d(%rip),%ymm6 # b570 <_sk_clut_4D_avx+0x23e3>
DB 197,204,94,192 ; vdivps %ymm0,%ymm6,%ymm0
DB 197,244,88,192 ; vaddps %ymm0,%ymm1,%ymm0
DB 197,252,17,4,36 ; vmovups %ymm0,(%rsp)
@@ -10387,7 +10825,7 @@ _sk_gamma_avx LABEL PROC
DB 197,236,92,212 ; vsubps %ymm4,%ymm2,%ymm2
DB 197,204,94,210 ; vdivps %ymm2,%ymm6,%ymm2
DB 197,244,88,202 ; vaddps %ymm2,%ymm1,%ymm1
- DB 196,226,125,24,21,116,121,0,0 ; vbroadcastss 0x7974(%rip),%ymm2 # b1c0 <_sk_clut_4D_avx+0x23e7>
+ DB 196,226,125,24,21,225,123,0,0 ; vbroadcastss 0x7be1(%rip),%ymm2 # b574 <_sk_clut_4D_avx+0x23e7>
DB 197,236,89,28,36 ; vmulps (%rsp),%ymm2,%ymm3
DB 197,252,89,226 ; vmulps %ymm2,%ymm0,%ymm4
DB 197,244,89,210 ; vmulps %ymm2,%ymm1,%ymm2
@@ -10405,31 +10843,31 @@ _sk_gamma_avx LABEL PROC
PUBLIC _sk_lab_to_xyz_avx
_sk_lab_to_xyz_avx LABEL PROC
- DB 196,98,125,24,5,39,121,0,0 ; vbroadcastss 0x7927(%rip),%ymm8 # b1c4 <_sk_clut_4D_avx+0x23eb>
+ DB 196,98,125,24,5,148,123,0,0 ; vbroadcastss 0x7b94(%rip),%ymm8 # b578 <_sk_clut_4D_avx+0x23eb>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,5,29,121,0,0 ; vbroadcastss 0x791d(%rip),%ymm8 # b1c8 <_sk_clut_4D_avx+0x23ef>
+ DB 196,98,125,24,5,138,123,0,0 ; vbroadcastss 0x7b8a(%rip),%ymm8 # b57c <_sk_clut_4D_avx+0x23ef>
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
- DB 196,98,125,24,13,19,121,0,0 ; vbroadcastss 0x7913(%rip),%ymm9 # b1cc <_sk_clut_4D_avx+0x23f3>
+ DB 196,98,125,24,13,128,123,0,0 ; vbroadcastss 0x7b80(%rip),%ymm9 # b580 <_sk_clut_4D_avx+0x23f3>
DB 196,193,116,88,201 ; vaddps %ymm9,%ymm1,%ymm1
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
DB 196,193,108,88,209 ; vaddps %ymm9,%ymm2,%ymm2
- DB 196,98,125,24,5,255,120,0,0 ; vbroadcastss 0x78ff(%rip),%ymm8 # b1d0 <_sk_clut_4D_avx+0x23f7>
+ DB 196,98,125,24,5,108,123,0,0 ; vbroadcastss 0x7b6c(%rip),%ymm8 # b584 <_sk_clut_4D_avx+0x23f7>
DB 196,193,124,88,192 ; vaddps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,5,245,120,0,0 ; vbroadcastss 0x78f5(%rip),%ymm8 # b1d4 <_sk_clut_4D_avx+0x23fb>
+ DB 196,98,125,24,5,98,123,0,0 ; vbroadcastss 0x7b62(%rip),%ymm8 # b588 <_sk_clut_4D_avx+0x23fb>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,5,235,120,0,0 ; vbroadcastss 0x78eb(%rip),%ymm8 # b1d8 <_sk_clut_4D_avx+0x23ff>
+ DB 196,98,125,24,5,88,123,0,0 ; vbroadcastss 0x7b58(%rip),%ymm8 # b58c <_sk_clut_4D_avx+0x23ff>
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
DB 197,252,88,201 ; vaddps %ymm1,%ymm0,%ymm1
- DB 196,98,125,24,5,221,120,0,0 ; vbroadcastss 0x78dd(%rip),%ymm8 # b1dc <_sk_clut_4D_avx+0x2403>
+ DB 196,98,125,24,5,74,123,0,0 ; vbroadcastss 0x7b4a(%rip),%ymm8 # b590 <_sk_clut_4D_avx+0x2403>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
DB 197,252,92,210 ; vsubps %ymm2,%ymm0,%ymm2
DB 197,116,89,193 ; vmulps %ymm1,%ymm1,%ymm8
DB 196,65,116,89,192 ; vmulps %ymm8,%ymm1,%ymm8
- DB 196,98,125,24,13,198,120,0,0 ; vbroadcastss 0x78c6(%rip),%ymm9 # b1e0 <_sk_clut_4D_avx+0x2407>
+ DB 196,98,125,24,13,51,123,0,0 ; vbroadcastss 0x7b33(%rip),%ymm9 # b594 <_sk_clut_4D_avx+0x2407>
DB 196,65,52,194,208,1 ; vcmpltps %ymm8,%ymm9,%ymm10
- DB 196,98,125,24,29,187,120,0,0 ; vbroadcastss 0x78bb(%rip),%ymm11 # b1e4 <_sk_clut_4D_avx+0x240b>
+ DB 196,98,125,24,29,40,123,0,0 ; vbroadcastss 0x7b28(%rip),%ymm11 # b598 <_sk_clut_4D_avx+0x240b>
DB 196,193,116,88,203 ; vaddps %ymm11,%ymm1,%ymm1
- DB 196,98,125,24,37,177,120,0,0 ; vbroadcastss 0x78b1(%rip),%ymm12 # b1e8 <_sk_clut_4D_avx+0x240f>
+ DB 196,98,125,24,37,30,123,0,0 ; vbroadcastss 0x7b1e(%rip),%ymm12 # b59c <_sk_clut_4D_avx+0x240f>
DB 196,193,116,89,204 ; vmulps %ymm12,%ymm1,%ymm1
DB 196,67,117,74,192,160 ; vblendvps %ymm10,%ymm8,%ymm1,%ymm8
DB 197,252,89,200 ; vmulps %ymm0,%ymm0,%ymm1
@@ -10444,9 +10882,9 @@ _sk_lab_to_xyz_avx LABEL PROC
DB 196,193,108,88,211 ; vaddps %ymm11,%ymm2,%ymm2
DB 196,193,108,89,212 ; vmulps %ymm12,%ymm2,%ymm2
DB 196,227,109,74,208,144 ; vblendvps %ymm9,%ymm0,%ymm2,%ymm2
- DB 196,226,125,24,5,103,120,0,0 ; vbroadcastss 0x7867(%rip),%ymm0 # b1ec <_sk_clut_4D_avx+0x2413>
+ DB 196,226,125,24,5,212,122,0,0 ; vbroadcastss 0x7ad4(%rip),%ymm0 # b5a0 <_sk_clut_4D_avx+0x2413>
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
- DB 196,98,125,24,5,94,120,0,0 ; vbroadcastss 0x785e(%rip),%ymm8 # b1f0 <_sk_clut_4D_avx+0x2417>
+ DB 196,98,125,24,5,203,122,0,0 ; vbroadcastss 0x7acb(%rip),%ymm8 # b5a4 <_sk_clut_4D_avx+0x2417>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -10461,15 +10899,15 @@ _sk_load_a8_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,67 ; jne 39f7 <_sk_load_a8_avx+0x5c>
+ DB 117,67 ; jne 3b3e <_sk_load_a8_avx+0x5c>
DB 196,194,121,48,4,25 ; vpmovzxbw (%r9,%rbx,1),%xmm0
- DB 197,249,219,5,78,123,0,0 ; vpand 0x7b4e(%rip),%xmm0,%xmm0 # b510 <_sk_clut_4D_avx+0x2737>
+ DB 197,249,219,5,119,126,0,0 ; vpand 0x7e77(%rip),%xmm0,%xmm0 # b980 <_sk_clut_4D_avx+0x27f3>
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,18,120,0,0 ; vbroadcastss 0x7812(%rip),%ymm1 # b1f4 <_sk_clut_4D_avx+0x241b>
+ DB 196,226,125,24,13,127,122,0,0 ; vbroadcastss 0x7a7f(%rip),%ymm1 # b5a8 <_sk_clut_4D_avx+0x241b>
DB 197,252,89,217 ; vmulps %ymm1,%ymm0,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0
@@ -10482,15 +10920,15 @@ _sk_load_a8_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,175 ; ja 39ba <_sk_load_a8_avx+0x1f>
+ DB 119,175 ; ja 3b01 <_sk_load_a8_avx+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,122,0,0,0 ; lea 0x7a(%rip),%r11 # 3a90 <_sk_load_a8_avx+0xf5>
+ DB 76,141,29,119,0,0,0 ; lea 0x77(%rip),%r11 # 3bd4 <_sk_load_a8_avx+0xf2>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
- DB 235,144 ; jmp 39ba <_sk_load_a8_avx+0x1f>
+ DB 235,144 ; jmp 3b01 <_sk_load_a8_avx+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 197,249,196,192,2 ; vpinsrw $0x2,%eax,%xmm0,%xmm0
@@ -10498,7 +10936,7 @@ _sk_load_a8_avx LABEL PROC
DB 197,249,110,200 ; vmovd %eax,%xmm1
DB 196,226,121,48,201 ; vpmovzxbw %xmm1,%xmm1
DB 196,227,121,14,193,3 ; vpblendw $0x3,%xmm1,%xmm0,%xmm0
- DB 233,104,255,255,255 ; jmpq 39ba <_sk_load_a8_avx+0x1f>
+ DB 233,104,255,255,255 ; jmpq 3b01 <_sk_load_a8_avx+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 197,249,196,192,6 ; vpinsrw $0x6,%eax,%xmm0,%xmm0
@@ -10509,24 +10947,23 @@ _sk_load_a8_avx LABEL PROC
DB 196,193,121,110,12,25 ; vmovd (%r9,%rbx,1),%xmm1
DB 196,226,121,48,201 ; vpmovzxbw %xmm1,%xmm1
DB 196,227,113,14,192,240 ; vpblendw $0xf0,%xmm0,%xmm1,%xmm0
- DB 233,45,255,255,255 ; jmpq 39ba <_sk_load_a8_avx+0x1f>
- DB 15,31,0 ; nopl (%rax)
- DB 143 ; (bad)
+ DB 233,45,255,255,255 ; jmpq 3b01 <_sk_load_a8_avx+0x1f>
+ DB 146 ; xchg %eax,%edx
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,169,255,255,255,154 ; ljmp *-0x65000001(%rcx)
+ DB 255,172,255,255,255,157,255 ; ljmp *-0x620001(%rdi,%rdi,8)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,231 ; jmpq *%rdi
+ DB 234 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 220,255 ; fdivr %st,%st(7)
+ DB 223,255 ; (bad)
DB 255 ; (bad)
- DB 255,209 ; callq *%rcx
+ DB 255,212 ; callq *%rsp
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,194 ; inc %edx
+ DB 255,197 ; inc %ebp
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -10541,15 +10978,15 @@ _sk_load_a8_dst_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,67 ; jne 3b08 <_sk_load_a8_dst_avx+0x5c>
+ DB 117,67 ; jne 3c4c <_sk_load_a8_dst_avx+0x5c>
DB 196,194,121,48,36,25 ; vpmovzxbw (%r9,%rbx,1),%xmm4
- DB 197,217,219,37,77,122,0,0 ; vpand 0x7a4d(%rip),%xmm4,%xmm4 # b520 <_sk_clut_4D_avx+0x2747>
+ DB 197,217,219,37,121,125,0,0 ; vpand 0x7d79(%rip),%xmm4,%xmm4 # b990 <_sk_clut_4D_avx+0x2803>
DB 197,209,239,237 ; vpxor %xmm5,%xmm5,%xmm5
DB 197,217,105,237 ; vpunpckhwd %xmm5,%xmm4,%xmm5
DB 196,226,121,51,228 ; vpmovzxwd %xmm4,%xmm4
DB 196,227,93,24,229,1 ; vinsertf128 $0x1,%xmm5,%ymm4,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,226,125,24,45,5,119,0,0 ; vbroadcastss 0x7705(%rip),%ymm5 # b1f8 <_sk_clut_4D_avx+0x241f>
+ DB 196,226,125,24,45,117,121,0,0 ; vbroadcastss 0x7975(%rip),%ymm5 # b5ac <_sk_clut_4D_avx+0x241f>
DB 197,220,89,253 ; vmulps %ymm5,%ymm4,%ymm7
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,220,87,228 ; vxorps %ymm4,%ymm4,%ymm4
@@ -10562,15 +10999,15 @@ _sk_load_a8_dst_avx LABEL PROC
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,175 ; ja 3acb <_sk_load_a8_dst_avx+0x1f>
+ DB 119,175 ; ja 3c0f <_sk_load_a8_dst_avx+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,121,0,0,0 ; lea 0x79(%rip),%r11 # 3ba0 <_sk_load_a8_dst_avx+0xf4>
+ DB 76,141,29,121,0,0,0 ; lea 0x79(%rip),%r11 # 3ce4 <_sk_load_a8_dst_avx+0xf4>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
- DB 235,144 ; jmp 3acb <_sk_load_a8_dst_avx+0x1f>
+ DB 235,144 ; jmp 3c0f <_sk_load_a8_dst_avx+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 197,217,196,224,2 ; vpinsrw $0x2,%eax,%xmm4,%xmm4
@@ -10578,7 +11015,7 @@ _sk_load_a8_dst_avx LABEL PROC
DB 197,249,110,232 ; vmovd %eax,%xmm5
DB 196,226,121,48,237 ; vpmovzxbw %xmm5,%xmm5
DB 196,227,89,14,229,3 ; vpblendw $0x3,%xmm5,%xmm4,%xmm4
- DB 233,104,255,255,255 ; jmpq 3acb <_sk_load_a8_dst_avx+0x1f>
+ DB 233,104,255,255,255 ; jmpq 3c0f <_sk_load_a8_dst_avx+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 197,217,196,224,6 ; vpinsrw $0x6,%eax,%xmm4,%xmm4
@@ -10589,7 +11026,7 @@ _sk_load_a8_dst_avx LABEL PROC
DB 196,193,121,110,44,25 ; vmovd (%r9,%rbx,1),%xmm5
DB 196,226,121,48,237 ; vpmovzxbw %xmm5,%xmm5
DB 196,227,81,14,228,240 ; vpblendw $0xf0,%xmm4,%xmm5,%xmm4
- DB 233,45,255,255,255 ; jmpq 3acb <_sk_load_a8_dst_avx+0x1f>
+ DB 233,45,255,255,255 ; jmpq 3c0f <_sk_load_a8_dst_avx+0x1f>
DB 102,144 ; xchg %ax,%ax
DB 144 ; nop
DB 255 ; (bad)
@@ -10598,7 +11035,7 @@ _sk_load_a8_dst_avx LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 232,255,255,255,221 ; callq ffffffffde003bb0 <_sk_clut_4D_avx+0xffffffffddffadd7>
+ DB 232,255,255,255,221 ; callq ffffffffde003cf4 <_sk_clut_4D_avx+0xffffffffddffab67>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,210 ; callq *%rdx
@@ -10651,7 +11088,7 @@ _sk_gather_a8_avx LABEL PROC
DB 196,226,121,49,192 ; vpmovzxbd %xmm0,%xmm0
DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,116,117,0,0 ; vbroadcastss 0x7574(%rip),%ymm1 # b1fc <_sk_clut_4D_avx+0x2423>
+ DB 196,226,125,24,13,228,119,0,0 ; vbroadcastss 0x77e4(%rip),%ymm1 # b5b0 <_sk_clut_4D_avx+0x2423>
DB 197,252,89,217 ; vmulps %ymm1,%ymm0,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0
@@ -10668,14 +11105,14 @@ _sk_store_a8_avx LABEL PROC
DB 77,15,175,202 ; imul %r10,%r9
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
- DB 196,98,125,24,5,71,117,0,0 ; vbroadcastss 0x7547(%rip),%ymm8 # b200 <_sk_clut_4D_avx+0x2427>
+ DB 196,98,125,24,5,183,119,0,0 ; vbroadcastss 0x77b7(%rip),%ymm8 # b5b4 <_sk_clut_4D_avx+0x2427>
DB 196,65,100,89,192 ; vmulps %ymm8,%ymm3,%ymm8
DB 196,65,125,91,192 ; vcvtps2dq %ymm8,%ymm8
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,11 ; jne 3ce3 <_sk_store_a8_avx+0x47>
+ DB 117,11 ; jne 3e27 <_sk_store_a8_avx+0x47>
DB 196,65,123,17,4,25 ; vmovsd %xmm8,(%r9,%rbx,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -10684,25 +11121,25 @@ _sk_store_a8_avx LABEL PROC
DB 65,128,226,7 ; and $0x7,%r10b
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,235 ; ja 3cde <_sk_store_a8_avx+0x42>
+ DB 119,235 ; ja 3e22 <_sk_store_a8_avx+0x42>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,85,0,0,0 ; lea 0x55(%rip),%r11 # 3d58 <_sk_store_a8_avx+0xbc>
+ DB 76,141,29,85,0,0,0 ; lea 0x55(%rip),%r11 # 3e9c <_sk_store_a8_avx+0xbc>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 196,67,121,20,4,25,0 ; vpextrb $0x0,%xmm8,(%r9,%rbx,1)
- DB 235,201 ; jmp 3cde <_sk_store_a8_avx+0x42>
+ DB 235,201 ; jmp 3e22 <_sk_store_a8_avx+0x42>
DB 196,67,121,20,68,25,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rbx,1)
- DB 196,98,57,0,5,10,120,0,0 ; vpshufb 0x780a(%rip),%xmm8,%xmm8 # b530 <_sk_clut_4D_avx+0x2757>
+ DB 196,98,57,0,5,54,123,0,0 ; vpshufb 0x7b36(%rip),%xmm8,%xmm8 # b9a0 <_sk_clut_4D_avx+0x2813>
DB 196,67,121,21,4,25,0 ; vpextrw $0x0,%xmm8,(%r9,%rbx,1)
- DB 235,175 ; jmp 3cde <_sk_store_a8_avx+0x42>
+ DB 235,175 ; jmp 3e22 <_sk_store_a8_avx+0x42>
DB 196,67,121,20,68,25,6,12 ; vpextrb $0xc,%xmm8,0x6(%r9,%rbx,1)
DB 196,67,121,20,68,25,5,10 ; vpextrb $0xa,%xmm8,0x5(%r9,%rbx,1)
DB 196,67,121,20,68,25,4,8 ; vpextrb $0x8,%xmm8,0x4(%r9,%rbx,1)
- DB 196,98,57,0,5,240,119,0,0 ; vpshufb 0x77f0(%rip),%xmm8,%xmm8 # b540 <_sk_clut_4D_avx+0x2767>
+ DB 196,98,57,0,5,28,123,0,0 ; vpshufb 0x7b1c(%rip),%xmm8,%xmm8 # b9b0 <_sk_clut_4D_avx+0x2823>
DB 196,65,121,126,4,25 ; vmovd %xmm8,(%r9,%rbx,1)
- DB 235,134 ; jmp 3cde <_sk_store_a8_avx+0x42>
+ DB 235,134 ; jmp 3e22 <_sk_store_a8_avx+0x42>
DB 180,255 ; mov $0xff,%ah
DB 255 ; (bad)
DB 255,197 ; inc %ebp
@@ -10733,18 +11170,18 @@ _sk_load_g8_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,72 ; jne 3dd5 <_sk_load_g8_avx+0x61>
+ DB 117,72 ; jne 3f19 <_sk_load_g8_avx+0x61>
DB 196,194,121,48,4,25 ; vpmovzxbw (%r9,%rbx,1),%xmm0
- DB 197,249,219,5,181,119,0,0 ; vpand 0x77b5(%rip),%xmm0,%xmm0 # b550 <_sk_clut_4D_avx+0x2777>
+ DB 197,249,219,5,225,122,0,0 ; vpand 0x7ae1(%rip),%xmm0,%xmm0 # b9c0 <_sk_clut_4D_avx+0x2833>
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,73,116,0,0 ; vbroadcastss 0x7449(%rip),%ymm1 # b204 <_sk_clut_4D_avx+0x242b>
+ DB 196,226,125,24,13,185,118,0,0 ; vbroadcastss 0x76b9(%rip),%ymm1 # b5b8 <_sk_clut_4D_avx+0x242b>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,62,116,0,0 ; vbroadcastss 0x743e(%rip),%ymm3 # b208 <_sk_clut_4D_avx+0x242f>
+ DB 196,226,125,24,29,174,118,0,0 ; vbroadcastss 0x76ae(%rip),%ymm3 # b5bc <_sk_clut_4D_avx+0x242f>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
DB 91 ; pop %rbx
@@ -10754,15 +11191,15 @@ _sk_load_g8_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,170 ; ja 3d93 <_sk_load_g8_avx+0x1f>
+ DB 119,170 ; ja 3ed7 <_sk_load_g8_avx+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,120,0,0,0 ; lea 0x78(%rip),%r11 # 3e6c <_sk_load_g8_avx+0xf8>
+ DB 76,141,29,120,0,0,0 ; lea 0x78(%rip),%r11 # 3fb0 <_sk_load_g8_avx+0xf8>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
- DB 235,139 ; jmp 3d93 <_sk_load_g8_avx+0x1f>
+ DB 235,139 ; jmp 3ed7 <_sk_load_g8_avx+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 197,249,196,192,2 ; vpinsrw $0x2,%eax,%xmm0,%xmm0
@@ -10770,7 +11207,7 @@ _sk_load_g8_avx LABEL PROC
DB 197,249,110,200 ; vmovd %eax,%xmm1
DB 196,226,121,48,201 ; vpmovzxbw %xmm1,%xmm1
DB 196,227,121,14,193,3 ; vpblendw $0x3,%xmm1,%xmm0,%xmm0
- DB 233,99,255,255,255 ; jmpq 3d93 <_sk_load_g8_avx+0x1f>
+ DB 233,99,255,255,255 ; jmpq 3ed7 <_sk_load_g8_avx+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 197,249,196,192,6 ; vpinsrw $0x6,%eax,%xmm0,%xmm0
@@ -10781,7 +11218,7 @@ _sk_load_g8_avx LABEL PROC
DB 196,193,121,110,12,25 ; vmovd (%r9,%rbx,1),%xmm1
DB 196,226,121,48,201 ; vpmovzxbw %xmm1,%xmm1
DB 196,227,113,14,192,240 ; vpblendw $0xf0,%xmm0,%xmm1,%xmm0
- DB 233,40,255,255,255 ; jmpq 3d93 <_sk_load_g8_avx+0x1f>
+ DB 233,40,255,255,255 ; jmpq 3ed7 <_sk_load_g8_avx+0x1f>
DB 144 ; nop
DB 145 ; xchg %eax,%ecx
DB 255 ; (bad)
@@ -10790,7 +11227,7 @@ _sk_load_g8_avx LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf003e7c <_sk_clut_4D_avx+0xffffffffdeffb0a3>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf003fc0 <_sk_clut_4D_avx+0xffffffffdeffae33>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -10811,18 +11248,18 @@ _sk_load_g8_dst_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 117,72 ; jne 3ee9 <_sk_load_g8_dst_avx+0x61>
+ DB 117,72 ; jne 402d <_sk_load_g8_dst_avx+0x61>
DB 196,194,121,48,36,25 ; vpmovzxbw (%r9,%rbx,1),%xmm4
- DB 197,217,219,37,177,118,0,0 ; vpand 0x76b1(%rip),%xmm4,%xmm4 # b560 <_sk_clut_4D_avx+0x2787>
+ DB 197,217,219,37,221,121,0,0 ; vpand 0x79dd(%rip),%xmm4,%xmm4 # b9d0 <_sk_clut_4D_avx+0x2843>
DB 197,209,239,237 ; vpxor %xmm5,%xmm5,%xmm5
DB 197,217,105,237 ; vpunpckhwd %xmm5,%xmm4,%xmm5
DB 196,226,121,51,228 ; vpmovzxwd %xmm4,%xmm4
DB 196,227,93,24,229,1 ; vinsertf128 $0x1,%xmm5,%ymm4,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,226,125,24,45,61,115,0,0 ; vbroadcastss 0x733d(%rip),%ymm5 # b20c <_sk_clut_4D_avx+0x2433>
+ DB 196,226,125,24,45,173,117,0,0 ; vbroadcastss 0x75ad(%rip),%ymm5 # b5c0 <_sk_clut_4D_avx+0x2433>
DB 197,220,89,229 ; vmulps %ymm5,%ymm4,%ymm4
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,61,50,115,0,0 ; vbroadcastss 0x7332(%rip),%ymm7 # b210 <_sk_clut_4D_avx+0x2437>
+ DB 196,226,125,24,61,162,117,0,0 ; vbroadcastss 0x75a2(%rip),%ymm7 # b5c4 <_sk_clut_4D_avx+0x2437>
DB 197,252,40,236 ; vmovaps %ymm4,%ymm5
DB 197,252,40,244 ; vmovaps %ymm4,%ymm6
DB 91 ; pop %rbx
@@ -10832,15 +11269,15 @@ _sk_load_g8_dst_avx LABEL PROC
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,170 ; ja 3ea7 <_sk_load_g8_dst_avx+0x1f>
+ DB 119,170 ; ja 3feb <_sk_load_g8_dst_avx+0x1f>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,120,0,0,0 ; lea 0x78(%rip),%r11 # 3f80 <_sk_load_g8_dst_avx+0xf8>
+ DB 76,141,29,120,0,0,0 ; lea 0x78(%rip),%r11 # 40c4 <_sk_load_g8_dst_avx+0xf8>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,182,4,25 ; movzbl (%r9,%rbx,1),%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
- DB 235,139 ; jmp 3ea7 <_sk_load_g8_dst_avx+0x1f>
+ DB 235,139 ; jmp 3feb <_sk_load_g8_dst_avx+0x1f>
DB 65,15,182,68,25,2 ; movzbl 0x2(%r9,%rbx,1),%eax
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 197,217,196,224,2 ; vpinsrw $0x2,%eax,%xmm4,%xmm4
@@ -10848,7 +11285,7 @@ _sk_load_g8_dst_avx LABEL PROC
DB 197,249,110,232 ; vmovd %eax,%xmm5
DB 196,226,121,48,237 ; vpmovzxbw %xmm5,%xmm5
DB 196,227,89,14,229,3 ; vpblendw $0x3,%xmm5,%xmm4,%xmm4
- DB 233,99,255,255,255 ; jmpq 3ea7 <_sk_load_g8_dst_avx+0x1f>
+ DB 233,99,255,255,255 ; jmpq 3feb <_sk_load_g8_dst_avx+0x1f>
DB 65,15,182,68,25,6 ; movzbl 0x6(%r9,%rbx,1),%eax
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 197,217,196,224,6 ; vpinsrw $0x6,%eax,%xmm4,%xmm4
@@ -10859,7 +11296,7 @@ _sk_load_g8_dst_avx LABEL PROC
DB 196,193,121,110,44,25 ; vmovd (%r9,%rbx,1),%xmm5
DB 196,226,121,48,237 ; vpmovzxbw %xmm5,%xmm5
DB 196,227,81,14,228,240 ; vpblendw $0xf0,%xmm4,%xmm5,%xmm4
- DB 233,40,255,255,255 ; jmpq 3ea7 <_sk_load_g8_dst_avx+0x1f>
+ DB 233,40,255,255,255 ; jmpq 3feb <_sk_load_g8_dst_avx+0x1f>
DB 144 ; nop
DB 145 ; xchg %eax,%ecx
DB 255 ; (bad)
@@ -10868,7 +11305,7 @@ _sk_load_g8_dst_avx LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf003f90 <_sk_clut_4D_avx+0xffffffffdeffb1b7>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf0040d4 <_sk_clut_4D_avx+0xffffffffdeffaf47>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -10921,10 +11358,10 @@ _sk_gather_g8_avx LABEL PROC
DB 196,226,121,49,192 ; vpmovzxbd %xmm0,%xmm0
DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,172,113,0,0 ; vbroadcastss 0x71ac(%rip),%ymm1 # b214 <_sk_clut_4D_avx+0x243b>
+ DB 196,226,125,24,13,28,116,0,0 ; vbroadcastss 0x741c(%rip),%ymm1 # b5c8 <_sk_clut_4D_avx+0x243b>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,161,113,0,0 ; vbroadcastss 0x71a1(%rip),%ymm3 # b218 <_sk_clut_4D_avx+0x243f>
+ DB 196,226,125,24,29,17,116,0,0 ; vbroadcastss 0x7411(%rip),%ymm3 # b5cc <_sk_clut_4D_avx+0x243f>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 197,252,40,208 ; vmovaps %ymm0,%ymm2
DB 255,224 ; jmpq *%rax
@@ -10940,29 +11377,29 @@ _sk_load_565_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,129,0,0,0 ; jne 4122 <_sk_load_565_avx+0xa1>
+ DB 15,133,129,0,0,0 ; jne 4266 <_sk_load_565_avx+0xa1>
DB 196,193,122,111,4,89 ; vmovdqu (%r9,%rbx,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,209,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
- DB 196,226,125,24,5,89,113,0,0 ; vbroadcastss 0x7159(%rip),%ymm0 # b21c <_sk_clut_4D_avx+0x2443>
+ DB 196,226,125,24,5,201,115,0,0 ; vbroadcastss 0x73c9(%rip),%ymm0 # b5d0 <_sk_clut_4D_avx+0x2443>
DB 197,236,84,192 ; vandps %ymm0,%ymm2,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,76,113,0,0 ; vbroadcastss 0x714c(%rip),%ymm1 # b220 <_sk_clut_4D_avx+0x2447>
+ DB 196,226,125,24,13,188,115,0,0 ; vbroadcastss 0x73bc(%rip),%ymm1 # b5d4 <_sk_clut_4D_avx+0x2447>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,67,113,0,0 ; vbroadcastss 0x7143(%rip),%ymm1 # b224 <_sk_clut_4D_avx+0x244b>
+ DB 196,226,125,24,13,179,115,0,0 ; vbroadcastss 0x73b3(%rip),%ymm1 # b5d8 <_sk_clut_4D_avx+0x244b>
DB 197,236,84,201 ; vandps %ymm1,%ymm2,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,29,54,113,0,0 ; vbroadcastss 0x7136(%rip),%ymm3 # b228 <_sk_clut_4D_avx+0x244f>
+ DB 196,226,125,24,29,166,115,0,0 ; vbroadcastss 0x73a6(%rip),%ymm3 # b5dc <_sk_clut_4D_avx+0x244f>
DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1
- DB 196,226,125,24,29,45,113,0,0 ; vbroadcastss 0x712d(%rip),%ymm3 # b22c <_sk_clut_4D_avx+0x2453>
+ DB 196,226,125,24,29,157,115,0,0 ; vbroadcastss 0x739d(%rip),%ymm3 # b5e0 <_sk_clut_4D_avx+0x2453>
DB 197,236,84,211 ; vandps %ymm3,%ymm2,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,226,125,24,29,32,113,0,0 ; vbroadcastss 0x7120(%rip),%ymm3 # b230 <_sk_clut_4D_avx+0x2457>
+ DB 196,226,125,24,29,144,115,0,0 ; vbroadcastss 0x7390(%rip),%ymm3 # b5e4 <_sk_clut_4D_avx+0x2457>
DB 197,236,89,211 ; vmulps %ymm3,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,21,113,0,0 ; vbroadcastss 0x7115(%rip),%ymm3 # b234 <_sk_clut_4D_avx+0x245b>
+ DB 196,226,125,24,29,133,115,0,0 ; vbroadcastss 0x7385(%rip),%ymm3 # b5e8 <_sk_clut_4D_avx+0x245b>
DB 91 ; pop %rbx
DB 255,224 ; jmpq *%rax
DB 69,137,194 ; mov %r8d,%r10d
@@ -10970,27 +11407,27 @@ _sk_load_565_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,109,255,255,255 ; ja 40a7 <_sk_load_565_avx+0x26>
+ DB 15,135,109,255,255,255 ; ja 41eb <_sk_load_565_avx+0x26>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,99,0,0,0 ; lea 0x63(%rip),%r11 # 41a8 <_sk_load_565_avx+0x127>
+ DB 76,141,29,99,0,0,0 ; lea 0x63(%rip),%r11 # 42ec <_sk_load_565_avx+0x127>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
- DB 233,75,255,255,255 ; jmpq 40a7 <_sk_load_565_avx+0x26>
+ DB 233,75,255,255,255 ; jmpq 41eb <_sk_load_565_avx+0x26>
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 196,193,121,196,68,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,110,12,89 ; vmovd (%r9,%rbx,2),%xmm1
DB 196,227,121,14,193,3 ; vpblendw $0x3,%xmm1,%xmm0,%xmm0
- DB 233,46,255,255,255 ; jmpq 40a7 <_sk_load_565_avx+0x26>
+ DB 233,46,255,255,255 ; jmpq 41eb <_sk_load_565_avx+0x26>
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 196,193,121,196,68,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,196,68,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,196,68,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,122,126,12,89 ; vmovq (%r9,%rbx,2),%xmm1
DB 196,227,113,14,192,240 ; vpblendw $0xf0,%xmm0,%xmm1,%xmm0
- DB 233,1,255,255,255 ; jmpq 40a7 <_sk_load_565_avx+0x26>
+ DB 233,1,255,255,255 ; jmpq 41eb <_sk_load_565_avx+0x26>
DB 102,144 ; xchg %ax,%ax
DB 166 ; cmpsb %es:(%rdi),%ds:(%rsi)
DB 255 ; (bad)
@@ -11022,29 +11459,29 @@ _sk_load_565_dst_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,129,0,0,0 ; jne 4265 <_sk_load_565_dst_avx+0xa1>
+ DB 15,133,129,0,0,0 ; jne 43a9 <_sk_load_565_dst_avx+0xa1>
DB 196,193,122,111,36,89 ; vmovdqu (%r9,%rbx,2),%xmm4
DB 197,209,239,237 ; vpxor %xmm5,%xmm5,%xmm5
DB 197,217,105,237 ; vpunpckhwd %xmm5,%xmm4,%xmm5
DB 196,226,121,51,228 ; vpmovzxwd %xmm4,%xmm4
DB 196,227,93,24,245,1 ; vinsertf128 $0x1,%xmm5,%ymm4,%ymm6
- DB 196,226,125,24,37,50,112,0,0 ; vbroadcastss 0x7032(%rip),%ymm4 # b238 <_sk_clut_4D_avx+0x245f>
+ DB 196,226,125,24,37,162,114,0,0 ; vbroadcastss 0x72a2(%rip),%ymm4 # b5ec <_sk_clut_4D_avx+0x245f>
DB 197,204,84,228 ; vandps %ymm4,%ymm6,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,226,125,24,45,37,112,0,0 ; vbroadcastss 0x7025(%rip),%ymm5 # b23c <_sk_clut_4D_avx+0x2463>
+ DB 196,226,125,24,45,149,114,0,0 ; vbroadcastss 0x7295(%rip),%ymm5 # b5f0 <_sk_clut_4D_avx+0x2463>
DB 197,220,89,229 ; vmulps %ymm5,%ymm4,%ymm4
- DB 196,226,125,24,45,28,112,0,0 ; vbroadcastss 0x701c(%rip),%ymm5 # b240 <_sk_clut_4D_avx+0x2467>
+ DB 196,226,125,24,45,140,114,0,0 ; vbroadcastss 0x728c(%rip),%ymm5 # b5f4 <_sk_clut_4D_avx+0x2467>
DB 197,204,84,237 ; vandps %ymm5,%ymm6,%ymm5
DB 197,252,91,237 ; vcvtdq2ps %ymm5,%ymm5
- DB 196,226,125,24,61,15,112,0,0 ; vbroadcastss 0x700f(%rip),%ymm7 # b244 <_sk_clut_4D_avx+0x246b>
+ DB 196,226,125,24,61,127,114,0,0 ; vbroadcastss 0x727f(%rip),%ymm7 # b5f8 <_sk_clut_4D_avx+0x246b>
DB 197,212,89,239 ; vmulps %ymm7,%ymm5,%ymm5
- DB 196,226,125,24,61,6,112,0,0 ; vbroadcastss 0x7006(%rip),%ymm7 # b248 <_sk_clut_4D_avx+0x246f>
+ DB 196,226,125,24,61,118,114,0,0 ; vbroadcastss 0x7276(%rip),%ymm7 # b5fc <_sk_clut_4D_avx+0x246f>
DB 197,204,84,247 ; vandps %ymm7,%ymm6,%ymm6
DB 197,252,91,246 ; vcvtdq2ps %ymm6,%ymm6
- DB 196,226,125,24,61,249,111,0,0 ; vbroadcastss 0x6ff9(%rip),%ymm7 # b24c <_sk_clut_4D_avx+0x2473>
+ DB 196,226,125,24,61,105,114,0,0 ; vbroadcastss 0x7269(%rip),%ymm7 # b600 <_sk_clut_4D_avx+0x2473>
DB 197,204,89,247 ; vmulps %ymm7,%ymm6,%ymm6
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,61,238,111,0,0 ; vbroadcastss 0x6fee(%rip),%ymm7 # b250 <_sk_clut_4D_avx+0x2477>
+ DB 196,226,125,24,61,94,114,0,0 ; vbroadcastss 0x725e(%rip),%ymm7 # b604 <_sk_clut_4D_avx+0x2477>
DB 91 ; pop %rbx
DB 255,224 ; jmpq *%rax
DB 69,137,194 ; mov %r8d,%r10d
@@ -11052,27 +11489,27 @@ _sk_load_565_dst_avx LABEL PROC
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,109,255,255,255 ; ja 41ea <_sk_load_565_dst_avx+0x26>
+ DB 15,135,109,255,255,255 ; ja 432e <_sk_load_565_dst_avx+0x26>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,100,0,0,0 ; lea 0x64(%rip),%r11 # 42ec <_sk_load_565_dst_avx+0x128>
+ DB 76,141,29,100,0,0,0 ; lea 0x64(%rip),%r11 # 4430 <_sk_load_565_dst_avx+0x128>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
- DB 233,75,255,255,255 ; jmpq 41ea <_sk_load_565_dst_avx+0x26>
+ DB 233,75,255,255,255 ; jmpq 432e <_sk_load_565_dst_avx+0x26>
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 196,193,89,196,100,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,121,110,44,89 ; vmovd (%r9,%rbx,2),%xmm5
DB 196,227,89,14,229,3 ; vpblendw $0x3,%xmm5,%xmm4,%xmm4
- DB 233,46,255,255,255 ; jmpq 41ea <_sk_load_565_dst_avx+0x26>
+ DB 233,46,255,255,255 ; jmpq 432e <_sk_load_565_dst_avx+0x26>
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 196,193,89,196,100,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,89,196,100,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,89,196,100,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,122,126,44,89 ; vmovq (%r9,%rbx,2),%xmm5
DB 196,227,81,14,228,240 ; vpblendw $0xf0,%xmm4,%xmm5,%xmm4
- DB 233,1,255,255,255 ; jmpq 41ea <_sk_load_565_dst_avx+0x26>
+ DB 233,1,255,255,255 ; jmpq 432e <_sk_load_565_dst_avx+0x26>
DB 15,31,0 ; nopl (%rax)
DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
DB 255 ; (bad)
@@ -11142,23 +11579,23 @@ _sk_gather_565_avx LABEL PROC
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,209,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm2
- DB 196,226,125,24,5,122,110,0,0 ; vbroadcastss 0x6e7a(%rip),%ymm0 # b254 <_sk_clut_4D_avx+0x247b>
+ DB 196,226,125,24,5,234,112,0,0 ; vbroadcastss 0x70ea(%rip),%ymm0 # b608 <_sk_clut_4D_avx+0x247b>
DB 197,236,84,192 ; vandps %ymm0,%ymm2,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,109,110,0,0 ; vbroadcastss 0x6e6d(%rip),%ymm1 # b258 <_sk_clut_4D_avx+0x247f>
+ DB 196,226,125,24,13,221,112,0,0 ; vbroadcastss 0x70dd(%rip),%ymm1 # b60c <_sk_clut_4D_avx+0x247f>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,100,110,0,0 ; vbroadcastss 0x6e64(%rip),%ymm1 # b25c <_sk_clut_4D_avx+0x2483>
+ DB 196,226,125,24,13,212,112,0,0 ; vbroadcastss 0x70d4(%rip),%ymm1 # b610 <_sk_clut_4D_avx+0x2483>
DB 197,236,84,201 ; vandps %ymm1,%ymm2,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,29,87,110,0,0 ; vbroadcastss 0x6e57(%rip),%ymm3 # b260 <_sk_clut_4D_avx+0x2487>
+ DB 196,226,125,24,29,199,112,0,0 ; vbroadcastss 0x70c7(%rip),%ymm3 # b614 <_sk_clut_4D_avx+0x2487>
DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1
- DB 196,226,125,24,29,78,110,0,0 ; vbroadcastss 0x6e4e(%rip),%ymm3 # b264 <_sk_clut_4D_avx+0x248b>
+ DB 196,226,125,24,29,190,112,0,0 ; vbroadcastss 0x70be(%rip),%ymm3 # b618 <_sk_clut_4D_avx+0x248b>
DB 197,236,84,211 ; vandps %ymm3,%ymm2,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,226,125,24,29,65,110,0,0 ; vbroadcastss 0x6e41(%rip),%ymm3 # b268 <_sk_clut_4D_avx+0x248f>
+ DB 196,226,125,24,29,177,112,0,0 ; vbroadcastss 0x70b1(%rip),%ymm3 # b61c <_sk_clut_4D_avx+0x248f>
DB 197,236,89,211 ; vmulps %ymm3,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,54,110,0,0 ; vbroadcastss 0x6e36(%rip),%ymm3 # b26c <_sk_clut_4D_avx+0x2493>
+ DB 196,226,125,24,29,166,112,0,0 ; vbroadcastss 0x70a6(%rip),%ymm3 # b620 <_sk_clut_4D_avx+0x2493>
DB 255,224 ; jmpq *%rax
PUBLIC _sk_store_565_avx
@@ -11171,14 +11608,14 @@ _sk_store_565_avx LABEL PROC
DB 77,1,201 ; add %r9,%r9
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
- DB 196,98,125,24,5,24,110,0,0 ; vbroadcastss 0x6e18(%rip),%ymm8 # b270 <_sk_clut_4D_avx+0x2497>
+ DB 196,98,125,24,5,136,112,0,0 ; vbroadcastss 0x7088(%rip),%ymm8 # b624 <_sk_clut_4D_avx+0x2497>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,193,41,114,241,11 ; vpslld $0xb,%xmm9,%xmm10
DB 196,67,125,25,201,1 ; vextractf128 $0x1,%ymm9,%xmm9
DB 196,193,49,114,241,11 ; vpslld $0xb,%xmm9,%xmm9
DB 196,67,45,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- DB 196,98,125,24,21,241,109,0,0 ; vbroadcastss 0x6df1(%rip),%ymm10 # b274 <_sk_clut_4D_avx+0x249b>
+ DB 196,98,125,24,21,97,112,0,0 ; vbroadcastss 0x7061(%rip),%ymm10 # b628 <_sk_clut_4D_avx+0x249b>
DB 196,65,116,89,210 ; vmulps %ymm10,%ymm1,%ymm10
DB 196,65,125,91,210 ; vcvtps2dq %ymm10,%ymm10
DB 196,193,33,114,242,5 ; vpslld $0x5,%xmm10,%xmm11
@@ -11192,7 +11629,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,11 ; jne 44d4 <_sk_store_565_avx+0x9c>
+ DB 117,11 ; jne 4618 <_sk_store_565_avx+0x9c>
DB 196,65,122,127,4,89 ; vmovdqu %xmm8,(%r9,%rbx,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -11201,22 +11638,22 @@ _sk_store_565_avx LABEL PROC
DB 65,128,226,7 ; and $0x7,%r10b
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,235 ; ja 44cf <_sk_store_565_avx+0x97>
+ DB 119,235 ; ja 4613 <_sk_store_565_avx+0x97>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,69,0,0,0 ; lea 0x45(%rip),%r11 # 4534 <_sk_store_565_avx+0xfc>
+ DB 76,141,29,69,0,0,0 ; lea 0x45(%rip),%r11 # 4678 <_sk_store_565_avx+0xfc>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 196,67,121,21,4,89,0 ; vpextrw $0x0,%xmm8,(%r9,%rbx,2)
- DB 235,206 ; jmp 44cf <_sk_store_565_avx+0x97>
+ DB 235,206 ; jmp 4613 <_sk_store_565_avx+0x97>
DB 196,67,121,21,68,89,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rbx,2)
DB 196,65,121,126,4,89 ; vmovd %xmm8,(%r9,%rbx,2)
- DB 235,190 ; jmp 44cf <_sk_store_565_avx+0x97>
+ DB 235,190 ; jmp 4613 <_sk_store_565_avx+0x97>
DB 196,67,121,21,68,89,12,6 ; vpextrw $0x6,%xmm8,0xc(%r9,%rbx,2)
DB 196,67,121,21,68,89,10,5 ; vpextrw $0x5,%xmm8,0xa(%r9,%rbx,2)
DB 196,67,121,21,68,89,8,4 ; vpextrw $0x4,%xmm8,0x8(%r9,%rbx,2)
DB 196,65,121,214,4,89 ; vmovq %xmm8,(%r9,%rbx,2)
- DB 235,158 ; jmp 44cf <_sk_store_565_avx+0x97>
+ DB 235,158 ; jmp 4613 <_sk_store_565_avx+0x97>
DB 15,31,0 ; nopl (%rax)
DB 196 ; (bad)
DB 255 ; (bad)
@@ -11253,31 +11690,31 @@ _sk_load_4444_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,153,0,0,0 ; jne 4609 <_sk_load_4444_avx+0xb9>
+ DB 15,133,153,0,0,0 ; jne 474d <_sk_load_4444_avx+0xb9>
DB 196,193,122,111,4,89 ; vmovdqu (%r9,%rbx,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,217,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm3
- DB 196,226,125,24,5,230,108,0,0 ; vbroadcastss 0x6ce6(%rip),%ymm0 # b278 <_sk_clut_4D_avx+0x249f>
+ DB 196,226,125,24,5,86,111,0,0 ; vbroadcastss 0x6f56(%rip),%ymm0 # b62c <_sk_clut_4D_avx+0x249f>
DB 197,228,84,192 ; vandps %ymm0,%ymm3,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,217,108,0,0 ; vbroadcastss 0x6cd9(%rip),%ymm1 # b27c <_sk_clut_4D_avx+0x24a3>
+ DB 196,226,125,24,13,73,111,0,0 ; vbroadcastss 0x6f49(%rip),%ymm1 # b630 <_sk_clut_4D_avx+0x24a3>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,208,108,0,0 ; vbroadcastss 0x6cd0(%rip),%ymm1 # b280 <_sk_clut_4D_avx+0x24a7>
+ DB 196,226,125,24,13,64,111,0,0 ; vbroadcastss 0x6f40(%rip),%ymm1 # b634 <_sk_clut_4D_avx+0x24a7>
DB 197,228,84,201 ; vandps %ymm1,%ymm3,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,21,195,108,0,0 ; vbroadcastss 0x6cc3(%rip),%ymm2 # b284 <_sk_clut_4D_avx+0x24ab>
+ DB 196,226,125,24,21,51,111,0,0 ; vbroadcastss 0x6f33(%rip),%ymm2 # b638 <_sk_clut_4D_avx+0x24ab>
DB 197,244,89,202 ; vmulps %ymm2,%ymm1,%ymm1
- DB 196,226,125,24,21,186,108,0,0 ; vbroadcastss 0x6cba(%rip),%ymm2 # b288 <_sk_clut_4D_avx+0x24af>
+ DB 196,226,125,24,21,42,111,0,0 ; vbroadcastss 0x6f2a(%rip),%ymm2 # b63c <_sk_clut_4D_avx+0x24af>
DB 197,228,84,210 ; vandps %ymm2,%ymm3,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,98,125,24,5,173,108,0,0 ; vbroadcastss 0x6cad(%rip),%ymm8 # b28c <_sk_clut_4D_avx+0x24b3>
+ DB 196,98,125,24,5,29,111,0,0 ; vbroadcastss 0x6f1d(%rip),%ymm8 # b640 <_sk_clut_4D_avx+0x24b3>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,163,108,0,0 ; vbroadcastss 0x6ca3(%rip),%ymm8 # b290 <_sk_clut_4D_avx+0x24b7>
+ DB 196,98,125,24,5,19,111,0,0 ; vbroadcastss 0x6f13(%rip),%ymm8 # b644 <_sk_clut_4D_avx+0x24b7>
DB 196,193,100,84,216 ; vandps %ymm8,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,149,108,0,0 ; vbroadcastss 0x6c95(%rip),%ymm8 # b294 <_sk_clut_4D_avx+0x24bb>
+ DB 196,98,125,24,5,5,111,0,0 ; vbroadcastss 0x6f05(%rip),%ymm8 # b648 <_sk_clut_4D_avx+0x24bb>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -11287,27 +11724,27 @@ _sk_load_4444_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,85,255,255,255 ; ja 4576 <_sk_load_4444_avx+0x26>
+ DB 15,135,85,255,255,255 ; ja 46ba <_sk_load_4444_avx+0x26>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,100,0,0,0 ; lea 0x64(%rip),%r11 # 4690 <_sk_load_4444_avx+0x140>
+ DB 76,141,29,100,0,0,0 ; lea 0x64(%rip),%r11 # 47d4 <_sk_load_4444_avx+0x140>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
- DB 233,51,255,255,255 ; jmpq 4576 <_sk_load_4444_avx+0x26>
+ DB 233,51,255,255,255 ; jmpq 46ba <_sk_load_4444_avx+0x26>
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 196,193,121,196,68,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,110,12,89 ; vmovd (%r9,%rbx,2),%xmm1
DB 196,227,121,14,193,3 ; vpblendw $0x3,%xmm1,%xmm0,%xmm0
- DB 233,22,255,255,255 ; jmpq 4576 <_sk_load_4444_avx+0x26>
+ DB 233,22,255,255,255 ; jmpq 46ba <_sk_load_4444_avx+0x26>
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 196,193,121,196,68,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,196,68,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,121,196,68,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm0,%xmm0
DB 196,193,122,126,12,89 ; vmovq (%r9,%rbx,2),%xmm1
DB 196,227,113,14,192,240 ; vpblendw $0xf0,%xmm0,%xmm1,%xmm0
- DB 233,233,254,255,255 ; jmpq 4576 <_sk_load_4444_avx+0x26>
+ DB 233,233,254,255,255 ; jmpq 46ba <_sk_load_4444_avx+0x26>
DB 15,31,0 ; nopl (%rax)
DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
DB 255 ; (bad)
@@ -11342,31 +11779,31 @@ _sk_load_4444_dst_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,153,0,0,0 ; jne 4765 <_sk_load_4444_dst_avx+0xb9>
+ DB 15,133,153,0,0,0 ; jne 48a9 <_sk_load_4444_dst_avx+0xb9>
DB 196,193,122,111,36,89 ; vmovdqu (%r9,%rbx,2),%xmm4
DB 197,209,239,237 ; vpxor %xmm5,%xmm5,%xmm5
DB 197,217,105,237 ; vpunpckhwd %xmm5,%xmm4,%xmm5
DB 196,226,121,51,228 ; vpmovzxwd %xmm4,%xmm4
DB 196,227,93,24,253,1 ; vinsertf128 $0x1,%xmm5,%ymm4,%ymm7
- DB 196,226,125,24,37,170,107,0,0 ; vbroadcastss 0x6baa(%rip),%ymm4 # b298 <_sk_clut_4D_avx+0x24bf>
+ DB 196,226,125,24,37,26,110,0,0 ; vbroadcastss 0x6e1a(%rip),%ymm4 # b64c <_sk_clut_4D_avx+0x24bf>
DB 197,196,84,228 ; vandps %ymm4,%ymm7,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,226,125,24,45,157,107,0,0 ; vbroadcastss 0x6b9d(%rip),%ymm5 # b29c <_sk_clut_4D_avx+0x24c3>
+ DB 196,226,125,24,45,13,110,0,0 ; vbroadcastss 0x6e0d(%rip),%ymm5 # b650 <_sk_clut_4D_avx+0x24c3>
DB 197,220,89,229 ; vmulps %ymm5,%ymm4,%ymm4
- DB 196,226,125,24,45,148,107,0,0 ; vbroadcastss 0x6b94(%rip),%ymm5 # b2a0 <_sk_clut_4D_avx+0x24c7>
+ DB 196,226,125,24,45,4,110,0,0 ; vbroadcastss 0x6e04(%rip),%ymm5 # b654 <_sk_clut_4D_avx+0x24c7>
DB 197,196,84,237 ; vandps %ymm5,%ymm7,%ymm5
DB 197,252,91,237 ; vcvtdq2ps %ymm5,%ymm5
- DB 196,226,125,24,53,135,107,0,0 ; vbroadcastss 0x6b87(%rip),%ymm6 # b2a4 <_sk_clut_4D_avx+0x24cb>
+ DB 196,226,125,24,53,247,109,0,0 ; vbroadcastss 0x6df7(%rip),%ymm6 # b658 <_sk_clut_4D_avx+0x24cb>
DB 197,212,89,238 ; vmulps %ymm6,%ymm5,%ymm5
- DB 196,226,125,24,53,126,107,0,0 ; vbroadcastss 0x6b7e(%rip),%ymm6 # b2a8 <_sk_clut_4D_avx+0x24cf>
+ DB 196,226,125,24,53,238,109,0,0 ; vbroadcastss 0x6dee(%rip),%ymm6 # b65c <_sk_clut_4D_avx+0x24cf>
DB 197,196,84,246 ; vandps %ymm6,%ymm7,%ymm6
DB 197,252,91,246 ; vcvtdq2ps %ymm6,%ymm6
- DB 196,98,125,24,5,113,107,0,0 ; vbroadcastss 0x6b71(%rip),%ymm8 # b2ac <_sk_clut_4D_avx+0x24d3>
+ DB 196,98,125,24,5,225,109,0,0 ; vbroadcastss 0x6de1(%rip),%ymm8 # b660 <_sk_clut_4D_avx+0x24d3>
DB 196,193,76,89,240 ; vmulps %ymm8,%ymm6,%ymm6
- DB 196,98,125,24,5,103,107,0,0 ; vbroadcastss 0x6b67(%rip),%ymm8 # b2b0 <_sk_clut_4D_avx+0x24d7>
+ DB 196,98,125,24,5,215,109,0,0 ; vbroadcastss 0x6dd7(%rip),%ymm8 # b664 <_sk_clut_4D_avx+0x24d7>
DB 196,193,68,84,248 ; vandps %ymm8,%ymm7,%ymm7
DB 197,252,91,255 ; vcvtdq2ps %ymm7,%ymm7
- DB 196,98,125,24,5,89,107,0,0 ; vbroadcastss 0x6b59(%rip),%ymm8 # b2b4 <_sk_clut_4D_avx+0x24db>
+ DB 196,98,125,24,5,201,109,0,0 ; vbroadcastss 0x6dc9(%rip),%ymm8 # b668 <_sk_clut_4D_avx+0x24db>
DB 196,193,68,89,248 ; vmulps %ymm8,%ymm7,%ymm7
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -11376,27 +11813,27 @@ _sk_load_4444_dst_avx LABEL PROC
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 15,135,85,255,255,255 ; ja 46d2 <_sk_load_4444_dst_avx+0x26>
+ DB 15,135,85,255,255,255 ; ja 4816 <_sk_load_4444_dst_avx+0x26>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,100,0,0,0 ; lea 0x64(%rip),%r11 # 47ec <_sk_load_4444_dst_avx+0x140>
+ DB 76,141,29,100,0,0,0 ; lea 0x64(%rip),%r11 # 4930 <_sk_load_4444_dst_avx+0x140>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 65,15,183,4,89 ; movzwl (%r9,%rbx,2),%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
- DB 233,51,255,255,255 ; jmpq 46d2 <_sk_load_4444_dst_avx+0x26>
+ DB 233,51,255,255,255 ; jmpq 4816 <_sk_load_4444_dst_avx+0x26>
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 196,193,89,196,100,89,4,2 ; vpinsrw $0x2,0x4(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,121,110,44,89 ; vmovd (%r9,%rbx,2),%xmm5
DB 196,227,89,14,229,3 ; vpblendw $0x3,%xmm5,%xmm4,%xmm4
- DB 233,22,255,255,255 ; jmpq 46d2 <_sk_load_4444_dst_avx+0x26>
+ DB 233,22,255,255,255 ; jmpq 4816 <_sk_load_4444_dst_avx+0x26>
DB 197,217,239,228 ; vpxor %xmm4,%xmm4,%xmm4
DB 196,193,89,196,100,89,12,6 ; vpinsrw $0x6,0xc(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,89,196,100,89,10,5 ; vpinsrw $0x5,0xa(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,89,196,100,89,8,4 ; vpinsrw $0x4,0x8(%r9,%rbx,2),%xmm4,%xmm4
DB 196,193,122,126,44,89 ; vmovq (%r9,%rbx,2),%xmm5
DB 196,227,81,14,228,240 ; vpblendw $0xf0,%xmm4,%xmm5,%xmm4
- DB 233,233,254,255,255 ; jmpq 46d2 <_sk_load_4444_dst_avx+0x26>
+ DB 233,233,254,255,255 ; jmpq 4816 <_sk_load_4444_dst_avx+0x26>
DB 15,31,0 ; nopl (%rax)
DB 165 ; movsl %ds:(%rsi),%es:(%rdi)
DB 255 ; (bad)
@@ -11466,25 +11903,25 @@ _sk_gather_4444_avx LABEL PROC
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,217,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm3
- DB 196,226,125,24,5,222,105,0,0 ; vbroadcastss 0x69de(%rip),%ymm0 # b2b8 <_sk_clut_4D_avx+0x24df>
+ DB 196,226,125,24,5,78,108,0,0 ; vbroadcastss 0x6c4e(%rip),%ymm0 # b66c <_sk_clut_4D_avx+0x24df>
DB 197,228,84,192 ; vandps %ymm0,%ymm3,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,226,125,24,13,209,105,0,0 ; vbroadcastss 0x69d1(%rip),%ymm1 # b2bc <_sk_clut_4D_avx+0x24e3>
+ DB 196,226,125,24,13,65,108,0,0 ; vbroadcastss 0x6c41(%rip),%ymm1 # b670 <_sk_clut_4D_avx+0x24e3>
DB 197,252,89,193 ; vmulps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,200,105,0,0 ; vbroadcastss 0x69c8(%rip),%ymm1 # b2c0 <_sk_clut_4D_avx+0x24e7>
+ DB 196,226,125,24,13,56,108,0,0 ; vbroadcastss 0x6c38(%rip),%ymm1 # b674 <_sk_clut_4D_avx+0x24e7>
DB 197,228,84,201 ; vandps %ymm1,%ymm3,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,226,125,24,21,187,105,0,0 ; vbroadcastss 0x69bb(%rip),%ymm2 # b2c4 <_sk_clut_4D_avx+0x24eb>
+ DB 196,226,125,24,21,43,108,0,0 ; vbroadcastss 0x6c2b(%rip),%ymm2 # b678 <_sk_clut_4D_avx+0x24eb>
DB 197,244,89,202 ; vmulps %ymm2,%ymm1,%ymm1
- DB 196,226,125,24,21,178,105,0,0 ; vbroadcastss 0x69b2(%rip),%ymm2 # b2c8 <_sk_clut_4D_avx+0x24ef>
+ DB 196,226,125,24,21,34,108,0,0 ; vbroadcastss 0x6c22(%rip),%ymm2 # b67c <_sk_clut_4D_avx+0x24ef>
DB 197,228,84,210 ; vandps %ymm2,%ymm3,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
- DB 196,98,125,24,5,165,105,0,0 ; vbroadcastss 0x69a5(%rip),%ymm8 # b2cc <_sk_clut_4D_avx+0x24f3>
+ DB 196,98,125,24,5,21,108,0,0 ; vbroadcastss 0x6c15(%rip),%ymm8 # b680 <_sk_clut_4D_avx+0x24f3>
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
- DB 196,98,125,24,5,155,105,0,0 ; vbroadcastss 0x699b(%rip),%ymm8 # b2d0 <_sk_clut_4D_avx+0x24f7>
+ DB 196,98,125,24,5,11,108,0,0 ; vbroadcastss 0x6c0b(%rip),%ymm8 # b684 <_sk_clut_4D_avx+0x24f7>
DB 196,193,100,84,216 ; vandps %ymm8,%ymm3,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
- DB 196,98,125,24,5,141,105,0,0 ; vbroadcastss 0x698d(%rip),%ymm8 # b2d4 <_sk_clut_4D_avx+0x24fb>
+ DB 196,98,125,24,5,253,107,0,0 ; vbroadcastss 0x6bfd(%rip),%ymm8 # b688 <_sk_clut_4D_avx+0x24fb>
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -11499,7 +11936,7 @@ _sk_store_4444_avx LABEL PROC
DB 77,1,201 ; add %r9,%r9
DB 76,3,8 ; add (%rax),%r9
DB 72,99,218 ; movslq %edx,%rbx
- DB 196,98,125,24,5,104,105,0,0 ; vbroadcastss 0x6968(%rip),%ymm8 # b2d8 <_sk_clut_4D_avx+0x24ff>
+ DB 196,98,125,24,5,216,107,0,0 ; vbroadcastss 0x6bd8(%rip),%ymm8 # b68c <_sk_clut_4D_avx+0x24ff>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,193,41,114,241,12 ; vpslld $0xc,%xmm9,%xmm10
@@ -11526,7 +11963,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,11 ; jne 4a0a <_sk_store_4444_avx+0xba>
+ DB 117,11 ; jne 4b4e <_sk_store_4444_avx+0xba>
DB 196,65,122,127,4,89 ; vmovdqu %xmm8,(%r9,%rbx,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 91 ; pop %rbx
@@ -11535,22 +11972,22 @@ _sk_store_4444_avx LABEL PROC
DB 65,128,226,7 ; and $0x7,%r10b
DB 65,254,202 ; dec %r10b
DB 65,128,250,6 ; cmp $0x6,%r10b
- DB 119,235 ; ja 4a05 <_sk_store_4444_avx+0xb5>
+ DB 119,235 ; ja 4b49 <_sk_store_4444_avx+0xb5>
DB 69,15,182,210 ; movzbl %r10b,%r10d
- DB 76,141,29,67,0,0,0 ; lea 0x43(%rip),%r11 # 4a68 <_sk_store_4444_avx+0x118>
+ DB 76,141,29,67,0,0,0 ; lea 0x43(%rip),%r11 # 4bac <_sk_store_4444_avx+0x118>
DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
DB 196,67,121,21,4,89,0 ; vpextrw $0x0,%xmm8,(%r9,%rbx,2)
- DB 235,206 ; jmp 4a05 <_sk_store_4444_avx+0xb5>
+ DB 235,206 ; jmp 4b49 <_sk_store_4444_avx+0xb5>
DB 196,67,121,21,68,89,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rbx,2)
DB 196,65,121,126,4,89 ; vmovd %xmm8,(%r9,%rbx,2)
- DB 235,190 ; jmp 4a05 <_sk_store_4444_avx+0xb5>
+ DB 235,190 ; jmp 4b49 <_sk_store_4444_avx+0xb5>
DB 196,67,121,21,68,89,12,6 ; vpextrw $0x6,%xmm8,0xc(%r9,%rbx,2)
DB 196,67,121,21,68,89,10,5 ; vpextrw $0x5,%xmm8,0xa(%r9,%rbx,2)
DB 196,67,121,21,68,89,8,4 ; vpextrw $0x4,%xmm8,0x8(%r9,%rbx,2)
DB 196,65,121,214,4,89 ; vmovq %xmm8,(%r9,%rbx,2)
- DB 235,158 ; jmp 4a05 <_sk_store_4444_avx+0xb5>
+ DB 235,158 ; jmp 4b49 <_sk_store_4444_avx+0xb5>
DB 144 ; nop
DB 198 ; (bad)
DB 255 ; (bad)
@@ -11578,117 +12015,191 @@ _sk_store_4444_avx LABEL PROC
PUBLIC _sk_load_8888_avx
_sk_load_8888_avx LABEL PROC
- DB 80 ; push %rax
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,138,0,0,0 ; jne 4b36 <_sk_load_8888_avx+0xb2>
- DB 197,252,16,24 ; vmovups (%rax),%ymm3
- DB 197,124,40,21,200,107,0,0 ; vmovaps 0x6bc8(%rip),%ymm10 # b680 <_sk_clut_4D_avx+0x28a7>
- DB 196,193,100,84,194 ; vandps %ymm10,%ymm3,%ymm0
+ DB 15,133,136,0,0,0 ; jne 4c71 <_sk_load_8888_avx+0xa9>
+ DB 196,65,125,16,12,153 ; vmovupd (%r9,%rbx,4),%ymm9
+ DB 197,125,40,21,137,108,0,0 ; vmovapd 0x6c89(%rip),%ymm10 # b880 <_sk_clut_4D_avx+0x26f3>
+ DB 196,193,53,84,194 ; vandpd %ymm10,%ymm9,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,5,18,104,0,0 ; vbroadcastss 0x6812(%rip),%ymm8 # b2dc <_sk_clut_4D_avx+0x2503>
+ DB 196,98,125,24,5,135,106,0,0 ; vbroadcastss 0x6a87(%rip),%ymm8 # b690 <_sk_clut_4D_avx+0x2503>
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 197,241,114,211,8 ; vpsrld $0x8,%xmm3,%xmm1
- DB 196,195,125,25,217,1 ; vextractf128 $0x1,%ymm3,%xmm9
- DB 196,193,105,114,209,8 ; vpsrld $0x8,%xmm9,%xmm2
+ DB 196,193,113,114,209,8 ; vpsrld $0x8,%xmm9,%xmm1
+ DB 196,99,125,25,203,1 ; vextractf128 $0x1,%ymm9,%xmm3
+ DB 197,233,114,211,8 ; vpsrld $0x8,%xmm3,%xmm2
DB 196,227,117,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
- DB 196,193,116,84,202 ; vandps %ymm10,%ymm1,%ymm1
+ DB 196,193,117,84,202 ; vandpd %ymm10,%ymm1,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
DB 196,193,116,89,200 ; vmulps %ymm8,%ymm1,%ymm1
- DB 197,161,114,211,16 ; vpsrld $0x10,%xmm3,%xmm11
- DB 196,193,105,114,209,16 ; vpsrld $0x10,%xmm9,%xmm2
+ DB 196,193,33,114,209,16 ; vpsrld $0x10,%xmm9,%xmm11
+ DB 197,233,114,211,16 ; vpsrld $0x10,%xmm3,%xmm2
DB 196,227,37,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm11,%ymm2
- DB 196,193,108,84,210 ; vandps %ymm10,%ymm2,%ymm2
+ DB 196,193,109,84,210 ; vandpd %ymm10,%ymm2,%ymm2
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 196,193,108,89,208 ; vmulps %ymm8,%ymm2,%ymm2
- DB 197,169,114,211,24 ; vpsrld $0x18,%xmm3,%xmm10
- DB 196,193,97,114,209,24 ; vpsrld $0x18,%xmm9,%xmm3
- DB 196,227,45,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm10,%ymm3
+ DB 196,193,49,114,209,24 ; vpsrld $0x18,%xmm9,%xmm9
+ DB 197,225,114,211,24 ; vpsrld $0x18,%xmm3,%xmm3
+ DB 196,227,53,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
- DB 65,89 ; pop %r9
- DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,194 ; vmovq %r10,%xmm0
- DB 196,226,121,48,192 ; vpmovzxbw %xmm0,%xmm0
- DB 196,226,121,0,13,18,106,0,0 ; vpshufb 0x6a12(%rip),%xmm0,%xmm1 # b570 <_sk_clut_4D_avx+0x2797>
- DB 196,226,121,33,201 ; vpmovsxbd %xmm1,%xmm1
- DB 196,226,121,0,5,20,106,0,0 ; vpshufb 0x6a14(%rip),%xmm0,%xmm0 # b580 <_sk_clut_4D_avx+0x27a7>
- DB 196,226,121,33,192 ; vpmovsxbd %xmm0,%xmm0
- DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- DB 196,226,125,44,24 ; vmaskmovps (%rax),%ymm0,%ymm3
- DB 233,47,255,255,255 ; jmpq 4ab0 <_sk_load_8888_avx+0x2c>
+ DB 91 ; pop %rbx
+ DB 255,224 ; jmpq *%rax
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 15,135,101,255,255,255 ; ja 4bef <_sk_load_8888_avx+0x27>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,147,0,0,0 ; lea 0x93(%rip),%r11 # 4d28 <_sk_load_8888_avx+0x160>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,122,16,12,153 ; vmovss (%r9,%rbx,4),%xmm9
+ DB 233,70,255,255,255 ; jmpq 4bef <_sk_load_8888_avx+0x27>
+ DB 196,193,121,110,68,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm0
+ DB 197,249,112,192,68 ; vpshufd $0x44,%xmm0,%xmm0
+ DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
+ DB 196,99,117,12,200,4 ; vblendps $0x4,%ymm0,%ymm1,%ymm9
+ DB 196,193,123,16,4,153 ; vmovsd (%r9,%rbx,4),%xmm0
+ DB 196,99,53,13,200,1 ; vblendpd $0x1,%ymm0,%ymm9,%ymm9
+ DB 233,31,255,255,255 ; jmpq 4bef <_sk_load_8888_avx+0x27>
+ DB 196,193,121,110,68,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm0
+ DB 197,249,112,192,68 ; vpshufd $0x44,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
+ DB 196,99,117,12,200,64 ; vblendps $0x40,%ymm0,%ymm1,%ymm9
+ DB 196,99,125,25,200,1 ; vextractf128 $0x1,%ymm9,%xmm0
+ DB 196,195,121,34,68,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm0,%xmm0
+ DB 196,99,53,24,200,1 ; vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
+ DB 196,99,125,25,200,1 ; vextractf128 $0x1,%ymm9,%xmm0
+ DB 196,195,121,34,68,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm0,%xmm0
+ DB 196,99,53,24,200,1 ; vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
+ DB 196,193,121,16,4,153 ; vmovupd (%r9,%rbx,4),%xmm0
+ DB 196,67,125,13,201,12 ; vblendpd $0xc,%ymm9,%ymm0,%ymm9
+ DB 233,202,254,255,255 ; jmpq 4bef <_sk_load_8888_avx+0x27>
+ DB 15,31,0 ; nopl (%rax)
+ DB 118,255 ; jbe 4d29 <_sk_load_8888_avx+0x161>
+ DB 255 ; (bad)
+ DB 255,151,255,255,255,129 ; callq *-0x7e000001(%rdi)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 236 ; in (%dx),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 216,255 ; fdivr %st(7),%st
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 168,255 ; test $0xff,%al
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_8888_dst_avx
_sk_load_8888_dst_avx LABEL PROC
- DB 80 ; push %rax
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,138,0,0,0 ; jne 4c33 <_sk_load_8888_dst_avx+0xb2>
- DB 197,252,16,56 ; vmovups (%rax),%ymm7
- DB 197,124,40,21,235,106,0,0 ; vmovaps 0x6aeb(%rip),%ymm10 # b6a0 <_sk_clut_4D_avx+0x28c7>
- DB 196,193,68,84,226 ; vandps %ymm10,%ymm7,%ymm4
+ DB 15,133,136,0,0,0 ; jne 4ded <_sk_load_8888_dst_avx+0xa9>
+ DB 196,65,125,16,12,153 ; vmovupd (%r9,%rbx,4),%ymm9
+ DB 197,125,40,21,45,107,0,0 ; vmovapd 0x6b2d(%rip),%ymm10 # b8a0 <_sk_clut_4D_avx+0x2713>
+ DB 196,193,53,84,226 ; vandpd %ymm10,%ymm9,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
- DB 196,98,125,24,5,25,103,0,0 ; vbroadcastss 0x6719(%rip),%ymm8 # b2e0 <_sk_clut_4D_avx+0x2507>
+ DB 196,98,125,24,5,15,105,0,0 ; vbroadcastss 0x690f(%rip),%ymm8 # b694 <_sk_clut_4D_avx+0x2507>
DB 196,193,92,89,224 ; vmulps %ymm8,%ymm4,%ymm4
- DB 197,209,114,215,8 ; vpsrld $0x8,%xmm7,%xmm5
- DB 196,195,125,25,249,1 ; vextractf128 $0x1,%ymm7,%xmm9
- DB 196,193,73,114,209,8 ; vpsrld $0x8,%xmm9,%xmm6
+ DB 196,193,81,114,209,8 ; vpsrld $0x8,%xmm9,%xmm5
+ DB 196,99,125,25,207,1 ; vextractf128 $0x1,%ymm9,%xmm7
+ DB 197,201,114,215,8 ; vpsrld $0x8,%xmm7,%xmm6
DB 196,227,85,24,238,1 ; vinsertf128 $0x1,%xmm6,%ymm5,%ymm5
- DB 196,193,84,84,234 ; vandps %ymm10,%ymm5,%ymm5
+ DB 196,193,85,84,234 ; vandpd %ymm10,%ymm5,%ymm5
DB 197,252,91,237 ; vcvtdq2ps %ymm5,%ymm5
DB 196,193,84,89,232 ; vmulps %ymm8,%ymm5,%ymm5
- DB 197,161,114,215,16 ; vpsrld $0x10,%xmm7,%xmm11
- DB 196,193,73,114,209,16 ; vpsrld $0x10,%xmm9,%xmm6
+ DB 196,193,33,114,209,16 ; vpsrld $0x10,%xmm9,%xmm11
+ DB 197,201,114,215,16 ; vpsrld $0x10,%xmm7,%xmm6
DB 196,227,37,24,246,1 ; vinsertf128 $0x1,%xmm6,%ymm11,%ymm6
- DB 196,193,76,84,242 ; vandps %ymm10,%ymm6,%ymm6
+ DB 196,193,77,84,242 ; vandpd %ymm10,%ymm6,%ymm6
DB 197,252,91,246 ; vcvtdq2ps %ymm6,%ymm6
DB 196,193,76,89,240 ; vmulps %ymm8,%ymm6,%ymm6
- DB 197,169,114,215,24 ; vpsrld $0x18,%xmm7,%xmm10
- DB 196,193,65,114,209,24 ; vpsrld $0x18,%xmm9,%xmm7
- DB 196,227,45,24,255,1 ; vinsertf128 $0x1,%xmm7,%ymm10,%ymm7
+ DB 196,193,49,114,209,24 ; vpsrld $0x18,%xmm9,%xmm9
+ DB 197,193,114,215,24 ; vpsrld $0x18,%xmm7,%xmm7
+ DB 196,227,53,24,255,1 ; vinsertf128 $0x1,%xmm7,%ymm9,%ymm7
DB 197,252,91,255 ; vcvtdq2ps %ymm7,%ymm7
DB 196,193,68,89,248 ; vmulps %ymm8,%ymm7,%ymm7
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
- DB 65,89 ; pop %r9
- DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,226 ; vmovq %r10,%xmm4
- DB 196,226,121,48,228 ; vpmovzxbw %xmm4,%xmm4
- DB 196,226,89,0,45,53,105,0,0 ; vpshufb 0x6935(%rip),%xmm4,%xmm5 # b590 <_sk_clut_4D_avx+0x27b7>
- DB 196,226,121,33,237 ; vpmovsxbd %xmm5,%xmm5
- DB 196,226,89,0,37,55,105,0,0 ; vpshufb 0x6937(%rip),%xmm4,%xmm4 # b5a0 <_sk_clut_4D_avx+0x27c7>
- DB 196,226,121,33,228 ; vpmovsxbd %xmm4,%xmm4
- DB 196,227,85,24,228,1 ; vinsertf128 $0x1,%xmm4,%ymm5,%ymm4
- DB 196,226,93,44,56 ; vmaskmovps (%rax),%ymm4,%ymm7
- DB 233,47,255,255,255 ; jmpq 4bad <_sk_load_8888_dst_avx+0x2c>
+ DB 91 ; pop %rbx
+ DB 255,224 ; jmpq *%rax
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 15,135,101,255,255,255 ; ja 4d6b <_sk_load_8888_dst_avx+0x27>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,147,0,0,0 ; lea 0x93(%rip),%r11 # 4ea4 <_sk_load_8888_dst_avx+0x160>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,122,16,12,153 ; vmovss (%r9,%rbx,4),%xmm9
+ DB 233,70,255,255,255 ; jmpq 4d6b <_sk_load_8888_dst_avx+0x27>
+ DB 196,193,121,110,100,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm4
+ DB 197,249,112,228,68 ; vpshufd $0x44,%xmm4,%xmm4
+ DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5
+ DB 196,99,85,12,204,4 ; vblendps $0x4,%ymm4,%ymm5,%ymm9
+ DB 196,193,123,16,36,153 ; vmovsd (%r9,%rbx,4),%xmm4
+ DB 196,99,53,13,204,1 ; vblendpd $0x1,%ymm4,%ymm9,%ymm9
+ DB 233,31,255,255,255 ; jmpq 4d6b <_sk_load_8888_dst_avx+0x27>
+ DB 196,193,121,110,100,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm4
+ DB 197,249,112,228,68 ; vpshufd $0x44,%xmm4,%xmm4
+ DB 196,227,125,24,228,1 ; vinsertf128 $0x1,%xmm4,%ymm0,%ymm4
+ DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5
+ DB 196,99,85,12,204,64 ; vblendps $0x40,%ymm4,%ymm5,%ymm9
+ DB 196,99,125,25,204,1 ; vextractf128 $0x1,%ymm9,%xmm4
+ DB 196,195,89,34,100,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,99,53,24,204,1 ; vinsertf128 $0x1,%xmm4,%ymm9,%ymm9
+ DB 196,99,125,25,204,1 ; vextractf128 $0x1,%ymm9,%xmm4
+ DB 196,195,89,34,100,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,99,53,24,204,1 ; vinsertf128 $0x1,%xmm4,%ymm9,%ymm9
+ DB 196,193,121,16,36,153 ; vmovupd (%r9,%rbx,4),%xmm4
+ DB 196,67,93,13,201,12 ; vblendpd $0xc,%ymm9,%ymm4,%ymm9
+ DB 233,202,254,255,255 ; jmpq 4d6b <_sk_load_8888_dst_avx+0x27>
+ DB 15,31,0 ; nopl (%rax)
+ DB 118,255 ; jbe 4ea5 <_sk_load_8888_dst_avx+0x161>
+ DB 255 ; (bad)
+ DB 255,151,255,255,255,129 ; callq *-0x7e000001(%rdi)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 236 ; in (%dx),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 216,255 ; fdivr %st(7),%st
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 168,255 ; test $0xff,%al
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_gather_8888_avx
_sk_gather_8888_avx LABEL PROC
@@ -11725,10 +12236,10 @@ _sk_gather_8888_avx LABEL PROC
DB 73,193,234,32 ; shr $0x20,%r10
DB 196,131,121,34,28,145,3 ; vpinsrd $0x3,(%r9,%r10,4),%xmm0,%xmm3
DB 196,227,61,24,195,1 ; vinsertf128 $0x1,%xmm3,%ymm8,%ymm0
- DB 197,124,40,21,151,105,0,0 ; vmovaps 0x6997(%rip),%ymm10 # b6c0 <_sk_clut_4D_avx+0x28e7>
+ DB 197,124,40,21,85,105,0,0 ; vmovaps 0x6955(%rip),%ymm10 # b8c0 <_sk_clut_4D_avx+0x2733>
DB 196,193,124,84,194 ; vandps %ymm10,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,13,169,101,0,0 ; vbroadcastss 0x65a9(%rip),%ymm9 # b2e4 <_sk_clut_4D_avx+0x250b>
+ DB 196,98,125,24,13,27,103,0,0 ; vbroadcastss 0x671b(%rip),%ymm9 # b698 <_sk_clut_4D_avx+0x250b>
DB 196,193,124,89,193 ; vmulps %ymm9,%ymm0,%ymm0
DB 196,193,113,114,208,8 ; vpsrld $0x8,%xmm8,%xmm1
DB 197,233,114,211,8 ; vpsrld $0x8,%xmm3,%xmm2
@@ -11752,17 +12263,15 @@ _sk_gather_8888_avx LABEL PROC
PUBLIC _sk_store_8888_avx
_sk_store_8888_avx LABEL PROC
- DB 80 ; push %rax
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
- DB 196,98,125,24,5,36,101,0,0 ; vbroadcastss 0x6524(%rip),%ymm8 # b2e8 <_sk_clut_4D_avx+0x250f>
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
+ DB 196,98,125,24,5,157,102,0,0 ; vbroadcastss 0x669d(%rip),%ymm8 # b69c <_sk_clut_4D_avx+0x250f>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,65,116,89,208 ; vmulps %ymm8,%ymm1,%ymm10
@@ -11787,140 +12296,243 @@ _sk_store_8888_avx LABEL PROC
DB 196,65,45,86,192 ; vorpd %ymm8,%ymm10,%ymm8
DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,13 ; jne 4e55 <_sk_store_8888_avx+0xb9>
- DB 197,124,17,0 ; vmovups %ymm8,(%rax)
+ DB 117,11 ; jne 508e <_sk_store_8888_avx+0xb0>
+ DB 196,65,124,17,4,153 ; vmovups %ymm8,(%r9,%rbx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
- DB 65,89 ; pop %r9
+ DB 91 ; pop %rbx
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,65,249,110,202 ; vmovq %r10,%xmm9
- DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
- DB 196,98,49,0,21,51,103,0,0 ; vpshufb 0x6733(%rip),%xmm9,%xmm10 # b5b0 <_sk_clut_4D_avx+0x27d7>
- DB 196,66,121,33,210 ; vpmovsxbd %xmm10,%xmm10
- DB 196,98,49,0,13,53,103,0,0 ; vpshufb 0x6735(%rip),%xmm9,%xmm9 # b5c0 <_sk_clut_4D_avx+0x27e7>
- DB 196,66,121,33,201 ; vpmovsxbd %xmm9,%xmm9
- DB 196,67,45,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- DB 196,98,53,46,0 ; vmaskmovps %ymm8,%ymm9,(%rax)
- DB 235,175 ; jmp 4e4c <_sk_store_8888_avx+0xb0>
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,235 ; ja 5089 <_sk_store_8888_avx+0xab>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,83,0,0,0 ; lea 0x53(%rip),%r11 # 50fc <_sk_store_8888_avx+0x11e>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,121,126,4,153 ; vmovd %xmm8,(%r9,%rbx,4)
+ DB 235,207 ; jmp 5089 <_sk_store_8888_avx+0xab>
+ DB 196,67,121,22,68,153,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ DB 196,65,121,214,4,153 ; vmovq %xmm8,(%r9,%rbx,4)
+ DB 235,191 ; jmp 5089 <_sk_store_8888_avx+0xab>
+ DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,24,2 ; vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,20,1 ; vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
+ DB 196,65,122,17,76,153,16 ; vmovss %xmm9,0x10(%r9,%rbx,4)
+ DB 196,65,121,17,4,153 ; vmovupd %xmm8,(%r9,%rbx,4)
+ DB 235,142 ; jmp 5089 <_sk_store_8888_avx+0xab>
+ DB 144 ; nop
+ DB 182,255 ; mov $0xff,%dh
+ DB 255 ; (bad)
+ DB 255,198 ; inc %esi
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 190,255,255,255,247 ; mov $0xf7ffffff,%esi
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 234 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 220,255 ; fdivr %st,%st(7)
+ DB 255 ; (bad)
+ DB 255,206 ; dec %esi
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_bgra_avx
_sk_load_bgra_avx LABEL PROC
- DB 80 ; push %rax
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,138,0,0,0 ; jne 4f4f <_sk_load_bgra_avx+0xb2>
- DB 197,252,16,24 ; vmovups (%rax),%ymm3
- DB 197,124,40,21,15,104,0,0 ; vmovaps 0x680f(%rip),%ymm10 # b6e0 <_sk_clut_4D_avx+0x2907>
- DB 196,193,100,84,202 ; vandps %ymm10,%ymm3,%ymm1
+ DB 15,133,136,0,0,0 ; jne 51c1 <_sk_load_bgra_avx+0xa9>
+ DB 196,65,125,16,12,153 ; vmovupd (%r9,%rbx,4),%ymm9
+ DB 197,125,40,21,153,103,0,0 ; vmovapd 0x6799(%rip),%ymm10 # b8e0 <_sk_clut_4D_avx+0x2753>
+ DB 196,193,53,84,202 ; vandpd %ymm10,%ymm9,%ymm1
DB 197,252,91,201 ; vcvtdq2ps %ymm1,%ymm1
- DB 196,98,125,24,5,9,100,0,0 ; vbroadcastss 0x6409(%rip),%ymm8 # b2ec <_sk_clut_4D_avx+0x2513>
+ DB 196,98,125,24,5,71,101,0,0 ; vbroadcastss 0x6547(%rip),%ymm8 # b6a0 <_sk_clut_4D_avx+0x2513>
DB 196,193,116,89,208 ; vmulps %ymm8,%ymm1,%ymm2
- DB 197,241,114,211,8 ; vpsrld $0x8,%xmm3,%xmm1
- DB 196,195,125,25,217,1 ; vextractf128 $0x1,%ymm3,%xmm9
- DB 196,193,121,114,209,8 ; vpsrld $0x8,%xmm9,%xmm0
+ DB 196,193,113,114,209,8 ; vpsrld $0x8,%xmm9,%xmm1
+ DB 196,99,125,25,203,1 ; vextractf128 $0x1,%ymm9,%xmm3
+ DB 197,249,114,211,8 ; vpsrld $0x8,%xmm3,%xmm0
DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- DB 196,193,124,84,194 ; vandps %ymm10,%ymm0,%ymm0
+ DB 196,193,125,84,194 ; vandpd %ymm10,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
DB 196,193,124,89,200 ; vmulps %ymm8,%ymm0,%ymm1
- DB 197,161,114,211,16 ; vpsrld $0x10,%xmm3,%xmm11
- DB 196,193,121,114,209,16 ; vpsrld $0x10,%xmm9,%xmm0
+ DB 196,193,33,114,209,16 ; vpsrld $0x10,%xmm9,%xmm11
+ DB 197,249,114,211,16 ; vpsrld $0x10,%xmm3,%xmm0
DB 196,227,37,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm11,%ymm0
- DB 196,193,124,84,194 ; vandps %ymm10,%ymm0,%ymm0
+ DB 196,193,125,84,194 ; vandpd %ymm10,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
DB 196,193,124,89,192 ; vmulps %ymm8,%ymm0,%ymm0
- DB 197,169,114,211,24 ; vpsrld $0x18,%xmm3,%xmm10
- DB 196,193,97,114,209,24 ; vpsrld $0x18,%xmm9,%xmm3
- DB 196,227,45,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm10,%ymm3
+ DB 196,193,49,114,209,24 ; vpsrld $0x18,%xmm9,%xmm9
+ DB 197,225,114,211,24 ; vpsrld $0x18,%xmm3,%xmm3
+ DB 196,227,53,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm9,%ymm3
DB 197,252,91,219 ; vcvtdq2ps %ymm3,%ymm3
DB 196,193,100,89,216 ; vmulps %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
- DB 65,89 ; pop %r9
- DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,194 ; vmovq %r10,%xmm0
- DB 196,226,121,48,192 ; vpmovzxbw %xmm0,%xmm0
- DB 196,226,121,0,13,89,102,0,0 ; vpshufb 0x6659(%rip),%xmm0,%xmm1 # b5d0 <_sk_clut_4D_avx+0x27f7>
- DB 196,226,121,33,201 ; vpmovsxbd %xmm1,%xmm1
- DB 196,226,121,0,5,91,102,0,0 ; vpshufb 0x665b(%rip),%xmm0,%xmm0 # b5e0 <_sk_clut_4D_avx+0x2807>
- DB 196,226,121,33,192 ; vpmovsxbd %xmm0,%xmm0
- DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0
- DB 196,226,125,44,24 ; vmaskmovps (%rax),%ymm0,%ymm3
- DB 233,47,255,255,255 ; jmpq 4ec9 <_sk_load_bgra_avx+0x2c>
+ DB 91 ; pop %rbx
+ DB 255,224 ; jmpq *%rax
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 15,135,101,255,255,255 ; ja 513f <_sk_load_bgra_avx+0x27>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,147,0,0,0 ; lea 0x93(%rip),%r11 # 5278 <_sk_load_bgra_avx+0x160>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,122,16,12,153 ; vmovss (%r9,%rbx,4),%xmm9
+ DB 233,70,255,255,255 ; jmpq 513f <_sk_load_bgra_avx+0x27>
+ DB 196,193,121,110,68,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm0
+ DB 197,249,112,192,68 ; vpshufd $0x44,%xmm0,%xmm0
+ DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
+ DB 196,99,117,12,200,4 ; vblendps $0x4,%ymm0,%ymm1,%ymm9
+ DB 196,193,123,16,4,153 ; vmovsd (%r9,%rbx,4),%xmm0
+ DB 196,99,53,13,200,1 ; vblendpd $0x1,%ymm0,%ymm9,%ymm9
+ DB 233,31,255,255,255 ; jmpq 513f <_sk_load_bgra_avx+0x27>
+ DB 196,193,121,110,68,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm0
+ DB 197,249,112,192,68 ; vpshufd $0x44,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
+ DB 196,99,117,12,200,64 ; vblendps $0x40,%ymm0,%ymm1,%ymm9
+ DB 196,99,125,25,200,1 ; vextractf128 $0x1,%ymm9,%xmm0
+ DB 196,195,121,34,68,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm0,%xmm0
+ DB 196,99,53,24,200,1 ; vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
+ DB 196,99,125,25,200,1 ; vextractf128 $0x1,%ymm9,%xmm0
+ DB 196,195,121,34,68,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm0,%xmm0
+ DB 196,99,53,24,200,1 ; vinsertf128 $0x1,%xmm0,%ymm9,%ymm9
+ DB 196,193,121,16,4,153 ; vmovupd (%r9,%rbx,4),%xmm0
+ DB 196,67,125,13,201,12 ; vblendpd $0xc,%ymm9,%ymm0,%ymm9
+ DB 233,202,254,255,255 ; jmpq 513f <_sk_load_bgra_avx+0x27>
+ DB 15,31,0 ; nopl (%rax)
+ DB 118,255 ; jbe 5279 <_sk_load_bgra_avx+0x161>
+ DB 255 ; (bad)
+ DB 255,151,255,255,255,129 ; callq *-0x7e000001(%rdi)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 236 ; in (%dx),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 216,255 ; fdivr %st(7),%st
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 168,255 ; test $0xff,%al
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_bgra_dst_avx
_sk_load_bgra_dst_avx LABEL PROC
- DB 80 ; push %rax
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
DB 77,133,192 ; test %r8,%r8
- DB 15,133,138,0,0,0 ; jne 504c <_sk_load_bgra_dst_avx+0xb2>
- DB 197,252,16,56 ; vmovups (%rax),%ymm7
- DB 197,124,40,21,50,103,0,0 ; vmovaps 0x6732(%rip),%ymm10 # b700 <_sk_clut_4D_avx+0x2927>
- DB 196,193,68,84,234 ; vandps %ymm10,%ymm7,%ymm5
+ DB 15,133,136,0,0,0 ; jne 533d <_sk_load_bgra_dst_avx+0xa9>
+ DB 196,65,125,16,12,153 ; vmovupd (%r9,%rbx,4),%ymm9
+ DB 197,125,40,21,61,102,0,0 ; vmovapd 0x663d(%rip),%ymm10 # b900 <_sk_clut_4D_avx+0x2773>
+ DB 196,193,53,84,234 ; vandpd %ymm10,%ymm9,%ymm5
DB 197,252,91,237 ; vcvtdq2ps %ymm5,%ymm5
- DB 196,98,125,24,5,16,99,0,0 ; vbroadcastss 0x6310(%rip),%ymm8 # b2f0 <_sk_clut_4D_avx+0x2517>
+ DB 196,98,125,24,5,207,99,0,0 ; vbroadcastss 0x63cf(%rip),%ymm8 # b6a4 <_sk_clut_4D_avx+0x2517>
DB 196,193,84,89,240 ; vmulps %ymm8,%ymm5,%ymm6
- DB 197,209,114,215,8 ; vpsrld $0x8,%xmm7,%xmm5
- DB 196,195,125,25,249,1 ; vextractf128 $0x1,%ymm7,%xmm9
- DB 196,193,89,114,209,8 ; vpsrld $0x8,%xmm9,%xmm4
+ DB 196,193,81,114,209,8 ; vpsrld $0x8,%xmm9,%xmm5
+ DB 196,99,125,25,207,1 ; vextractf128 $0x1,%ymm9,%xmm7
+ DB 197,217,114,215,8 ; vpsrld $0x8,%xmm7,%xmm4
DB 196,227,85,24,228,1 ; vinsertf128 $0x1,%xmm4,%ymm5,%ymm4
- DB 196,193,92,84,226 ; vandps %ymm10,%ymm4,%ymm4
+ DB 196,193,93,84,226 ; vandpd %ymm10,%ymm4,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
DB 196,193,92,89,232 ; vmulps %ymm8,%ymm4,%ymm5
- DB 197,161,114,215,16 ; vpsrld $0x10,%xmm7,%xmm11
- DB 196,193,89,114,209,16 ; vpsrld $0x10,%xmm9,%xmm4
+ DB 196,193,33,114,209,16 ; vpsrld $0x10,%xmm9,%xmm11
+ DB 197,217,114,215,16 ; vpsrld $0x10,%xmm7,%xmm4
DB 196,227,37,24,228,1 ; vinsertf128 $0x1,%xmm4,%ymm11,%ymm4
- DB 196,193,92,84,226 ; vandps %ymm10,%ymm4,%ymm4
+ DB 196,193,93,84,226 ; vandpd %ymm10,%ymm4,%ymm4
DB 197,252,91,228 ; vcvtdq2ps %ymm4,%ymm4
DB 196,193,92,89,224 ; vmulps %ymm8,%ymm4,%ymm4
- DB 197,169,114,215,24 ; vpsrld $0x18,%xmm7,%xmm10
- DB 196,193,65,114,209,24 ; vpsrld $0x18,%xmm9,%xmm7
- DB 196,227,45,24,255,1 ; vinsertf128 $0x1,%xmm7,%ymm10,%ymm7
+ DB 196,193,49,114,209,24 ; vpsrld $0x18,%xmm9,%xmm9
+ DB 197,193,114,215,24 ; vpsrld $0x18,%xmm7,%xmm7
+ DB 196,227,53,24,255,1 ; vinsertf128 $0x1,%xmm7,%ymm9,%ymm7
DB 197,252,91,255 ; vcvtdq2ps %ymm7,%ymm7
DB 196,193,68,89,248 ; vmulps %ymm8,%ymm7,%ymm7
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
- DB 65,89 ; pop %r9
- DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,193,249,110,226 ; vmovq %r10,%xmm4
- DB 196,226,121,48,228 ; vpmovzxbw %xmm4,%xmm4
- DB 196,226,89,0,45,124,101,0,0 ; vpshufb 0x657c(%rip),%xmm4,%xmm5 # b5f0 <_sk_clut_4D_avx+0x2817>
- DB 196,226,121,33,237 ; vpmovsxbd %xmm5,%xmm5
- DB 196,226,89,0,37,126,101,0,0 ; vpshufb 0x657e(%rip),%xmm4,%xmm4 # b600 <_sk_clut_4D_avx+0x2827>
- DB 196,226,121,33,228 ; vpmovsxbd %xmm4,%xmm4
- DB 196,227,85,24,228,1 ; vinsertf128 $0x1,%xmm4,%ymm5,%ymm4
- DB 196,226,93,44,56 ; vmaskmovps (%rax),%ymm4,%ymm7
- DB 233,47,255,255,255 ; jmpq 4fc6 <_sk_load_bgra_dst_avx+0x2c>
+ DB 91 ; pop %rbx
+ DB 255,224 ; jmpq *%rax
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 15,135,101,255,255,255 ; ja 52bb <_sk_load_bgra_dst_avx+0x27>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,147,0,0,0 ; lea 0x93(%rip),%r11 # 53f4 <_sk_load_bgra_dst_avx+0x160>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,122,16,12,153 ; vmovss (%r9,%rbx,4),%xmm9
+ DB 233,70,255,255,255 ; jmpq 52bb <_sk_load_bgra_dst_avx+0x27>
+ DB 196,193,121,110,100,153,8 ; vmovd 0x8(%r9,%rbx,4),%xmm4
+ DB 197,249,112,228,68 ; vpshufd $0x44,%xmm4,%xmm4
+ DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5
+ DB 196,99,85,12,204,4 ; vblendps $0x4,%ymm4,%ymm5,%ymm9
+ DB 196,193,123,16,36,153 ; vmovsd (%r9,%rbx,4),%xmm4
+ DB 196,99,53,13,204,1 ; vblendpd $0x1,%ymm4,%ymm9,%ymm9
+ DB 233,31,255,255,255 ; jmpq 52bb <_sk_load_bgra_dst_avx+0x27>
+ DB 196,193,121,110,100,153,24 ; vmovd 0x18(%r9,%rbx,4),%xmm4
+ DB 197,249,112,228,68 ; vpshufd $0x44,%xmm4,%xmm4
+ DB 196,227,125,24,228,1 ; vinsertf128 $0x1,%xmm4,%ymm0,%ymm4
+ DB 197,212,87,237 ; vxorps %ymm5,%ymm5,%ymm5
+ DB 196,99,85,12,204,64 ; vblendps $0x40,%ymm4,%ymm5,%ymm9
+ DB 196,99,125,25,204,1 ; vextractf128 $0x1,%ymm9,%xmm4
+ DB 196,195,89,34,100,153,20,1 ; vpinsrd $0x1,0x14(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,99,53,24,204,1 ; vinsertf128 $0x1,%xmm4,%ymm9,%ymm9
+ DB 196,99,125,25,204,1 ; vextractf128 $0x1,%ymm9,%xmm4
+ DB 196,195,89,34,100,153,16,0 ; vpinsrd $0x0,0x10(%r9,%rbx,4),%xmm4,%xmm4
+ DB 196,99,53,24,204,1 ; vinsertf128 $0x1,%xmm4,%ymm9,%ymm9
+ DB 196,193,121,16,36,153 ; vmovupd (%r9,%rbx,4),%xmm4
+ DB 196,67,93,13,201,12 ; vblendpd $0xc,%ymm9,%ymm4,%ymm9
+ DB 233,202,254,255,255 ; jmpq 52bb <_sk_load_bgra_dst_avx+0x27>
+ DB 15,31,0 ; nopl (%rax)
+ DB 118,255 ; jbe 53f5 <_sk_load_bgra_dst_avx+0x161>
+ DB 255 ; (bad)
+ DB 255,151,255,255,255,129 ; callq *-0x7e000001(%rdi)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 236 ; in (%dx),%al
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 216,255 ; fdivr %st(7),%st
+ DB 255 ; (bad)
+ DB 255,196 ; inc %esp
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
+ DB 168,255 ; test $0xff,%al
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_gather_bgra_avx
_sk_gather_bgra_avx LABEL PROC
@@ -11957,10 +12569,10 @@ _sk_gather_bgra_avx LABEL PROC
DB 73,193,234,32 ; shr $0x20,%r10
DB 196,131,121,34,28,145,3 ; vpinsrd $0x3,(%r9,%r10,4),%xmm0,%xmm3
DB 196,227,61,24,195,1 ; vinsertf128 $0x1,%xmm3,%ymm8,%ymm0
- DB 197,124,40,13,222,101,0,0 ; vmovaps 0x65de(%rip),%ymm9 # b720 <_sk_clut_4D_avx+0x2947>
+ DB 197,124,40,13,101,100,0,0 ; vmovaps 0x6465(%rip),%ymm9 # b920 <_sk_clut_4D_avx+0x2793>
DB 196,193,124,84,193 ; vandps %ymm9,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,21,160,97,0,0 ; vbroadcastss 0x61a0(%rip),%ymm10 # b2f4 <_sk_clut_4D_avx+0x251b>
+ DB 196,98,125,24,21,219,97,0,0 ; vbroadcastss 0x61db(%rip),%ymm10 # b6a8 <_sk_clut_4D_avx+0x251b>
DB 196,193,124,89,210 ; vmulps %ymm10,%ymm0,%ymm2
DB 196,193,121,114,208,8 ; vpsrld $0x8,%xmm8,%xmm0
DB 197,241,114,211,8 ; vpsrld $0x8,%xmm3,%xmm1
@@ -11984,17 +12596,15 @@ _sk_gather_bgra_avx LABEL PROC
PUBLIC _sk_store_bgra_avx
_sk_store_bgra_avx LABEL PROC
- DB 80 ; push %rax
- DB 73,137,201 ; mov %rcx,%r9
+ DB 83 ; push %rbx
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,99,80,8 ; movslq 0x8(%rax),%r10
- DB 73,99,201 ; movslq %r9d,%rcx
- DB 73,15,175,202 ; imul %r10,%rcx
- DB 72,193,225,2 ; shl $0x2,%rcx
- DB 72,3,8 ; add (%rax),%rcx
- DB 72,99,194 ; movslq %edx,%rax
- DB 72,141,4,129 ; lea (%rcx,%rax,4),%rax
- DB 196,98,125,24,5,27,97,0,0 ; vbroadcastss 0x611b(%rip),%ymm8 # b2f8 <_sk_clut_4D_avx+0x251f>
+ DB 76,99,201 ; movslq %ecx,%r9
+ DB 77,15,175,202 ; imul %r10,%r9
+ DB 73,193,225,2 ; shl $0x2,%r9
+ DB 76,3,8 ; add (%rax),%r9
+ DB 72,99,218 ; movslq %edx,%rbx
+ DB 196,98,125,24,5,93,97,0,0 ; vbroadcastss 0x615d(%rip),%ymm8 # b6ac <_sk_clut_4D_avx+0x251f>
DB 196,65,108,89,200 ; vmulps %ymm8,%ymm2,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,65,116,89,208 ; vmulps %ymm8,%ymm1,%ymm10
@@ -12019,26 +12629,55 @@ _sk_store_bgra_avx LABEL PROC
DB 196,65,45,86,192 ; vorpd %ymm8,%ymm10,%ymm8
DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,13 ; jne 526e <_sk_store_bgra_avx+0xb9>
- DB 197,124,17,0 ; vmovups %ymm8,(%rax)
+ DB 117,11 ; jne 55de <_sk_store_bgra_avx+0xb0>
+ DB 196,65,124,17,4,153 ; vmovups %ymm8,(%r9,%rbx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 76,137,201 ; mov %r9,%rcx
- DB 65,89 ; pop %r9
+ DB 91 ; pop %rbx
DB 255,224 ; jmpq *%rax
- DB 185,8,0,0,0 ; mov $0x8,%ecx
- DB 68,41,193 ; sub %r8d,%ecx
- DB 192,225,3 ; shl $0x3,%cl
- DB 73,199,194,255,255,255,255 ; mov $0xffffffffffffffff,%r10
- DB 73,211,234 ; shr %cl,%r10
- DB 196,65,249,110,202 ; vmovq %r10,%xmm9
- DB 196,66,121,48,201 ; vpmovzxbw %xmm9,%xmm9
- DB 196,98,49,0,21,122,99,0,0 ; vpshufb 0x637a(%rip),%xmm9,%xmm10 # b610 <_sk_clut_4D_avx+0x2837>
- DB 196,66,121,33,210 ; vpmovsxbd %xmm10,%xmm10
- DB 196,98,49,0,13,124,99,0,0 ; vpshufb 0x637c(%rip),%xmm9,%xmm9 # b620 <_sk_clut_4D_avx+0x2847>
- DB 196,66,121,33,201 ; vpmovsxbd %xmm9,%xmm9
- DB 196,67,45,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm9
- DB 196,98,53,46,0 ; vmaskmovps %ymm8,%ymm9,(%rax)
- DB 235,175 ; jmp 5265 <_sk_store_bgra_avx+0xb0>
+ DB 69,137,194 ; mov %r8d,%r10d
+ DB 65,128,226,7 ; and $0x7,%r10b
+ DB 65,254,202 ; dec %r10b
+ DB 65,128,250,6 ; cmp $0x6,%r10b
+ DB 119,235 ; ja 55d9 <_sk_store_bgra_avx+0xab>
+ DB 69,15,182,210 ; movzbl %r10b,%r10d
+ DB 76,141,29,83,0,0,0 ; lea 0x53(%rip),%r11 # 564c <_sk_store_bgra_avx+0x11e>
+ DB 75,99,4,147 ; movslq (%r11,%r10,4),%rax
+ DB 76,1,216 ; add %r11,%rax
+ DB 255,224 ; jmpq *%rax
+ DB 196,65,121,126,4,153 ; vmovd %xmm8,(%r9,%rbx,4)
+ DB 235,207 ; jmp 55d9 <_sk_store_bgra_avx+0xab>
+ DB 196,67,121,22,68,153,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rbx,4)
+ DB 196,65,121,214,4,153 ; vmovq %xmm8,(%r9,%rbx,4)
+ DB 235,191 ; jmp 55d9 <_sk_store_bgra_avx+0xab>
+ DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,24,2 ; vpextrd $0x2,%xmm9,0x18(%r9,%rbx,4)
+ DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
+ DB 196,67,121,22,76,153,20,1 ; vpextrd $0x1,%xmm9,0x14(%r9,%rbx,4)
+ DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
+ DB 196,65,122,17,76,153,16 ; vmovss %xmm9,0x10(%r9,%rbx,4)
+ DB 196,65,121,17,4,153 ; vmovupd %xmm8,(%r9,%rbx,4)
+ DB 235,142 ; jmp 55d9 <_sk_store_bgra_avx+0xab>
+ DB 144 ; nop
+ DB 182,255 ; mov $0xff,%dh
+ DB 255 ; (bad)
+ DB 255,198 ; inc %esi
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 190,255,255,255,247 ; mov $0xf7ffffff,%esi
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 234 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 220,255 ; fdivr %st,%st(7)
+ DB 255 ; (bad)
+ DB 255,206 ; dec %esi
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255 ; .byte 0xff
PUBLIC _sk_load_f16_avx
_sk_load_f16_avx LABEL PROC
@@ -12055,7 +12694,7 @@ _sk_load_f16_avx LABEL PROC
DB 197,252,17,116,36,64 ; vmovups %ymm6,0x40(%rsp)
DB 197,252,17,108,36,32 ; vmovups %ymm5,0x20(%rsp)
DB 197,254,127,36,36 ; vmovdqu %ymm4,(%rsp)
- DB 15,133,147,2,0,0 ; jne 5587 <_sk_load_f16_avx+0x2d1>
+ DB 15,133,147,2,0,0 ; jne 5939 <_sk_load_f16_avx+0x2d1>
DB 196,65,121,16,4,193 ; vmovupd (%r9,%rax,8),%xmm8
DB 196,193,121,16,84,193,16 ; vmovupd 0x10(%r9,%rax,8),%xmm2
DB 196,193,121,16,76,193,32 ; vmovupd 0x20(%r9,%rax,8),%xmm1
@@ -12073,13 +12712,13 @@ _sk_load_f16_avx LABEL PROC
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
- DB 196,98,125,24,37,171,95,0,0 ; vbroadcastss 0x5fab(%rip),%ymm12 # b2fc <_sk_clut_4D_avx+0x2523>
+ DB 196,98,125,24,37,173,95,0,0 ; vbroadcastss 0x5fad(%rip),%ymm12 # b6b0 <_sk_clut_4D_avx+0x2523>
DB 196,193,124,84,204 ; vandps %ymm12,%ymm0,%ymm1
DB 197,252,87,193 ; vxorps %ymm1,%ymm0,%ymm0
DB 196,195,125,25,198,1 ; vextractf128 $0x1,%ymm0,%xmm14
- DB 196,98,121,24,29,151,95,0,0 ; vbroadcastss 0x5f97(%rip),%xmm11 # b300 <_sk_clut_4D_avx+0x2527>
+ DB 196,98,121,24,29,153,95,0,0 ; vbroadcastss 0x5f99(%rip),%xmm11 # b6b4 <_sk_clut_4D_avx+0x2527>
DB 196,193,8,87,219 ; vxorps %xmm11,%xmm14,%xmm3
- DB 196,98,121,24,45,141,95,0,0 ; vbroadcastss 0x5f8d(%rip),%xmm13 # b304 <_sk_clut_4D_avx+0x252b>
+ DB 196,98,121,24,45,143,95,0,0 ; vbroadcastss 0x5f8f(%rip),%xmm13 # b6b8 <_sk_clut_4D_avx+0x252b>
DB 197,145,102,219 ; vpcmpgtd %xmm3,%xmm13,%xmm3
DB 196,65,120,87,211 ; vxorps %xmm11,%xmm0,%xmm10
DB 196,65,17,102,210 ; vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -12093,7 +12732,7 @@ _sk_load_f16_avx LABEL PROC
DB 196,227,125,24,195,1 ; vinsertf128 $0x1,%xmm3,%ymm0,%ymm0
DB 197,252,86,193 ; vorps %ymm1,%ymm0,%ymm0
DB 196,227,125,25,193,1 ; vextractf128 $0x1,%ymm0,%xmm1
- DB 196,226,121,24,29,67,95,0,0 ; vbroadcastss 0x5f43(%rip),%xmm3 # b308 <_sk_clut_4D_avx+0x252f>
+ DB 196,226,121,24,29,69,95,0,0 ; vbroadcastss 0x5f45(%rip),%xmm3 # b6bc <_sk_clut_4D_avx+0x252f>
DB 197,241,254,203 ; vpaddd %xmm3,%xmm1,%xmm1
DB 197,249,254,195 ; vpaddd %xmm3,%xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
@@ -12186,29 +12825,29 @@ _sk_load_f16_avx LABEL PROC
DB 196,65,123,16,4,193 ; vmovsd (%r9,%rax,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,85 ; je 55ed <_sk_load_f16_avx+0x337>
+ DB 116,85 ; je 599f <_sk_load_f16_avx+0x337>
DB 196,65,57,22,68,193,8 ; vmovhpd 0x8(%r9,%rax,8),%xmm8,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,72 ; jb 55ed <_sk_load_f16_avx+0x337>
+ DB 114,72 ; jb 599f <_sk_load_f16_avx+0x337>
DB 196,193,123,16,84,193,16 ; vmovsd 0x10(%r9,%rax,8),%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 116,72 ; je 55fa <_sk_load_f16_avx+0x344>
+ DB 116,72 ; je 59ac <_sk_load_f16_avx+0x344>
DB 196,193,105,22,84,193,24 ; vmovhpd 0x18(%r9,%rax,8),%xmm2,%xmm2
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,59 ; jb 55fa <_sk_load_f16_avx+0x344>
+ DB 114,59 ; jb 59ac <_sk_load_f16_avx+0x344>
DB 196,193,123,16,76,193,32 ; vmovsd 0x20(%r9,%rax,8),%xmm1
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 15,132,63,253,255,255 ; je 530f <_sk_load_f16_avx+0x59>
+ DB 15,132,63,253,255,255 ; je 56c1 <_sk_load_f16_avx+0x59>
DB 196,193,113,22,76,193,40 ; vmovhpd 0x28(%r9,%rax,8),%xmm1,%xmm1
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,130,46,253,255,255 ; jb 530f <_sk_load_f16_avx+0x59>
+ DB 15,130,46,253,255,255 ; jb 56c1 <_sk_load_f16_avx+0x59>
DB 196,65,122,126,76,193,48 ; vmovq 0x30(%r9,%rax,8),%xmm9
- DB 233,34,253,255,255 ; jmpq 530f <_sk_load_f16_avx+0x59>
+ DB 233,34,253,255,255 ; jmpq 56c1 <_sk_load_f16_avx+0x59>
DB 197,241,87,201 ; vxorpd %xmm1,%xmm1,%xmm1
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,21,253,255,255 ; jmpq 530f <_sk_load_f16_avx+0x59>
+ DB 233,21,253,255,255 ; jmpq 56c1 <_sk_load_f16_avx+0x59>
DB 197,241,87,201 ; vxorpd %xmm1,%xmm1,%xmm1
- DB 233,12,253,255,255 ; jmpq 530f <_sk_load_f16_avx+0x59>
+ DB 233,12,253,255,255 ; jmpq 56c1 <_sk_load_f16_avx+0x59>
PUBLIC _sk_load_f16_dst_avx
_sk_load_f16_dst_avx LABEL PROC
@@ -12225,7 +12864,7 @@ _sk_load_f16_dst_avx LABEL PROC
DB 197,252,17,84,36,64 ; vmovups %ymm2,0x40(%rsp)
DB 197,252,17,76,36,32 ; vmovups %ymm1,0x20(%rsp)
DB 197,254,127,4,36 ; vmovdqu %ymm0,(%rsp)
- DB 15,133,147,2,0,0 ; jne 58d4 <_sk_load_f16_dst_avx+0x2d1>
+ DB 15,133,147,2,0,0 ; jne 5c86 <_sk_load_f16_dst_avx+0x2d1>
DB 196,65,121,16,4,193 ; vmovupd (%r9,%rax,8),%xmm8
DB 196,193,121,16,116,193,16 ; vmovupd 0x10(%r9,%rax,8),%xmm6
DB 196,193,121,16,108,193,32 ; vmovupd 0x20(%r9,%rax,8),%xmm5
@@ -12243,13 +12882,13 @@ _sk_load_f16_dst_avx LABEL PROC
DB 197,217,105,232 ; vpunpckhwd %xmm0,%xmm4,%xmm5
DB 196,226,121,51,228 ; vpmovzxwd %xmm4,%xmm4
DB 196,227,93,24,229,1 ; vinsertf128 $0x1,%xmm5,%ymm4,%ymm4
- DB 196,98,125,24,37,110,92,0,0 ; vbroadcastss 0x5c6e(%rip),%ymm12 # b30c <_sk_clut_4D_avx+0x2533>
+ DB 196,98,125,24,37,112,92,0,0 ; vbroadcastss 0x5c70(%rip),%ymm12 # b6c0 <_sk_clut_4D_avx+0x2533>
DB 196,193,92,84,236 ; vandps %ymm12,%ymm4,%ymm5
DB 197,220,87,229 ; vxorps %ymm5,%ymm4,%ymm4
DB 196,195,125,25,230,1 ; vextractf128 $0x1,%ymm4,%xmm14
- DB 196,98,121,24,29,90,92,0,0 ; vbroadcastss 0x5c5a(%rip),%xmm11 # b310 <_sk_clut_4D_avx+0x2537>
+ DB 196,98,121,24,29,92,92,0,0 ; vbroadcastss 0x5c5c(%rip),%xmm11 # b6c4 <_sk_clut_4D_avx+0x2537>
DB 196,193,8,87,251 ; vxorps %xmm11,%xmm14,%xmm7
- DB 196,98,121,24,45,80,92,0,0 ; vbroadcastss 0x5c50(%rip),%xmm13 # b314 <_sk_clut_4D_avx+0x253b>
+ DB 196,98,121,24,45,82,92,0,0 ; vbroadcastss 0x5c52(%rip),%xmm13 # b6c8 <_sk_clut_4D_avx+0x253b>
DB 197,145,102,255 ; vpcmpgtd %xmm7,%xmm13,%xmm7
DB 196,65,88,87,211 ; vxorps %xmm11,%xmm4,%xmm10
DB 196,65,17,102,210 ; vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -12263,7 +12902,7 @@ _sk_load_f16_dst_avx LABEL PROC
DB 196,227,93,24,231,1 ; vinsertf128 $0x1,%xmm7,%ymm4,%ymm4
DB 197,220,86,229 ; vorps %ymm5,%ymm4,%ymm4
DB 196,227,125,25,229,1 ; vextractf128 $0x1,%ymm4,%xmm5
- DB 196,226,121,24,61,6,92,0,0 ; vbroadcastss 0x5c06(%rip),%xmm7 # b318 <_sk_clut_4D_avx+0x253f>
+ DB 196,226,121,24,61,8,92,0,0 ; vbroadcastss 0x5c08(%rip),%xmm7 # b6cc <_sk_clut_4D_avx+0x253f>
DB 197,209,254,239 ; vpaddd %xmm7,%xmm5,%xmm5
DB 197,217,254,231 ; vpaddd %xmm7,%xmm4,%xmm4
DB 196,227,93,24,229,1 ; vinsertf128 $0x1,%xmm5,%ymm4,%ymm4
@@ -12356,29 +12995,29 @@ _sk_load_f16_dst_avx LABEL PROC
DB 196,65,123,16,4,193 ; vmovsd (%r9,%rax,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,85 ; je 593a <_sk_load_f16_dst_avx+0x337>
+ DB 116,85 ; je 5cec <_sk_load_f16_dst_avx+0x337>
DB 196,65,57,22,68,193,8 ; vmovhpd 0x8(%r9,%rax,8),%xmm8,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,72 ; jb 593a <_sk_load_f16_dst_avx+0x337>
+ DB 114,72 ; jb 5cec <_sk_load_f16_dst_avx+0x337>
DB 196,193,123,16,116,193,16 ; vmovsd 0x10(%r9,%rax,8),%xmm6
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 116,72 ; je 5947 <_sk_load_f16_dst_avx+0x344>
+ DB 116,72 ; je 5cf9 <_sk_load_f16_dst_avx+0x344>
DB 196,193,73,22,116,193,24 ; vmovhpd 0x18(%r9,%rax,8),%xmm6,%xmm6
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,59 ; jb 5947 <_sk_load_f16_dst_avx+0x344>
+ DB 114,59 ; jb 5cf9 <_sk_load_f16_dst_avx+0x344>
DB 196,193,123,16,108,193,32 ; vmovsd 0x20(%r9,%rax,8),%xmm5
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 15,132,63,253,255,255 ; je 565c <_sk_load_f16_dst_avx+0x59>
+ DB 15,132,63,253,255,255 ; je 5a0e <_sk_load_f16_dst_avx+0x59>
DB 196,193,81,22,108,193,40 ; vmovhpd 0x28(%r9,%rax,8),%xmm5,%xmm5
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,130,46,253,255,255 ; jb 565c <_sk_load_f16_dst_avx+0x59>
+ DB 15,130,46,253,255,255 ; jb 5a0e <_sk_load_f16_dst_avx+0x59>
DB 196,65,122,126,76,193,48 ; vmovq 0x30(%r9,%rax,8),%xmm9
- DB 233,34,253,255,255 ; jmpq 565c <_sk_load_f16_dst_avx+0x59>
+ DB 233,34,253,255,255 ; jmpq 5a0e <_sk_load_f16_dst_avx+0x59>
DB 197,209,87,237 ; vxorpd %xmm5,%xmm5,%xmm5
DB 197,201,87,246 ; vxorpd %xmm6,%xmm6,%xmm6
- DB 233,21,253,255,255 ; jmpq 565c <_sk_load_f16_dst_avx+0x59>
+ DB 233,21,253,255,255 ; jmpq 5a0e <_sk_load_f16_dst_avx+0x59>
DB 197,209,87,237 ; vxorpd %xmm5,%xmm5,%xmm5
- DB 233,12,253,255,255 ; jmpq 565c <_sk_load_f16_dst_avx+0x59>
+ DB 233,12,253,255,255 ; jmpq 5a0e <_sk_load_f16_dst_avx+0x59>
PUBLIC _sk_gather_f16_avx
_sk_gather_f16_avx LABEL PROC
@@ -12437,13 +13076,13 @@ _sk_gather_f16_avx LABEL PROC
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
- DB 196,98,125,24,37,192,88,0,0 ; vbroadcastss 0x58c0(%rip),%ymm12 # b31c <_sk_clut_4D_avx+0x2543>
+ DB 196,98,125,24,37,194,88,0,0 ; vbroadcastss 0x58c2(%rip),%ymm12 # b6d0 <_sk_clut_4D_avx+0x2543>
DB 196,193,124,84,204 ; vandps %ymm12,%ymm0,%ymm1
DB 197,252,87,193 ; vxorps %ymm1,%ymm0,%ymm0
DB 196,195,125,25,198,1 ; vextractf128 $0x1,%ymm0,%xmm14
- DB 196,98,121,24,29,172,88,0,0 ; vbroadcastss 0x58ac(%rip),%xmm11 # b320 <_sk_clut_4D_avx+0x2547>
+ DB 196,98,121,24,29,174,88,0,0 ; vbroadcastss 0x58ae(%rip),%xmm11 # b6d4 <_sk_clut_4D_avx+0x2547>
DB 196,193,8,87,219 ; vxorps %xmm11,%xmm14,%xmm3
- DB 196,98,121,24,45,162,88,0,0 ; vbroadcastss 0x58a2(%rip),%xmm13 # b324 <_sk_clut_4D_avx+0x254b>
+ DB 196,98,121,24,45,164,88,0,0 ; vbroadcastss 0x58a4(%rip),%xmm13 # b6d8 <_sk_clut_4D_avx+0x254b>
DB 197,145,102,219 ; vpcmpgtd %xmm3,%xmm13,%xmm3
DB 196,65,120,87,211 ; vxorps %xmm11,%xmm0,%xmm10
DB 196,65,17,102,210 ; vpcmpgtd %xmm10,%xmm13,%xmm10
@@ -12457,7 +13096,7 @@ _sk_gather_f16_avx LABEL PROC
DB 196,227,125,24,195,1 ; vinsertf128 $0x1,%xmm3,%ymm0,%ymm0
DB 197,252,86,193 ; vorps %ymm1,%ymm0,%ymm0
DB 196,227,125,25,193,1 ; vextractf128 $0x1,%ymm0,%xmm1
- DB 196,226,121,24,29,88,88,0,0 ; vbroadcastss 0x5858(%rip),%xmm3 # b328 <_sk_clut_4D_avx+0x254f>
+ DB 196,226,121,24,29,90,88,0,0 ; vbroadcastss 0x585a(%rip),%xmm3 # b6dc <_sk_clut_4D_avx+0x254f>
DB 197,241,254,203 ; vpaddd %xmm3,%xmm1,%xmm1
DB 197,249,254,195 ; vpaddd %xmm3,%xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
@@ -12555,12 +13194,12 @@ _sk_store_f16_avx LABEL PROC
DB 197,252,17,180,36,128,0,0,0 ; vmovups %ymm6,0x80(%rsp)
DB 197,252,17,108,36,96 ; vmovups %ymm5,0x60(%rsp)
DB 197,252,17,100,36,64 ; vmovups %ymm4,0x40(%rsp)
- DB 196,98,125,24,13,108,86,0,0 ; vbroadcastss 0x566c(%rip),%ymm9 # b32c <_sk_clut_4D_avx+0x2553>
+ DB 196,98,125,24,13,110,86,0,0 ; vbroadcastss 0x566e(%rip),%ymm9 # b6e0 <_sk_clut_4D_avx+0x2553>
DB 196,65,124,84,209 ; vandps %ymm9,%ymm0,%ymm10
DB 197,252,17,4,36 ; vmovups %ymm0,(%rsp)
DB 196,65,124,87,218 ; vxorps %ymm10,%ymm0,%ymm11
DB 196,67,125,25,220,1 ; vextractf128 $0x1,%ymm11,%xmm12
- DB 196,98,121,24,5,82,86,0,0 ; vbroadcastss 0x5652(%rip),%xmm8 # b330 <_sk_clut_4D_avx+0x2557>
+ DB 196,98,121,24,5,84,86,0,0 ; vbroadcastss 0x5654(%rip),%xmm8 # b6e4 <_sk_clut_4D_avx+0x2557>
DB 196,65,57,102,236 ; vpcmpgtd %xmm12,%xmm8,%xmm13
DB 196,65,57,102,243 ; vpcmpgtd %xmm11,%xmm8,%xmm14
DB 196,67,13,24,237,1 ; vinsertf128 $0x1,%xmm13,%ymm14,%ymm13
@@ -12570,7 +13209,7 @@ _sk_store_f16_avx LABEL PROC
DB 196,67,13,24,242,1 ; vinsertf128 $0x1,%xmm10,%ymm14,%ymm14
DB 196,193,33,114,211,13 ; vpsrld $0xd,%xmm11,%xmm11
DB 196,193,25,114,212,13 ; vpsrld $0xd,%xmm12,%xmm12
- DB 196,98,125,24,21,25,86,0,0 ; vbroadcastss 0x5619(%rip),%ymm10 # b334 <_sk_clut_4D_avx+0x255b>
+ DB 196,98,125,24,21,27,86,0,0 ; vbroadcastss 0x561b(%rip),%ymm10 # b6e8 <_sk_clut_4D_avx+0x255b>
DB 196,65,12,86,242 ; vorps %ymm10,%ymm14,%ymm14
DB 196,67,125,25,247,1 ; vextractf128 $0x1,%ymm14,%xmm15
DB 196,65,1,254,228 ; vpaddd %xmm12,%xmm15,%xmm12
@@ -12657,7 +13296,7 @@ _sk_store_f16_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 72,99,194 ; movslq %edx,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,79 ; jne 5f18 <_sk_store_f16_avx+0x286>
+ DB 117,79 ; jne 62ca <_sk_store_f16_avx+0x286>
DB 196,65,120,17,28,193 ; vmovups %xmm11,(%r9,%rax,8)
DB 196,65,120,17,84,193,16 ; vmovups %xmm10,0x10(%r9,%rax,8)
DB 196,65,120,17,76,193,32 ; vmovups %xmm9,0x20(%r9,%rax,8)
@@ -12673,22 +13312,22 @@ _sk_store_f16_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,214,28,193 ; vmovq %xmm11,(%r9,%rax,8)
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,192 ; je 5ee4 <_sk_store_f16_avx+0x252>
+ DB 116,192 ; je 6296 <_sk_store_f16_avx+0x252>
DB 196,65,121,23,92,193,8 ; vmovhpd %xmm11,0x8(%r9,%rax,8)
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,179 ; jb 5ee4 <_sk_store_f16_avx+0x252>
+ DB 114,179 ; jb 6296 <_sk_store_f16_avx+0x252>
DB 196,65,121,214,84,193,16 ; vmovq %xmm10,0x10(%r9,%rax,8)
- DB 116,170 ; je 5ee4 <_sk_store_f16_avx+0x252>
+ DB 116,170 ; je 6296 <_sk_store_f16_avx+0x252>
DB 196,65,121,23,84,193,24 ; vmovhpd %xmm10,0x18(%r9,%rax,8)
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,157 ; jb 5ee4 <_sk_store_f16_avx+0x252>
+ DB 114,157 ; jb 6296 <_sk_store_f16_avx+0x252>
DB 196,65,121,214,76,193,32 ; vmovq %xmm9,0x20(%r9,%rax,8)
- DB 116,148 ; je 5ee4 <_sk_store_f16_avx+0x252>
+ DB 116,148 ; je 6296 <_sk_store_f16_avx+0x252>
DB 196,65,121,23,76,193,40 ; vmovhpd %xmm9,0x28(%r9,%rax,8)
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,135 ; jb 5ee4 <_sk_store_f16_avx+0x252>
+ DB 114,135 ; jb 6296 <_sk_store_f16_avx+0x252>
DB 196,65,121,214,68,193,48 ; vmovq %xmm8,0x30(%r9,%rax,8)
- DB 233,123,255,255,255 ; jmpq 5ee4 <_sk_store_f16_avx+0x252>
+ DB 233,123,255,255,255 ; jmpq 6296 <_sk_store_f16_avx+0x252>
PUBLIC _sk_load_u16_be_avx
_sk_load_u16_be_avx LABEL PROC
@@ -12701,7 +13340,7 @@ _sk_load_u16_be_avx LABEL PROC
DB 76,3,8 ; add (%rax),%r9
DB 73,99,194 ; movslq %r10d,%rax
DB 77,133,192 ; test %r8,%r8
- DB 15,133,253,0,0,0 ; jne 608d <_sk_load_u16_be_avx+0x124>
+ DB 15,133,253,0,0,0 ; jne 643f <_sk_load_u16_be_avx+0x124>
DB 196,65,121,16,4,65 ; vmovupd (%r9,%rax,2),%xmm8
DB 196,193,121,16,84,65,16 ; vmovupd 0x10(%r9,%rax,2),%xmm2
DB 196,193,121,16,92,65,32 ; vmovupd 0x20(%r9,%rax,2),%xmm3
@@ -12723,7 +13362,7 @@ _sk_load_u16_be_avx LABEL PROC
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,29,55,83,0,0 ; vbroadcastss 0x5337(%rip),%ymm11 # b338 <_sk_clut_4D_avx+0x255f>
+ DB 196,98,125,24,29,57,83,0,0 ; vbroadcastss 0x5339(%rip),%ymm11 # b6ec <_sk_clut_4D_avx+0x255f>
DB 196,193,124,89,195 ; vmulps %ymm11,%ymm0,%ymm0
DB 197,177,109,202 ; vpunpckhqdq %xmm2,%xmm9,%xmm1
DB 197,233,113,241,8 ; vpsllw $0x8,%xmm1,%xmm2
@@ -12757,29 +13396,29 @@ _sk_load_u16_be_avx LABEL PROC
DB 196,65,123,16,4,65 ; vmovsd (%r9,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,85 ; je 60f3 <_sk_load_u16_be_avx+0x18a>
+ DB 116,85 ; je 64a5 <_sk_load_u16_be_avx+0x18a>
DB 196,65,57,22,68,65,8 ; vmovhpd 0x8(%r9,%rax,2),%xmm8,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,72 ; jb 60f3 <_sk_load_u16_be_avx+0x18a>
+ DB 114,72 ; jb 64a5 <_sk_load_u16_be_avx+0x18a>
DB 196,193,123,16,84,65,16 ; vmovsd 0x10(%r9,%rax,2),%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 116,72 ; je 6100 <_sk_load_u16_be_avx+0x197>
+ DB 116,72 ; je 64b2 <_sk_load_u16_be_avx+0x197>
DB 196,193,105,22,84,65,24 ; vmovhpd 0x18(%r9,%rax,2),%xmm2,%xmm2
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,59 ; jb 6100 <_sk_load_u16_be_avx+0x197>
+ DB 114,59 ; jb 64b2 <_sk_load_u16_be_avx+0x197>
DB 196,193,123,16,92,65,32 ; vmovsd 0x20(%r9,%rax,2),%xmm3
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 15,132,213,254,255,255 ; je 5fab <_sk_load_u16_be_avx+0x42>
+ DB 15,132,213,254,255,255 ; je 635d <_sk_load_u16_be_avx+0x42>
DB 196,193,97,22,92,65,40 ; vmovhpd 0x28(%r9,%rax,2),%xmm3,%xmm3
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,130,196,254,255,255 ; jb 5fab <_sk_load_u16_be_avx+0x42>
+ DB 15,130,196,254,255,255 ; jb 635d <_sk_load_u16_be_avx+0x42>
DB 196,65,122,126,76,65,48 ; vmovq 0x30(%r9,%rax,2),%xmm9
- DB 233,184,254,255,255 ; jmpq 5fab <_sk_load_u16_be_avx+0x42>
+ DB 233,184,254,255,255 ; jmpq 635d <_sk_load_u16_be_avx+0x42>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,171,254,255,255 ; jmpq 5fab <_sk_load_u16_be_avx+0x42>
+ DB 233,171,254,255,255 ; jmpq 635d <_sk_load_u16_be_avx+0x42>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,162,254,255,255 ; jmpq 5fab <_sk_load_u16_be_avx+0x42>
+ DB 233,162,254,255,255 ; jmpq 635d <_sk_load_u16_be_avx+0x42>
PUBLIC _sk_load_rgb_u16_be_avx
_sk_load_rgb_u16_be_avx LABEL PROC
@@ -12794,7 +13433,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 72,141,4,64 ; lea (%rax,%rax,2),%rax
DB 72,193,248,32 ; sar $0x20,%rax
DB 77,133,192 ; test %r8,%r8
- DB 15,133,243,0,0,0 ; jne 6227 <_sk_load_rgb_u16_be_avx+0x11e>
+ DB 15,133,243,0,0,0 ; jne 65d9 <_sk_load_rgb_u16_be_avx+0x11e>
DB 196,193,122,111,4,65 ; vmovdqu (%r9,%rax,2),%xmm0
DB 196,193,122,111,84,65,12 ; vmovdqu 0xc(%r9,%rax,2),%xmm2
DB 196,193,122,111,76,65,24 ; vmovdqu 0x18(%r9,%rax,2),%xmm1
@@ -12821,7 +13460,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 196,226,121,51,192 ; vpmovzxwd %xmm0,%xmm0
DB 196,227,125,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
- DB 196,98,125,24,29,126,81,0,0 ; vbroadcastss 0x517e(%rip),%ymm11 # b33c <_sk_clut_4D_avx+0x2563>
+ DB 196,98,125,24,29,128,81,0,0 ; vbroadcastss 0x5180(%rip),%ymm11 # b6f0 <_sk_clut_4D_avx+0x2563>
DB 196,193,124,89,195 ; vmulps %ymm11,%ymm0,%ymm0
DB 197,185,109,202 ; vpunpckhqdq %xmm2,%xmm8,%xmm1
DB 197,233,113,241,8 ; vpsllw $0x8,%xmm1,%xmm2
@@ -12842,41 +13481,41 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 197,252,91,210 ; vcvtdq2ps %ymm2,%ymm2
DB 196,193,108,89,211 ; vmulps %ymm11,%ymm2,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,27,81,0,0 ; vbroadcastss 0x511b(%rip),%ymm3 # b340 <_sk_clut_4D_avx+0x2567>
+ DB 196,226,125,24,29,29,81,0,0 ; vbroadcastss 0x511d(%rip),%ymm3 # b6f4 <_sk_clut_4D_avx+0x2567>
DB 255,224 ; jmpq *%rax
DB 196,193,121,110,4,65 ; vmovd (%r9,%rax,2),%xmm0
DB 196,193,121,196,68,65,4,2 ; vpinsrw $0x2,0x4(%r9,%rax,2),%xmm0,%xmm0
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 117,5 ; jne 6240 <_sk_load_rgb_u16_be_avx+0x137>
- DB 233,40,255,255,255 ; jmpq 6168 <_sk_load_rgb_u16_be_avx+0x5f>
+ DB 117,5 ; jne 65f2 <_sk_load_rgb_u16_be_avx+0x137>
+ DB 233,40,255,255,255 ; jmpq 651a <_sk_load_rgb_u16_be_avx+0x5f>
DB 196,193,121,110,76,65,6 ; vmovd 0x6(%r9,%rax,2),%xmm1
DB 196,65,113,196,68,65,10,2 ; vpinsrw $0x2,0xa(%r9,%rax,2),%xmm1,%xmm8
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,26 ; jb 626f <_sk_load_rgb_u16_be_avx+0x166>
+ DB 114,26 ; jb 6621 <_sk_load_rgb_u16_be_avx+0x166>
DB 196,193,121,110,76,65,12 ; vmovd 0xc(%r9,%rax,2),%xmm1
DB 196,193,113,196,84,65,16,2 ; vpinsrw $0x2,0x10(%r9,%rax,2),%xmm1,%xmm2
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 117,10 ; jne 6274 <_sk_load_rgb_u16_be_avx+0x16b>
- DB 233,249,254,255,255 ; jmpq 6168 <_sk_load_rgb_u16_be_avx+0x5f>
- DB 233,244,254,255,255 ; jmpq 6168 <_sk_load_rgb_u16_be_avx+0x5f>
+ DB 117,10 ; jne 6626 <_sk_load_rgb_u16_be_avx+0x16b>
+ DB 233,249,254,255,255 ; jmpq 651a <_sk_load_rgb_u16_be_avx+0x5f>
+ DB 233,244,254,255,255 ; jmpq 651a <_sk_load_rgb_u16_be_avx+0x5f>
DB 196,193,121,110,76,65,18 ; vmovd 0x12(%r9,%rax,2),%xmm1
DB 196,65,113,196,76,65,22,2 ; vpinsrw $0x2,0x16(%r9,%rax,2),%xmm1,%xmm9
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,26 ; jb 62a3 <_sk_load_rgb_u16_be_avx+0x19a>
+ DB 114,26 ; jb 6655 <_sk_load_rgb_u16_be_avx+0x19a>
DB 196,193,121,110,76,65,24 ; vmovd 0x18(%r9,%rax,2),%xmm1
DB 196,193,113,196,76,65,28,2 ; vpinsrw $0x2,0x1c(%r9,%rax,2),%xmm1,%xmm1
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 117,10 ; jne 62a8 <_sk_load_rgb_u16_be_avx+0x19f>
- DB 233,197,254,255,255 ; jmpq 6168 <_sk_load_rgb_u16_be_avx+0x5f>
- DB 233,192,254,255,255 ; jmpq 6168 <_sk_load_rgb_u16_be_avx+0x5f>
+ DB 117,10 ; jne 665a <_sk_load_rgb_u16_be_avx+0x19f>
+ DB 233,197,254,255,255 ; jmpq 651a <_sk_load_rgb_u16_be_avx+0x5f>
+ DB 233,192,254,255,255 ; jmpq 651a <_sk_load_rgb_u16_be_avx+0x5f>
DB 196,193,121,110,92,65,30 ; vmovd 0x1e(%r9,%rax,2),%xmm3
DB 196,65,97,196,92,65,34,2 ; vpinsrw $0x2,0x22(%r9,%rax,2),%xmm3,%xmm11
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,20 ; jb 62d1 <_sk_load_rgb_u16_be_avx+0x1c8>
+ DB 114,20 ; jb 6683 <_sk_load_rgb_u16_be_avx+0x1c8>
DB 196,193,121,110,92,65,36 ; vmovd 0x24(%r9,%rax,2),%xmm3
DB 196,193,97,196,92,65,40,2 ; vpinsrw $0x2,0x28(%r9,%rax,2),%xmm3,%xmm3
- DB 233,151,254,255,255 ; jmpq 6168 <_sk_load_rgb_u16_be_avx+0x5f>
- DB 233,146,254,255,255 ; jmpq 6168 <_sk_load_rgb_u16_be_avx+0x5f>
+ DB 233,151,254,255,255 ; jmpq 651a <_sk_load_rgb_u16_be_avx+0x5f>
+ DB 233,146,254,255,255 ; jmpq 651a <_sk_load_rgb_u16_be_avx+0x5f>
PUBLIC _sk_store_u16_be_avx
_sk_store_u16_be_avx LABEL PROC
@@ -12888,7 +13527,7 @@ _sk_store_u16_be_avx LABEL PROC
DB 77,1,201 ; add %r9,%r9
DB 76,3,8 ; add (%rax),%r9
DB 73,99,194 ; movslq %r10d,%rax
- DB 196,98,125,24,5,71,80,0,0 ; vbroadcastss 0x5047(%rip),%ymm8 # b344 <_sk_clut_4D_avx+0x256b>
+ DB 196,98,125,24,5,73,80,0,0 ; vbroadcastss 0x5049(%rip),%ymm8 # b6f8 <_sk_clut_4D_avx+0x256b>
DB 196,65,124,89,200 ; vmulps %ymm8,%ymm0,%ymm9
DB 196,65,125,91,201 ; vcvtps2dq %ymm9,%ymm9
DB 196,67,125,25,202,1 ; vextractf128 $0x1,%ymm9,%xmm10
@@ -12926,7 +13565,7 @@ _sk_store_u16_be_avx LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 77,133,192 ; test %r8,%r8
- DB 117,31 ; jne 63e1 <_sk_store_u16_be_avx+0x10b>
+ DB 117,31 ; jne 6793 <_sk_store_u16_be_avx+0x10b>
DB 196,65,120,17,28,65 ; vmovups %xmm11,(%r9,%rax,2)
DB 196,65,120,17,84,65,16 ; vmovups %xmm10,0x10(%r9,%rax,2)
DB 196,65,120,17,76,65,32 ; vmovups %xmm9,0x20(%r9,%rax,2)
@@ -12935,28 +13574,28 @@ _sk_store_u16_be_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,214,28,65 ; vmovq %xmm11,(%r9,%rax,2)
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,240 ; je 63dd <_sk_store_u16_be_avx+0x107>
+ DB 116,240 ; je 678f <_sk_store_u16_be_avx+0x107>
DB 196,65,121,23,92,65,8 ; vmovhpd %xmm11,0x8(%r9,%rax,2)
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,227 ; jb 63dd <_sk_store_u16_be_avx+0x107>
+ DB 114,227 ; jb 678f <_sk_store_u16_be_avx+0x107>
DB 196,65,121,214,84,65,16 ; vmovq %xmm10,0x10(%r9,%rax,2)
- DB 116,218 ; je 63dd <_sk_store_u16_be_avx+0x107>
+ DB 116,218 ; je 678f <_sk_store_u16_be_avx+0x107>
DB 196,65,121,23,84,65,24 ; vmovhpd %xmm10,0x18(%r9,%rax,2)
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,205 ; jb 63dd <_sk_store_u16_be_avx+0x107>
+ DB 114,205 ; jb 678f <_sk_store_u16_be_avx+0x107>
DB 196,65,121,214,76,65,32 ; vmovq %xmm9,0x20(%r9,%rax,2)
- DB 116,196 ; je 63dd <_sk_store_u16_be_avx+0x107>
+ DB 116,196 ; je 678f <_sk_store_u16_be_avx+0x107>
DB 196,65,121,23,76,65,40 ; vmovhpd %xmm9,0x28(%r9,%rax,2)
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,183 ; jb 63dd <_sk_store_u16_be_avx+0x107>
+ DB 114,183 ; jb 678f <_sk_store_u16_be_avx+0x107>
DB 196,65,121,214,68,65,48 ; vmovq %xmm8,0x30(%r9,%rax,2)
- DB 235,174 ; jmp 63dd <_sk_store_u16_be_avx+0x107>
+ DB 235,174 ; jmp 678f <_sk_store_u16_be_avx+0x107>
PUBLIC _sk_load_f32_avx
_sk_load_f32_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,135,128,0,0,0 ; ja 64bb <_sk_load_f32_avx+0x8c>
+ DB 15,135,128,0,0,0 ; ja 686d <_sk_load_f32_avx+0x8c>
DB 68,141,20,149,0,0,0,0 ; lea 0x0(,%rdx,4),%r10d
DB 76,99,88,8 ; movslq 0x8(%rax),%r11
DB 76,99,201 ; movslq %ecx,%r9
@@ -12964,7 +13603,7 @@ _sk_load_f32_avx LABEL PROC
DB 73,193,225,2 ; shl $0x2,%r9
DB 76,3,8 ; add (%rax),%r9
DB 77,99,210 ; movslq %r10d,%r10
- DB 76,141,29,133,0,0,0 ; lea 0x85(%rip),%r11 # 64e4 <_sk_load_f32_avx+0xb5>
+ DB 76,141,29,135,0,0,0 ; lea 0x87(%rip),%r11 # 6898 <_sk_load_f32_avx+0xb7>
DB 75,99,4,131 ; movslq (%r11,%r8,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
@@ -12990,19 +13629,22 @@ _sk_load_f32_avx LABEL PROC
DB 196,193,101,21,216 ; vunpckhpd %ymm8,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
- DB 144 ; nop
- DB 132,255 ; test %bh,%bh
+ DB 15,31,0 ; nopl (%rax)
+ DB 130 ; (bad)
DB 255 ; (bad)
- DB 255,203 ; dec %ebx
DB 255 ; (bad)
+ DB 255,201 ; dec %ecx
DB 255 ; (bad)
DB 255 ; (bad)
- DB 190,255,255,255,177 ; mov $0xb1ffffff,%esi
DB 255 ; (bad)
+ DB 188,255,255,255,175 ; mov $0xafffffff,%esp
DB 255 ; (bad)
- DB 255,164,255,255,255,156,255 ; jmpq *-0x630001(%rdi,%rdi,8)
DB 255 ; (bad)
- DB 255,148,255,255,255,140,255 ; callq *-0x730001(%rdi,%rdi,8)
+ DB 255,162,255,255,255,154 ; jmpq *-0x65000001(%rdx)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,146,255,255,255,138 ; callq *-0x75000001(%rdx)
+ DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -13010,7 +13652,7 @@ PUBLIC _sk_load_f32_dst_avx
_sk_load_f32_dst_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 15,135,128,0,0,0 ; ja 6590 <_sk_load_f32_dst_avx+0x8c>
+ DB 15,135,128,0,0,0 ; ja 6944 <_sk_load_f32_dst_avx+0x8c>
DB 68,141,20,149,0,0,0,0 ; lea 0x0(,%rdx,4),%r10d
DB 76,99,88,8 ; movslq 0x8(%rax),%r11
DB 76,99,201 ; movslq %ecx,%r9
@@ -13018,7 +13660,7 @@ _sk_load_f32_dst_avx LABEL PROC
DB 73,193,225,2 ; shl $0x2,%r9
DB 76,3,8 ; add (%rax),%r9
DB 77,99,210 ; movslq %r10d,%r10
- DB 76,141,29,132,0,0,0 ; lea 0x84(%rip),%r11 # 65b8 <_sk_load_f32_dst_avx+0xb4>
+ DB 76,141,29,132,0,0,0 ; lea 0x84(%rip),%r11 # 696c <_sk_load_f32_dst_avx+0xb4>
DB 75,99,4,131 ; movslq (%r11,%r8,4),%rax
DB 76,1,216 ; add %r11,%rax
DB 255,224 ; jmpq *%rax
@@ -13080,7 +13722,7 @@ _sk_store_f32_avx LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 77,133,192 ; test %r8,%r8
- DB 117,55 ; jne 6657 <_sk_store_f32_avx+0x7f>
+ DB 117,55 ; jne 6a0b <_sk_store_f32_avx+0x7f>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -13093,22 +13735,22 @@ _sk_store_f32_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,129 ; vmovupd %xmm10,(%r9,%rax,4)
DB 73,131,248,1 ; cmp $0x1,%r8
- DB 116,240 ; je 6653 <_sk_store_f32_avx+0x7b>
+ DB 116,240 ; je 6a07 <_sk_store_f32_avx+0x7b>
DB 196,65,121,17,76,129,16 ; vmovupd %xmm9,0x10(%r9,%rax,4)
DB 73,131,248,3 ; cmp $0x3,%r8
- DB 114,227 ; jb 6653 <_sk_store_f32_avx+0x7b>
+ DB 114,227 ; jb 6a07 <_sk_store_f32_avx+0x7b>
DB 196,65,121,17,68,129,32 ; vmovupd %xmm8,0x20(%r9,%rax,4)
- DB 116,218 ; je 6653 <_sk_store_f32_avx+0x7b>
+ DB 116,218 ; je 6a07 <_sk_store_f32_avx+0x7b>
DB 196,65,121,17,92,129,48 ; vmovupd %xmm11,0x30(%r9,%rax,4)
DB 73,131,248,5 ; cmp $0x5,%r8
- DB 114,205 ; jb 6653 <_sk_store_f32_avx+0x7b>
+ DB 114,205 ; jb 6a07 <_sk_store_f32_avx+0x7b>
DB 196,67,125,25,84,129,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r9,%rax,4)
- DB 116,195 ; je 6653 <_sk_store_f32_avx+0x7b>
+ DB 116,195 ; je 6a07 <_sk_store_f32_avx+0x7b>
DB 196,67,125,25,76,129,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r9,%rax,4)
DB 73,131,248,7 ; cmp $0x7,%r8
- DB 114,181 ; jb 6653 <_sk_store_f32_avx+0x7b>
+ DB 114,181 ; jb 6a07 <_sk_store_f32_avx+0x7b>
DB 196,67,125,25,68,129,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r9,%rax,4)
- DB 235,171 ; jmp 6653 <_sk_store_f32_avx+0x7b>
+ DB 235,171 ; jmp 6a07 <_sk_store_f32_avx+0x7b>
PUBLIC _sk_clamp_x_avx
_sk_clamp_x_avx LABEL PROC
@@ -13191,7 +13833,7 @@ _sk_mirror_x_avx LABEL PROC
DB 196,193,58,88,192 ; vaddss %xmm8,%xmm8,%xmm0
DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
DB 196,99,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm8
- DB 197,178,89,5,107,75,0,0 ; vmulss 0x4b6b(%rip),%xmm9,%xmm0 # b348 <_sk_clut_4D_avx+0x256f>
+ DB 197,178,89,5,107,75,0,0 ; vmulss 0x4b6b(%rip),%xmm9,%xmm0 # b6fc <_sk_clut_4D_avx+0x256f>
DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
DB 197,164,89,192 ; vmulps %ymm0,%ymm11,%ymm0
@@ -13223,7 +13865,7 @@ _sk_mirror_y_avx LABEL PROC
DB 196,193,58,88,200 ; vaddss %xmm8,%xmm8,%xmm1
DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
DB 196,99,117,24,193,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm8
- DB 197,178,89,13,226,74,0,0 ; vmulss 0x4ae2(%rip),%xmm9,%xmm1 # b34c <_sk_clut_4D_avx+0x2573>
+ DB 197,178,89,13,226,74,0,0 ; vmulss 0x4ae2(%rip),%xmm9,%xmm1 # b700 <_sk_clut_4D_avx+0x2573>
DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
DB 197,164,89,201 ; vmulps %ymm1,%ymm11,%ymm1
@@ -13248,7 +13890,7 @@ PUBLIC _sk_clamp_x_1_avx
_sk_clamp_x_1_avx LABEL PROC
DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
DB 197,188,95,192 ; vmaxps %ymm0,%ymm8,%ymm0
- DB 196,98,125,24,5,124,74,0,0 ; vbroadcastss 0x4a7c(%rip),%ymm8 # b350 <_sk_clut_4D_avx+0x2577>
+ DB 196,98,125,24,5,124,74,0,0 ; vbroadcastss 0x4a7c(%rip),%ymm8 # b704 <_sk_clut_4D_avx+0x2577>
DB 196,193,124,93,192 ; vminps %ymm8,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -13262,9 +13904,9 @@ _sk_repeat_x_1_avx LABEL PROC
PUBLIC _sk_mirror_x_1_avx
_sk_mirror_x_1_avx LABEL PROC
- DB 196,98,125,24,5,95,74,0,0 ; vbroadcastss 0x4a5f(%rip),%ymm8 # b354 <_sk_clut_4D_avx+0x257b>
+ DB 196,98,125,24,5,95,74,0,0 ; vbroadcastss 0x4a5f(%rip),%ymm8 # b708 <_sk_clut_4D_avx+0x257b>
DB 196,193,124,88,192 ; vaddps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,13,85,74,0,0 ; vbroadcastss 0x4a55(%rip),%ymm9 # b358 <_sk_clut_4D_avx+0x257f>
+ DB 196,98,125,24,13,85,74,0,0 ; vbroadcastss 0x4a55(%rip),%ymm9 # b70c <_sk_clut_4D_avx+0x257f>
DB 196,65,124,89,201 ; vmulps %ymm9,%ymm0,%ymm9
DB 196,67,125,8,201,1 ; vroundps $0x1,%ymm9,%ymm9
DB 196,65,52,88,201 ; vaddps %ymm9,%ymm9,%ymm9
@@ -13278,12 +13920,12 @@ _sk_mirror_x_1_avx LABEL PROC
PUBLIC _sk_luminance_to_alpha_avx
_sk_luminance_to_alpha_avx LABEL PROC
- DB 196,226,125,24,29,37,74,0,0 ; vbroadcastss 0x4a25(%rip),%ymm3 # b35c <_sk_clut_4D_avx+0x2583>
+ DB 196,226,125,24,29,37,74,0,0 ; vbroadcastss 0x4a25(%rip),%ymm3 # b710 <_sk_clut_4D_avx+0x2583>
DB 197,252,89,195 ; vmulps %ymm3,%ymm0,%ymm0
- DB 196,226,125,24,29,28,74,0,0 ; vbroadcastss 0x4a1c(%rip),%ymm3 # b360 <_sk_clut_4D_avx+0x2587>
+ DB 196,226,125,24,29,28,74,0,0 ; vbroadcastss 0x4a1c(%rip),%ymm3 # b714 <_sk_clut_4D_avx+0x2587>
DB 197,244,89,203 ; vmulps %ymm3,%ymm1,%ymm1
DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0
- DB 196,226,125,24,13,15,74,0,0 ; vbroadcastss 0x4a0f(%rip),%ymm1 # b364 <_sk_clut_4D_avx+0x258b>
+ DB 196,226,125,24,13,15,74,0,0 ; vbroadcastss 0x4a0f(%rip),%ymm1 # b718 <_sk_clut_4D_avx+0x258b>
DB 197,236,89,201 ; vmulps %ymm1,%ymm2,%ymm1
DB 197,252,88,217 ; vaddps %ymm1,%ymm0,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -13516,9 +14158,9 @@ _sk_evenly_spaced_gradient_avx LABEL PROC
DB 72,139,24 ; mov (%rax),%rbx
DB 72,139,104,8 ; mov 0x8(%rax),%rbp
DB 72,255,203 ; dec %rbx
- DB 120,7 ; js 6cfc <_sk_evenly_spaced_gradient_avx+0x28>
+ DB 120,7 ; js 70b0 <_sk_evenly_spaced_gradient_avx+0x28>
DB 196,225,242,42,203 ; vcvtsi2ss %rbx,%xmm1,%xmm1
- DB 235,21 ; jmp 6d11 <_sk_evenly_spaced_gradient_avx+0x3d>
+ DB 235,21 ; jmp 70c5 <_sk_evenly_spaced_gradient_avx+0x3d>
DB 73,137,217 ; mov %rbx,%r9
DB 73,209,233 ; shr %r9
DB 131,227,1 ; and $0x1,%ebx
@@ -13675,18 +14317,18 @@ _sk_evenly_spaced_gradient_avx LABEL PROC
PUBLIC _sk_gauss_a_to_rgba_avx
_sk_gauss_a_to_rgba_avx LABEL PROC
- DB 196,226,125,24,5,240,66,0,0 ; vbroadcastss 0x42f0(%rip),%ymm0 # b368 <_sk_clut_4D_avx+0x258f>
+ DB 196,226,125,24,5,240,66,0,0 ; vbroadcastss 0x42f0(%rip),%ymm0 # b71c <_sk_clut_4D_avx+0x258f>
DB 197,228,89,192 ; vmulps %ymm0,%ymm3,%ymm0
- DB 196,226,125,24,13,231,66,0,0 ; vbroadcastss 0x42e7(%rip),%ymm1 # b36c <_sk_clut_4D_avx+0x2593>
+ DB 196,226,125,24,13,231,66,0,0 ; vbroadcastss 0x42e7(%rip),%ymm1 # b720 <_sk_clut_4D_avx+0x2593>
DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0
DB 197,252,89,195 ; vmulps %ymm3,%ymm0,%ymm0
- DB 196,226,125,24,13,218,66,0,0 ; vbroadcastss 0x42da(%rip),%ymm1 # b370 <_sk_clut_4D_avx+0x2597>
+ DB 196,226,125,24,13,218,66,0,0 ; vbroadcastss 0x42da(%rip),%ymm1 # b724 <_sk_clut_4D_avx+0x2597>
DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0
DB 197,252,89,195 ; vmulps %ymm3,%ymm0,%ymm0
- DB 196,226,125,24,13,205,66,0,0 ; vbroadcastss 0x42cd(%rip),%ymm1 # b374 <_sk_clut_4D_avx+0x259b>
+ DB 196,226,125,24,13,205,66,0,0 ; vbroadcastss 0x42cd(%rip),%ymm1 # b728 <_sk_clut_4D_avx+0x259b>
DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0
DB 197,252,89,195 ; vmulps %ymm3,%ymm0,%ymm0
- DB 196,226,125,24,13,192,66,0,0 ; vbroadcastss 0x42c0(%rip),%ymm1 # b378 <_sk_clut_4D_avx+0x259f>
+ DB 196,226,125,24,13,192,66,0,0 ; vbroadcastss 0x42c0(%rip),%ymm1 # b72c <_sk_clut_4D_avx+0x259f>
DB 197,252,88,193 ; vaddps %ymm1,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
@@ -13708,12 +14350,12 @@ _sk_gradient_avx LABEL PROC
DB 76,139,8 ; mov (%rax),%r9
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 73,131,249,2 ; cmp $0x2,%r9
- DB 114,80 ; jb 713e <_sk_gradient_avx+0x72>
+ DB 114,80 ; jb 74f2 <_sk_gradient_avx+0x72>
DB 72,139,88,72 ; mov 0x48(%rax),%rbx
DB 73,255,201 ; dec %r9
DB 72,131,195,4 ; add $0x4,%rbx
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
- DB 196,98,125,24,21,117,66,0,0 ; vbroadcastss 0x4275(%rip),%ymm10 # b37c <_sk_clut_4D_avx+0x25a3>
+ DB 196,98,125,24,21,117,66,0,0 ; vbroadcastss 0x4275(%rip),%ymm10 # b730 <_sk_clut_4D_avx+0x25a3>
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 196,98,125,24,3 ; vbroadcastss (%rbx),%ymm8
DB 197,60,194,192,2 ; vcmpleps %ymm0,%ymm8,%ymm8
@@ -13725,7 +14367,7 @@ _sk_gradient_avx LABEL PROC
DB 196,227,117,24,202,1 ; vinsertf128 $0x1,%xmm2,%ymm1,%ymm1
DB 72,131,195,4 ; add $0x4,%rbx
DB 73,255,201 ; dec %r9
- DB 117,205 ; jne 710b <_sk_gradient_avx+0x3f>
+ DB 117,205 ; jne 74bf <_sk_gradient_avx+0x3f>
DB 196,195,249,22,201,1 ; vpextrq $0x1,%xmm1,%r9
DB 69,137,202 ; mov %r9d,%r10d
DB 73,193,233,32 ; shr $0x20,%r9
@@ -13905,27 +14547,27 @@ _sk_xy_to_unit_angle_avx LABEL PROC
DB 196,65,52,95,226 ; vmaxps %ymm10,%ymm9,%ymm12
DB 196,65,36,94,220 ; vdivps %ymm12,%ymm11,%ymm11
DB 196,65,36,89,227 ; vmulps %ymm11,%ymm11,%ymm12
- DB 196,98,125,24,45,104,62,0,0 ; vbroadcastss 0x3e68(%rip),%ymm13 # b380 <_sk_clut_4D_avx+0x25a7>
+ DB 196,98,125,24,45,104,62,0,0 ; vbroadcastss 0x3e68(%rip),%ymm13 # b734 <_sk_clut_4D_avx+0x25a7>
DB 196,65,28,89,237 ; vmulps %ymm13,%ymm12,%ymm13
- DB 196,98,125,24,53,94,62,0,0 ; vbroadcastss 0x3e5e(%rip),%ymm14 # b384 <_sk_clut_4D_avx+0x25ab>
+ DB 196,98,125,24,53,94,62,0,0 ; vbroadcastss 0x3e5e(%rip),%ymm14 # b738 <_sk_clut_4D_avx+0x25ab>
DB 196,65,20,88,238 ; vaddps %ymm14,%ymm13,%ymm13
DB 196,65,28,89,237 ; vmulps %ymm13,%ymm12,%ymm13
- DB 196,98,125,24,53,79,62,0,0 ; vbroadcastss 0x3e4f(%rip),%ymm14 # b388 <_sk_clut_4D_avx+0x25af>
+ DB 196,98,125,24,53,79,62,0,0 ; vbroadcastss 0x3e4f(%rip),%ymm14 # b73c <_sk_clut_4D_avx+0x25af>
DB 196,65,20,88,238 ; vaddps %ymm14,%ymm13,%ymm13
DB 196,65,28,89,229 ; vmulps %ymm13,%ymm12,%ymm12
- DB 196,98,125,24,45,64,62,0,0 ; vbroadcastss 0x3e40(%rip),%ymm13 # b38c <_sk_clut_4D_avx+0x25b3>
+ DB 196,98,125,24,45,64,62,0,0 ; vbroadcastss 0x3e40(%rip),%ymm13 # b740 <_sk_clut_4D_avx+0x25b3>
DB 196,65,28,88,229 ; vaddps %ymm13,%ymm12,%ymm12
DB 196,65,36,89,220 ; vmulps %ymm12,%ymm11,%ymm11
DB 196,65,52,194,202,1 ; vcmpltps %ymm10,%ymm9,%ymm9
- DB 196,98,125,24,21,43,62,0,0 ; vbroadcastss 0x3e2b(%rip),%ymm10 # b390 <_sk_clut_4D_avx+0x25b7>
+ DB 196,98,125,24,21,43,62,0,0 ; vbroadcastss 0x3e2b(%rip),%ymm10 # b744 <_sk_clut_4D_avx+0x25b7>
DB 196,65,44,92,211 ; vsubps %ymm11,%ymm10,%ymm10
DB 196,67,37,74,202,144 ; vblendvps %ymm9,%ymm10,%ymm11,%ymm9
DB 196,193,124,194,192,1 ; vcmpltps %ymm8,%ymm0,%ymm0
- DB 196,98,125,24,21,21,62,0,0 ; vbroadcastss 0x3e15(%rip),%ymm10 # b394 <_sk_clut_4D_avx+0x25bb>
+ DB 196,98,125,24,21,21,62,0,0 ; vbroadcastss 0x3e15(%rip),%ymm10 # b748 <_sk_clut_4D_avx+0x25bb>
DB 196,65,44,92,209 ; vsubps %ymm9,%ymm10,%ymm10
DB 196,195,53,74,194,0 ; vblendvps %ymm0,%ymm10,%ymm9,%ymm0
DB 196,65,116,194,200,1 ; vcmpltps %ymm8,%ymm1,%ymm9
- DB 196,98,125,24,21,255,61,0,0 ; vbroadcastss 0x3dff(%rip),%ymm10 # b398 <_sk_clut_4D_avx+0x25bf>
+ DB 196,98,125,24,21,255,61,0,0 ; vbroadcastss 0x3dff(%rip),%ymm10 # b74c <_sk_clut_4D_avx+0x25bf>
DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10
DB 196,195,125,74,194,144 ; vblendvps %ymm9,%ymm10,%ymm0,%ymm0
DB 196,65,124,194,200,3 ; vcmpunordps %ymm8,%ymm0,%ymm9
@@ -13951,7 +14593,7 @@ _sk_xy_to_2pt_conical_quadratic_max_avx LABEL PROC
DB 196,67,121,4,210,0 ; vpermilps $0x0,%xmm10,%xmm10
DB 196,67,45,24,210,1 ; vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
DB 197,44,88,208 ; vaddps %ymm0,%ymm10,%ymm10
- DB 196,98,125,24,29,169,61,0,0 ; vbroadcastss 0x3da9(%rip),%ymm11 # b39c <_sk_clut_4D_avx+0x25c3>
+ DB 196,98,125,24,29,169,61,0,0 ; vbroadcastss 0x3da9(%rip),%ymm11 # b750 <_sk_clut_4D_avx+0x25c3>
DB 196,65,44,89,211 ; vmulps %ymm11,%ymm10,%ymm10
DB 197,252,89,192 ; vmulps %ymm0,%ymm0,%ymm0
DB 197,116,89,217 ; vmulps %ymm1,%ymm1,%ymm11
@@ -13960,17 +14602,17 @@ _sk_xy_to_2pt_conical_quadratic_max_avx LABEL PROC
DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
DB 197,164,92,192 ; vsubps %ymm0,%ymm11,%ymm0
- DB 196,98,125,24,13,125,61,0,0 ; vbroadcastss 0x3d7d(%rip),%ymm9 # b3a0 <_sk_clut_4D_avx+0x25c7>
+ DB 196,98,125,24,13,125,61,0,0 ; vbroadcastss 0x3d7d(%rip),%ymm9 # b754 <_sk_clut_4D_avx+0x25c7>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 196,65,44,89,194 ; vmulps %ymm10,%ymm10,%ymm8
DB 196,193,124,88,192 ; vaddps %ymm8,%ymm0,%ymm0
DB 197,252,81,192 ; vsqrtps %ymm0,%ymm0
DB 196,98,125,24,64,36 ; vbroadcastss 0x24(%rax),%ymm8
- DB 196,98,125,24,13,91,61,0,0 ; vbroadcastss 0x3d5b(%rip),%ymm9 # b3a4 <_sk_clut_4D_avx+0x25cb>
+ DB 196,98,125,24,13,91,61,0,0 ; vbroadcastss 0x3d5b(%rip),%ymm9 # b758 <_sk_clut_4D_avx+0x25cb>
DB 196,65,44,87,201 ; vxorps %ymm9,%ymm10,%ymm9
DB 196,65,124,92,210 ; vsubps %ymm10,%ymm0,%ymm10
- DB 196,98,125,24,29,76,61,0,0 ; vbroadcastss 0x3d4c(%rip),%ymm11 # b3a8 <_sk_clut_4D_avx+0x25cf>
+ DB 196,98,125,24,29,76,61,0,0 ; vbroadcastss 0x3d4c(%rip),%ymm11 # b75c <_sk_clut_4D_avx+0x25cf>
DB 196,65,60,89,195 ; vmulps %ymm11,%ymm8,%ymm8
DB 196,65,60,89,210 ; vmulps %ymm10,%ymm8,%ymm10
DB 197,180,92,192 ; vsubps %ymm0,%ymm9,%ymm0
@@ -13988,7 +14630,7 @@ _sk_xy_to_2pt_conical_quadratic_min_avx LABEL PROC
DB 196,67,121,4,210,0 ; vpermilps $0x0,%xmm10,%xmm10
DB 196,67,45,24,210,1 ; vinsertf128 $0x1,%xmm10,%ymm10,%ymm10
DB 197,44,88,208 ; vaddps %ymm0,%ymm10,%ymm10
- DB 196,98,125,24,29,11,61,0,0 ; vbroadcastss 0x3d0b(%rip),%ymm11 # b3ac <_sk_clut_4D_avx+0x25d3>
+ DB 196,98,125,24,29,11,61,0,0 ; vbroadcastss 0x3d0b(%rip),%ymm11 # b760 <_sk_clut_4D_avx+0x25d3>
DB 196,65,44,89,211 ; vmulps %ymm11,%ymm10,%ymm10
DB 197,252,89,192 ; vmulps %ymm0,%ymm0,%ymm0
DB 197,116,89,217 ; vmulps %ymm1,%ymm1,%ymm11
@@ -13997,17 +14639,17 @@ _sk_xy_to_2pt_conical_quadratic_min_avx LABEL PROC
DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
DB 197,164,92,192 ; vsubps %ymm0,%ymm11,%ymm0
- DB 196,98,125,24,13,223,60,0,0 ; vbroadcastss 0x3cdf(%rip),%ymm9 # b3b0 <_sk_clut_4D_avx+0x25d7>
+ DB 196,98,125,24,13,223,60,0,0 ; vbroadcastss 0x3cdf(%rip),%ymm9 # b764 <_sk_clut_4D_avx+0x25d7>
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0
DB 196,65,44,89,194 ; vmulps %ymm10,%ymm10,%ymm8
DB 196,193,124,88,192 ; vaddps %ymm8,%ymm0,%ymm0
DB 197,252,81,192 ; vsqrtps %ymm0,%ymm0
DB 196,98,125,24,64,36 ; vbroadcastss 0x24(%rax),%ymm8
- DB 196,98,125,24,13,189,60,0,0 ; vbroadcastss 0x3cbd(%rip),%ymm9 # b3b4 <_sk_clut_4D_avx+0x25db>
+ DB 196,98,125,24,13,189,60,0,0 ; vbroadcastss 0x3cbd(%rip),%ymm9 # b768 <_sk_clut_4D_avx+0x25db>
DB 196,65,44,87,201 ; vxorps %ymm9,%ymm10,%ymm9
DB 196,65,124,92,210 ; vsubps %ymm10,%ymm0,%ymm10
- DB 196,98,125,24,29,174,60,0,0 ; vbroadcastss 0x3cae(%rip),%ymm11 # b3b8 <_sk_clut_4D_avx+0x25df>
+ DB 196,98,125,24,29,174,60,0,0 ; vbroadcastss 0x3cae(%rip),%ymm11 # b76c <_sk_clut_4D_avx+0x25df>
DB 196,65,60,89,195 ; vmulps %ymm11,%ymm8,%ymm8
DB 196,65,60,89,210 ; vmulps %ymm10,%ymm8,%ymm10
DB 197,180,92,192 ; vsubps %ymm0,%ymm9,%ymm0
@@ -14024,7 +14666,7 @@ _sk_xy_to_2pt_conical_linear_avx LABEL PROC
DB 196,67,121,4,201,0 ; vpermilps $0x0,%xmm9,%xmm9
DB 196,67,53,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm9,%ymm9
DB 197,52,88,200 ; vaddps %ymm0,%ymm9,%ymm9
- DB 196,98,125,24,21,115,60,0,0 ; vbroadcastss 0x3c73(%rip),%ymm10 # b3bc <_sk_clut_4D_avx+0x25e3>
+ DB 196,98,125,24,21,115,60,0,0 ; vbroadcastss 0x3c73(%rip),%ymm10 # b770 <_sk_clut_4D_avx+0x25e3>
DB 196,65,52,89,202 ; vmulps %ymm10,%ymm9,%ymm9
DB 197,252,89,192 ; vmulps %ymm0,%ymm0,%ymm0
DB 197,116,89,209 ; vmulps %ymm1,%ymm1,%ymm10
@@ -14033,7 +14675,7 @@ _sk_xy_to_2pt_conical_linear_avx LABEL PROC
DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
DB 197,172,92,192 ; vsubps %ymm0,%ymm10,%ymm0
- DB 196,98,125,24,5,71,60,0,0 ; vbroadcastss 0x3c47(%rip),%ymm8 # b3c0 <_sk_clut_4D_avx+0x25e7>
+ DB 196,98,125,24,5,71,60,0,0 ; vbroadcastss 0x3c47(%rip),%ymm8 # b774 <_sk_clut_4D_avx+0x25e7>
DB 196,193,124,87,192 ; vxorps %ymm8,%ymm0,%ymm0
DB 196,193,124,94,193 ; vdivps %ymm9,%ymm0,%ymm0
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -14068,7 +14710,7 @@ _sk_apply_vector_mask_avx LABEL PROC
PUBLIC _sk_save_xy_avx
_sk_save_xy_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,227,59,0,0 ; vbroadcastss 0x3be3(%rip),%ymm8 # b3c4 <_sk_clut_4D_avx+0x25eb>
+ DB 196,98,125,24,5,227,59,0,0 ; vbroadcastss 0x3be3(%rip),%ymm8 # b778 <_sk_clut_4D_avx+0x25eb>
DB 196,65,124,88,200 ; vaddps %ymm8,%ymm0,%ymm9
DB 196,67,125,8,209,1 ; vroundps $0x1,%ymm9,%ymm10
DB 196,65,52,92,202 ; vsubps %ymm10,%ymm9,%ymm9
@@ -14101,9 +14743,9 @@ _sk_accumulate_avx LABEL PROC
PUBLIC _sk_bilinear_nx_avx
_sk_bilinear_nx_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,111,59,0,0 ; vbroadcastss 0x3b6f(%rip),%ymm0 # b3c8 <_sk_clut_4D_avx+0x25ef>
+ DB 196,226,125,24,5,111,59,0,0 ; vbroadcastss 0x3b6f(%rip),%ymm0 # b77c <_sk_clut_4D_avx+0x25ef>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
- DB 196,98,125,24,5,102,59,0,0 ; vbroadcastss 0x3b66(%rip),%ymm8 # b3cc <_sk_clut_4D_avx+0x25f3>
+ DB 196,98,125,24,5,102,59,0,0 ; vbroadcastss 0x3b66(%rip),%ymm8 # b780 <_sk_clut_4D_avx+0x25f3>
DB 197,60,92,64,64 ; vsubps 0x40(%rax),%ymm8,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -14112,7 +14754,7 @@ _sk_bilinear_nx_avx LABEL PROC
PUBLIC _sk_bilinear_px_avx
_sk_bilinear_px_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,78,59,0,0 ; vbroadcastss 0x3b4e(%rip),%ymm0 # b3d0 <_sk_clut_4D_avx+0x25f7>
+ DB 196,226,125,24,5,78,59,0,0 ; vbroadcastss 0x3b4e(%rip),%ymm0 # b784 <_sk_clut_4D_avx+0x25f7>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
DB 197,124,16,64,64 ; vmovups 0x40(%rax),%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
@@ -14122,9 +14764,9 @@ _sk_bilinear_px_avx LABEL PROC
PUBLIC _sk_bilinear_ny_avx
_sk_bilinear_ny_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,50,59,0,0 ; vbroadcastss 0x3b32(%rip),%ymm1 # b3d4 <_sk_clut_4D_avx+0x25fb>
+ DB 196,226,125,24,13,50,59,0,0 ; vbroadcastss 0x3b32(%rip),%ymm1 # b788 <_sk_clut_4D_avx+0x25fb>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
- DB 196,98,125,24,5,40,59,0,0 ; vbroadcastss 0x3b28(%rip),%ymm8 # b3d8 <_sk_clut_4D_avx+0x25ff>
+ DB 196,98,125,24,5,40,59,0,0 ; vbroadcastss 0x3b28(%rip),%ymm8 # b78c <_sk_clut_4D_avx+0x25ff>
DB 197,60,92,64,96 ; vsubps 0x60(%rax),%ymm8,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -14133,7 +14775,7 @@ _sk_bilinear_ny_avx LABEL PROC
PUBLIC _sk_bilinear_py_avx
_sk_bilinear_py_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,16,59,0,0 ; vbroadcastss 0x3b10(%rip),%ymm1 # b3dc <_sk_clut_4D_avx+0x2603>
+ DB 196,226,125,24,13,16,59,0,0 ; vbroadcastss 0x3b10(%rip),%ymm1 # b790 <_sk_clut_4D_avx+0x2603>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
DB 197,124,16,64,96 ; vmovups 0x60(%rax),%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
@@ -14143,14 +14785,14 @@ _sk_bilinear_py_avx LABEL PROC
PUBLIC _sk_bicubic_n3x_avx
_sk_bicubic_n3x_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,243,58,0,0 ; vbroadcastss 0x3af3(%rip),%ymm0 # b3e0 <_sk_clut_4D_avx+0x2607>
+ DB 196,226,125,24,5,243,58,0,0 ; vbroadcastss 0x3af3(%rip),%ymm0 # b794 <_sk_clut_4D_avx+0x2607>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
- DB 196,98,125,24,5,234,58,0,0 ; vbroadcastss 0x3aea(%rip),%ymm8 # b3e4 <_sk_clut_4D_avx+0x260b>
+ DB 196,98,125,24,5,234,58,0,0 ; vbroadcastss 0x3aea(%rip),%ymm8 # b798 <_sk_clut_4D_avx+0x260b>
DB 197,60,92,64,64 ; vsubps 0x40(%rax),%ymm8,%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,219,58,0,0 ; vbroadcastss 0x3adb(%rip),%ymm10 # b3e8 <_sk_clut_4D_avx+0x260f>
+ DB 196,98,125,24,21,219,58,0,0 ; vbroadcastss 0x3adb(%rip),%ymm10 # b79c <_sk_clut_4D_avx+0x260f>
DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,98,125,24,21,209,58,0,0 ; vbroadcastss 0x3ad1(%rip),%ymm10 # b3ec <_sk_clut_4D_avx+0x2613>
+ DB 196,98,125,24,21,209,58,0,0 ; vbroadcastss 0x3ad1(%rip),%ymm10 # b7a0 <_sk_clut_4D_avx+0x2613>
DB 196,65,60,88,194 ; vaddps %ymm10,%ymm8,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
@@ -14160,19 +14802,19 @@ _sk_bicubic_n3x_avx LABEL PROC
PUBLIC _sk_bicubic_n1x_avx
_sk_bicubic_n1x_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,180,58,0,0 ; vbroadcastss 0x3ab4(%rip),%ymm0 # b3f0 <_sk_clut_4D_avx+0x2617>
+ DB 196,226,125,24,5,180,58,0,0 ; vbroadcastss 0x3ab4(%rip),%ymm0 # b7a4 <_sk_clut_4D_avx+0x2617>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
- DB 196,98,125,24,5,171,58,0,0 ; vbroadcastss 0x3aab(%rip),%ymm8 # b3f4 <_sk_clut_4D_avx+0x261b>
+ DB 196,98,125,24,5,171,58,0,0 ; vbroadcastss 0x3aab(%rip),%ymm8 # b7a8 <_sk_clut_4D_avx+0x261b>
DB 197,60,92,64,64 ; vsubps 0x40(%rax),%ymm8,%ymm8
- DB 196,98,125,24,13,161,58,0,0 ; vbroadcastss 0x3aa1(%rip),%ymm9 # b3f8 <_sk_clut_4D_avx+0x261f>
+ DB 196,98,125,24,13,161,58,0,0 ; vbroadcastss 0x3aa1(%rip),%ymm9 # b7ac <_sk_clut_4D_avx+0x261f>
DB 196,65,60,89,201 ; vmulps %ymm9,%ymm8,%ymm9
- DB 196,98,125,24,21,151,58,0,0 ; vbroadcastss 0x3a97(%rip),%ymm10 # b3fc <_sk_clut_4D_avx+0x2623>
+ DB 196,98,125,24,21,151,58,0,0 ; vbroadcastss 0x3a97(%rip),%ymm10 # b7b0 <_sk_clut_4D_avx+0x2623>
DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
DB 196,65,60,89,201 ; vmulps %ymm9,%ymm8,%ymm9
- DB 196,98,125,24,21,136,58,0,0 ; vbroadcastss 0x3a88(%rip),%ymm10 # b400 <_sk_clut_4D_avx+0x2627>
+ DB 196,98,125,24,21,136,58,0,0 ; vbroadcastss 0x3a88(%rip),%ymm10 # b7b4 <_sk_clut_4D_avx+0x2627>
DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
- DB 196,98,125,24,13,121,58,0,0 ; vbroadcastss 0x3a79(%rip),%ymm9 # b404 <_sk_clut_4D_avx+0x262b>
+ DB 196,98,125,24,13,121,58,0,0 ; vbroadcastss 0x3a79(%rip),%ymm9 # b7b8 <_sk_clut_4D_avx+0x262b>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -14181,17 +14823,17 @@ _sk_bicubic_n1x_avx LABEL PROC
PUBLIC _sk_bicubic_p1x_avx
_sk_bicubic_p1x_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,97,58,0,0 ; vbroadcastss 0x3a61(%rip),%ymm8 # b408 <_sk_clut_4D_avx+0x262f>
+ DB 196,98,125,24,5,97,58,0,0 ; vbroadcastss 0x3a61(%rip),%ymm8 # b7bc <_sk_clut_4D_avx+0x262f>
DB 197,188,88,0 ; vaddps (%rax),%ymm8,%ymm0
DB 197,124,16,72,64 ; vmovups 0x40(%rax),%ymm9
- DB 196,98,125,24,21,83,58,0,0 ; vbroadcastss 0x3a53(%rip),%ymm10 # b40c <_sk_clut_4D_avx+0x2633>
+ DB 196,98,125,24,21,83,58,0,0 ; vbroadcastss 0x3a53(%rip),%ymm10 # b7c0 <_sk_clut_4D_avx+0x2633>
DB 196,65,52,89,210 ; vmulps %ymm10,%ymm9,%ymm10
- DB 196,98,125,24,29,73,58,0,0 ; vbroadcastss 0x3a49(%rip),%ymm11 # b410 <_sk_clut_4D_avx+0x2637>
+ DB 196,98,125,24,29,73,58,0,0 ; vbroadcastss 0x3a49(%rip),%ymm11 # b7c4 <_sk_clut_4D_avx+0x2637>
DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
DB 196,65,52,89,210 ; vmulps %ymm10,%ymm9,%ymm10
DB 196,65,44,88,192 ; vaddps %ymm8,%ymm10,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
- DB 196,98,125,24,13,48,58,0,0 ; vbroadcastss 0x3a30(%rip),%ymm9 # b414 <_sk_clut_4D_avx+0x263b>
+ DB 196,98,125,24,13,48,58,0,0 ; vbroadcastss 0x3a30(%rip),%ymm9 # b7c8 <_sk_clut_4D_avx+0x263b>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -14200,13 +14842,13 @@ _sk_bicubic_p1x_avx LABEL PROC
PUBLIC _sk_bicubic_p3x_avx
_sk_bicubic_p3x_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,24,58,0,0 ; vbroadcastss 0x3a18(%rip),%ymm0 # b418 <_sk_clut_4D_avx+0x263f>
+ DB 196,226,125,24,5,24,58,0,0 ; vbroadcastss 0x3a18(%rip),%ymm0 # b7cc <_sk_clut_4D_avx+0x263f>
DB 197,252,88,0 ; vaddps (%rax),%ymm0,%ymm0
DB 197,124,16,64,64 ; vmovups 0x40(%rax),%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,5,58,0,0 ; vbroadcastss 0x3a05(%rip),%ymm10 # b41c <_sk_clut_4D_avx+0x2643>
+ DB 196,98,125,24,21,5,58,0,0 ; vbroadcastss 0x3a05(%rip),%ymm10 # b7d0 <_sk_clut_4D_avx+0x2643>
DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,98,125,24,21,251,57,0,0 ; vbroadcastss 0x39fb(%rip),%ymm10 # b420 <_sk_clut_4D_avx+0x2647>
+ DB 196,98,125,24,21,251,57,0,0 ; vbroadcastss 0x39fb(%rip),%ymm10 # b7d4 <_sk_clut_4D_avx+0x2647>
DB 196,65,60,88,194 ; vaddps %ymm10,%ymm8,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
DB 197,124,17,128,128,0,0,0 ; vmovups %ymm8,0x80(%rax)
@@ -14216,14 +14858,14 @@ _sk_bicubic_p3x_avx LABEL PROC
PUBLIC _sk_bicubic_n3y_avx
_sk_bicubic_n3y_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,222,57,0,0 ; vbroadcastss 0x39de(%rip),%ymm1 # b424 <_sk_clut_4D_avx+0x264b>
+ DB 196,226,125,24,13,222,57,0,0 ; vbroadcastss 0x39de(%rip),%ymm1 # b7d8 <_sk_clut_4D_avx+0x264b>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
- DB 196,98,125,24,5,212,57,0,0 ; vbroadcastss 0x39d4(%rip),%ymm8 # b428 <_sk_clut_4D_avx+0x264f>
+ DB 196,98,125,24,5,212,57,0,0 ; vbroadcastss 0x39d4(%rip),%ymm8 # b7dc <_sk_clut_4D_avx+0x264f>
DB 197,60,92,64,96 ; vsubps 0x60(%rax),%ymm8,%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,197,57,0,0 ; vbroadcastss 0x39c5(%rip),%ymm10 # b42c <_sk_clut_4D_avx+0x2653>
+ DB 196,98,125,24,21,197,57,0,0 ; vbroadcastss 0x39c5(%rip),%ymm10 # b7e0 <_sk_clut_4D_avx+0x2653>
DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,98,125,24,21,187,57,0,0 ; vbroadcastss 0x39bb(%rip),%ymm10 # b430 <_sk_clut_4D_avx+0x2657>
+ DB 196,98,125,24,21,187,57,0,0 ; vbroadcastss 0x39bb(%rip),%ymm10 # b7e4 <_sk_clut_4D_avx+0x2657>
DB 196,65,60,88,194 ; vaddps %ymm10,%ymm8,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
@@ -14233,19 +14875,19 @@ _sk_bicubic_n3y_avx LABEL PROC
PUBLIC _sk_bicubic_n1y_avx
_sk_bicubic_n1y_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,158,57,0,0 ; vbroadcastss 0x399e(%rip),%ymm1 # b434 <_sk_clut_4D_avx+0x265b>
+ DB 196,226,125,24,13,158,57,0,0 ; vbroadcastss 0x399e(%rip),%ymm1 # b7e8 <_sk_clut_4D_avx+0x265b>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
- DB 196,98,125,24,5,148,57,0,0 ; vbroadcastss 0x3994(%rip),%ymm8 # b438 <_sk_clut_4D_avx+0x265f>
+ DB 196,98,125,24,5,148,57,0,0 ; vbroadcastss 0x3994(%rip),%ymm8 # b7ec <_sk_clut_4D_avx+0x265f>
DB 197,60,92,64,96 ; vsubps 0x60(%rax),%ymm8,%ymm8
- DB 196,98,125,24,13,138,57,0,0 ; vbroadcastss 0x398a(%rip),%ymm9 # b43c <_sk_clut_4D_avx+0x2663>
+ DB 196,98,125,24,13,138,57,0,0 ; vbroadcastss 0x398a(%rip),%ymm9 # b7f0 <_sk_clut_4D_avx+0x2663>
DB 196,65,60,89,201 ; vmulps %ymm9,%ymm8,%ymm9
- DB 196,98,125,24,21,128,57,0,0 ; vbroadcastss 0x3980(%rip),%ymm10 # b440 <_sk_clut_4D_avx+0x2667>
+ DB 196,98,125,24,21,128,57,0,0 ; vbroadcastss 0x3980(%rip),%ymm10 # b7f4 <_sk_clut_4D_avx+0x2667>
DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
DB 196,65,60,89,201 ; vmulps %ymm9,%ymm8,%ymm9
- DB 196,98,125,24,21,113,57,0,0 ; vbroadcastss 0x3971(%rip),%ymm10 # b444 <_sk_clut_4D_avx+0x266b>
+ DB 196,98,125,24,21,113,57,0,0 ; vbroadcastss 0x3971(%rip),%ymm10 # b7f8 <_sk_clut_4D_avx+0x266b>
DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
DB 196,65,60,89,193 ; vmulps %ymm9,%ymm8,%ymm8
- DB 196,98,125,24,13,98,57,0,0 ; vbroadcastss 0x3962(%rip),%ymm9 # b448 <_sk_clut_4D_avx+0x266f>
+ DB 196,98,125,24,13,98,57,0,0 ; vbroadcastss 0x3962(%rip),%ymm9 # b7fc <_sk_clut_4D_avx+0x266f>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -14254,17 +14896,17 @@ _sk_bicubic_n1y_avx LABEL PROC
PUBLIC _sk_bicubic_p1y_avx
_sk_bicubic_p1y_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,98,125,24,5,74,57,0,0 ; vbroadcastss 0x394a(%rip),%ymm8 # b44c <_sk_clut_4D_avx+0x2673>
+ DB 196,98,125,24,5,74,57,0,0 ; vbroadcastss 0x394a(%rip),%ymm8 # b800 <_sk_clut_4D_avx+0x2673>
DB 197,188,88,72,32 ; vaddps 0x20(%rax),%ymm8,%ymm1
DB 197,124,16,72,96 ; vmovups 0x60(%rax),%ymm9
- DB 196,98,125,24,21,59,57,0,0 ; vbroadcastss 0x393b(%rip),%ymm10 # b450 <_sk_clut_4D_avx+0x2677>
+ DB 196,98,125,24,21,59,57,0,0 ; vbroadcastss 0x393b(%rip),%ymm10 # b804 <_sk_clut_4D_avx+0x2677>
DB 196,65,52,89,210 ; vmulps %ymm10,%ymm9,%ymm10
- DB 196,98,125,24,29,49,57,0,0 ; vbroadcastss 0x3931(%rip),%ymm11 # b454 <_sk_clut_4D_avx+0x267b>
+ DB 196,98,125,24,29,49,57,0,0 ; vbroadcastss 0x3931(%rip),%ymm11 # b808 <_sk_clut_4D_avx+0x267b>
DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10
DB 196,65,52,89,210 ; vmulps %ymm10,%ymm9,%ymm10
DB 196,65,44,88,192 ; vaddps %ymm8,%ymm10,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
- DB 196,98,125,24,13,24,57,0,0 ; vbroadcastss 0x3918(%rip),%ymm9 # b458 <_sk_clut_4D_avx+0x267f>
+ DB 196,98,125,24,13,24,57,0,0 ; vbroadcastss 0x3918(%rip),%ymm9 # b80c <_sk_clut_4D_avx+0x267f>
DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -14273,13 +14915,13 @@ _sk_bicubic_p1y_avx LABEL PROC
PUBLIC _sk_bicubic_p3y_avx
_sk_bicubic_p3y_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,13,0,57,0,0 ; vbroadcastss 0x3900(%rip),%ymm1 # b45c <_sk_clut_4D_avx+0x2683>
+ DB 196,226,125,24,13,0,57,0,0 ; vbroadcastss 0x3900(%rip),%ymm1 # b810 <_sk_clut_4D_avx+0x2683>
DB 197,244,88,72,32 ; vaddps 0x20(%rax),%ymm1,%ymm1
DB 197,124,16,64,96 ; vmovups 0x60(%rax),%ymm8
DB 196,65,60,89,200 ; vmulps %ymm8,%ymm8,%ymm9
- DB 196,98,125,24,21,236,56,0,0 ; vbroadcastss 0x38ec(%rip),%ymm10 # b460 <_sk_clut_4D_avx+0x2687>
+ DB 196,98,125,24,21,236,56,0,0 ; vbroadcastss 0x38ec(%rip),%ymm10 # b814 <_sk_clut_4D_avx+0x2687>
DB 196,65,60,89,194 ; vmulps %ymm10,%ymm8,%ymm8
- DB 196,98,125,24,21,226,56,0,0 ; vbroadcastss 0x38e2(%rip),%ymm10 # b464 <_sk_clut_4D_avx+0x268b>
+ DB 196,98,125,24,21,226,56,0,0 ; vbroadcastss 0x38e2(%rip),%ymm10 # b818 <_sk_clut_4D_avx+0x268b>
DB 196,65,60,88,194 ; vaddps %ymm10,%ymm8,%ymm8
DB 196,65,52,89,192 ; vmulps %ymm8,%ymm9,%ymm8
DB 197,124,17,128,160,0,0,0 ; vmovups %ymm8,0xa0(%rax)
@@ -14415,7 +15057,7 @@ _sk_clut_3D_avx LABEL PROC
DB 197,249,127,68,36,96 ; vmovdqa %xmm0,0x60(%rsp)
DB 72,139,0 ; mov (%rax),%rax
DB 196,193,121,254,194 ; vpaddd %xmm10,%xmm0,%xmm0
- DB 196,98,121,24,29,139,54,0,0 ; vbroadcastss 0x368b(%rip),%xmm11 # b46c <_sk_clut_4D_avx+0x2693>
+ DB 196,98,121,24,29,139,54,0,0 ; vbroadcastss 0x368b(%rip),%xmm11 # b820 <_sk_clut_4D_avx+0x2693>
DB 196,194,121,64,203 ; vpmulld %xmm11,%xmm0,%xmm1
DB 196,193,249,126,201 ; vmovq %xmm1,%r9
DB 69,137,202 ; mov %r9d,%r10d
@@ -14449,7 +15091,7 @@ _sk_clut_3D_avx LABEL PROC
DB 196,163,65,33,60,152,48 ; vinsertps $0x30,(%rax,%r11,4),%xmm7,%xmm7
DB 196,227,93,24,223,1 ; vinsertf128 $0x1,%xmm7,%ymm4,%ymm3
DB 197,252,17,156,36,192,0,0,0 ; vmovups %ymm3,0xc0(%rsp)
- DB 196,98,121,24,13,204,53,0,0 ; vbroadcastss 0x35cc(%rip),%xmm9 # b470 <_sk_clut_4D_avx+0x2697>
+ DB 196,98,121,24,13,204,53,0,0 ; vbroadcastss 0x35cc(%rip),%xmm9 # b824 <_sk_clut_4D_avx+0x2697>
DB 196,193,105,254,249 ; vpaddd %xmm9,%xmm2,%xmm7
DB 196,195,249,22,249,1 ; vpextrq $0x1,%xmm7,%r9
DB 196,193,249,126,250 ; vmovq %xmm7,%r10
@@ -14477,7 +15119,7 @@ _sk_clut_3D_avx LABEL PROC
DB 196,161,122,16,60,152 ; vmovss (%rax,%r11,4),%xmm7
DB 196,227,81,33,239,48 ; vinsertps $0x30,%xmm7,%xmm5,%xmm5
DB 196,99,85,24,254,1 ; vinsertf128 $0x1,%xmm6,%ymm5,%ymm15
- DB 196,98,121,24,37,59,53,0,0 ; vbroadcastss 0x353b(%rip),%xmm12 # b474 <_sk_clut_4D_avx+0x269b>
+ DB 196,98,121,24,37,59,53,0,0 ; vbroadcastss 0x353b(%rip),%xmm12 # b828 <_sk_clut_4D_avx+0x269b>
DB 196,193,105,254,212 ; vpaddd %xmm12,%xmm2,%xmm2
DB 196,195,249,22,209,1 ; vpextrq $0x1,%xmm2,%r9
DB 196,193,249,126,210 ; vmovq %xmm2,%r10
@@ -14505,7 +15147,7 @@ _sk_clut_3D_avx LABEL PROC
DB 197,250,16,44,152 ; vmovss (%rax,%rbx,4),%xmm5
DB 196,227,105,33,213,48 ; vinsertps $0x30,%xmm5,%xmm2,%xmm2
DB 196,227,109,24,225,1 ; vinsertf128 $0x1,%xmm1,%ymm2,%ymm4
- DB 196,226,125,24,13,155,52,0,0 ; vbroadcastss 0x349b(%rip),%ymm1 # b468 <_sk_clut_4D_avx+0x268f>
+ DB 196,226,125,24,13,155,52,0,0 ; vbroadcastss 0x349b(%rip),%ymm1 # b81c <_sk_clut_4D_avx+0x268f>
DB 197,188,88,209 ; vaddps %ymm1,%ymm8,%ymm2
DB 197,252,17,140,36,160,0,0,0 ; vmovups %ymm1,0xa0(%rsp)
DB 197,254,91,210 ; vcvttps2dq %ymm2,%ymm2
@@ -15258,7 +15900,7 @@ _sk_clut_4D_avx LABEL PROC
DB 196,226,25,64,200 ; vpmulld %xmm0,%xmm12,%xmm1
DB 197,249,127,12,36 ; vmovdqa %xmm1,(%rsp)
DB 197,241,254,202 ; vpaddd %xmm2,%xmm1,%xmm1
- DB 196,98,121,24,61,46,37,0,0 ; vbroadcastss 0x252e(%rip),%xmm15 # b47c <_sk_clut_4D_avx+0x26a3>
+ DB 196,98,121,24,61,46,37,0,0 ; vbroadcastss 0x252e(%rip),%xmm15 # b830 <_sk_clut_4D_avx+0x26a3>
DB 196,194,113,64,247 ; vpmulld %xmm15,%xmm1,%xmm6
DB 196,193,249,126,241 ; vmovq %xmm6,%r9
DB 69,137,202 ; mov %r9d,%r10d
@@ -15296,7 +15938,7 @@ _sk_clut_4D_avx LABEL PROC
DB 196,163,65,33,60,144,32 ; vinsertps $0x20,(%rax,%r10,4),%xmm7,%xmm7
DB 196,163,65,33,60,152,48 ; vinsertps $0x30,(%rax,%r11,4),%xmm7,%xmm7
DB 196,99,53,24,247,1 ; vinsertf128 $0x1,%xmm7,%ymm9,%ymm14
- DB 196,98,121,24,21,86,36,0,0 ; vbroadcastss 0x2456(%rip),%xmm10 # b480 <_sk_clut_4D_avx+0x26a7>
+ DB 196,98,121,24,21,86,36,0,0 ; vbroadcastss 0x2456(%rip),%xmm10 # b834 <_sk_clut_4D_avx+0x26a7>
DB 196,65,97,254,202 ; vpaddd %xmm10,%xmm3,%xmm9
DB 196,67,249,22,201,1 ; vpextrq $0x1,%xmm9,%r9
DB 196,65,249,126,202 ; vmovq %xmm9,%r10
@@ -15324,7 +15966,7 @@ _sk_clut_4D_avx LABEL PROC
DB 196,161,122,16,60,152 ; vmovss (%rax,%r11,4),%xmm7
DB 196,227,81,33,239,48 ; vinsertps $0x30,%xmm7,%xmm5,%xmm5
DB 196,99,85,24,220,1 ; vinsertf128 $0x1,%xmm4,%ymm5,%ymm11
- DB 196,98,121,24,13,197,35,0,0 ; vbroadcastss 0x23c5(%rip),%xmm9 # b484 <_sk_clut_4D_avx+0x26ab>
+ DB 196,98,121,24,13,197,35,0,0 ; vbroadcastss 0x23c5(%rip),%xmm9 # b838 <_sk_clut_4D_avx+0x26ab>
DB 196,193,97,254,217 ; vpaddd %xmm9,%xmm3,%xmm3
DB 196,195,249,22,217,1 ; vpextrq $0x1,%xmm3,%r9
DB 196,193,249,126,218 ; vmovq %xmm3,%r10
@@ -15352,7 +15994,7 @@ _sk_clut_4D_avx LABEL PROC
DB 197,250,16,44,152 ; vmovss (%rax,%rbx,4),%xmm5
DB 196,227,89,33,229,48 ; vinsertps $0x30,%xmm5,%xmm4,%xmm4
DB 196,227,93,24,243,1 ; vinsertf128 $0x1,%xmm3,%ymm4,%ymm6
- DB 196,226,125,24,37,37,35,0,0 ; vbroadcastss 0x2325(%rip),%ymm4 # b478 <_sk_clut_4D_avx+0x269f>
+ DB 196,226,125,24,37,37,35,0,0 ; vbroadcastss 0x2325(%rip),%ymm4 # b82c <_sk_clut_4D_avx+0x269f>
DB 197,148,88,220 ; vaddps %ymm4,%ymm13,%ymm3
DB 197,252,40,252 ; vmovaps %ymm4,%ymm7
DB 197,252,17,188,36,160,0,0,0 ; vmovups %ymm7,0xa0(%rsp)
@@ -16782,7 +17424,7 @@ _sk_clut_4D_avx LABEL PROC
DB 197,220,89,210 ; vmulps %ymm2,%ymm4,%ymm2
DB 197,212,88,210 ; vaddps %ymm2,%ymm5,%ymm2
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,29,112,5,0,0 ; vbroadcastss 0x570(%rip),%ymm3 # b488 <_sk_clut_4D_avx+0x26af>
+ DB 196,226,125,24,29,112,5,0,0 ; vbroadcastss 0x570(%rip),%ymm3 # b83c <_sk_clut_4D_avx+0x26af>
DB 197,252,16,164,36,160,2,0,0 ; vmovups 0x2a0(%rsp),%ymm4
DB 197,252,16,172,36,192,2,0,0 ; vmovups 0x2c0(%rsp),%ymm5
DB 197,252,16,180,36,224,2,0,0 ; vmovups 0x2e0(%rsp),%ymm6
@@ -16836,25 +17478,25 @@ ALIGN 4
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope b005 <.literal4+0xb9>
+ DB 71,225,61 ; rex.RXB loope b3b9 <.literal4+0xb9>
DB 0,0 ; add %al,(%rax)
DB 128,63,154 ; cmpb $0x9a,(%rdi)
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope b015 <.literal4+0xc9>
+ DB 71,225,61 ; rex.RXB loope b3c9 <.literal4+0xc9>
DB 0,0 ; add %al,(%rax)
DB 128,63,154 ; cmpb $0x9a,(%rdi)
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope b025 <.literal4+0xd9>
+ DB 71,225,61 ; rex.RXB loope b3d9 <.literal4+0xd9>
DB 0,0 ; add %al,(%rax)
DB 128,63,154 ; cmpb $0x9a,(%rdi)
DB 153 ; cltd
DB 153 ; cltd
DB 62,61,10,23,63,174 ; ds cmp $0xae3f170a,%eax
- DB 71,225,61 ; rex.RXB loope b035 <.literal4+0xe9>
+ DB 71,225,61 ; rex.RXB loope b3e9 <.literal4+0xe9>
DB 0,0 ; add %al,(%rax)
DB 128,63,0 ; cmpb $0x0,(%rdi)
DB 0,128,63,0,0,127 ; add %al,0x7f00003f(%rax)
@@ -16919,7 +17561,7 @@ ALIGN 4
DB 190,129,128,128,59 ; mov $0x3b808081,%esi
DB 129,128,128,59,0,248,0,0,8,33 ; addl $0x21080000,-0x7ffc480(%rax)
DB 132,55 ; test %dh,(%rdi)
- DB 224,7 ; loopne b0a5 <.literal4+0x159>
+ DB 224,7 ; loopne b459 <.literal4+0x159>
DB 0,0 ; add %al,(%rax)
DB 33,8 ; and %ecx,(%rax)
DB 2,58 ; add (%rdx),%bh
@@ -16935,10 +17577,10 @@ ALIGN 4
DB 129,128,128,59,129,128,128,59,0,0 ; addl $0x3b80,-0x7f7ec480(%rax)
DB 0,52,255 ; add %dh,(%rdi,%rdi,8)
DB 255 ; (bad)
- DB 127,0 ; jg b0cc <.literal4+0x180>
+ DB 127,0 ; jg b480 <.literal4+0x180>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja b145 <.literal4+0x1f9>
+ DB 119,115 ; ja b4f9 <.literal4+0x1f9>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -16952,10 +17594,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg b100 <.literal4+0x1b4>
+ DB 127,0 ; jg b4b4 <.literal4+0x1b4>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja b179 <.literal4+0x22d>
+ DB 119,115 ; ja b52d <.literal4+0x22d>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -16969,10 +17611,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg b134 <.literal4+0x1e8>
+ DB 127,0 ; jg b4e8 <.literal4+0x1e8>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja b1ad <.literal4+0x261>
+ DB 119,115 ; ja b561 <.literal4+0x261>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -16986,10 +17628,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg b168 <.literal4+0x21c>
+ DB 127,0 ; jg b51c <.literal4+0x21c>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja b1e1 <.literal4+0x295>
+ DB 119,115 ; ja b595 <.literal4+0x295>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -17003,10 +17645,10 @@ ALIGN 4
DB 0,128,63,0,0,0 ; add %al,0x3f(%rax)
DB 52,255 ; xor $0xff,%al
DB 255 ; (bad)
- DB 127,0 ; jg b19c <.literal4+0x250>
+ DB 127,0 ; jg b550 <.literal4+0x250>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
- DB 119,115 ; ja b215 <.literal4+0x2c9>
+ DB 119,115 ; ja b5c9 <.literal4+0x2c9>
DB 248 ; clc
DB 194,117,191 ; retq $0xbf75
DB 191,63,249,68,180 ; mov $0xb444f93f,%edi
@@ -17019,7 +17661,7 @@ ALIGN 4
DB 0,75,0 ; add %cl,0x0(%rbx)
DB 0,200 ; add %cl,%al
DB 66,0,0 ; rex.X add %al,(%rax)
- DB 127,67 ; jg b20f <.literal4+0x2c3>
+ DB 127,67 ; jg b5c3 <.literal4+0x2c3>
DB 0,0 ; add %al,(%rax)
DB 0,195 ; add %al,%bl
DB 0,0 ; add %al,(%rax)
@@ -17031,7 +17673,7 @@ ALIGN 4
DB 190,80,128,3,62 ; mov $0x3e038050,%esi
DB 31 ; (bad)
DB 215 ; xlat %ds:(%rbx)
- DB 118,63 ; jbe b22f <.literal4+0x2e3>
+ DB 118,63 ; jbe b5e3 <.literal4+0x2e3>
DB 246,64,83,63 ; testb $0x3f,0x53(%rax)
DB 129,128,128,59,129,128,128,59,129,128; addl $0x80813b80,-0x7f7ec480(%rax)
DB 128,59,0 ; cmpb $0x0,(%rbx)
@@ -17053,7 +17695,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 8,33 ; or %ah,(%rcx)
DB 132,55 ; test %dh,(%rdi)
- DB 224,7 ; loopne b249 <.literal4+0x2fd>
+ DB 224,7 ; loopne b5fd <.literal4+0x2fd>
DB 0,0 ; add %al,(%rax)
DB 33,8 ; and %ecx,(%rax)
DB 2,58 ; add (%rdx),%bh
@@ -17065,7 +17707,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 8,33 ; or %ah,(%rcx)
DB 132,55 ; test %dh,(%rdi)
- DB 224,7 ; loopne b265 <.literal4+0x319>
+ DB 224,7 ; loopne b619 <.literal4+0x319>
DB 0,0 ; add %al,(%rax)
DB 33,8 ; and %ecx,(%rax)
DB 2,58 ; add (%rdx),%bh
@@ -17076,7 +17718,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 248 ; clc
DB 65,0,0 ; add %al,(%r8)
- DB 124,66 ; jl b2ba <.literal4+0x36e>
+ DB 124,66 ; jl b66e <.literal4+0x36e>
DB 0,240 ; add %dh,%al
DB 0,0 ; add %al,(%rax)
DB 137,136,136,55,0,15 ; mov %ecx,0xf003788(%rax)
@@ -17102,7 +17744,7 @@ ALIGN 4
DB 137,136,136,59,15,0 ; mov %ecx,0xf3b88(%rax)
DB 0,0 ; add %al,(%rax)
DB 137,136,136,61,0,0 ; mov %ecx,0x3d88(%rax)
- DB 112,65 ; jo b31d <.literal4+0x3d1>
+ DB 112,65 ; jo b6d1 <.literal4+0x3d1>
DB 129,128,128,59,129,128,128,59,129,128; addl $0x80813b80,-0x7f7ec480(%rax)
DB 128,59,0 ; cmpb $0x0,(%rbx)
DB 0,127,67 ; add %bh,0x43(%rdi)
@@ -17128,7 +17770,7 @@ ALIGN 4
DB 0,128,55,0,0,128 ; add %al,-0x7fffffc9(%rax)
DB 63 ; (bad)
DB 0,255 ; add %bh,%bh
- DB 127,71 ; jg b38f <.literal4+0x443>
+ DB 127,71 ; jg b743 <.literal4+0x443>
DB 0,0 ; add %al,(%rax)
DB 0,63 ; add %bh,(%rdi)
DB 0,0 ; add %al,(%rax)
@@ -17248,8 +17890,8 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 2,0 ; add (%rax),%al
DB 0,0 ; add %al,(%rax)
- DB 114,249 ; jb b473 <.literal4+0x527>
- DB 127,63 ; jg b4bb <_sk_clut_4D_avx+0x26e2>
+ DB 114,249 ; jb b827 <.literal4+0x527>
+ DB 127,63 ; jg b86f <_sk_clut_4D_avx+0x26e2>
DB 3,0 ; add (%rax),%eax
DB 0,0 ; add %al,(%rax)
DB 1,0 ; add %eax,(%rax)
@@ -17260,107 +17902,137 @@ ALIGN 4
DB 128 ; .byte 0x80
DB 63 ; (bad)
-ALIGN 16
- DB 0,2 ; add %al,(%rdx)
- DB 4,6 ; add $0x6,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 8,10 ; or %cl,(%rdx)
- DB 12,14 ; or $0xe,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
- DB 255,0 ; incl (%rax)
+ALIGN 32
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,2 ; add %al,(%rdx)
- DB 4,6 ; add $0x6,%al
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 8,10 ; or %cl,(%rdx)
- DB 12,14 ; or $0xe,%al
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,2 ; add %al,(%rdx)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 0,2 ; add %al,(%rdx)
- DB 4,6 ; add $0x6,%al
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
+ DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,0 ; add %al,(%rax)
+
+ALIGN 16
DB 255,0 ; incl (%rax)
DB 255,0 ; incl (%rax)
DB 255,0 ; incl (%rax)
@@ -17373,232 +18045,74 @@ ALIGN 16
DB 255,0 ; incl (%rax)
DB 255,0 ; incl (%rax)
DB 255,0 ; incl (%rax)
- DB 0,2 ; add %al,(%rdx)
- DB 4,6 ; add $0x6,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 8,10 ; or %cl,(%rdx)
- DB 12,14 ; or $0xe,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,2 ; add %al,(%rdx)
- DB 4,6 ; add $0x6,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 8,10 ; or %cl,(%rdx)
- DB 12,14 ; or $0xe,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,2 ; add %al,(%rdx)
- DB 4,6 ; add $0x6,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 8,10 ; or %cl,(%rdx)
- DB 12,14 ; or $0xe,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,2 ; add %al,(%rdx)
- DB 4,6 ; add $0x6,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 8,10 ; or %cl,(%rdx)
- DB 12,14 ; or $0xe,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,2 ; add %al,(%rdx)
- DB 4,6 ; add $0x6,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 8,10 ; or %cl,(%rdx)
- DB 12,14 ; or $0xe,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,2 ; add %al,(%rdx)
- DB 4,6 ; add $0x6,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 8,10 ; or %cl,(%rdx)
- DB 12,14 ; or $0xe,%al
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
- DB 0,0 ; add %al,(%rax)
-
-ALIGN 32
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
+ DB 0,2 ; add %al,(%rdx)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
+ DB 0,2 ; add %al,(%rdx)
+ DB 4,6 ; add $0x6,%al
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
- DB 255,0 ; incl (%rax)
DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
DB 255,0 ; incl (%rax)
- DB 0,0 ; add %al,(%rax)
ALIGN 32
PUBLIC _sk_start_pipeline_sse41
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index f276ea6e9c..ca8b460aaa 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -184,40 +184,6 @@ SI void store(T* dst, V v, size_t tail) {
unaligned_store(dst, v);
}
-// AVX adds some mask loads and stores that make for shorter, faster code.
-#if defined(JUMPER) && defined(__AVX__)
- SI U32 mask(size_t tail) {
- // We go a little out of our way to avoid needing large constant values here.
-
- // It's easiest to build the mask as 8 8-bit values, either 0x00 or 0xff.
- // Start fully on, then shift away lanes from the top until we've got our mask.
- uint64_t mask = 0xffffffffffffffff >> 8*(kStride-tail);
-
- // Sign-extend each mask lane to its full width, 0x00000000 or 0xffffffff.
- using S8 = int8_t __attribute__((ext_vector_type(8)));
- using S32 = int32_t __attribute__((ext_vector_type(8)));
- return (U32)__builtin_convertvector(unaligned_load<S8>(&mask), S32);
- }
-
- template <>
- inline U32 load(const uint32_t* src, size_t tail) {
- __builtin_assume(tail < kStride);
- if (__builtin_expect(tail, 0)) {
- return (U32)_mm256_maskload_ps((const float*)src, mask(tail));
- }
- return unaligned_load<U32>(src);
- }
-
- template <>
- inline void store(uint32_t* dst, U32 v, size_t tail) {
- __builtin_assume(tail < kStride);
- if (__builtin_expect(tail, 0)) {
- return _mm256_maskstore_ps((float*)dst, mask(tail), (F)v);
- }
- unaligned_store(dst, v);
- }
-#endif
-
SI F from_byte(U8 b) {
return cast(expand(b)) * (1/255.0f);
}