aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper/SkJumper_generated_win.S
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-04-17 19:32:05 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-04-19 17:43:58 +0000
commit44375176c06f00682518a03d4983554ca8fb5b6a (patch)
tree553fc077c2060173f3022e9831978f23704bd2b6 /src/jumper/SkJumper_generated_win.S
parent8f2911f8407b5e151768690ed40ecedde6cd7ad8 (diff)
jumper, parametric_{r,g,b,a}
I've tried a couple of ideas for approx_powf(): 1) accumulate integer powers of x, then 4th roots, then 16th roots 2) continue 1) all the way to 256th roots 3) decompose into pow2 and log2, exploiting IEEE float layout 4) slightly tune constants used in 3) 5) accumulate integer powers of x, then 3+4) with different tuning 6) follow a source online, basically 5 with finesse 7) a new source quoting and improving on the method in 6). 7) seems perfect, enough that maybe we can explore improving its speed at cost of precision. Might be nice to get rid of those divides. If we allow a small tolerance (2-5) in our tests, we could use the very simple fast forms from 3) (e.g. PS 5). I wish I had some images to look at! Anything involving roots seems to be subverted by poor rsqrt precision. This change of course affects the pipelines created by the tests for exponential and full parametric gamma curves. What's less obvious is that it also means SkJumper can now for the first time run the pipeline created by the mixed gamma curves test. This means we now need to relax our tolerance for the table-based channel, just like we did when implementing table_{r,g,b,a}. This took me an embarassingly long time to figure out. *face palm* Change-Id: I451ee3c970a0a4a4e285f8aa8f6ef709a654d247 Reviewed-on: https://skia-review.googlesource.com/13656 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Matt Sarett <msarett@google.com> Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src/jumper/SkJumper_generated_win.S')
-rw-r--r--src/jumper/SkJumper_generated_win.S1942
1 files changed, 1734 insertions, 208 deletions
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index eb77b6d5a5..d8f82efef0 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -1357,7 +1357,7 @@ _sk_lerp_565_hsw LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,225 ; jmpq ffffffffe2001478 <_sk_callback_hsw+0xffffffffe1ffd85e>
+ DB 233,255,255,255,225 ; jmpq ffffffffe2001478 <_sk_callback_hsw+0xffffffffe1ffd22e>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -1934,6 +1934,334 @@ _sk_table_a_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_hsw
+_sk_parametric_r_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,124,194,192,2 ; vcmpleps %ymm8,%ymm0,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,125,168,202 ; vfmadd213ps %ymm10,%ymm0,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,125,168,211 ; vfmadd213ps %ymm11,%ymm0,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 196,65,125,219,210 ; vpand %ymm10,%ymm0,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,45,235,208 ; vpor %ymm0,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,20,88,216 ; vaddps %ymm0,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,216 ; vpbroadcastd %xmm0,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,172,88,192 ; vaddps %ymm0,%ymm10,%ymm0
+ DB 197,164,94,192 ; vdivps %ymm0,%ymm11,%ymm0
+ DB 197,156,92,192 ; vsubps %ymm0,%ymm12,%ymm0
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,98,125,88,216 ; vpbroadcastd %xmm0,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 196,193,124,92,194 ; vsubps %ymm10,%ymm0,%ymm0
+ DB 197,164,94,192 ; vdivps %ymm0,%ymm11,%ymm0
+ DB 197,148,88,192 ; vaddps %ymm0,%ymm13,%ymm0
+ DB 197,156,89,192 ; vmulps %ymm0,%ymm12,%ymm0
+ DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0
+ DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0
+ DB 197,188,93,192 ; vminps %ymm0,%ymm8,%ymm0
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_hsw
+_sk_parametric_g_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,116,194,192,2 ; vcmpleps %ymm8,%ymm1,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,117,168,202 ; vfmadd213ps %ymm10,%ymm1,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,117,168,211 ; vfmadd213ps %ymm11,%ymm1,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,233 ; vpbroadcastd %xmm1,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,65,117,219,210 ; vpand %ymm10,%ymm1,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,45,235,209 ; vpor %ymm1,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,20,88,217 ; vaddps %ymm1,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,217 ; vpbroadcastd %xmm1,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,172,88,201 ; vaddps %ymm1,%ymm10,%ymm1
+ DB 197,164,94,201 ; vdivps %ymm1,%ymm11,%ymm1
+ DB 197,156,92,201 ; vsubps %ymm1,%ymm12,%ymm1
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,217 ; vmulps %ymm1,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,233 ; vpbroadcastd %xmm1,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,98,125,88,217 ; vpbroadcastd %xmm1,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1
+ DB 197,164,94,201 ; vdivps %ymm1,%ymm11,%ymm1
+ DB 197,148,88,201 ; vaddps %ymm1,%ymm13,%ymm1
+ DB 197,156,89,201 ; vmulps %ymm1,%ymm12,%ymm1
+ DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1
+ DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1
+ DB 197,188,93,201 ; vminps %ymm1,%ymm8,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_hsw
+_sk_parametric_b_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,108,194,192,2 ; vcmpleps %ymm8,%ymm2,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,109,168,202 ; vfmadd213ps %ymm10,%ymm2,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,109,168,211 ; vfmadd213ps %ymm11,%ymm2,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,234 ; vpbroadcastd %xmm2,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,65,109,219,210 ; vpand %ymm10,%ymm2,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,45,235,210 ; vpor %ymm2,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,20,88,218 ; vaddps %ymm2,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,218 ; vpbroadcastd %xmm2,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,172,88,210 ; vaddps %ymm2,%ymm10,%ymm2
+ DB 197,164,94,210 ; vdivps %ymm2,%ymm11,%ymm2
+ DB 197,156,92,210 ; vsubps %ymm2,%ymm12,%ymm2
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,218 ; vmulps %ymm2,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,234 ; vpbroadcastd %xmm2,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,98,125,88,218 ; vpbroadcastd %xmm2,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2
+ DB 197,164,94,210 ; vdivps %ymm2,%ymm11,%ymm2
+ DB 197,148,88,210 ; vaddps %ymm2,%ymm13,%ymm2
+ DB 197,156,89,210 ; vmulps %ymm2,%ymm12,%ymm2
+ DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2
+ DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,108,95,192 ; vmaxps %ymm8,%ymm2,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2
+ DB 197,188,93,210 ; vminps %ymm2,%ymm8,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_hsw
+_sk_parametric_a_hsw LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,100,194,192,2 ; vcmpleps %ymm8,%ymm3,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 196,66,101,168,202 ; vfmadd213ps %ymm10,%ymm3,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 196,66,101,168,211 ; vfmadd213ps %ymm11,%ymm3,%ymm10
+ DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,235 ; vpbroadcastd %xmm3,%ymm13
+ DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 196,65,101,219,210 ; vpand %ymm10,%ymm3,%ymm10
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,45,235,211 ; vpor %ymm3,%ymm10,%ymm10
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,20,88,219 ; vaddps %ymm3,%ymm13,%ymm11
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12
+ DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,219 ; vpbroadcastd %xmm3,%ymm11
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,172,88,219 ; vaddps %ymm3,%ymm10,%ymm3
+ DB 197,164,94,219 ; vdivps %ymm3,%ymm11,%ymm3
+ DB 197,156,92,219 ; vsubps %ymm3,%ymm12,%ymm3
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,219 ; vmulps %ymm3,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,235 ; vpbroadcastd %xmm3,%ymm13
+ DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,98,125,88,219 ; vpbroadcastd %xmm3,%ymm11
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 196,193,100,92,218 ; vsubps %ymm10,%ymm3,%ymm3
+ DB 197,164,94,219 ; vdivps %ymm3,%ymm11,%ymm3
+ DB 197,148,88,219 ; vaddps %ymm3,%ymm13,%ymm3
+ DB 197,156,89,219 ; vmulps %ymm3,%ymm12,%ymm3
+ DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3
+ DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,100,95,192 ; vmaxps %ymm8,%ymm3,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,216 ; vmovd %eax,%xmm3
+ DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3
+ DB 197,188,93,219 ; vminps %ymm3,%ymm8,%ymm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_hsw
_sk_load_a8_hsw LABEL PROC
DB 73,137,200 ; mov %rcx,%r8
@@ -1941,7 +2269,7 @@ _sk_load_a8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,50 ; jne 1e28 <_sk_load_a8_hsw+0x42>
+ DB 117,50 ; jne 2458 <_sk_load_a8_hsw+0x42>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
@@ -1964,9 +2292,9 @@ _sk_load_a8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 1e30 <_sk_load_a8_hsw+0x4a>
+ DB 117,234 ; jne 2460 <_sk_load_a8_hsw+0x4a>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,173 ; jmp 1dfa <_sk_load_a8_hsw+0x14>
+ DB 235,173 ; jmp 242a <_sk_load_a8_hsw+0x14>
PUBLIC _sk_gather_a8_hsw
_sk_gather_a8_hsw LABEL PROC
@@ -2037,7 +2365,7 @@ _sk_store_a8_hsw LABEL PROC
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 1f65 <_sk_store_a8_hsw+0x3b>
+ DB 117,10 ; jne 2595 <_sk_store_a8_hsw+0x3b>
DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2045,10 +2373,10 @@ _sk_store_a8_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 1f61 <_sk_store_a8_hsw+0x37>
+ DB 119,236 ; ja 2591 <_sk_store_a8_hsw+0x37>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 1fc8 <_sk_store_a8_hsw+0x9e>
+ DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 25f8 <_sk_store_a8_hsw+0x9e>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2059,7 +2387,7 @@ _sk_store_a8_hsw LABEL PROC
DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- DB 235,154 ; jmp 1f61 <_sk_store_a8_hsw+0x37>
+ DB 235,154 ; jmp 2591 <_sk_store_a8_hsw+0x37>
DB 144 ; nop
DB 246,255 ; idiv %bh
DB 255 ; (bad)
@@ -2091,7 +2419,7 @@ _sk_load_g8_hsw LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,60 ; jne 2030 <_sk_load_g8_hsw+0x4c>
+ DB 117,60 ; jne 2660 <_sk_load_g8_hsw+0x4c>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0
@@ -2116,9 +2444,9 @@ _sk_load_g8_hsw LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 2038 <_sk_load_g8_hsw+0x54>
+ DB 117,234 ; jne 2668 <_sk_load_g8_hsw+0x54>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,163 ; jmp 1ff8 <_sk_load_g8_hsw+0x14>
+ DB 235,163 ; jmp 2628 <_sk_load_g8_hsw+0x14>
PUBLIC _sk_gather_g8_hsw
_sk_gather_g8_hsw LABEL PROC
@@ -2183,9 +2511,9 @@ _sk_gather_i8_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 214b <_sk_gather_i8_hsw+0xf>
+ DB 116,5 ; je 277b <_sk_gather_i8_hsw+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 214d <_sk_gather_i8_hsw+0x11>
+ DB 235,2 ; jmp 277d <_sk_gather_i8_hsw+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 65,87 ; push %r15
DB 65,86 ; push %r14
@@ -2256,7 +2584,7 @@ _sk_load_565_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,149,0,0,0 ; jne 22ff <_sk_load_565_hsw+0xa3>
+ DB 15,133,149,0,0,0 ; jne 292f <_sk_load_565_hsw+0xa3>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 196,226,125,51,208 ; vpmovzxwd %xmm0,%ymm2
DB 184,0,248,0,0 ; mov $0xf800,%eax
@@ -2296,9 +2624,9 @@ _sk_load_565_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,89,255,255,255 ; ja 2270 <_sk_load_565_hsw+0x14>
+ DB 15,135,89,255,255,255 ; ja 28a0 <_sk_load_565_hsw+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 236c <_sk_load_565_hsw+0x110>
+ DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 299c <_sk_load_565_hsw+0x110>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -2310,12 +2638,12 @@ _sk_load_565_hsw LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,5,255,255,255 ; jmpq 2270 <_sk_load_565_hsw+0x14>
+ DB 233,5,255,255,255 ; jmpq 28a0 <_sk_load_565_hsw+0x14>
DB 144 ; nop
DB 243,255 ; repz (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 235,255 ; jmp 2371 <_sk_load_565_hsw+0x115>
+ DB 235,255 ; jmp 29a1 <_sk_load_565_hsw+0x115>
DB 255 ; (bad)
DB 255,227 ; jmpq *%rbx
DB 255 ; (bad)
@@ -2438,7 +2766,7 @@ _sk_store_565_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 2537 <_sk_store_565_hsw+0x6c>
+ DB 117,10 ; jne 2b67 <_sk_store_565_hsw+0x6c>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2446,9 +2774,9 @@ _sk_store_565_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 2533 <_sk_store_565_hsw+0x68>
+ DB 119,236 ; ja 2b63 <_sk_store_565_hsw+0x68>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2594 <_sk_store_565_hsw+0xc9>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2bc4 <_sk_store_565_hsw+0xc9>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2459,7 +2787,7 @@ _sk_store_565_hsw LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 2533 <_sk_store_565_hsw+0x68>
+ DB 235,159 ; jmp 2b63 <_sk_store_565_hsw+0x68>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2488,7 +2816,7 @@ _sk_load_4444_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,179,0,0,0 ; jne 2671 <_sk_load_4444_hsw+0xc1>
+ DB 15,133,179,0,0,0 ; jne 2ca1 <_sk_load_4444_hsw+0xc1>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 196,98,125,51,200 ; vpmovzxwd %xmm0,%ymm9
DB 184,0,240,0,0 ; mov $0xf000,%eax
@@ -2534,9 +2862,9 @@ _sk_load_4444_hsw LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,59,255,255,255 ; ja 25c4 <_sk_load_4444_hsw+0x14>
+ DB 15,135,59,255,255,255 ; ja 2bf4 <_sk_load_4444_hsw+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 26e0 <_sk_load_4444_hsw+0x130>
+ DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 2d10 <_sk_load_4444_hsw+0x130>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -2548,13 +2876,13 @@ _sk_load_4444_hsw LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,231,254,255,255 ; jmpq 25c4 <_sk_load_4444_hsw+0x14>
+ DB 233,231,254,255,255 ; jmpq 2bf4 <_sk_load_4444_hsw+0x14>
DB 15,31,0 ; nopl (%rax)
DB 241 ; icebp
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,225 ; jmpq ffffffffe20026e8 <_sk_callback_hsw+0xffffffffe1ffeace>
+ DB 233,255,255,255,225 ; jmpq ffffffffe2002d18 <_sk_callback_hsw+0xffffffffe1ffeace>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2682,7 +3010,7 @@ _sk_store_4444_hsw LABEL PROC
DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 28cf <_sk_store_4444_hsw+0x72>
+ DB 117,10 ; jne 2eff <_sk_store_4444_hsw+0x72>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -2690,9 +3018,9 @@ _sk_store_4444_hsw LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 28cb <_sk_store_4444_hsw+0x6e>
+ DB 119,236 ; ja 2efb <_sk_store_4444_hsw+0x6e>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 292c <_sk_store_4444_hsw+0xcf>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2f5c <_sk_store_4444_hsw+0xcf>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -2703,7 +3031,7 @@ _sk_store_4444_hsw LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 28cb <_sk_store_4444_hsw+0x6e>
+ DB 235,159 ; jmp 2efb <_sk_store_4444_hsw+0x6e>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -2734,7 +3062,7 @@ _sk_load_8888_hsw LABEL PROC
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
DB 76,3,8 ; add (%rax),%r9
DB 77,133,192 ; test %r8,%r8
- DB 117,104 ; jne 29c5 <_sk_load_8888_hsw+0x7d>
+ DB 117,104 ; jne 2ff5 <_sk_load_8888_hsw+0x7d>
DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3
DB 184,255,0,0,0 ; mov $0xff,%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
@@ -2767,7 +3095,7 @@ _sk_load_8888_hsw LABEL PROC
DB 196,225,249,110,192 ; vmovq %rax,%xmm0
DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0
DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3
- DB 233,116,255,255,255 ; jmpq 2962 <_sk_load_8888_hsw+0x1a>
+ DB 233,116,255,255,255 ; jmpq 2f92 <_sk_load_8888_hsw+0x1a>
PUBLIC _sk_gather_8888_hsw
_sk_gather_8888_hsw LABEL PROC
@@ -2827,7 +3155,7 @@ _sk_store_8888_hsw LABEL PROC
DB 196,65,45,235,192 ; vpor %ymm8,%ymm10,%ymm8
DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8
DB 77,133,192 ; test %r8,%r8
- DB 117,12 ; jne 2ae8 <_sk_store_8888_hsw+0x74>
+ DB 117,12 ; jne 3118 <_sk_store_8888_hsw+0x74>
DB 196,65,126,127,1 ; vmovdqu %ymm8,(%r9)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,137,193 ; mov %r8,%rcx
@@ -2840,14 +3168,14 @@ _sk_store_8888_hsw LABEL PROC
DB 196,97,249,110,200 ; vmovq %rax,%xmm9
DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9
DB 196,66,53,142,1 ; vpmaskmovd %ymm8,%ymm9,(%r9)
- DB 235,211 ; jmp 2ae1 <_sk_store_8888_hsw+0x6d>
+ DB 235,211 ; jmp 3111 <_sk_store_8888_hsw+0x6d>
PUBLIC _sk_load_f16_hsw
_sk_load_f16_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,139,0 ; mov (%rax),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 117,97 ; jne 2b79 <_sk_load_f16_hsw+0x6b>
+ DB 117,97 ; jne 31a9 <_sk_load_f16_hsw+0x6b>
DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8
DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2
DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -2873,29 +3201,29 @@ _sk_load_f16_hsw LABEL PROC
DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,79 ; je 2bd8 <_sk_load_f16_hsw+0xca>
+ DB 116,79 ; je 3208 <_sk_load_f16_hsw+0xca>
DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,67 ; jb 2bd8 <_sk_load_f16_hsw+0xca>
+ DB 114,67 ; jb 3208 <_sk_load_f16_hsw+0xca>
DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,68 ; je 2be5 <_sk_load_f16_hsw+0xd7>
+ DB 116,68 ; je 3215 <_sk_load_f16_hsw+0xd7>
DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,56 ; jb 2be5 <_sk_load_f16_hsw+0xd7>
+ DB 114,56 ; jb 3215 <_sk_load_f16_hsw+0xd7>
DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,114,255,255,255 ; je 2b2f <_sk_load_f16_hsw+0x21>
+ DB 15,132,114,255,255,255 ; je 315f <_sk_load_f16_hsw+0x21>
DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,98,255,255,255 ; jb 2b2f <_sk_load_f16_hsw+0x21>
+ DB 15,130,98,255,255,255 ; jb 315f <_sk_load_f16_hsw+0x21>
DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9
- DB 233,87,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21>
+ DB 233,87,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,74,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21>
+ DB 233,74,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,65,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21>
+ DB 233,65,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21>
PUBLIC _sk_gather_f16_hsw
_sk_gather_f16_hsw LABEL PROC
@@ -2949,7 +3277,7 @@ _sk_store_f16_hsw LABEL PROC
DB 196,65,57,98,205 ; vpunpckldq %xmm13,%xmm8,%xmm9
DB 196,65,57,106,197 ; vpunpckhdq %xmm13,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,27 ; jne 2cdd <_sk_store_f16_hsw+0x65>
+ DB 117,27 ; jne 330d <_sk_store_f16_hsw+0x65>
DB 197,120,17,28,248 ; vmovups %xmm11,(%rax,%rdi,8)
DB 197,120,17,84,248,16 ; vmovups %xmm10,0x10(%rax,%rdi,8)
DB 197,120,17,76,248,32 ; vmovups %xmm9,0x20(%rax,%rdi,8)
@@ -2958,22 +3286,22 @@ _sk_store_f16_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 197,121,214,28,248 ; vmovq %xmm11,(%rax,%rdi,8)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,241 ; je 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 116,241 ; je 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%rax,%rdi,8)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,229 ; jb 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 114,229 ; jb 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,214,84,248,16 ; vmovq %xmm10,0x10(%rax,%rdi,8)
- DB 116,221 ; je 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 116,221 ; je 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%rax,%rdi,8)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,209 ; jb 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 114,209 ; jb 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,214,76,248,32 ; vmovq %xmm9,0x20(%rax,%rdi,8)
- DB 116,201 ; je 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 116,201 ; je 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%rax,%rdi,8)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,189 ; jb 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 114,189 ; jb 3309 <_sk_store_f16_hsw+0x61>
DB 197,121,214,68,248,48 ; vmovq %xmm8,0x30(%rax,%rdi,8)
- DB 235,181 ; jmp 2cd9 <_sk_store_f16_hsw+0x61>
+ DB 235,181 ; jmp 3309 <_sk_store_f16_hsw+0x61>
PUBLIC _sk_load_u16_be_hsw
_sk_load_u16_be_hsw LABEL PROC
@@ -2981,7 +3309,7 @@ _sk_load_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,205,0,0,0 ; jne 2e07 <_sk_load_u16_be_hsw+0xe3>
+ DB 15,133,205,0,0,0 ; jne 3437 <_sk_load_u16_be_hsw+0xe3>
DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8
DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2
DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -3030,29 +3358,29 @@ _sk_load_u16_be_hsw LABEL PROC
DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 2e6d <_sk_load_u16_be_hsw+0x149>
+ DB 116,85 ; je 349d <_sk_load_u16_be_hsw+0x149>
DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 2e6d <_sk_load_u16_be_hsw+0x149>
+ DB 114,72 ; jb 349d <_sk_load_u16_be_hsw+0x149>
DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 2e7a <_sk_load_u16_be_hsw+0x156>
+ DB 116,72 ; je 34aa <_sk_load_u16_be_hsw+0x156>
DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 2e7a <_sk_load_u16_be_hsw+0x156>
+ DB 114,59 ; jb 34aa <_sk_load_u16_be_hsw+0x156>
DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,5,255,255,255 ; je 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 15,132,5,255,255,255 ; je 3385 <_sk_load_u16_be_hsw+0x31>
DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,244,254,255,255 ; jb 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 15,130,244,254,255,255 ; jb 3385 <_sk_load_u16_be_hsw+0x31>
DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9
- DB 233,232,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 233,232,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,219,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 233,219,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,210,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31>
+ DB 233,210,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31>
PUBLIC _sk_load_rgb_u16_be_hsw
_sk_load_rgb_u16_be_hsw LABEL PROC
@@ -3060,7 +3388,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,211,0,0,0 ; jne 2f68 <_sk_load_rgb_u16_be_hsw+0xe5>
+ DB 15,133,211,0,0,0 ; jne 3598 <_sk_load_rgb_u16_be_hsw+0xe5>
DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0
DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2
DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -3110,36 +3438,36 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0
DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 2f81 <_sk_load_rgb_u16_be_hsw+0xfe>
- DB 233,72,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,5 ; jne 35b1 <_sk_load_rgb_u16_be_hsw+0xfe>
+ DB 233,72,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1
DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 2fb0 <_sk_load_rgb_u16_be_hsw+0x12d>
+ DB 114,26 ; jb 35e0 <_sk_load_rgb_u16_be_hsw+0x12d>
DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1
DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 2fb5 <_sk_load_rgb_u16_be_hsw+0x132>
- DB 233,25,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,20,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 35e5 <_sk_load_rgb_u16_be_hsw+0x132>
+ DB 233,25,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,20,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1
DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 2fe4 <_sk_load_rgb_u16_be_hsw+0x161>
+ DB 114,26 ; jb 3614 <_sk_load_rgb_u16_be_hsw+0x161>
DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1
DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 2fe9 <_sk_load_rgb_u16_be_hsw+0x166>
- DB 233,229,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,224,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 117,10 ; jne 3619 <_sk_load_rgb_u16_be_hsw+0x166>
+ DB 233,229,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,224,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3
DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 3012 <_sk_load_rgb_u16_be_hsw+0x18f>
+ DB 114,20 ; jb 3642 <_sk_load_rgb_u16_be_hsw+0x18f>
DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3
DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- DB 233,183,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
- DB 233,178,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,183,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
+ DB 233,178,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46>
PUBLIC _sk_store_u16_be_hsw
_sk_store_u16_be_hsw LABEL PROC
@@ -3186,7 +3514,7 @@ _sk_store_u16_be_hsw LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 3112 <_sk_store_u16_be_hsw+0xfb>
+ DB 117,31 ; jne 3742 <_sk_store_u16_be_hsw+0xfb>
DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2)
DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2)
DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2)
@@ -3195,31 +3523,31 @@ _sk_store_u16_be_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 116,240 ; je 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 114,227 ; jb 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2)
- DB 116,218 ; je 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 116,218 ; je 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 114,205 ; jb 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2)
- DB 116,196 ; je 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 116,196 ; je 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 114,183 ; jb 373e <_sk_store_u16_be_hsw+0xf7>
DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2)
- DB 235,174 ; jmp 310e <_sk_store_u16_be_hsw+0xf7>
+ DB 235,174 ; jmp 373e <_sk_store_u16_be_hsw+0xf7>
PUBLIC _sk_load_f32_hsw
_sk_load_f32_hsw LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 119,110 ; ja 31d6 <_sk_load_f32_hsw+0x76>
+ DB 119,110 ; ja 3806 <_sk_load_f32_hsw+0x76>
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
- DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 3200 <_sk_load_f32_hsw+0xa0>
+ DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 3830 <_sk_load_f32_hsw+0xa0>
DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -3276,7 +3604,7 @@ _sk_store_f32_hsw LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 72,133,201 ; test %rcx,%rcx
- DB 117,55 ; jne 328d <_sk_store_f32_hsw+0x6d>
+ DB 117,55 ; jne 38bd <_sk_store_f32_hsw+0x6d>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -3289,22 +3617,22 @@ _sk_store_f32_hsw LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 3289 <_sk_store_f32_hsw+0x69>
+ DB 116,240 ; je 38b9 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 3289 <_sk_store_f32_hsw+0x69>
+ DB 114,227 ; jb 38b9 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4)
- DB 116,218 ; je 3289 <_sk_store_f32_hsw+0x69>
+ DB 116,218 ; je 38b9 <_sk_store_f32_hsw+0x69>
DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 3289 <_sk_store_f32_hsw+0x69>
+ DB 114,205 ; jb 38b9 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- DB 116,195 ; je 3289 <_sk_store_f32_hsw+0x69>
+ DB 116,195 ; je 38b9 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,181 ; jb 3289 <_sk_store_f32_hsw+0x69>
+ DB 114,181 ; jb 38b9 <_sk_store_f32_hsw+0x69>
DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- DB 235,171 ; jmp 3289 <_sk_store_f32_hsw+0x69>
+ DB 235,171 ; jmp 38b9 <_sk_store_f32_hsw+0x69>
PUBLIC _sk_clamp_x_hsw
_sk_clamp_x_hsw LABEL PROC
@@ -3545,7 +3873,7 @@ _sk_linear_gradient_hsw LABEL PROC
DB 196,98,125,24,72,28 ; vbroadcastss 0x1c(%rax),%ymm9
DB 76,139,0 ; mov (%rax),%r8
DB 77,133,192 ; test %r8,%r8
- DB 15,132,143,0,0,0 ; je 3719 <_sk_linear_gradient_hsw+0xb5>
+ DB 15,132,143,0,0,0 ; je 3d49 <_sk_linear_gradient_hsw+0xb5>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12
@@ -3572,8 +3900,8 @@ _sk_linear_gradient_hsw LABEL PROC
DB 196,67,13,74,201,208 ; vblendvps %ymm13,%ymm9,%ymm14,%ymm9
DB 72,131,192,36 ; add $0x24,%rax
DB 73,255,200 ; dec %r8
- DB 117,140 ; jne 36a3 <_sk_linear_gradient_hsw+0x3f>
- DB 235,17 ; jmp 372a <_sk_linear_gradient_hsw+0xc6>
+ DB 117,140 ; jne 3cd3 <_sk_linear_gradient_hsw+0x3f>
+ DB 235,17 ; jmp 3d5a <_sk_linear_gradient_hsw+0xc6>
DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1
DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2
DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3
@@ -6463,6 +6791,410 @@ _sk_table_a_avx LABEL PROC
DB 65,95 ; pop %r15
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_avx
+_sk_parametric_r_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,124,194,192,2 ; vcmpleps %ymm8,%ymm0,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,200 ; vmulps %ymm0,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,192 ; vmulps %ymm0,%ymm10,%ymm0
+ DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,44,89,208 ; vmulps %ymm0,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,65,124,84,219 ; vandps %ymm11,%ymm0,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,36,86,216 ; vorps %ymm0,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,44,88,208 ; vaddps %ymm0,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,164,89,192 ; vmulps %ymm0,%ymm11,%ymm0
+ DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,99,125,24,224,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,164,88,192 ; vaddps %ymm0,%ymm11,%ymm0
+ DB 197,156,94,192 ; vdivps %ymm0,%ymm12,%ymm0
+ DB 197,172,92,192 ; vsubps %ymm0,%ymm10,%ymm0
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,99,125,24,224,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0
+ DB 197,36,92,216 ; vsubps %ymm0,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,99,125,24,232,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,192 ; vmovd %r8d,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 196,193,124,92,194 ; vsubps %ymm10,%ymm0,%ymm0
+ DB 197,148,94,192 ; vdivps %ymm0,%ymm13,%ymm0
+ DB 197,164,88,192 ; vaddps %ymm0,%ymm11,%ymm0
+ DB 197,156,89,192 ; vmulps %ymm0,%ymm12,%ymm0
+ DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0
+ DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,192 ; vmovd %eax,%xmm0
+ DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0
+ DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0
+ DB 197,188,93,192 ; vminps %ymm0,%ymm8,%ymm0
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_avx
+_sk_parametric_g_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,116,194,192,2 ; vcmpleps %ymm8,%ymm1,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,201 ; vmulps %ymm1,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,201 ; vmulps %ymm1,%ymm10,%ymm1
+ DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,44,89,209 ; vmulps %ymm1,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,44,92,209 ; vsubps %ymm1,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 197,249,112,201,0 ; vpshufd $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,65,116,84,219 ; vandps %ymm11,%ymm1,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 197,249,112,201,0 ; vpshufd $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,36,86,217 ; vorps %ymm1,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,44,88,209 ; vaddps %ymm1,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,164,89,201 ; vmulps %ymm1,%ymm11,%ymm1
+ DB 197,44,92,209 ; vsubps %ymm1,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,225,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,164,88,201 ; vaddps %ymm1,%ymm11,%ymm1
+ DB 197,156,94,201 ; vdivps %ymm1,%ymm12,%ymm1
+ DB 197,172,92,201 ; vsubps %ymm1,%ymm10,%ymm1
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,217 ; vmulps %ymm1,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,225,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,193,116,89,202 ; vmulps %ymm10,%ymm1,%ymm1
+ DB 197,36,92,217 ; vsubps %ymm1,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,99,117,24,233,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,200 ; vmovd %r8d,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1
+ DB 197,148,94,201 ; vdivps %ymm1,%ymm13,%ymm1
+ DB 197,164,88,201 ; vaddps %ymm1,%ymm11,%ymm1
+ DB 197,156,89,201 ; vmulps %ymm1,%ymm12,%ymm1
+ DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1
+ DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,200 ; vmovd %eax,%xmm1
+ DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1
+ DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1
+ DB 197,188,93,201 ; vminps %ymm1,%ymm8,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_avx
+_sk_parametric_b_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,108,194,192,2 ; vcmpleps %ymm8,%ymm2,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,202 ; vmulps %ymm2,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2
+ DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,44,89,210 ; vmulps %ymm2,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,44,92,210 ; vsubps %ymm2,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,65,108,84,219 ; vandps %ymm11,%ymm2,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,36,86,218 ; vorps %ymm2,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,44,88,210 ; vaddps %ymm2,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,164,89,210 ; vmulps %ymm2,%ymm11,%ymm2
+ DB 197,44,92,210 ; vsubps %ymm2,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,164,88,210 ; vaddps %ymm2,%ymm11,%ymm2
+ DB 197,156,94,210 ; vdivps %ymm2,%ymm12,%ymm2
+ DB 197,172,92,210 ; vsubps %ymm2,%ymm10,%ymm2
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,218 ; vmulps %ymm2,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,193,108,89,210 ; vmulps %ymm10,%ymm2,%ymm2
+ DB 197,36,92,218 ; vsubps %ymm2,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,99,109,24,234,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,208 ; vmovd %r8d,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2
+ DB 197,148,94,210 ; vdivps %ymm2,%ymm13,%ymm2
+ DB 197,164,88,210 ; vaddps %ymm2,%ymm11,%ymm2
+ DB 197,156,89,210 ; vmulps %ymm2,%ymm12,%ymm2
+ DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2
+ DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,108,95,192 ; vmaxps %ymm8,%ymm2,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,208 ; vmovd %eax,%xmm2
+ DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2
+ DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2
+ DB 197,188,93,210 ; vminps %ymm2,%ymm8,%ymm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_avx
+_sk_parametric_a_avx LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8
+ DB 196,65,100,194,192,2 ; vcmpleps %ymm8,%ymm3,%ymm8
+ DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9
+ DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10
+ DB 197,52,89,203 ; vmulps %ymm3,%ymm9,%ymm9
+ DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9
+ DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10
+ DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11
+ DB 197,172,89,219 ; vmulps %ymm3,%ymm10,%ymm3
+ DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
+ DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10
+ DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,44,89,211 ; vmulps %ymm3,%ymm10,%ymm10
+ DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,44,92,211 ; vsubps %ymm3,%ymm10,%ymm10
+ DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 197,249,112,219,0 ; vpshufd $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,65,100,84,219 ; vandps %ymm11,%ymm3,%ymm11
+ DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 197,249,112,219,0 ; vpshufd $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,36,86,219 ; vorps %ymm3,%ymm11,%ymm11
+ DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,44,88,211 ; vaddps %ymm3,%ymm10,%ymm10
+ DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,164,89,219 ; vmulps %ymm3,%ymm11,%ymm3
+ DB 197,44,92,211 ; vsubps %ymm3,%ymm10,%ymm10
+ DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,99,101,24,227,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,164,88,219 ; vaddps %ymm3,%ymm11,%ymm3
+ DB 197,156,94,219 ; vdivps %ymm3,%ymm12,%ymm3
+ DB 197,172,92,219 ; vsubps %ymm3,%ymm10,%ymm3
+ DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10
+ DB 197,44,89,219 ; vmulps %ymm3,%ymm10,%ymm11
+ DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10
+ DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,99,101,24,227,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm12
+ DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11
+ DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,193,100,89,218 ; vmulps %ymm10,%ymm3,%ymm3
+ DB 197,36,92,219 ; vsubps %ymm3,%ymm11,%ymm11
+ DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,99,101,24,235,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm13
+ DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d
+ DB 196,193,121,110,216 ; vmovd %r8d,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 196,193,100,92,218 ; vsubps %ymm10,%ymm3,%ymm3
+ DB 197,148,94,219 ; vdivps %ymm3,%ymm13,%ymm3
+ DB 197,164,88,219 ; vaddps %ymm3,%ymm11,%ymm3
+ DB 197,156,89,219 ; vmulps %ymm3,%ymm12,%ymm3
+ DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3
+ DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10
+ DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3
+ DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3
+ DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8
+ DB 196,65,100,95,192 ; vmaxps %ymm8,%ymm3,%ymm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 197,249,110,216 ; vmovd %eax,%xmm3
+ DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3
+ DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3
+ DB 197,188,93,219 ; vminps %ymm3,%ymm8,%ymm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_avx
_sk_load_a8_avx LABEL PROC
DB 73,137,200 ; mov %rcx,%r8
@@ -6470,7 +7202,7 @@ _sk_load_a8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,74 ; jne 29ba <_sk_load_a8_avx+0x5a>
+ DB 117,74 ; jne 31b2 <_sk_load_a8_avx+0x5a>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
@@ -6497,9 +7229,9 @@ _sk_load_a8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 29c2 <_sk_load_a8_avx+0x62>
+ DB 117,234 ; jne 31ba <_sk_load_a8_avx+0x62>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,149 ; jmp 2974 <_sk_load_a8_avx+0x14>
+ DB 235,149 ; jmp 316c <_sk_load_a8_avx+0x14>
PUBLIC _sk_gather_a8_avx
_sk_gather_a8_avx LABEL PROC
@@ -6576,7 +7308,7 @@ _sk_store_a8_avx LABEL PROC
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 2b1b <_sk_store_a8_avx+0x42>
+ DB 117,10 ; jne 3313 <_sk_store_a8_avx+0x42>
DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -6584,10 +7316,10 @@ _sk_store_a8_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 2b17 <_sk_store_a8_avx+0x3e>
+ DB 119,236 ; ja 330f <_sk_store_a8_avx+0x3e>
DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 2b80 <_sk_store_a8_avx+0xa7>
+ DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3378 <_sk_store_a8_avx+0xa7>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -6598,7 +7330,7 @@ _sk_store_a8_avx LABEL PROC
DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1)
DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1)
DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1)
- DB 235,154 ; jmp 2b17 <_sk_store_a8_avx+0x3e>
+ DB 235,154 ; jmp 330f <_sk_store_a8_avx+0x3e>
DB 15,31,0 ; nopl (%rax)
DB 244 ; hlt
DB 255 ; (bad)
@@ -6631,7 +7363,7 @@ _sk_load_g8_avx LABEL PROC
DB 72,139,0 ; mov (%rax),%rax
DB 72,1,248 ; add %rdi,%rax
DB 77,133,192 ; test %r8,%r8
- DB 117,91 ; jne 2c07 <_sk_load_g8_avx+0x6b>
+ DB 117,91 ; jne 33ff <_sk_load_g8_avx+0x6b>
DB 197,250,126,0 ; vmovq (%rax),%xmm0
DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1
DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0
@@ -6661,9 +7393,9 @@ _sk_load_g8_avx LABEL PROC
DB 77,9,217 ; or %r11,%r9
DB 72,131,193,8 ; add $0x8,%rcx
DB 73,255,202 ; dec %r10
- DB 117,234 ; jne 2c0f <_sk_load_g8_avx+0x73>
+ DB 117,234 ; jne 3407 <_sk_load_g8_avx+0x73>
DB 196,193,249,110,193 ; vmovq %r9,%xmm0
- DB 235,132 ; jmp 2bb0 <_sk_load_g8_avx+0x14>
+ DB 235,132 ; jmp 33a8 <_sk_load_g8_avx+0x14>
PUBLIC _sk_gather_g8_avx
_sk_gather_g8_avx LABEL PROC
@@ -6734,9 +7466,9 @@ _sk_gather_i8_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 2d46 <_sk_gather_i8_avx+0xf>
+ DB 116,5 ; je 353e <_sk_gather_i8_avx+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 2d48 <_sk_gather_i8_avx+0x11>
+ DB 235,2 ; jmp 3540 <_sk_gather_i8_avx+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 65,87 ; push %r15
DB 65,86 ; push %r14
@@ -6839,7 +7571,7 @@ _sk_load_565_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,209,0,0,0 ; jne 2fe2 <_sk_load_565_avx+0xdf>
+ DB 15,133,209,0,0,0 ; jne 37da <_sk_load_565_avx+0xdf>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -6889,9 +7621,9 @@ _sk_load_565_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,29,255,255,255 ; ja 2f17 <_sk_load_565_avx+0x14>
+ DB 15,135,29,255,255,255 ; ja 370f <_sk_load_565_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3050 <_sk_load_565_avx+0x14d>
+ DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3848 <_sk_load_565_avx+0x14d>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -6903,7 +7635,7 @@ _sk_load_565_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,201,254,255,255 ; jmpq 2f17 <_sk_load_565_avx+0x14>
+ DB 233,201,254,255,255 ; jmpq 370f <_sk_load_565_avx+0x14>
DB 102,144 ; xchg %ax,%ax
DB 242,255 ; repnz (bad)
DB 255 ; (bad)
@@ -7056,7 +7788,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 329b <_sk_store_565_avx+0x9e>
+ DB 117,10 ; jne 3a93 <_sk_store_565_avx+0x9e>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7064,9 +7796,9 @@ _sk_store_565_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3297 <_sk_store_565_avx+0x9a>
+ DB 119,236 ; ja 3a8f <_sk_store_565_avx+0x9a>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 32f8 <_sk_store_565_avx+0xfb>
+ DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 3af0 <_sk_store_565_avx+0xfb>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7077,7 +7809,7 @@ _sk_store_565_avx LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 3297 <_sk_store_565_avx+0x9a>
+ DB 235,159 ; jmp 3a8f <_sk_store_565_avx+0x9a>
DB 247,255 ; idiv %edi
DB 255 ; (bad)
DB 255 ; (bad)
@@ -7106,7 +7838,7 @@ _sk_load_4444_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,245,0,0,0 ; jne 3417 <_sk_load_4444_avx+0x103>
+ DB 15,133,245,0,0,0 ; jne 3c0f <_sk_load_4444_avx+0x103>
DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0
DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1
DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1
@@ -7163,9 +7895,9 @@ _sk_load_4444_avx LABEL PROC
DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,249,254,255,255 ; ja 3328 <_sk_load_4444_avx+0x14>
+ DB 15,135,249,254,255,255 ; ja 3b20 <_sk_load_4444_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 3484 <_sk_load_4444_avx+0x170>
+ DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 3c7c <_sk_load_4444_avx+0x170>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -7177,12 +7909,12 @@ _sk_load_4444_avx LABEL PROC
DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0
- DB 233,165,254,255,255 ; jmpq 3328 <_sk_load_4444_avx+0x14>
+ DB 233,165,254,255,255 ; jmpq 3b20 <_sk_load_4444_avx+0x14>
DB 144 ; nop
DB 243,255 ; repz (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 235,255 ; jmp 3489 <_sk_load_4444_avx+0x175>
+ DB 235,255 ; jmp 3c81 <_sk_load_4444_avx+0x175>
DB 255 ; (bad)
DB 255,227 ; jmpq *%rbx
DB 255 ; (bad)
@@ -7339,7 +8071,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9
DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 3704 <_sk_store_4444_avx+0xaf>
+ DB 117,10 ; jne 3efc <_sk_store_4444_avx+0xaf>
DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7347,9 +8079,9 @@ _sk_store_4444_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3700 <_sk_store_4444_avx+0xab>
+ DB 119,236 ; ja 3ef8 <_sk_store_4444_avx+0xab>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3764 <_sk_store_4444_avx+0x10f>
+ DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3f5c <_sk_store_4444_avx+0x10f>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7360,7 +8092,7 @@ _sk_store_4444_avx LABEL PROC
DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2)
DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2)
DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2)
- DB 235,159 ; jmp 3700 <_sk_store_4444_avx+0xab>
+ DB 235,159 ; jmp 3ef8 <_sk_store_4444_avx+0xab>
DB 15,31,0 ; nopl (%rax)
DB 244 ; hlt
DB 255 ; (bad)
@@ -7391,7 +8123,7 @@ _sk_load_8888_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,157,0,0,0 ; jne 382b <_sk_load_8888_avx+0xab>
+ DB 15,133,157,0,0,0 ; jne 4023 <_sk_load_8888_avx+0xab>
DB 196,65,124,16,12,186 ; vmovups (%r10,%rdi,4),%ymm9
DB 184,255,0,0,0 ; mov $0xff,%eax
DB 197,249,110,192 ; vmovd %eax,%xmm0
@@ -7429,9 +8161,9 @@ _sk_load_8888_avx LABEL PROC
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 15,135,80,255,255,255 ; ja 3794 <_sk_load_8888_avx+0x14>
+ DB 15,135,80,255,255,255 ; ja 3f8c <_sk_load_8888_avx+0x14>
DB 69,15,182,192 ; movzbl %r8b,%r8d
- DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 38d8 <_sk_load_8888_avx+0x158>
+ DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 40d0 <_sk_load_8888_avx+0x158>
DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax
DB 76,1,200 ; add %r9,%rax
DB 255,224 ; jmpq *%rax
@@ -7454,7 +8186,7 @@ _sk_load_8888_avx LABEL PROC
DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
DB 196,195,49,34,4,186,0 ; vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0
DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9
- DB 233,188,254,255,255 ; jmpq 3794 <_sk_load_8888_avx+0x14>
+ DB 233,188,254,255,255 ; jmpq 3f8c <_sk_load_8888_avx+0x14>
DB 238 ; out %al,(%dx)
DB 255 ; (bad)
DB 255 ; (bad)
@@ -7580,7 +8312,7 @@ _sk_store_8888_avx LABEL PROC
DB 196,65,45,86,192 ; vorpd %ymm8,%ymm10,%ymm8
DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,10 ; jne 3ad9 <_sk_store_8888_avx+0xa4>
+ DB 117,10 ; jne 42d1 <_sk_store_8888_avx+0xa4>
DB 196,65,124,17,4,185 ; vmovups %ymm8,(%r9,%rdi,4)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -7588,9 +8320,9 @@ _sk_store_8888_avx LABEL PROC
DB 65,128,224,7 ; and $0x7,%r8b
DB 65,254,200 ; dec %r8b
DB 65,128,248,6 ; cmp $0x6,%r8b
- DB 119,236 ; ja 3ad5 <_sk_store_8888_avx+0xa0>
+ DB 119,236 ; ja 42cd <_sk_store_8888_avx+0xa0>
DB 65,15,182,192 ; movzbl %r8b,%eax
- DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 3b48 <_sk_store_8888_avx+0x113>
+ DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 4340 <_sk_store_8888_avx+0x113>
DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax
DB 76,1,192 ; add %r8,%rax
DB 255,224 ; jmpq *%rax
@@ -7604,7 +8336,7 @@ _sk_store_8888_avx LABEL PROC
DB 196,67,121,22,68,185,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4)
DB 196,67,121,22,68,185,4,1 ; vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4)
DB 196,65,121,126,4,185 ; vmovd %xmm8,(%r9,%rdi,4)
- DB 235,143 ; jmp 3ad5 <_sk_store_8888_avx+0xa0>
+ DB 235,143 ; jmp 42cd <_sk_store_8888_avx+0xa0>
DB 102,144 ; xchg %ax,%ax
DB 246,255 ; idiv %bh
DB 255 ; (bad)
@@ -7634,7 +8366,7 @@ _sk_load_f16_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,139,0 ; mov (%rax),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,17,1,0,0 ; jne 3c83 <_sk_load_f16_avx+0x11f>
+ DB 15,133,17,1,0,0 ; jne 447b <_sk_load_f16_avx+0x11f>
DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8
DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2
DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3
@@ -7696,29 +8428,29 @@ _sk_load_f16_avx LABEL PROC
DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,79 ; je 3ce2 <_sk_load_f16_avx+0x17e>
+ DB 116,79 ; je 44da <_sk_load_f16_avx+0x17e>
DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,67 ; jb 3ce2 <_sk_load_f16_avx+0x17e>
+ DB 114,67 ; jb 44da <_sk_load_f16_avx+0x17e>
DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,68 ; je 3cef <_sk_load_f16_avx+0x18b>
+ DB 116,68 ; je 44e7 <_sk_load_f16_avx+0x18b>
DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,56 ; jb 3cef <_sk_load_f16_avx+0x18b>
+ DB 114,56 ; jb 44e7 <_sk_load_f16_avx+0x18b>
DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,194,254,255,255 ; je 3b89 <_sk_load_f16_avx+0x25>
+ DB 15,132,194,254,255,255 ; je 4381 <_sk_load_f16_avx+0x25>
DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,178,254,255,255 ; jb 3b89 <_sk_load_f16_avx+0x25>
+ DB 15,130,178,254,255,255 ; jb 4381 <_sk_load_f16_avx+0x25>
DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9
- DB 233,167,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25>
+ DB 233,167,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,154,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25>
+ DB 233,154,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,145,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25>
+ DB 233,145,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25>
PUBLIC _sk_gather_f16_avx
_sk_gather_f16_avx LABEL PROC
@@ -7858,7 +8590,7 @@ _sk_store_f16_avx LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 3f78 <_sk_store_f16_avx+0xd2>
+ DB 117,31 ; jne 4770 <_sk_store_f16_avx+0xd2>
DB 196,65,120,17,28,248 ; vmovups %xmm11,(%r8,%rdi,8)
DB 196,65,120,17,84,248,16 ; vmovups %xmm10,0x10(%r8,%rdi,8)
DB 196,65,120,17,76,248,32 ; vmovups %xmm9,0x20(%r8,%rdi,8)
@@ -7867,22 +8599,22 @@ _sk_store_f16_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,214,28,248 ; vmovq %xmm11,(%r8,%rdi,8)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 3f74 <_sk_store_f16_avx+0xce>
+ DB 116,240 ; je 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%r8,%rdi,8)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 3f74 <_sk_store_f16_avx+0xce>
+ DB 114,227 ; jb 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,214,84,248,16 ; vmovq %xmm10,0x10(%r8,%rdi,8)
- DB 116,218 ; je 3f74 <_sk_store_f16_avx+0xce>
+ DB 116,218 ; je 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%r8,%rdi,8)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 3f74 <_sk_store_f16_avx+0xce>
+ DB 114,205 ; jb 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,214,76,248,32 ; vmovq %xmm9,0x20(%r8,%rdi,8)
- DB 116,196 ; je 3f74 <_sk_store_f16_avx+0xce>
+ DB 116,196 ; je 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%r8,%rdi,8)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 3f74 <_sk_store_f16_avx+0xce>
+ DB 114,183 ; jb 476c <_sk_store_f16_avx+0xce>
DB 196,65,121,214,68,248,48 ; vmovq %xmm8,0x30(%r8,%rdi,8)
- DB 235,174 ; jmp 3f74 <_sk_store_f16_avx+0xce>
+ DB 235,174 ; jmp 476c <_sk_store_f16_avx+0xce>
PUBLIC _sk_load_u16_be_avx
_sk_load_u16_be_avx LABEL PROC
@@ -7890,7 +8622,7 @@ _sk_load_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,5,1,0,0 ; jne 40e1 <_sk_load_u16_be_avx+0x11b>
+ DB 15,133,5,1,0,0 ; jne 48d9 <_sk_load_u16_be_avx+0x11b>
DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8
DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2
DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3
@@ -7949,29 +8681,29 @@ _sk_load_u16_be_avx LABEL PROC
DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8
DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,85 ; je 4147 <_sk_load_u16_be_avx+0x181>
+ DB 116,85 ; je 493f <_sk_load_u16_be_avx+0x181>
DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,72 ; jb 4147 <_sk_load_u16_be_avx+0x181>
+ DB 114,72 ; jb 493f <_sk_load_u16_be_avx+0x181>
DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 116,72 ; je 4154 <_sk_load_u16_be_avx+0x18e>
+ DB 116,72 ; je 494c <_sk_load_u16_be_avx+0x18e>
DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,59 ; jb 4154 <_sk_load_u16_be_avx+0x18e>
+ DB 114,59 ; jb 494c <_sk_load_u16_be_avx+0x18e>
DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 15,132,205,254,255,255 ; je 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 15,132,205,254,255,255 ; je 47ef <_sk_load_u16_be_avx+0x31>
DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 15,130,188,254,255,255 ; jb 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 15,130,188,254,255,255 ; jb 47ef <_sk_load_u16_be_avx+0x31>
DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9
- DB 233,176,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 233,176,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2
- DB 233,163,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 233,163,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31>
DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3
- DB 233,154,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31>
+ DB 233,154,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31>
PUBLIC _sk_load_rgb_u16_be_avx
_sk_load_rgb_u16_be_avx LABEL PROC
@@ -7979,7 +8711,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 76,139,0 ; mov (%rax),%r8
DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax
DB 72,133,201 ; test %rcx,%rcx
- DB 15,133,8,1,0,0 ; jne 4277 <_sk_load_rgb_u16_be_avx+0x11a>
+ DB 15,133,8,1,0,0 ; jne 4a6f <_sk_load_rgb_u16_be_avx+0x11a>
DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0
DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2
DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1
@@ -8038,36 +8770,36 @@ _sk_load_rgb_u16_be_avx LABEL PROC
DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0
DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 117,5 ; jne 4290 <_sk_load_rgb_u16_be_avx+0x133>
- DB 233,19,255,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,5 ; jne 4a88 <_sk_load_rgb_u16_be_avx+0x133>
+ DB 233,19,255,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1
DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,26 ; jb 42bf <_sk_load_rgb_u16_be_avx+0x162>
+ DB 114,26 ; jb 4ab7 <_sk_load_rgb_u16_be_avx+0x162>
DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1
DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 117,10 ; jne 42c4 <_sk_load_rgb_u16_be_avx+0x167>
- DB 233,228,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,223,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 4abc <_sk_load_rgb_u16_be_avx+0x167>
+ DB 233,228,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,223,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1
DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,26 ; jb 42f3 <_sk_load_rgb_u16_be_avx+0x196>
+ DB 114,26 ; jb 4aeb <_sk_load_rgb_u16_be_avx+0x196>
DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1
DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 117,10 ; jne 42f8 <_sk_load_rgb_u16_be_avx+0x19b>
- DB 233,176,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,171,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 117,10 ; jne 4af0 <_sk_load_rgb_u16_be_avx+0x19b>
+ DB 233,176,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,171,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3
DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,20 ; jb 4321 <_sk_load_rgb_u16_be_avx+0x1c4>
+ DB 114,20 ; jb 4b19 <_sk_load_rgb_u16_be_avx+0x1c4>
DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3
DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
- DB 233,130,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
- DB 233,125,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,130,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
+ DB 233,125,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46>
PUBLIC _sk_store_u16_be_avx
_sk_store_u16_be_avx LABEL PROC
@@ -8115,7 +8847,7 @@ _sk_store_u16_be_avx LABEL PROC
DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9
DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8
DB 72,133,201 ; test %rcx,%rcx
- DB 117,31 ; jne 4428 <_sk_store_u16_be_avx+0x102>
+ DB 117,31 ; jne 4c20 <_sk_store_u16_be_avx+0x102>
DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2)
DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2)
DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2)
@@ -8124,31 +8856,31 @@ _sk_store_u16_be_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 116,240 ; je 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 114,227 ; jb 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2)
- DB 116,218 ; je 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 116,218 ; je 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 114,205 ; jb 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2)
- DB 116,196 ; je 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 116,196 ; je 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,183 ; jb 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 114,183 ; jb 4c1c <_sk_store_u16_be_avx+0xfe>
DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2)
- DB 235,174 ; jmp 4424 <_sk_store_u16_be_avx+0xfe>
+ DB 235,174 ; jmp 4c1c <_sk_store_u16_be_avx+0xfe>
PUBLIC _sk_load_f32_avx
_sk_load_f32_avx LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 119,110 ; ja 44ec <_sk_load_f32_avx+0x76>
+ DB 119,110 ; ja 4ce4 <_sk_load_f32_avx+0x76>
DB 76,139,0 ; mov (%rax),%r8
DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9
- DB 76,141,21,132,0,0,0 ; lea 0x84(%rip),%r10 # 4514 <_sk_load_f32_avx+0x9e>
+ DB 76,141,21,132,0,0,0 ; lea 0x84(%rip),%r10 # 4d0c <_sk_load_f32_avx+0x9e>
DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -8205,7 +8937,7 @@ _sk_store_f32_avx LABEL PROC
DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8
DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11
DB 72,133,201 ; test %rcx,%rcx
- DB 117,55 ; jne 45a1 <_sk_store_f32_avx+0x6d>
+ DB 117,55 ; jne 4d99 <_sk_store_f32_avx+0x6d>
DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12
DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13
DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9
@@ -8218,22 +8950,22 @@ _sk_store_f32_avx LABEL PROC
DB 255,224 ; jmpq *%rax
DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4)
DB 72,131,249,1 ; cmp $0x1,%rcx
- DB 116,240 ; je 459d <_sk_store_f32_avx+0x69>
+ DB 116,240 ; je 4d95 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4)
DB 72,131,249,3 ; cmp $0x3,%rcx
- DB 114,227 ; jb 459d <_sk_store_f32_avx+0x69>
+ DB 114,227 ; jb 4d95 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4)
- DB 116,218 ; je 459d <_sk_store_f32_avx+0x69>
+ DB 116,218 ; je 4d95 <_sk_store_f32_avx+0x69>
DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4)
DB 72,131,249,5 ; cmp $0x5,%rcx
- DB 114,205 ; jb 459d <_sk_store_f32_avx+0x69>
+ DB 114,205 ; jb 4d95 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4)
- DB 116,195 ; je 459d <_sk_store_f32_avx+0x69>
+ DB 116,195 ; je 4d95 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4)
DB 72,131,249,7 ; cmp $0x7,%rcx
- DB 114,181 ; jb 459d <_sk_store_f32_avx+0x69>
+ DB 114,181 ; jb 4d95 <_sk_store_f32_avx+0x69>
DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4)
- DB 235,171 ; jmp 459d <_sk_store_f32_avx+0x69>
+ DB 235,171 ; jmp 4d95 <_sk_store_f32_avx+0x69>
PUBLIC _sk_clamp_x_avx
_sk_clamp_x_avx LABEL PROC
@@ -8537,7 +9269,7 @@ _sk_linear_gradient_avx LABEL PROC
DB 196,226,125,24,88,28 ; vbroadcastss 0x1c(%rax),%ymm3
DB 76,139,0 ; mov (%rax),%r8
DB 77,133,192 ; test %r8,%r8
- DB 15,132,146,0,0,0 ; je 4b55 <_sk_linear_gradient_avx+0xb8>
+ DB 15,132,146,0,0,0 ; je 534d <_sk_linear_gradient_avx+0xb8>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12
@@ -8564,8 +9296,8 @@ _sk_linear_gradient_avx LABEL PROC
DB 196,227,13,74,219,208 ; vblendvps %ymm13,%ymm3,%ymm14,%ymm3
DB 72,131,192,36 ; add $0x24,%rax
DB 73,255,200 ; dec %r8
- DB 117,140 ; jne 4adf <_sk_linear_gradient_avx+0x42>
- DB 235,20 ; jmp 4b69 <_sk_linear_gradient_avx+0xcc>
+ DB 117,140 ; jne 52d7 <_sk_linear_gradient_avx+0x42>
+ DB 235,20 ; jmp 5361 <_sk_linear_gradient_avx+0xcc>
DB 196,65,36,87,219 ; vxorps %ymm11,%ymm11,%ymm11
DB 196,65,44,87,210 ; vxorps %ymm10,%ymm10,%ymm10
DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9
@@ -11142,6 +11874,392 @@ _sk_table_a_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_sse41
+_sk_parametric_r_sse41 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,64,16 ; movss 0x10(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,200 ; mulps %xmm0,%xmm9
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,208 ; mulps %xmm0,%xmm10
+ DB 65,15,194,192,2 ; cmpleps %xmm8,%xmm0
+ DB 243,68,15,16,64,24 ; movss 0x18(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 69,15,88,200 ; addps %xmm8,%xmm9
+ DB 243,68,15,16,0 ; movss (%rax),%xmm8
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,227 ; mulps %xmm11,%xmm12
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,219,0 ; pshufd $0x0,%xmm11,%xmm11
+ DB 102,69,15,219,218 ; pand %xmm10,%xmm11
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,210,0 ; pshufd $0x0,%xmm10,%xmm10
+ DB 102,69,15,235,211 ; por %xmm11,%xmm10
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,220 ; addps %xmm12,%xmm11
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 69,15,92,220 ; subps %xmm12,%xmm11
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,234 ; addps %xmm10,%xmm13
+ DB 69,15,94,229 ; divps %xmm13,%xmm12
+ DB 69,15,92,220 ; subps %xmm12,%xmm11
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 69,15,89,195 ; mulps %xmm11,%xmm8
+ DB 102,69,15,58,8,216,1 ; roundps $0x1,%xmm8,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,224 ; addps %xmm8,%xmm12
+ DB 69,15,92,195 ; subps %xmm11,%xmm8
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,89,216 ; mulps %xmm8,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,92,232 ; subps %xmm8,%xmm13
+ DB 69,15,94,221 ; divps %xmm13,%xmm11
+ DB 69,15,88,220 ; addps %xmm12,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,68,15,16,64,20 ; movss 0x14(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 69,15,88,194 ; addps %xmm10,%xmm8
+ DB 102,69,15,56,20,193 ; blendvps %xmm0,%xmm9,%xmm8
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 68,15,95,192 ; maxps %xmm0,%xmm8
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 68,15,93,192 ; minps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_sse41
+_sk_parametric_g_sse41 LABEL PROC
+ DB 68,15,40,192 ; movaps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,201 ; mulps %xmm1,%xmm9
+ DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 68,15,89,217 ; mulps %xmm1,%xmm11
+ DB 15,40,193 ; movaps %xmm1,%xmm0
+ DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0
+ DB 243,15,16,72,24 ; movss 0x18(%rax),%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 68,15,88,201 ; addps %xmm1,%xmm9
+ DB 243,68,15,16,32 ; movss (%rax),%xmm12
+ DB 243,15,16,72,8 ; movss 0x8(%rax),%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 68,15,88,217 ; addps %xmm1,%xmm11
+ DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,234 ; mulps %xmm10,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 68,15,92,233 ; subps %xmm1,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,68,15,112,209,0 ; pshufd $0x0,%xmm1,%xmm10
+ DB 102,69,15,219,211 ; pand %xmm11,%xmm10
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 102,68,15,112,217,0 ; pshufd $0x0,%xmm1,%xmm11
+ DB 102,69,15,235,218 ; por %xmm10,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,88,213 ; addps %xmm13,%xmm10
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,89,203 ; mulps %xmm11,%xmm1
+ DB 68,15,92,209 ; subps %xmm1,%xmm10
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,88,203 ; addps %xmm11,%xmm1
+ DB 68,15,94,233 ; divps %xmm1,%xmm13
+ DB 69,15,92,213 ; subps %xmm13,%xmm10
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,236 ; addps %xmm12,%xmm13
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,89,204 ; mulps %xmm12,%xmm1
+ DB 68,15,92,233 ; subps %xmm1,%xmm13
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,15,110,201 ; movd %ecx,%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,92,204 ; subps %xmm12,%xmm1
+ DB 68,15,94,217 ; divps %xmm1,%xmm11
+ DB 69,15,88,221 ; addps %xmm13,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,15,16,72,20 ; movss 0x14(%rax),%xmm1
+ DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1
+ DB 65,15,88,202 ; addps %xmm10,%xmm1
+ DB 102,65,15,56,20,201 ; blendvps %xmm0,%xmm9,%xmm1
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 15,95,200 ; maxps %xmm0,%xmm1
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,93,200 ; minps %xmm0,%xmm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_sse41
+_sk_parametric_b_sse41 LABEL PROC
+ DB 68,15,40,192 ; movaps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,202 ; mulps %xmm2,%xmm9
+ DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 68,15,89,218 ; mulps %xmm2,%xmm11
+ DB 15,40,194 ; movaps %xmm2,%xmm0
+ DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0
+ DB 243,15,16,80,24 ; movss 0x18(%rax),%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 68,15,88,202 ; addps %xmm2,%xmm9
+ DB 243,68,15,16,32 ; movss (%rax),%xmm12
+ DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 68,15,88,218 ; addps %xmm2,%xmm11
+ DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,234 ; mulps %xmm10,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 68,15,92,234 ; subps %xmm2,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 102,68,15,112,210,0 ; pshufd $0x0,%xmm2,%xmm10
+ DB 102,69,15,219,211 ; pand %xmm11,%xmm10
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 102,68,15,112,218,0 ; pshufd $0x0,%xmm2,%xmm11
+ DB 102,69,15,235,218 ; por %xmm10,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,88,213 ; addps %xmm13,%xmm10
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,89,211 ; mulps %xmm11,%xmm2
+ DB 68,15,92,210 ; subps %xmm2,%xmm10
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,88,211 ; addps %xmm11,%xmm2
+ DB 68,15,94,234 ; divps %xmm2,%xmm13
+ DB 69,15,92,213 ; subps %xmm13,%xmm10
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,236 ; addps %xmm12,%xmm13
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,89,212 ; mulps %xmm12,%xmm2
+ DB 68,15,92,234 ; subps %xmm2,%xmm13
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,15,110,209 ; movd %ecx,%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,92,212 ; subps %xmm12,%xmm2
+ DB 68,15,94,218 ; divps %xmm2,%xmm11
+ DB 69,15,88,221 ; addps %xmm13,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,15,16,80,20 ; movss 0x14(%rax),%xmm2
+ DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2
+ DB 65,15,88,210 ; addps %xmm10,%xmm2
+ DB 102,65,15,56,20,209 ; blendvps %xmm0,%xmm9,%xmm2
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 15,95,208 ; maxps %xmm0,%xmm2
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,93,208 ; minps %xmm0,%xmm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_sse41
+_sk_parametric_a_sse41 LABEL PROC
+ DB 68,15,40,192 ; movaps %xmm0,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 68,15,89,203 ; mulps %xmm3,%xmm9
+ DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 68,15,89,219 ; mulps %xmm3,%xmm11
+ DB 15,40,195 ; movaps %xmm3,%xmm0
+ DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0
+ DB 243,15,16,88,24 ; movss 0x18(%rax),%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 68,15,88,203 ; addps %xmm3,%xmm9
+ DB 243,68,15,16,32 ; movss (%rax),%xmm12
+ DB 243,15,16,88,8 ; movss 0x8(%rax),%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 68,15,88,219 ; addps %xmm3,%xmm11
+ DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,234 ; mulps %xmm10,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 68,15,92,235 ; subps %xmm3,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,68,15,112,211,0 ; pshufd $0x0,%xmm3,%xmm10
+ DB 102,69,15,219,211 ; pand %xmm11,%xmm10
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 102,68,15,112,219,0 ; pshufd $0x0,%xmm3,%xmm11
+ DB 102,69,15,235,218 ; por %xmm10,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,88,213 ; addps %xmm13,%xmm10
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,89,219 ; mulps %xmm11,%xmm3
+ DB 68,15,92,211 ; subps %xmm3,%xmm10
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,88,219 ; addps %xmm11,%xmm3
+ DB 68,15,94,235 ; divps %xmm3,%xmm13
+ DB 69,15,92,213 ; subps %xmm13,%xmm10
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,89,226 ; mulps %xmm10,%xmm12
+ DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11
+ DB 185,0,0,0,75 ; mov $0x4b000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,88,236 ; addps %xmm12,%xmm13
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,89,220 ; mulps %xmm12,%xmm3
+ DB 68,15,92,235 ; subps %xmm3,%xmm13
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,15,110,217 ; movd %ecx,%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,92,220 ; subps %xmm12,%xmm3
+ DB 68,15,94,219 ; divps %xmm3,%xmm11
+ DB 69,15,88,221 ; addps %xmm13,%xmm11
+ DB 69,15,89,218 ; mulps %xmm10,%xmm11
+ DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10
+ DB 243,15,16,88,20 ; movss 0x14(%rax),%xmm3
+ DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3
+ DB 65,15,88,218 ; addps %xmm10,%xmm3
+ DB 102,65,15,56,20,217 ; blendvps %xmm0,%xmm9,%xmm3
+ DB 15,87,192 ; xorps %xmm0,%xmm0
+ DB 15,95,216 ; maxps %xmm0,%xmm3
+ DB 184,0,0,128,63 ; mov $0x3f800000,%eax
+ DB 102,15,110,192 ; movd %eax,%xmm0
+ DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0
+ DB 15,93,216 ; minps %xmm0,%xmm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 65,15,40,192 ; movaps %xmm8,%xmm0
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_sse41
_sk_load_a8_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -11266,9 +12384,9 @@ _sk_gather_i8_sse41 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 210a <_sk_gather_i8_sse41+0xf>
+ DB 116,5 ; je 27a3 <_sk_gather_i8_sse41+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 210c <_sk_gather_i8_sse41+0x11>
+ DB 235,2 ; jmp 27a5 <_sk_gather_i8_sse41+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1
@@ -12362,7 +13480,7 @@ _sk_linear_gradient_sse41 LABEL PROC
DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
DB 72,139,8 ; mov (%rax),%rcx
DB 72,133,201 ; test %rcx,%rcx
- DB 15,132,4,1,0,0 ; je 3338 <_sk_linear_gradient_sse41+0x13e>
+ DB 15,132,4,1,0,0 ; je 39d1 <_sk_linear_gradient_sse41+0x13e>
DB 72,131,236,88 ; sub $0x58,%rsp
DB 15,41,36,36 ; movaps %xmm4,(%rsp)
DB 15,41,108,36,16 ; movaps %xmm5,0x10(%rsp)
@@ -12413,13 +13531,13 @@ _sk_linear_gradient_sse41 LABEL PROC
DB 15,40,196 ; movaps %xmm4,%xmm0
DB 72,131,192,36 ; add $0x24,%rax
DB 72,255,201 ; dec %rcx
- DB 15,133,65,255,255,255 ; jne 3260 <_sk_linear_gradient_sse41+0x66>
+ DB 15,133,65,255,255,255 ; jne 38f9 <_sk_linear_gradient_sse41+0x66>
DB 15,40,124,36,48 ; movaps 0x30(%rsp),%xmm7
DB 15,40,116,36,32 ; movaps 0x20(%rsp),%xmm6
DB 15,40,108,36,16 ; movaps 0x10(%rsp),%xmm5
DB 15,40,36,36 ; movaps (%rsp),%xmm4
DB 72,131,196,88 ; add $0x58,%rsp
- DB 235,13 ; jmp 3345 <_sk_linear_gradient_sse41+0x14b>
+ DB 235,13 ; jmp 39de <_sk_linear_gradient_sse41+0x14b>
DB 15,87,201 ; xorps %xmm1,%xmm1
DB 15,87,210 ; xorps %xmm2,%xmm2
DB 15,87,219 ; xorps %xmm3,%xmm3
@@ -15098,6 +16216,414 @@ _sk_table_a_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_parametric_r_sse2
+_sk_parametric_r_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,192 ; mulps %xmm0,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,208 ; mulps %xmm0,%xmm10
+ DB 65,15,194,193,2 ; cmpleps %xmm9,%xmm0
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,192 ; andps %xmm0,%xmm8
+ DB 65,15,85,196 ; andnps %xmm12,%xmm0
+ DB 65,15,86,192 ; orps %xmm8,%xmm0
+ DB 65,15,95,193 ; maxps %xmm9,%xmm0
+ DB 65,15,93,194 ; minps %xmm10,%xmm0
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_g_sse2
+_sk_parametric_g_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,193 ; mulps %xmm1,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,209 ; mulps %xmm1,%xmm10
+ DB 65,15,194,201,2 ; cmpleps %xmm9,%xmm1
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,193 ; andps %xmm1,%xmm8
+ DB 65,15,85,204 ; andnps %xmm12,%xmm1
+ DB 65,15,86,200 ; orps %xmm8,%xmm1
+ DB 65,15,95,201 ; maxps %xmm9,%xmm1
+ DB 65,15,93,202 ; minps %xmm10,%xmm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_b_sse2
+_sk_parametric_b_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,194 ; mulps %xmm2,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,210 ; mulps %xmm2,%xmm10
+ DB 65,15,194,209,2 ; cmpleps %xmm9,%xmm2
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,194 ; andps %xmm2,%xmm8
+ DB 65,15,85,212 ; andnps %xmm12,%xmm2
+ DB 65,15,86,208 ; orps %xmm8,%xmm2
+ DB 65,15,95,209 ; maxps %xmm9,%xmm2
+ DB 65,15,93,210 ; minps %xmm10,%xmm2
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_parametric_a_sse2
+_sk_parametric_a_sse2 LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8
+ DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8
+ DB 68,15,89,195 ; mulps %xmm3,%xmm8
+ DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 68,15,89,211 ; mulps %xmm3,%xmm10
+ DB 65,15,194,217,2 ; cmpleps %xmm9,%xmm3
+ DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,88,193 ; addps %xmm9,%xmm8
+ DB 243,68,15,16,8 ; movss (%rax),%xmm9
+ DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,88,211 ; addps %xmm11,%xmm10
+ DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11
+ DB 185,0,0,0,52 ; mov $0x34000000,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 69,15,92,235 ; subps %xmm11,%xmm13
+ DB 185,255,255,127,0 ; mov $0x7fffff,%ecx
+ DB 102,68,15,110,217 ; movd %ecx,%xmm11
+ DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12
+ DB 102,69,15,219,226 ; pand %xmm10,%xmm12
+ DB 185,0,0,0,63 ; mov $0x3f000000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11
+ DB 102,69,15,235,220 ; por %xmm12,%xmm11
+ DB 185,42,145,49,64 ; mov $0x4031912a,%ecx
+ DB 102,68,15,110,225 ; movd %ecx,%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,229 ; addps %xmm13,%xmm12
+ DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,89,211 ; mulps %xmm11,%xmm10
+ DB 69,15,92,226 ; subps %xmm10,%xmm12
+ DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 185,0,0,128,63 ; mov $0x3f800000,%ecx
+ DB 102,68,15,110,209 ; movd %ecx,%xmm10
+ DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d
+ DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx
+ DB 102,68,15,110,249 ; movd %ecx,%xmm15
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,88,243 ; addps %xmm11,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,92,229 ; subps %xmm13,%xmm12
+ DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9
+ DB 69,15,89,204 ; mulps %xmm12,%xmm9
+ DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11
+ DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11
+ DB 69,15,40,225 ; movaps %xmm9,%xmm12
+ DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15
+ DB 69,15,88,249 ; addps %xmm9,%xmm15
+ DB 69,15,40,233 ; movaps %xmm9,%xmm13
+ DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13
+ DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10
+ DB 69,15,84,234 ; andps %xmm10,%xmm13
+ DB 69,15,87,201 ; xorps %xmm9,%xmm9
+ DB 69,15,92,221 ; subps %xmm13,%xmm11
+ DB 69,15,92,227 ; subps %xmm11,%xmm12
+ DB 102,69,15,110,216 ; movd %r8d,%xmm11
+ DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11
+ DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 69,15,89,236 ; mulps %xmm12,%xmm13
+ DB 69,15,92,253 ; subps %xmm13,%xmm15
+ DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx
+ DB 102,68,15,110,233 ; movd %ecx,%xmm13
+ DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13
+ DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx
+ DB 102,68,15,110,241 ; movd %ecx,%xmm14
+ DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14
+ DB 69,15,92,244 ; subps %xmm12,%xmm14
+ DB 69,15,94,238 ; divps %xmm14,%xmm13
+ DB 69,15,88,239 ; addps %xmm15,%xmm13
+ DB 69,15,89,235 ; mulps %xmm11,%xmm13
+ DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11
+ DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12
+ DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
+ DB 69,15,88,227 ; addps %xmm11,%xmm12
+ DB 68,15,84,195 ; andps %xmm3,%xmm8
+ DB 65,15,85,220 ; andnps %xmm12,%xmm3
+ DB 65,15,86,216 ; orps %xmm8,%xmm3
+ DB 65,15,95,217 ; maxps %xmm9,%xmm3
+ DB 65,15,93,218 ; minps %xmm10,%xmm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_a8_sse2
_sk_load_a8_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -15254,9 +16780,9 @@ _sk_gather_i8_sse2 LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 73,137,192 ; mov %rax,%r8
DB 77,133,192 ; test %r8,%r8
- DB 116,5 ; je 2285 <_sk_gather_i8_sse2+0xf>
+ DB 116,5 ; je 29b9 <_sk_gather_i8_sse2+0xf>
DB 76,137,192 ; mov %r8,%rax
- DB 235,2 ; jmp 2287 <_sk_gather_i8_sse2+0x11>
+ DB 235,2 ; jmp 29bb <_sk_gather_i8_sse2+0x11>
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,16 ; mov (%rax),%r10
DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1
@@ -16453,7 +17979,7 @@ _sk_linear_gradient_sse2 LABEL PROC
DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12
DB 72,139,8 ; mov (%rax),%rcx
DB 72,133,201 ; test %rcx,%rcx
- DB 15,132,15,1,0,0 ; je 366f <_sk_linear_gradient_sse2+0x149>
+ DB 15,132,15,1,0,0 ; je 3da3 <_sk_linear_gradient_sse2+0x149>
DB 72,139,64,8 ; mov 0x8(%rax),%rax
DB 72,131,192,32 ; add $0x20,%rax
DB 69,15,87,192 ; xorps %xmm8,%xmm8
@@ -16514,8 +18040,8 @@ _sk_linear_gradient_sse2 LABEL PROC
DB 69,15,86,231 ; orps %xmm15,%xmm12
DB 72,131,192,36 ; add $0x24,%rax
DB 72,255,201 ; dec %rcx
- DB 15,133,8,255,255,255 ; jne 3575 <_sk_linear_gradient_sse2+0x4f>
- DB 235,13 ; jmp 367c <_sk_linear_gradient_sse2+0x156>
+ DB 15,133,8,255,255,255 ; jne 3ca9 <_sk_linear_gradient_sse2+0x4f>
+ DB 235,13 ; jmp 3db0 <_sk_linear_gradient_sse2+0x156>
DB 15,87,201 ; xorps %xmm1,%xmm1
DB 15,87,210 ; xorps %xmm2,%xmm2
DB 15,87,219 ; xorps %xmm3,%xmm3