From 44375176c06f00682518a03d4983554ca8fb5b6a Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Mon, 17 Apr 2017 19:32:05 -0400 Subject: jumper, parametric_{r,g,b,a} I've tried a couple of ideas for approx_powf(): 1) accumulate integer powers of x, then 4th roots, then 16th roots 2) continue 1) all the way to 256th roots 3) decompose into pow2 and log2, exploiting IEEE float layout 4) slightly tune constants used in 3) 5) accumulate integer powers of x, then 3+4) with different tuning 6) follow a source online, basically 5 with finesse 7) a new source quoting and improving on the method in 6). 7) seems perfect, enough that maybe we can explore improving its speed at cost of precision. Might be nice to get rid of those divides. If we allow a small tolerance (2-5) in our tests, we could use the very simple fast forms from 3) (e.g. PS 5). I wish I had some images to look at! Anything involving roots seems to be subverted by poor rsqrt precision. This change of course affects the pipelines created by the tests for exponential and full parametric gamma curves. What's less obvious is that it also means SkJumper can now for the first time run the pipeline created by the mixed gamma curves test. This means we now need to relax our tolerance for the table-based channel, just like we did when implementing table_{r,g,b,a}. This took me an embarassingly long time to figure out. *face palm* Change-Id: I451ee3c970a0a4a4e285f8aa8f6ef709a654d247 Reviewed-on: https://skia-review.googlesource.com/13656 Commit-Queue: Mike Klein Reviewed-by: Matt Sarett Reviewed-by: Herb Derby --- src/jumper/SkJumper_generated_win.S | 2060 ++++++++++++++++++++++++++++++----- 1 file changed, 1793 insertions(+), 267 deletions(-) (limited to 'src/jumper/SkJumper_generated_win.S') diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S index eb77b6d5a5..d8f82efef0 100644 --- a/src/jumper/SkJumper_generated_win.S +++ b/src/jumper/SkJumper_generated_win.S @@ -1357,7 +1357,7 @@ _sk_lerp_565_hsw LABEL PROC DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 233,255,255,255,225 ; jmpq ffffffffe2001478 <_sk_callback_hsw+0xffffffffe1ffd85e> + DB 233,255,255,255,225 ; jmpq ffffffffe2001478 <_sk_callback_hsw+0xffffffffe1ffd22e> DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) @@ -1934,6 +1934,334 @@ _sk_table_a_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +PUBLIC _sk_parametric_r_hsw +_sk_parametric_r_hsw LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8 + DB 196,65,124,194,192,2 ; vcmpleps %ymm8,%ymm0,%ymm8 + DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 + DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10 + DB 196,66,125,168,202 ; vfmadd213ps %ymm10,%ymm0,%ymm9 + DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 + DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11 + DB 196,66,125,168,211 ; vfmadd213ps %ymm11,%ymm0,%ymm10 + DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11 + DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12 + DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13 + DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13 + DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0 + DB 196,65,125,219,210 ; vpand %ymm10,%ymm0,%ymm10 + DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0 + DB 197,45,235,208 ; vpor %ymm0,%ymm10,%ymm10 + DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0 + DB 197,20,88,216 ; vaddps %ymm0,%ymm13,%ymm11 + DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12 + DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12 + DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,98,125,88,216 ; vpbroadcastd %xmm0,%ymm11 + DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0 + DB 197,172,88,192 ; vaddps %ymm0,%ymm10,%ymm0 + DB 197,164,94,192 ; vdivps %ymm0,%ymm11,%ymm0 + DB 197,156,92,192 ; vsubps %ymm0,%ymm12,%ymm0 + DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10 + DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11 + DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10 + DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,98,125,88,224 ; vpbroadcastd %xmm0,%ymm12 + DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0 + DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11 + DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,98,125,88,232 ; vpbroadcastd %xmm0,%ymm13 + DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13 + DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,98,125,88,216 ; vpbroadcastd %xmm0,%ymm11 + DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0 + DB 196,193,124,92,194 ; vsubps %ymm10,%ymm0,%ymm0 + DB 197,164,94,192 ; vdivps %ymm0,%ymm11,%ymm0 + DB 197,148,88,192 ; vaddps %ymm0,%ymm13,%ymm0 + DB 197,156,89,192 ; vmulps %ymm0,%ymm12,%ymm0 + DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0 + DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10 + DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0 + DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0 + DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 + DB 196,65,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,192 ; vmovd %eax,%xmm0 + DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0 + DB 197,188,93,192 ; vminps %ymm0,%ymm8,%ymm0 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_g_hsw +_sk_parametric_g_hsw LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8 + DB 196,65,116,194,192,2 ; vcmpleps %ymm8,%ymm1,%ymm8 + DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 + DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10 + DB 196,66,117,168,202 ; vfmadd213ps %ymm10,%ymm1,%ymm9 + DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 + DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11 + DB 196,66,117,168,211 ; vfmadd213ps %ymm11,%ymm1,%ymm10 + DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11 + DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12 + DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,98,125,88,233 ; vpbroadcastd %xmm1,%ymm13 + DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13 + DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1 + DB 196,65,117,219,210 ; vpand %ymm10,%ymm1,%ymm10 + DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1 + DB 197,45,235,209 ; vpor %ymm1,%ymm10,%ymm10 + DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1 + DB 197,20,88,217 ; vaddps %ymm1,%ymm13,%ymm11 + DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12 + DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12 + DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,98,125,88,217 ; vpbroadcastd %xmm1,%ymm11 + DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1 + DB 197,172,88,201 ; vaddps %ymm1,%ymm10,%ymm1 + DB 197,164,94,201 ; vdivps %ymm1,%ymm11,%ymm1 + DB 197,156,92,201 ; vsubps %ymm1,%ymm12,%ymm1 + DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10 + DB 197,44,89,217 ; vmulps %ymm1,%ymm10,%ymm11 + DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10 + DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,98,125,88,225 ; vpbroadcastd %xmm1,%ymm12 + DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1 + DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11 + DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,98,125,88,233 ; vpbroadcastd %xmm1,%ymm13 + DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13 + DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,98,125,88,217 ; vpbroadcastd %xmm1,%ymm11 + DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1 + DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1 + DB 197,164,94,201 ; vdivps %ymm1,%ymm11,%ymm1 + DB 197,148,88,201 ; vaddps %ymm1,%ymm13,%ymm1 + DB 197,156,89,201 ; vmulps %ymm1,%ymm12,%ymm1 + DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1 + DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10 + DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1 + DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1 + DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 + DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,200 ; vmovd %eax,%xmm1 + DB 196,226,125,88,201 ; vpbroadcastd %xmm1,%ymm1 + DB 197,188,93,201 ; vminps %ymm1,%ymm8,%ymm1 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_b_hsw +_sk_parametric_b_hsw LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8 + DB 196,65,108,194,192,2 ; vcmpleps %ymm8,%ymm2,%ymm8 + DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 + DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10 + DB 196,66,109,168,202 ; vfmadd213ps %ymm10,%ymm2,%ymm9 + DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 + DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11 + DB 196,66,109,168,211 ; vfmadd213ps %ymm11,%ymm2,%ymm10 + DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11 + DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12 + DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,98,125,88,234 ; vpbroadcastd %xmm2,%ymm13 + DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13 + DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2 + DB 196,65,109,219,210 ; vpand %ymm10,%ymm2,%ymm10 + DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2 + DB 197,45,235,210 ; vpor %ymm2,%ymm10,%ymm10 + DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2 + DB 197,20,88,218 ; vaddps %ymm2,%ymm13,%ymm11 + DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12 + DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12 + DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,98,125,88,218 ; vpbroadcastd %xmm2,%ymm11 + DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2 + DB 197,172,88,210 ; vaddps %ymm2,%ymm10,%ymm2 + DB 197,164,94,210 ; vdivps %ymm2,%ymm11,%ymm2 + DB 197,156,92,210 ; vsubps %ymm2,%ymm12,%ymm2 + DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10 + DB 197,44,89,218 ; vmulps %ymm2,%ymm10,%ymm11 + DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10 + DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,98,125,88,226 ; vpbroadcastd %xmm2,%ymm12 + DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2 + DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11 + DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,98,125,88,234 ; vpbroadcastd %xmm2,%ymm13 + DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13 + DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,98,125,88,218 ; vpbroadcastd %xmm2,%ymm11 + DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2 + DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2 + DB 197,164,94,210 ; vdivps %ymm2,%ymm11,%ymm2 + DB 197,148,88,210 ; vaddps %ymm2,%ymm13,%ymm2 + DB 197,156,89,210 ; vmulps %ymm2,%ymm12,%ymm2 + DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2 + DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10 + DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2 + DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2 + DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 + DB 196,65,108,95,192 ; vmaxps %ymm8,%ymm2,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,208 ; vmovd %eax,%xmm2 + DB 196,226,125,88,210 ; vpbroadcastd %xmm2,%ymm2 + DB 197,188,93,210 ; vminps %ymm2,%ymm8,%ymm2 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_a_hsw +_sk_parametric_a_hsw LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8 + DB 196,65,100,194,192,2 ; vcmpleps %ymm8,%ymm3,%ymm8 + DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 + DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10 + DB 196,66,101,168,202 ; vfmadd213ps %ymm10,%ymm3,%ymm9 + DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 + DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11 + DB 196,66,101,168,211 ; vfmadd213ps %ymm11,%ymm3,%ymm10 + DB 196,65,124,91,218 ; vcvtdq2ps %ymm10,%ymm11 + DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12 + DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,98,125,88,235 ; vpbroadcastd %xmm3,%ymm13 + DB 196,66,37,186,236 ; vfmsub231ps %ymm12,%ymm11,%ymm13 + DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3 + DB 196,65,101,219,210 ; vpand %ymm10,%ymm3,%ymm10 + DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3 + DB 197,45,235,211 ; vpor %ymm3,%ymm10,%ymm10 + DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3 + DB 197,20,88,219 ; vaddps %ymm3,%ymm13,%ymm11 + DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12 + DB 196,66,45,172,227 ; vfnmadd213ps %ymm11,%ymm10,%ymm12 + DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,98,125,88,219 ; vpbroadcastd %xmm3,%ymm11 + DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3 + DB 197,172,88,219 ; vaddps %ymm3,%ymm10,%ymm3 + DB 197,164,94,219 ; vdivps %ymm3,%ymm11,%ymm3 + DB 197,156,92,219 ; vsubps %ymm3,%ymm12,%ymm3 + DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10 + DB 197,44,89,219 ; vmulps %ymm3,%ymm10,%ymm11 + DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10 + DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,98,125,88,227 ; vpbroadcastd %xmm3,%ymm12 + DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3 + DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11 + DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,98,125,88,235 ; vpbroadcastd %xmm3,%ymm13 + DB 196,66,45,172,235 ; vfnmadd213ps %ymm11,%ymm10,%ymm13 + DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,98,125,88,219 ; vpbroadcastd %xmm3,%ymm11 + DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3 + DB 196,193,100,92,218 ; vsubps %ymm10,%ymm3,%ymm3 + DB 197,164,94,219 ; vdivps %ymm3,%ymm11,%ymm3 + DB 197,148,88,219 ; vaddps %ymm3,%ymm13,%ymm3 + DB 197,156,89,219 ; vmulps %ymm3,%ymm12,%ymm3 + DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3 + DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10 + DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3 + DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3 + DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 + DB 196,65,100,95,192 ; vmaxps %ymm8,%ymm3,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,216 ; vmovd %eax,%xmm3 + DB 196,226,125,88,219 ; vpbroadcastd %xmm3,%ymm3 + DB 197,188,93,219 ; vminps %ymm3,%ymm8,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + PUBLIC _sk_load_a8_hsw _sk_load_a8_hsw LABEL PROC DB 73,137,200 ; mov %rcx,%r8 @@ -1941,7 +2269,7 @@ _sk_load_a8_hsw LABEL PROC DB 72,139,0 ; mov (%rax),%rax DB 72,1,248 ; add %rdi,%rax DB 77,133,192 ; test %r8,%r8 - DB 117,50 ; jne 1e28 <_sk_load_a8_hsw+0x42> + DB 117,50 ; jne 2458 <_sk_load_a8_hsw+0x42> DB 197,250,126,0 ; vmovq (%rax),%xmm0 DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0 DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 @@ -1964,9 +2292,9 @@ _sk_load_a8_hsw LABEL PROC DB 77,9,217 ; or %r11,%r9 DB 72,131,193,8 ; add $0x8,%rcx DB 73,255,202 ; dec %r10 - DB 117,234 ; jne 1e30 <_sk_load_a8_hsw+0x4a> + DB 117,234 ; jne 2460 <_sk_load_a8_hsw+0x4a> DB 196,193,249,110,193 ; vmovq %r9,%xmm0 - DB 235,173 ; jmp 1dfa <_sk_load_a8_hsw+0x14> + DB 235,173 ; jmp 242a <_sk_load_a8_hsw+0x14> PUBLIC _sk_gather_a8_hsw _sk_gather_a8_hsw LABEL PROC @@ -2037,7 +2365,7 @@ _sk_store_a8_hsw LABEL PROC DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne 1f65 <_sk_store_a8_hsw+0x3b> + DB 117,10 ; jne 2595 <_sk_store_a8_hsw+0x3b> DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -2045,10 +2373,10 @@ _sk_store_a8_hsw LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja 1f61 <_sk_store_a8_hsw+0x37> + DB 119,236 ; ja 2591 <_sk_store_a8_hsw+0x37> DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8 DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 1fc8 <_sk_store_a8_hsw+0x9e> + DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 25f8 <_sk_store_a8_hsw+0x9e> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -2059,7 +2387,7 @@ _sk_store_a8_hsw LABEL PROC DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1) DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1) DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1) - DB 235,154 ; jmp 1f61 <_sk_store_a8_hsw+0x37> + DB 235,154 ; jmp 2591 <_sk_store_a8_hsw+0x37> DB 144 ; nop DB 246,255 ; idiv %bh DB 255 ; (bad) @@ -2091,7 +2419,7 @@ _sk_load_g8_hsw LABEL PROC DB 72,139,0 ; mov (%rax),%rax DB 72,1,248 ; add %rdi,%rax DB 77,133,192 ; test %r8,%r8 - DB 117,60 ; jne 2030 <_sk_load_g8_hsw+0x4c> + DB 117,60 ; jne 2660 <_sk_load_g8_hsw+0x4c> DB 197,250,126,0 ; vmovq (%rax),%xmm0 DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0 DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 @@ -2116,9 +2444,9 @@ _sk_load_g8_hsw LABEL PROC DB 77,9,217 ; or %r11,%r9 DB 72,131,193,8 ; add $0x8,%rcx DB 73,255,202 ; dec %r10 - DB 117,234 ; jne 2038 <_sk_load_g8_hsw+0x54> + DB 117,234 ; jne 2668 <_sk_load_g8_hsw+0x54> DB 196,193,249,110,193 ; vmovq %r9,%xmm0 - DB 235,163 ; jmp 1ff8 <_sk_load_g8_hsw+0x14> + DB 235,163 ; jmp 2628 <_sk_load_g8_hsw+0x14> PUBLIC _sk_gather_g8_hsw _sk_gather_g8_hsw LABEL PROC @@ -2183,9 +2511,9 @@ _sk_gather_i8_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 73,137,192 ; mov %rax,%r8 DB 77,133,192 ; test %r8,%r8 - DB 116,5 ; je 214b <_sk_gather_i8_hsw+0xf> + DB 116,5 ; je 277b <_sk_gather_i8_hsw+0xf> DB 76,137,192 ; mov %r8,%rax - DB 235,2 ; jmp 214d <_sk_gather_i8_hsw+0x11> + DB 235,2 ; jmp 277d <_sk_gather_i8_hsw+0x11> DB 72,173 ; lods %ds:(%rsi),%rax DB 65,87 ; push %r15 DB 65,86 ; push %r14 @@ -2256,7 +2584,7 @@ _sk_load_565_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,149,0,0,0 ; jne 22ff <_sk_load_565_hsw+0xa3> + DB 15,133,149,0,0,0 ; jne 292f <_sk_load_565_hsw+0xa3> DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0 DB 196,226,125,51,208 ; vpmovzxwd %xmm0,%ymm2 DB 184,0,248,0,0 ; mov $0xf800,%eax @@ -2296,9 +2624,9 @@ _sk_load_565_hsw LABEL PROC DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,89,255,255,255 ; ja 2270 <_sk_load_565_hsw+0x14> + DB 15,135,89,255,255,255 ; ja 28a0 <_sk_load_565_hsw+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 236c <_sk_load_565_hsw+0x110> + DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 299c <_sk_load_565_hsw+0x110> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -2310,12 +2638,12 @@ _sk_load_565_hsw LABEL PROC DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0 - DB 233,5,255,255,255 ; jmpq 2270 <_sk_load_565_hsw+0x14> + DB 233,5,255,255,255 ; jmpq 28a0 <_sk_load_565_hsw+0x14> DB 144 ; nop DB 243,255 ; repz (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 235,255 ; jmp 2371 <_sk_load_565_hsw+0x115> + DB 235,255 ; jmp 29a1 <_sk_load_565_hsw+0x115> DB 255 ; (bad) DB 255,227 ; jmpq *%rbx DB 255 ; (bad) @@ -2438,7 +2766,7 @@ _sk_store_565_hsw LABEL PROC DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9 DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne 2537 <_sk_store_565_hsw+0x6c> + DB 117,10 ; jne 2b67 <_sk_store_565_hsw+0x6c> DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -2446,9 +2774,9 @@ _sk_store_565_hsw LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja 2533 <_sk_store_565_hsw+0x68> + DB 119,236 ; ja 2b63 <_sk_store_565_hsw+0x68> DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2594 <_sk_store_565_hsw+0xc9> + DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2bc4 <_sk_store_565_hsw+0xc9> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -2459,7 +2787,7 @@ _sk_store_565_hsw LABEL PROC DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2) DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2) DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2) - DB 235,159 ; jmp 2533 <_sk_store_565_hsw+0x68> + DB 235,159 ; jmp 2b63 <_sk_store_565_hsw+0x68> DB 247,255 ; idiv %edi DB 255 ; (bad) DB 255 ; (bad) @@ -2488,7 +2816,7 @@ _sk_load_4444_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,179,0,0,0 ; jne 2671 <_sk_load_4444_hsw+0xc1> + DB 15,133,179,0,0,0 ; jne 2ca1 <_sk_load_4444_hsw+0xc1> DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0 DB 196,98,125,51,200 ; vpmovzxwd %xmm0,%ymm9 DB 184,0,240,0,0 ; mov $0xf000,%eax @@ -2534,9 +2862,9 @@ _sk_load_4444_hsw LABEL PROC DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,59,255,255,255 ; ja 25c4 <_sk_load_4444_hsw+0x14> + DB 15,135,59,255,255,255 ; ja 2bf4 <_sk_load_4444_hsw+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 26e0 <_sk_load_4444_hsw+0x130> + DB 76,141,13,76,0,0,0 ; lea 0x4c(%rip),%r9 # 2d10 <_sk_load_4444_hsw+0x130> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -2548,13 +2876,13 @@ _sk_load_4444_hsw LABEL PROC DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0 - DB 233,231,254,255,255 ; jmpq 25c4 <_sk_load_4444_hsw+0x14> + DB 233,231,254,255,255 ; jmpq 2bf4 <_sk_load_4444_hsw+0x14> DB 15,31,0 ; nopl (%rax) DB 241 ; icebp DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 233,255,255,255,225 ; jmpq ffffffffe20026e8 <_sk_callback_hsw+0xffffffffe1ffeace> + DB 233,255,255,255,225 ; jmpq ffffffffe2002d18 <_sk_callback_hsw+0xffffffffe1ffeace> DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) @@ -2682,7 +3010,7 @@ _sk_store_4444_hsw LABEL PROC DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9 DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne 28cf <_sk_store_4444_hsw+0x72> + DB 117,10 ; jne 2eff <_sk_store_4444_hsw+0x72> DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -2690,9 +3018,9 @@ _sk_store_4444_hsw LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja 28cb <_sk_store_4444_hsw+0x6e> + DB 119,236 ; ja 2efb <_sk_store_4444_hsw+0x6e> DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 292c <_sk_store_4444_hsw+0xcf> + DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 2f5c <_sk_store_4444_hsw+0xcf> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -2703,7 +3031,7 @@ _sk_store_4444_hsw LABEL PROC DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2) DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2) DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2) - DB 235,159 ; jmp 28cb <_sk_store_4444_hsw+0x6e> + DB 235,159 ; jmp 2efb <_sk_store_4444_hsw+0x6e> DB 247,255 ; idiv %edi DB 255 ; (bad) DB 255 ; (bad) @@ -2734,7 +3062,7 @@ _sk_load_8888_hsw LABEL PROC DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9 DB 76,3,8 ; add (%rax),%r9 DB 77,133,192 ; test %r8,%r8 - DB 117,104 ; jne 29c5 <_sk_load_8888_hsw+0x7d> + DB 117,104 ; jne 2ff5 <_sk_load_8888_hsw+0x7d> DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3 DB 184,255,0,0,0 ; mov $0xff,%eax DB 197,249,110,192 ; vmovd %eax,%xmm0 @@ -2767,7 +3095,7 @@ _sk_load_8888_hsw LABEL PROC DB 196,225,249,110,192 ; vmovq %rax,%xmm0 DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0 DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3 - DB 233,116,255,255,255 ; jmpq 2962 <_sk_load_8888_hsw+0x1a> + DB 233,116,255,255,255 ; jmpq 2f92 <_sk_load_8888_hsw+0x1a> PUBLIC _sk_gather_8888_hsw _sk_gather_8888_hsw LABEL PROC @@ -2827,7 +3155,7 @@ _sk_store_8888_hsw LABEL PROC DB 196,65,45,235,192 ; vpor %ymm8,%ymm10,%ymm8 DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8 DB 77,133,192 ; test %r8,%r8 - DB 117,12 ; jne 2ae8 <_sk_store_8888_hsw+0x74> + DB 117,12 ; jne 3118 <_sk_store_8888_hsw+0x74> DB 196,65,126,127,1 ; vmovdqu %ymm8,(%r9) DB 72,173 ; lods %ds:(%rsi),%rax DB 76,137,193 ; mov %r8,%rcx @@ -2840,14 +3168,14 @@ _sk_store_8888_hsw LABEL PROC DB 196,97,249,110,200 ; vmovq %rax,%xmm9 DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9 DB 196,66,53,142,1 ; vpmaskmovd %ymm8,%ymm9,(%r9) - DB 235,211 ; jmp 2ae1 <_sk_store_8888_hsw+0x6d> + DB 235,211 ; jmp 3111 <_sk_store_8888_hsw+0x6d> PUBLIC _sk_load_f16_hsw _sk_load_f16_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 72,139,0 ; mov (%rax),%rax DB 72,133,201 ; test %rcx,%rcx - DB 117,97 ; jne 2b79 <_sk_load_f16_hsw+0x6b> + DB 117,97 ; jne 31a9 <_sk_load_f16_hsw+0x6b> DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8 DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2 DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3 @@ -2873,29 +3201,29 @@ _sk_load_f16_hsw LABEL PROC DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8 DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9 DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,79 ; je 2bd8 <_sk_load_f16_hsw+0xca> + DB 116,79 ; je 3208 <_sk_load_f16_hsw+0xca> DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,67 ; jb 2bd8 <_sk_load_f16_hsw+0xca> + DB 114,67 ; jb 3208 <_sk_load_f16_hsw+0xca> DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 116,68 ; je 2be5 <_sk_load_f16_hsw+0xd7> + DB 116,68 ; je 3215 <_sk_load_f16_hsw+0xd7> DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,56 ; jb 2be5 <_sk_load_f16_hsw+0xd7> + DB 114,56 ; jb 3215 <_sk_load_f16_hsw+0xd7> DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 15,132,114,255,255,255 ; je 2b2f <_sk_load_f16_hsw+0x21> + DB 15,132,114,255,255,255 ; je 315f <_sk_load_f16_hsw+0x21> DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3 DB 72,131,249,7 ; cmp $0x7,%rcx - DB 15,130,98,255,255,255 ; jb 2b2f <_sk_load_f16_hsw+0x21> + DB 15,130,98,255,255,255 ; jb 315f <_sk_load_f16_hsw+0x21> DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9 - DB 233,87,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21> + DB 233,87,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2 - DB 233,74,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21> + DB 233,74,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 - DB 233,65,255,255,255 ; jmpq 2b2f <_sk_load_f16_hsw+0x21> + DB 233,65,255,255,255 ; jmpq 315f <_sk_load_f16_hsw+0x21> PUBLIC _sk_gather_f16_hsw _sk_gather_f16_hsw LABEL PROC @@ -2949,7 +3277,7 @@ _sk_store_f16_hsw LABEL PROC DB 196,65,57,98,205 ; vpunpckldq %xmm13,%xmm8,%xmm9 DB 196,65,57,106,197 ; vpunpckhdq %xmm13,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,27 ; jne 2cdd <_sk_store_f16_hsw+0x65> + DB 117,27 ; jne 330d <_sk_store_f16_hsw+0x65> DB 197,120,17,28,248 ; vmovups %xmm11,(%rax,%rdi,8) DB 197,120,17,84,248,16 ; vmovups %xmm10,0x10(%rax,%rdi,8) DB 197,120,17,76,248,32 ; vmovups %xmm9,0x20(%rax,%rdi,8) @@ -2958,22 +3286,22 @@ _sk_store_f16_hsw LABEL PROC DB 255,224 ; jmpq *%rax DB 197,121,214,28,248 ; vmovq %xmm11,(%rax,%rdi,8) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,241 ; je 2cd9 <_sk_store_f16_hsw+0x61> + DB 116,241 ; je 3309 <_sk_store_f16_hsw+0x61> DB 197,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%rax,%rdi,8) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,229 ; jb 2cd9 <_sk_store_f16_hsw+0x61> + DB 114,229 ; jb 3309 <_sk_store_f16_hsw+0x61> DB 197,121,214,84,248,16 ; vmovq %xmm10,0x10(%rax,%rdi,8) - DB 116,221 ; je 2cd9 <_sk_store_f16_hsw+0x61> + DB 116,221 ; je 3309 <_sk_store_f16_hsw+0x61> DB 197,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%rax,%rdi,8) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,209 ; jb 2cd9 <_sk_store_f16_hsw+0x61> + DB 114,209 ; jb 3309 <_sk_store_f16_hsw+0x61> DB 197,121,214,76,248,32 ; vmovq %xmm9,0x20(%rax,%rdi,8) - DB 116,201 ; je 2cd9 <_sk_store_f16_hsw+0x61> + DB 116,201 ; je 3309 <_sk_store_f16_hsw+0x61> DB 197,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%rax,%rdi,8) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,189 ; jb 2cd9 <_sk_store_f16_hsw+0x61> + DB 114,189 ; jb 3309 <_sk_store_f16_hsw+0x61> DB 197,121,214,68,248,48 ; vmovq %xmm8,0x30(%rax,%rdi,8) - DB 235,181 ; jmp 2cd9 <_sk_store_f16_hsw+0x61> + DB 235,181 ; jmp 3309 <_sk_store_f16_hsw+0x61> PUBLIC _sk_load_u16_be_hsw _sk_load_u16_be_hsw LABEL PROC @@ -2981,7 +3309,7 @@ _sk_load_u16_be_hsw LABEL PROC DB 76,139,0 ; mov (%rax),%r8 DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax DB 72,133,201 ; test %rcx,%rcx - DB 15,133,205,0,0,0 ; jne 2e07 <_sk_load_u16_be_hsw+0xe3> + DB 15,133,205,0,0,0 ; jne 3437 <_sk_load_u16_be_hsw+0xe3> DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8 DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2 DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3 @@ -3030,29 +3358,29 @@ _sk_load_u16_be_hsw LABEL PROC DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8 DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9 DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,85 ; je 2e6d <_sk_load_u16_be_hsw+0x149> + DB 116,85 ; je 349d <_sk_load_u16_be_hsw+0x149> DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,72 ; jb 2e6d <_sk_load_u16_be_hsw+0x149> + DB 114,72 ; jb 349d <_sk_load_u16_be_hsw+0x149> DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 116,72 ; je 2e7a <_sk_load_u16_be_hsw+0x156> + DB 116,72 ; je 34aa <_sk_load_u16_be_hsw+0x156> DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,59 ; jb 2e7a <_sk_load_u16_be_hsw+0x156> + DB 114,59 ; jb 34aa <_sk_load_u16_be_hsw+0x156> DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 15,132,5,255,255,255 ; je 2d55 <_sk_load_u16_be_hsw+0x31> + DB 15,132,5,255,255,255 ; je 3385 <_sk_load_u16_be_hsw+0x31> DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3 DB 72,131,249,7 ; cmp $0x7,%rcx - DB 15,130,244,254,255,255 ; jb 2d55 <_sk_load_u16_be_hsw+0x31> + DB 15,130,244,254,255,255 ; jb 3385 <_sk_load_u16_be_hsw+0x31> DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9 - DB 233,232,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31> + DB 233,232,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2 - DB 233,219,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31> + DB 233,219,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 - DB 233,210,254,255,255 ; jmpq 2d55 <_sk_load_u16_be_hsw+0x31> + DB 233,210,254,255,255 ; jmpq 3385 <_sk_load_u16_be_hsw+0x31> PUBLIC _sk_load_rgb_u16_be_hsw _sk_load_rgb_u16_be_hsw LABEL PROC @@ -3060,7 +3388,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC DB 76,139,0 ; mov (%rax),%r8 DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax DB 72,133,201 ; test %rcx,%rcx - DB 15,133,211,0,0,0 ; jne 2f68 <_sk_load_rgb_u16_be_hsw+0xe5> + DB 15,133,211,0,0,0 ; jne 3598 <_sk_load_rgb_u16_be_hsw+0xe5> DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0 DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2 DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1 @@ -3110,36 +3438,36 @@ _sk_load_rgb_u16_be_hsw LABEL PROC DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0 DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0 DB 72,131,249,1 ; cmp $0x1,%rcx - DB 117,5 ; jne 2f81 <_sk_load_rgb_u16_be_hsw+0xfe> - DB 233,72,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46> + DB 117,5 ; jne 35b1 <_sk_load_rgb_u16_be_hsw+0xfe> + DB 233,72,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46> DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1 DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,26 ; jb 2fb0 <_sk_load_rgb_u16_be_hsw+0x12d> + DB 114,26 ; jb 35e0 <_sk_load_rgb_u16_be_hsw+0x12d> DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1 DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 117,10 ; jne 2fb5 <_sk_load_rgb_u16_be_hsw+0x132> - DB 233,25,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46> - DB 233,20,255,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46> + DB 117,10 ; jne 35e5 <_sk_load_rgb_u16_be_hsw+0x132> + DB 233,25,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46> + DB 233,20,255,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46> DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1 DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,26 ; jb 2fe4 <_sk_load_rgb_u16_be_hsw+0x161> + DB 114,26 ; jb 3614 <_sk_load_rgb_u16_be_hsw+0x161> DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1 DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 117,10 ; jne 2fe9 <_sk_load_rgb_u16_be_hsw+0x166> - DB 233,229,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46> - DB 233,224,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46> + DB 117,10 ; jne 3619 <_sk_load_rgb_u16_be_hsw+0x166> + DB 233,229,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46> + DB 233,224,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46> DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3 DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11 DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,20 ; jb 3012 <_sk_load_rgb_u16_be_hsw+0x18f> + DB 114,20 ; jb 3642 <_sk_load_rgb_u16_be_hsw+0x18f> DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3 DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3 - DB 233,183,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46> - DB 233,178,254,255,255 ; jmpq 2ec9 <_sk_load_rgb_u16_be_hsw+0x46> + DB 233,183,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46> + DB 233,178,254,255,255 ; jmpq 34f9 <_sk_load_rgb_u16_be_hsw+0x46> PUBLIC _sk_store_u16_be_hsw _sk_store_u16_be_hsw LABEL PROC @@ -3186,7 +3514,7 @@ _sk_store_u16_be_hsw LABEL PROC DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9 DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,31 ; jne 3112 <_sk_store_u16_be_hsw+0xfb> + DB 117,31 ; jne 3742 <_sk_store_u16_be_hsw+0xfb> DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2) DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2) DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2) @@ -3195,31 +3523,31 @@ _sk_store_u16_be_hsw LABEL PROC DB 255,224 ; jmpq *%rax DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,240 ; je 310e <_sk_store_u16_be_hsw+0xf7> + DB 116,240 ; je 373e <_sk_store_u16_be_hsw+0xf7> DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,227 ; jb 310e <_sk_store_u16_be_hsw+0xf7> + DB 114,227 ; jb 373e <_sk_store_u16_be_hsw+0xf7> DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2) - DB 116,218 ; je 310e <_sk_store_u16_be_hsw+0xf7> + DB 116,218 ; je 373e <_sk_store_u16_be_hsw+0xf7> DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,205 ; jb 310e <_sk_store_u16_be_hsw+0xf7> + DB 114,205 ; jb 373e <_sk_store_u16_be_hsw+0xf7> DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2) - DB 116,196 ; je 310e <_sk_store_u16_be_hsw+0xf7> + DB 116,196 ; je 373e <_sk_store_u16_be_hsw+0xf7> DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,183 ; jb 310e <_sk_store_u16_be_hsw+0xf7> + DB 114,183 ; jb 373e <_sk_store_u16_be_hsw+0xf7> DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2) - DB 235,174 ; jmp 310e <_sk_store_u16_be_hsw+0xf7> + DB 235,174 ; jmp 373e <_sk_store_u16_be_hsw+0xf7> PUBLIC _sk_load_f32_hsw _sk_load_f32_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 72,131,249,7 ; cmp $0x7,%rcx - DB 119,110 ; ja 31d6 <_sk_load_f32_hsw+0x76> + DB 119,110 ; ja 3806 <_sk_load_f32_hsw+0x76> DB 76,139,0 ; mov (%rax),%r8 DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9 - DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 3200 <_sk_load_f32_hsw+0xa0> + DB 76,141,21,134,0,0,0 ; lea 0x86(%rip),%r10 # 3830 <_sk_load_f32_hsw+0xa0> DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax DB 76,1,208 ; add %r10,%rax DB 255,224 ; jmpq *%rax @@ -3276,7 +3604,7 @@ _sk_store_f32_hsw LABEL PROC DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8 DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11 DB 72,133,201 ; test %rcx,%rcx - DB 117,55 ; jne 328d <_sk_store_f32_hsw+0x6d> + DB 117,55 ; jne 38bd <_sk_store_f32_hsw+0x6d> DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12 DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13 DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9 @@ -3289,22 +3617,22 @@ _sk_store_f32_hsw LABEL PROC DB 255,224 ; jmpq *%rax DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,240 ; je 3289 <_sk_store_f32_hsw+0x69> + DB 116,240 ; je 38b9 <_sk_store_f32_hsw+0x69> DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,227 ; jb 3289 <_sk_store_f32_hsw+0x69> + DB 114,227 ; jb 38b9 <_sk_store_f32_hsw+0x69> DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4) - DB 116,218 ; je 3289 <_sk_store_f32_hsw+0x69> + DB 116,218 ; je 38b9 <_sk_store_f32_hsw+0x69> DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,205 ; jb 3289 <_sk_store_f32_hsw+0x69> + DB 114,205 ; jb 38b9 <_sk_store_f32_hsw+0x69> DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4) - DB 116,195 ; je 3289 <_sk_store_f32_hsw+0x69> + DB 116,195 ; je 38b9 <_sk_store_f32_hsw+0x69> DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,181 ; jb 3289 <_sk_store_f32_hsw+0x69> + DB 114,181 ; jb 38b9 <_sk_store_f32_hsw+0x69> DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4) - DB 235,171 ; jmp 3289 <_sk_store_f32_hsw+0x69> + DB 235,171 ; jmp 38b9 <_sk_store_f32_hsw+0x69> PUBLIC _sk_clamp_x_hsw _sk_clamp_x_hsw LABEL PROC @@ -3545,7 +3873,7 @@ _sk_linear_gradient_hsw LABEL PROC DB 196,98,125,24,72,28 ; vbroadcastss 0x1c(%rax),%ymm9 DB 76,139,0 ; mov (%rax),%r8 DB 77,133,192 ; test %r8,%r8 - DB 15,132,143,0,0,0 ; je 3719 <_sk_linear_gradient_hsw+0xb5> + DB 15,132,143,0,0,0 ; je 3d49 <_sk_linear_gradient_hsw+0xb5> DB 72,139,64,8 ; mov 0x8(%rax),%rax DB 72,131,192,32 ; add $0x20,%rax DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12 @@ -3572,8 +3900,8 @@ _sk_linear_gradient_hsw LABEL PROC DB 196,67,13,74,201,208 ; vblendvps %ymm13,%ymm9,%ymm14,%ymm9 DB 72,131,192,36 ; add $0x24,%rax DB 73,255,200 ; dec %r8 - DB 117,140 ; jne 36a3 <_sk_linear_gradient_hsw+0x3f> - DB 235,17 ; jmp 372a <_sk_linear_gradient_hsw+0xc6> + DB 117,140 ; jne 3cd3 <_sk_linear_gradient_hsw+0x3f> + DB 235,17 ; jmp 3d5a <_sk_linear_gradient_hsw+0xc6> DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1 DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2 DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3 @@ -6463,45 +6791,449 @@ _sk_table_a_avx LABEL PROC DB 65,95 ; pop %r15 DB 255,224 ; jmpq *%rax -PUBLIC _sk_load_a8_avx -_sk_load_a8_avx LABEL PROC - DB 73,137,200 ; mov %rcx,%r8 +PUBLIC _sk_parametric_r_avx +_sk_parametric_r_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 72,139,0 ; mov (%rax),%rax - DB 72,1,248 ; add %rdi,%rax - DB 77,133,192 ; test %r8,%r8 - DB 117,74 ; jne 29ba <_sk_load_a8_avx+0x5a> - DB 197,250,126,0 ; vmovq (%rax),%xmm0 - DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1 - DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0 - DB 196,226,121,49,192 ; vpmovzxbd %xmm0,%xmm0 - DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0 - DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 - DB 184,129,128,128,59 ; mov $0x3b808081,%eax - DB 197,249,110,200 ; vmovd %eax,%xmm1 - DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 - DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 - DB 197,252,89,217 ; vmulps %ymm1,%ymm0,%ymm3 + DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8 + DB 196,65,124,194,192,2 ; vcmpleps %ymm8,%ymm0,%ymm8 + DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 + DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10 + DB 197,52,89,200 ; vmulps %ymm0,%ymm9,%ymm9 + DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9 + DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 + DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11 + DB 197,172,89,192 ; vmulps %ymm0,%ymm10,%ymm0 + DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11 + DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10 + DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 197,44,89,208 ; vmulps %ymm0,%ymm10,%ymm10 + DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10 + DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 196,65,124,84,219 ; vandps %ymm11,%ymm0,%ymm11 + DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 197,249,112,192,0 ; vpshufd $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 197,36,86,216 ; vorps %ymm0,%ymm11,%ymm11 + DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 197,44,88,208 ; vaddps %ymm0,%ymm10,%ymm10 + DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 197,164,89,192 ; vmulps %ymm0,%ymm11,%ymm0 + DB 197,44,92,208 ; vsubps %ymm0,%ymm10,%ymm10 + DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,99,125,24,224,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm12 + DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 197,164,88,192 ; vaddps %ymm0,%ymm11,%ymm0 + DB 197,156,94,192 ; vdivps %ymm0,%ymm12,%ymm0 + DB 197,172,92,192 ; vsubps %ymm0,%ymm10,%ymm0 + DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10 + DB 197,44,89,216 ; vmulps %ymm0,%ymm10,%ymm11 + DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10 + DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,99,125,24,224,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm12 + DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 196,65,124,88,219 ; vaddps %ymm11,%ymm0,%ymm11 + DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 196,193,124,89,194 ; vmulps %ymm10,%ymm0,%ymm0 + DB 197,36,92,216 ; vsubps %ymm0,%ymm11,%ymm11 + DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,99,125,24,232,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm13 + DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d + DB 196,193,121,110,192 ; vmovd %r8d,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 196,193,124,92,194 ; vsubps %ymm10,%ymm0,%ymm0 + DB 197,148,94,192 ; vdivps %ymm0,%ymm13,%ymm0 + DB 197,164,88,192 ; vaddps %ymm0,%ymm11,%ymm0 + DB 197,156,89,192 ; vmulps %ymm0,%ymm12,%ymm0 + DB 197,253,91,192 ; vcvtps2dq %ymm0,%ymm0 + DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10 + DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0 + DB 196,195,125,74,193,128 ; vblendvps %ymm8,%ymm9,%ymm0,%ymm0 + DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 + DB 196,65,124,95,192 ; vmaxps %ymm8,%ymm0,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,192 ; vmovd %eax,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 197,188,93,192 ; vminps %ymm0,%ymm8,%ymm0 DB 72,173 ; lods %ds:(%rsi),%rax - DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0 - DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1 - DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2 - DB 76,137,193 ; mov %r8,%rcx DB 255,224 ; jmpq *%rax - DB 49,201 ; xor %ecx,%ecx - DB 77,137,194 ; mov %r8,%r10 - DB 69,49,201 ; xor %r9d,%r9d - DB 68,15,182,24 ; movzbl (%rax),%r11d - DB 72,255,192 ; inc %rax - DB 73,211,227 ; shl %cl,%r11 - DB 77,9,217 ; or %r11,%r9 - DB 72,131,193,8 ; add $0x8,%rcx - DB 73,255,202 ; dec %r10 - DB 117,234 ; jne 29c2 <_sk_load_a8_avx+0x62> - DB 196,193,249,110,193 ; vmovq %r9,%xmm0 - DB 235,149 ; jmp 2974 <_sk_load_a8_avx+0x14> -PUBLIC _sk_gather_a8_avx +PUBLIC _sk_parametric_g_avx +_sk_parametric_g_avx LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8 + DB 196,65,116,194,192,2 ; vcmpleps %ymm8,%ymm1,%ymm8 + DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 + DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10 + DB 197,52,89,201 ; vmulps %ymm1,%ymm9,%ymm9 + DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9 + DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 + DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11 + DB 197,172,89,201 ; vmulps %ymm1,%ymm10,%ymm1 + DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11 + DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10 + DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 197,44,89,209 ; vmulps %ymm1,%ymm10,%ymm10 + DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 197,44,92,209 ; vsubps %ymm1,%ymm10,%ymm10 + DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 197,249,112,201,0 ; vpshufd $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 196,65,116,84,219 ; vandps %ymm11,%ymm1,%ymm11 + DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 197,249,112,201,0 ; vpshufd $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 197,36,86,217 ; vorps %ymm1,%ymm11,%ymm11 + DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 197,44,88,209 ; vaddps %ymm1,%ymm10,%ymm10 + DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 197,164,89,201 ; vmulps %ymm1,%ymm11,%ymm1 + DB 197,44,92,209 ; vsubps %ymm1,%ymm10,%ymm10 + DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,99,117,24,225,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm12 + DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 197,164,88,201 ; vaddps %ymm1,%ymm11,%ymm1 + DB 197,156,94,201 ; vdivps %ymm1,%ymm12,%ymm1 + DB 197,172,92,201 ; vsubps %ymm1,%ymm10,%ymm1 + DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10 + DB 197,44,89,217 ; vmulps %ymm1,%ymm10,%ymm11 + DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10 + DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,99,117,24,225,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm12 + DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 196,65,116,88,219 ; vaddps %ymm11,%ymm1,%ymm11 + DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 196,193,116,89,202 ; vmulps %ymm10,%ymm1,%ymm1 + DB 197,36,92,217 ; vsubps %ymm1,%ymm11,%ymm11 + DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,99,117,24,233,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm13 + DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d + DB 196,193,121,110,200 ; vmovd %r8d,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 196,193,116,92,202 ; vsubps %ymm10,%ymm1,%ymm1 + DB 197,148,94,201 ; vdivps %ymm1,%ymm13,%ymm1 + DB 197,164,88,201 ; vaddps %ymm1,%ymm11,%ymm1 + DB 197,156,89,201 ; vmulps %ymm1,%ymm12,%ymm1 + DB 197,253,91,201 ; vcvtps2dq %ymm1,%ymm1 + DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10 + DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1 + DB 196,195,117,74,201,128 ; vblendvps %ymm8,%ymm9,%ymm1,%ymm1 + DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 + DB 196,65,116,95,192 ; vmaxps %ymm8,%ymm1,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,200 ; vmovd %eax,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 197,188,93,201 ; vminps %ymm1,%ymm8,%ymm1 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_b_avx +_sk_parametric_b_avx LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8 + DB 196,65,108,194,192,2 ; vcmpleps %ymm8,%ymm2,%ymm8 + DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 + DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10 + DB 197,52,89,202 ; vmulps %ymm2,%ymm9,%ymm9 + DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9 + DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 + DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11 + DB 197,172,89,210 ; vmulps %ymm2,%ymm10,%ymm2 + DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11 + DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10 + DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 197,44,89,210 ; vmulps %ymm2,%ymm10,%ymm10 + DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 197,44,92,210 ; vsubps %ymm2,%ymm10,%ymm10 + DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 196,65,108,84,219 ; vandps %ymm11,%ymm2,%ymm11 + DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 197,249,112,210,0 ; vpshufd $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 197,36,86,218 ; vorps %ymm2,%ymm11,%ymm11 + DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 197,44,88,210 ; vaddps %ymm2,%ymm10,%ymm10 + DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 197,164,89,210 ; vmulps %ymm2,%ymm11,%ymm2 + DB 197,44,92,210 ; vsubps %ymm2,%ymm10,%ymm10 + DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12 + DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 197,164,88,210 ; vaddps %ymm2,%ymm11,%ymm2 + DB 197,156,94,210 ; vdivps %ymm2,%ymm12,%ymm2 + DB 197,172,92,210 ; vsubps %ymm2,%ymm10,%ymm2 + DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10 + DB 197,44,89,218 ; vmulps %ymm2,%ymm10,%ymm11 + DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10 + DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,99,109,24,226,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm12 + DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 196,65,108,88,219 ; vaddps %ymm11,%ymm2,%ymm11 + DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 196,193,108,89,210 ; vmulps %ymm10,%ymm2,%ymm2 + DB 197,36,92,218 ; vsubps %ymm2,%ymm11,%ymm11 + DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,99,109,24,234,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm13 + DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d + DB 196,193,121,110,208 ; vmovd %r8d,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 196,193,108,92,210 ; vsubps %ymm10,%ymm2,%ymm2 + DB 197,148,94,210 ; vdivps %ymm2,%ymm13,%ymm2 + DB 197,164,88,210 ; vaddps %ymm2,%ymm11,%ymm2 + DB 197,156,89,210 ; vmulps %ymm2,%ymm12,%ymm2 + DB 197,253,91,210 ; vcvtps2dq %ymm2,%ymm2 + DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10 + DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2 + DB 196,195,109,74,209,128 ; vblendvps %ymm8,%ymm9,%ymm2,%ymm2 + DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 + DB 196,65,108,95,192 ; vmaxps %ymm8,%ymm2,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,208 ; vmovd %eax,%xmm2 + DB 196,227,121,4,210,0 ; vpermilps $0x0,%xmm2,%xmm2 + DB 196,227,109,24,210,1 ; vinsertf128 $0x1,%xmm2,%ymm2,%ymm2 + DB 197,188,93,210 ; vminps %ymm2,%ymm8,%ymm2 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_a_avx +_sk_parametric_a_avx LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 196,98,125,24,64,16 ; vbroadcastss 0x10(%rax),%ymm8 + DB 196,65,100,194,192,2 ; vcmpleps %ymm8,%ymm3,%ymm8 + DB 196,98,125,24,72,12 ; vbroadcastss 0xc(%rax),%ymm9 + DB 196,98,125,24,80,24 ; vbroadcastss 0x18(%rax),%ymm10 + DB 197,52,89,203 ; vmulps %ymm3,%ymm9,%ymm9 + DB 196,65,52,88,202 ; vaddps %ymm10,%ymm9,%ymm9 + DB 196,98,125,24,80,4 ; vbroadcastss 0x4(%rax),%ymm10 + DB 196,98,125,24,88,8 ; vbroadcastss 0x8(%rax),%ymm11 + DB 197,172,89,219 ; vmulps %ymm3,%ymm10,%ymm3 + DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11 + DB 196,65,124,91,211 ; vcvtdq2ps %ymm11,%ymm10 + DB 65,184,0,0,0,52 ; mov $0x34000000,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 197,44,89,211 ; vmulps %ymm3,%ymm10,%ymm10 + DB 65,184,0,0,254,66 ; mov $0x42fe0000,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 197,44,92,211 ; vsubps %ymm3,%ymm10,%ymm10 + DB 65,184,255,255,127,0 ; mov $0x7fffff,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 197,249,112,219,0 ; vpshufd $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 196,65,100,84,219 ; vandps %ymm11,%ymm3,%ymm11 + DB 65,184,0,0,0,63 ; mov $0x3f000000,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 197,249,112,219,0 ; vpshufd $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 197,36,86,219 ; vorps %ymm3,%ymm11,%ymm11 + DB 65,184,42,145,49,64 ; mov $0x4031912a,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 197,44,88,211 ; vaddps %ymm3,%ymm10,%ymm10 + DB 65,184,117,191,191,63 ; mov $0x3fbfbf75,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 197,164,89,219 ; vmulps %ymm3,%ymm11,%ymm3 + DB 197,44,92,211 ; vsubps %ymm3,%ymm10,%ymm10 + DB 65,184,163,233,220,63 ; mov $0x3fdce9a3,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,99,101,24,227,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm12 + DB 65,184,249,68,180,62 ; mov $0x3eb444f9,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 197,164,88,219 ; vaddps %ymm3,%ymm11,%ymm3 + DB 197,156,94,219 ; vdivps %ymm3,%ymm12,%ymm3 + DB 197,172,92,219 ; vsubps %ymm3,%ymm10,%ymm3 + DB 196,98,125,24,16 ; vbroadcastss (%rax),%ymm10 + DB 197,44,89,219 ; vmulps %ymm3,%ymm10,%ymm11 + DB 196,67,125,8,211,1 ; vroundps $0x1,%ymm11,%ymm10 + DB 196,65,36,92,210 ; vsubps %ymm10,%ymm11,%ymm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,99,101,24,227,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm12 + DB 65,184,81,140,242,66 ; mov $0x42f28c51,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 196,65,100,88,219 ; vaddps %ymm11,%ymm3,%ymm11 + DB 65,184,141,188,190,63 ; mov $0x3fbebc8d,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 196,193,100,89,218 ; vmulps %ymm10,%ymm3,%ymm3 + DB 197,36,92,219 ; vsubps %ymm3,%ymm11,%ymm11 + DB 65,184,254,210,221,65 ; mov $0x41ddd2fe,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,99,101,24,235,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm13 + DB 65,184,248,245,154,64 ; mov $0x409af5f8,%r8d + DB 196,193,121,110,216 ; vmovd %r8d,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 196,193,100,92,218 ; vsubps %ymm10,%ymm3,%ymm3 + DB 197,148,94,219 ; vdivps %ymm3,%ymm13,%ymm3 + DB 197,164,88,219 ; vaddps %ymm3,%ymm11,%ymm3 + DB 197,156,89,219 ; vmulps %ymm3,%ymm12,%ymm3 + DB 197,253,91,219 ; vcvtps2dq %ymm3,%ymm3 + DB 196,98,125,24,80,20 ; vbroadcastss 0x14(%rax),%ymm10 + DB 196,193,100,88,218 ; vaddps %ymm10,%ymm3,%ymm3 + DB 196,195,101,74,217,128 ; vblendvps %ymm8,%ymm9,%ymm3,%ymm3 + DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 + DB 196,65,100,95,192 ; vmaxps %ymm8,%ymm3,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,216 ; vmovd %eax,%xmm3 + DB 196,227,121,4,219,0 ; vpermilps $0x0,%xmm3,%xmm3 + DB 196,227,101,24,219,1 ; vinsertf128 $0x1,%xmm3,%ymm3,%ymm3 + DB 197,188,93,219 ; vminps %ymm3,%ymm8,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_load_a8_avx +_sk_load_a8_avx LABEL PROC + DB 73,137,200 ; mov %rcx,%r8 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 72,139,0 ; mov (%rax),%rax + DB 72,1,248 ; add %rdi,%rax + DB 77,133,192 ; test %r8,%r8 + DB 117,74 ; jne 31b2 <_sk_load_a8_avx+0x5a> + DB 197,250,126,0 ; vmovq (%rax),%xmm0 + DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1 + DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0 + DB 196,226,121,49,192 ; vpmovzxbd %xmm0,%xmm0 + DB 196,227,117,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm1,%ymm0 + DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 + DB 184,129,128,128,59 ; mov $0x3b808081,%eax + DB 197,249,110,200 ; vmovd %eax,%xmm1 + DB 196,227,121,4,201,0 ; vpermilps $0x0,%xmm1,%xmm1 + DB 196,227,117,24,201,1 ; vinsertf128 $0x1,%xmm1,%ymm1,%ymm1 + DB 197,252,89,217 ; vmulps %ymm1,%ymm0,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 197,252,87,192 ; vxorps %ymm0,%ymm0,%ymm0 + DB 197,244,87,201 ; vxorps %ymm1,%ymm1,%ymm1 + DB 197,236,87,210 ; vxorps %ymm2,%ymm2,%ymm2 + DB 76,137,193 ; mov %r8,%rcx + DB 255,224 ; jmpq *%rax + DB 49,201 ; xor %ecx,%ecx + DB 77,137,194 ; mov %r8,%r10 + DB 69,49,201 ; xor %r9d,%r9d + DB 68,15,182,24 ; movzbl (%rax),%r11d + DB 72,255,192 ; inc %rax + DB 73,211,227 ; shl %cl,%r11 + DB 77,9,217 ; or %r11,%r9 + DB 72,131,193,8 ; add $0x8,%rcx + DB 73,255,202 ; dec %r10 + DB 117,234 ; jne 31ba <_sk_load_a8_avx+0x62> + DB 196,193,249,110,193 ; vmovq %r9,%xmm0 + DB 235,149 ; jmp 316c <_sk_load_a8_avx+0x14> + +PUBLIC _sk_gather_a8_avx _sk_gather_a8_avx LABEL PROC DB 65,87 ; push %r15 DB 65,86 ; push %r14 @@ -6576,7 +7308,7 @@ _sk_store_a8_avx LABEL PROC DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne 2b1b <_sk_store_a8_avx+0x42> + DB 117,10 ; jne 3313 <_sk_store_a8_avx+0x42> DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -6584,10 +7316,10 @@ _sk_store_a8_avx LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja 2b17 <_sk_store_a8_avx+0x3e> + DB 119,236 ; ja 330f <_sk_store_a8_avx+0x3e> DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8 DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 2b80 <_sk_store_a8_avx+0xa7> + DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3378 <_sk_store_a8_avx+0xa7> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -6598,7 +7330,7 @@ _sk_store_a8_avx LABEL PROC DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1) DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1) DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1) - DB 235,154 ; jmp 2b17 <_sk_store_a8_avx+0x3e> + DB 235,154 ; jmp 330f <_sk_store_a8_avx+0x3e> DB 15,31,0 ; nopl (%rax) DB 244 ; hlt DB 255 ; (bad) @@ -6631,7 +7363,7 @@ _sk_load_g8_avx LABEL PROC DB 72,139,0 ; mov (%rax),%rax DB 72,1,248 ; add %rdi,%rax DB 77,133,192 ; test %r8,%r8 - DB 117,91 ; jne 2c07 <_sk_load_g8_avx+0x6b> + DB 117,91 ; jne 33ff <_sk_load_g8_avx+0x6b> DB 197,250,126,0 ; vmovq (%rax),%xmm0 DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1 DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0 @@ -6661,9 +7393,9 @@ _sk_load_g8_avx LABEL PROC DB 77,9,217 ; or %r11,%r9 DB 72,131,193,8 ; add $0x8,%rcx DB 73,255,202 ; dec %r10 - DB 117,234 ; jne 2c0f <_sk_load_g8_avx+0x73> + DB 117,234 ; jne 3407 <_sk_load_g8_avx+0x73> DB 196,193,249,110,193 ; vmovq %r9,%xmm0 - DB 235,132 ; jmp 2bb0 <_sk_load_g8_avx+0x14> + DB 235,132 ; jmp 33a8 <_sk_load_g8_avx+0x14> PUBLIC _sk_gather_g8_avx _sk_gather_g8_avx LABEL PROC @@ -6734,9 +7466,9 @@ _sk_gather_i8_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 73,137,192 ; mov %rax,%r8 DB 77,133,192 ; test %r8,%r8 - DB 116,5 ; je 2d46 <_sk_gather_i8_avx+0xf> + DB 116,5 ; je 353e <_sk_gather_i8_avx+0xf> DB 76,137,192 ; mov %r8,%rax - DB 235,2 ; jmp 2d48 <_sk_gather_i8_avx+0x11> + DB 235,2 ; jmp 3540 <_sk_gather_i8_avx+0x11> DB 72,173 ; lods %ds:(%rsi),%rax DB 65,87 ; push %r15 DB 65,86 ; push %r14 @@ -6839,7 +7571,7 @@ _sk_load_565_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,209,0,0,0 ; jne 2fe2 <_sk_load_565_avx+0xdf> + DB 15,133,209,0,0,0 ; jne 37da <_sk_load_565_avx+0xdf> DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0 DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1 DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1 @@ -6889,9 +7621,9 @@ _sk_load_565_avx LABEL PROC DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,29,255,255,255 ; ja 2f17 <_sk_load_565_avx+0x14> + DB 15,135,29,255,255,255 ; ja 370f <_sk_load_565_avx+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3050 <_sk_load_565_avx+0x14d> + DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 3848 <_sk_load_565_avx+0x14d> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -6903,7 +7635,7 @@ _sk_load_565_avx LABEL PROC DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0 - DB 233,201,254,255,255 ; jmpq 2f17 <_sk_load_565_avx+0x14> + DB 233,201,254,255,255 ; jmpq 370f <_sk_load_565_avx+0x14> DB 102,144 ; xchg %ax,%ax DB 242,255 ; repnz (bad) DB 255 ; (bad) @@ -7056,7 +7788,7 @@ _sk_store_565_avx LABEL PROC DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9 DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne 329b <_sk_store_565_avx+0x9e> + DB 117,10 ; jne 3a93 <_sk_store_565_avx+0x9e> DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -7064,9 +7796,9 @@ _sk_store_565_avx LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja 3297 <_sk_store_565_avx+0x9a> + DB 119,236 ; ja 3a8f <_sk_store_565_avx+0x9a> DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 32f8 <_sk_store_565_avx+0xfb> + DB 76,141,5,66,0,0,0 ; lea 0x42(%rip),%r8 # 3af0 <_sk_store_565_avx+0xfb> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -7077,7 +7809,7 @@ _sk_store_565_avx LABEL PROC DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2) DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2) DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2) - DB 235,159 ; jmp 3297 <_sk_store_565_avx+0x9a> + DB 235,159 ; jmp 3a8f <_sk_store_565_avx+0x9a> DB 247,255 ; idiv %edi DB 255 ; (bad) DB 255 ; (bad) @@ -7106,7 +7838,7 @@ _sk_load_4444_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,245,0,0,0 ; jne 3417 <_sk_load_4444_avx+0x103> + DB 15,133,245,0,0,0 ; jne 3c0f <_sk_load_4444_avx+0x103> DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0 DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1 DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1 @@ -7163,9 +7895,9 @@ _sk_load_4444_avx LABEL PROC DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,249,254,255,255 ; ja 3328 <_sk_load_4444_avx+0x14> + DB 15,135,249,254,255,255 ; ja 3b20 <_sk_load_4444_avx+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 3484 <_sk_load_4444_avx+0x170> + DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 3c7c <_sk_load_4444_avx+0x170> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -7177,12 +7909,12 @@ _sk_load_4444_avx LABEL PROC DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0 - DB 233,165,254,255,255 ; jmpq 3328 <_sk_load_4444_avx+0x14> + DB 233,165,254,255,255 ; jmpq 3b20 <_sk_load_4444_avx+0x14> DB 144 ; nop DB 243,255 ; repz (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 235,255 ; jmp 3489 <_sk_load_4444_avx+0x175> + DB 235,255 ; jmp 3c81 <_sk_load_4444_avx+0x175> DB 255 ; (bad) DB 255,227 ; jmpq *%rbx DB 255 ; (bad) @@ -7339,7 +8071,7 @@ _sk_store_4444_avx LABEL PROC DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9 DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne 3704 <_sk_store_4444_avx+0xaf> + DB 117,10 ; jne 3efc <_sk_store_4444_avx+0xaf> DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -7347,9 +8079,9 @@ _sk_store_4444_avx LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja 3700 <_sk_store_4444_avx+0xab> + DB 119,236 ; ja 3ef8 <_sk_store_4444_avx+0xab> DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3764 <_sk_store_4444_avx+0x10f> + DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # 3f5c <_sk_store_4444_avx+0x10f> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -7360,7 +8092,7 @@ _sk_store_4444_avx LABEL PROC DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2) DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2) DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2) - DB 235,159 ; jmp 3700 <_sk_store_4444_avx+0xab> + DB 235,159 ; jmp 3ef8 <_sk_store_4444_avx+0xab> DB 15,31,0 ; nopl (%rax) DB 244 ; hlt DB 255 ; (bad) @@ -7391,7 +8123,7 @@ _sk_load_8888_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,157,0,0,0 ; jne 382b <_sk_load_8888_avx+0xab> + DB 15,133,157,0,0,0 ; jne 4023 <_sk_load_8888_avx+0xab> DB 196,65,124,16,12,186 ; vmovups (%r10,%rdi,4),%ymm9 DB 184,255,0,0,0 ; mov $0xff,%eax DB 197,249,110,192 ; vmovd %eax,%xmm0 @@ -7429,9 +8161,9 @@ _sk_load_8888_avx LABEL PROC DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,80,255,255,255 ; ja 3794 <_sk_load_8888_avx+0x14> + DB 15,135,80,255,255,255 ; ja 3f8c <_sk_load_8888_avx+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 38d8 <_sk_load_8888_avx+0x158> + DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 40d0 <_sk_load_8888_avx+0x158> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -7454,7 +8186,7 @@ _sk_load_8888_avx LABEL PROC DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9 DB 196,195,49,34,4,186,0 ; vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0 DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9 - DB 233,188,254,255,255 ; jmpq 3794 <_sk_load_8888_avx+0x14> + DB 233,188,254,255,255 ; jmpq 3f8c <_sk_load_8888_avx+0x14> DB 238 ; out %al,(%dx) DB 255 ; (bad) DB 255 ; (bad) @@ -7580,7 +8312,7 @@ _sk_store_8888_avx LABEL PROC DB 196,65,45,86,192 ; vorpd %ymm8,%ymm10,%ymm8 DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne 3ad9 <_sk_store_8888_avx+0xa4> + DB 117,10 ; jne 42d1 <_sk_store_8888_avx+0xa4> DB 196,65,124,17,4,185 ; vmovups %ymm8,(%r9,%rdi,4) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -7588,9 +8320,9 @@ _sk_store_8888_avx LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja 3ad5 <_sk_store_8888_avx+0xa0> + DB 119,236 ; ja 42cd <_sk_store_8888_avx+0xa0> DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 3b48 <_sk_store_8888_avx+0x113> + DB 76,141,5,84,0,0,0 ; lea 0x54(%rip),%r8 # 4340 <_sk_store_8888_avx+0x113> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -7604,7 +8336,7 @@ _sk_store_8888_avx LABEL PROC DB 196,67,121,22,68,185,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4) DB 196,67,121,22,68,185,4,1 ; vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4) DB 196,65,121,126,4,185 ; vmovd %xmm8,(%r9,%rdi,4) - DB 235,143 ; jmp 3ad5 <_sk_store_8888_avx+0xa0> + DB 235,143 ; jmp 42cd <_sk_store_8888_avx+0xa0> DB 102,144 ; xchg %ax,%ax DB 246,255 ; idiv %bh DB 255 ; (bad) @@ -7634,7 +8366,7 @@ _sk_load_f16_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 72,139,0 ; mov (%rax),%rax DB 72,133,201 ; test %rcx,%rcx - DB 15,133,17,1,0,0 ; jne 3c83 <_sk_load_f16_avx+0x11f> + DB 15,133,17,1,0,0 ; jne 447b <_sk_load_f16_avx+0x11f> DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8 DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2 DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3 @@ -7696,29 +8428,29 @@ _sk_load_f16_avx LABEL PROC DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8 DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9 DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,79 ; je 3ce2 <_sk_load_f16_avx+0x17e> + DB 116,79 ; je 44da <_sk_load_f16_avx+0x17e> DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,67 ; jb 3ce2 <_sk_load_f16_avx+0x17e> + DB 114,67 ; jb 44da <_sk_load_f16_avx+0x17e> DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 116,68 ; je 3cef <_sk_load_f16_avx+0x18b> + DB 116,68 ; je 44e7 <_sk_load_f16_avx+0x18b> DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,56 ; jb 3cef <_sk_load_f16_avx+0x18b> + DB 114,56 ; jb 44e7 <_sk_load_f16_avx+0x18b> DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 15,132,194,254,255,255 ; je 3b89 <_sk_load_f16_avx+0x25> + DB 15,132,194,254,255,255 ; je 4381 <_sk_load_f16_avx+0x25> DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3 DB 72,131,249,7 ; cmp $0x7,%rcx - DB 15,130,178,254,255,255 ; jb 3b89 <_sk_load_f16_avx+0x25> + DB 15,130,178,254,255,255 ; jb 4381 <_sk_load_f16_avx+0x25> DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9 - DB 233,167,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25> + DB 233,167,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2 - DB 233,154,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25> + DB 233,154,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 - DB 233,145,254,255,255 ; jmpq 3b89 <_sk_load_f16_avx+0x25> + DB 233,145,254,255,255 ; jmpq 4381 <_sk_load_f16_avx+0x25> PUBLIC _sk_gather_f16_avx _sk_gather_f16_avx LABEL PROC @@ -7858,7 +8590,7 @@ _sk_store_f16_avx LABEL PROC DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9 DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,31 ; jne 3f78 <_sk_store_f16_avx+0xd2> + DB 117,31 ; jne 4770 <_sk_store_f16_avx+0xd2> DB 196,65,120,17,28,248 ; vmovups %xmm11,(%r8,%rdi,8) DB 196,65,120,17,84,248,16 ; vmovups %xmm10,0x10(%r8,%rdi,8) DB 196,65,120,17,76,248,32 ; vmovups %xmm9,0x20(%r8,%rdi,8) @@ -7867,22 +8599,22 @@ _sk_store_f16_avx LABEL PROC DB 255,224 ; jmpq *%rax DB 196,65,121,214,28,248 ; vmovq %xmm11,(%r8,%rdi,8) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,240 ; je 3f74 <_sk_store_f16_avx+0xce> + DB 116,240 ; je 476c <_sk_store_f16_avx+0xce> DB 196,65,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%r8,%rdi,8) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,227 ; jb 3f74 <_sk_store_f16_avx+0xce> + DB 114,227 ; jb 476c <_sk_store_f16_avx+0xce> DB 196,65,121,214,84,248,16 ; vmovq %xmm10,0x10(%r8,%rdi,8) - DB 116,218 ; je 3f74 <_sk_store_f16_avx+0xce> + DB 116,218 ; je 476c <_sk_store_f16_avx+0xce> DB 196,65,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%r8,%rdi,8) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,205 ; jb 3f74 <_sk_store_f16_avx+0xce> + DB 114,205 ; jb 476c <_sk_store_f16_avx+0xce> DB 196,65,121,214,76,248,32 ; vmovq %xmm9,0x20(%r8,%rdi,8) - DB 116,196 ; je 3f74 <_sk_store_f16_avx+0xce> + DB 116,196 ; je 476c <_sk_store_f16_avx+0xce> DB 196,65,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%r8,%rdi,8) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,183 ; jb 3f74 <_sk_store_f16_avx+0xce> + DB 114,183 ; jb 476c <_sk_store_f16_avx+0xce> DB 196,65,121,214,68,248,48 ; vmovq %xmm8,0x30(%r8,%rdi,8) - DB 235,174 ; jmp 3f74 <_sk_store_f16_avx+0xce> + DB 235,174 ; jmp 476c <_sk_store_f16_avx+0xce> PUBLIC _sk_load_u16_be_avx _sk_load_u16_be_avx LABEL PROC @@ -7890,7 +8622,7 @@ _sk_load_u16_be_avx LABEL PROC DB 76,139,0 ; mov (%rax),%r8 DB 72,141,4,189,0,0,0,0 ; lea 0x0(,%rdi,4),%rax DB 72,133,201 ; test %rcx,%rcx - DB 15,133,5,1,0,0 ; jne 40e1 <_sk_load_u16_be_avx+0x11b> + DB 15,133,5,1,0,0 ; jne 48d9 <_sk_load_u16_be_avx+0x11b> DB 196,65,121,16,4,64 ; vmovupd (%r8,%rax,2),%xmm8 DB 196,193,121,16,84,64,16 ; vmovupd 0x10(%r8,%rax,2),%xmm2 DB 196,193,121,16,92,64,32 ; vmovupd 0x20(%r8,%rax,2),%xmm3 @@ -7949,29 +8681,29 @@ _sk_load_u16_be_avx LABEL PROC DB 196,65,123,16,4,64 ; vmovsd (%r8,%rax,2),%xmm8 DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9 DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,85 ; je 4147 <_sk_load_u16_be_avx+0x181> + DB 116,85 ; je 493f <_sk_load_u16_be_avx+0x181> DB 196,65,57,22,68,64,8 ; vmovhpd 0x8(%r8,%rax,2),%xmm8,%xmm8 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,72 ; jb 4147 <_sk_load_u16_be_avx+0x181> + DB 114,72 ; jb 493f <_sk_load_u16_be_avx+0x181> DB 196,193,123,16,84,64,16 ; vmovsd 0x10(%r8,%rax,2),%xmm2 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 116,72 ; je 4154 <_sk_load_u16_be_avx+0x18e> + DB 116,72 ; je 494c <_sk_load_u16_be_avx+0x18e> DB 196,193,105,22,84,64,24 ; vmovhpd 0x18(%r8,%rax,2),%xmm2,%xmm2 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,59 ; jb 4154 <_sk_load_u16_be_avx+0x18e> + DB 114,59 ; jb 494c <_sk_load_u16_be_avx+0x18e> DB 196,193,123,16,92,64,32 ; vmovsd 0x20(%r8,%rax,2),%xmm3 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 15,132,205,254,255,255 ; je 3ff7 <_sk_load_u16_be_avx+0x31> + DB 15,132,205,254,255,255 ; je 47ef <_sk_load_u16_be_avx+0x31> DB 196,193,97,22,92,64,40 ; vmovhpd 0x28(%r8,%rax,2),%xmm3,%xmm3 DB 72,131,249,7 ; cmp $0x7,%rcx - DB 15,130,188,254,255,255 ; jb 3ff7 <_sk_load_u16_be_avx+0x31> + DB 15,130,188,254,255,255 ; jb 47ef <_sk_load_u16_be_avx+0x31> DB 196,65,122,126,76,64,48 ; vmovq 0x30(%r8,%rax,2),%xmm9 - DB 233,176,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31> + DB 233,176,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2 - DB 233,163,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31> + DB 233,163,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 - DB 233,154,254,255,255 ; jmpq 3ff7 <_sk_load_u16_be_avx+0x31> + DB 233,154,254,255,255 ; jmpq 47ef <_sk_load_u16_be_avx+0x31> PUBLIC _sk_load_rgb_u16_be_avx _sk_load_rgb_u16_be_avx LABEL PROC @@ -7979,7 +8711,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC DB 76,139,0 ; mov (%rax),%r8 DB 72,141,4,127 ; lea (%rdi,%rdi,2),%rax DB 72,133,201 ; test %rcx,%rcx - DB 15,133,8,1,0,0 ; jne 4277 <_sk_load_rgb_u16_be_avx+0x11a> + DB 15,133,8,1,0,0 ; jne 4a6f <_sk_load_rgb_u16_be_avx+0x11a> DB 196,193,122,111,4,64 ; vmovdqu (%r8,%rax,2),%xmm0 DB 196,193,122,111,84,64,12 ; vmovdqu 0xc(%r8,%rax,2),%xmm2 DB 196,193,122,111,76,64,24 ; vmovdqu 0x18(%r8,%rax,2),%xmm1 @@ -8038,36 +8770,36 @@ _sk_load_rgb_u16_be_avx LABEL PROC DB 196,193,121,110,4,64 ; vmovd (%r8,%rax,2),%xmm0 DB 196,193,121,196,68,64,4,2 ; vpinsrw $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0 DB 72,131,249,1 ; cmp $0x1,%rcx - DB 117,5 ; jne 4290 <_sk_load_rgb_u16_be_avx+0x133> - DB 233,19,255,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46> + DB 117,5 ; jne 4a88 <_sk_load_rgb_u16_be_avx+0x133> + DB 233,19,255,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46> DB 196,193,121,110,76,64,6 ; vmovd 0x6(%r8,%rax,2),%xmm1 DB 196,65,113,196,68,64,10,2 ; vpinsrw $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,26 ; jb 42bf <_sk_load_rgb_u16_be_avx+0x162> + DB 114,26 ; jb 4ab7 <_sk_load_rgb_u16_be_avx+0x162> DB 196,193,121,110,76,64,12 ; vmovd 0xc(%r8,%rax,2),%xmm1 DB 196,193,113,196,84,64,16,2 ; vpinsrw $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 117,10 ; jne 42c4 <_sk_load_rgb_u16_be_avx+0x167> - DB 233,228,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46> - DB 233,223,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46> + DB 117,10 ; jne 4abc <_sk_load_rgb_u16_be_avx+0x167> + DB 233,228,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46> + DB 233,223,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46> DB 196,193,121,110,76,64,18 ; vmovd 0x12(%r8,%rax,2),%xmm1 DB 196,65,113,196,76,64,22,2 ; vpinsrw $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,26 ; jb 42f3 <_sk_load_rgb_u16_be_avx+0x196> + DB 114,26 ; jb 4aeb <_sk_load_rgb_u16_be_avx+0x196> DB 196,193,121,110,76,64,24 ; vmovd 0x18(%r8,%rax,2),%xmm1 DB 196,193,113,196,76,64,28,2 ; vpinsrw $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 117,10 ; jne 42f8 <_sk_load_rgb_u16_be_avx+0x19b> - DB 233,176,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46> - DB 233,171,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46> + DB 117,10 ; jne 4af0 <_sk_load_rgb_u16_be_avx+0x19b> + DB 233,176,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46> + DB 233,171,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46> DB 196,193,121,110,92,64,30 ; vmovd 0x1e(%r8,%rax,2),%xmm3 DB 196,65,97,196,92,64,34,2 ; vpinsrw $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11 DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,20 ; jb 4321 <_sk_load_rgb_u16_be_avx+0x1c4> + DB 114,20 ; jb 4b19 <_sk_load_rgb_u16_be_avx+0x1c4> DB 196,193,121,110,92,64,36 ; vmovd 0x24(%r8,%rax,2),%xmm3 DB 196,193,97,196,92,64,40,2 ; vpinsrw $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3 - DB 233,130,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46> - DB 233,125,254,255,255 ; jmpq 41a3 <_sk_load_rgb_u16_be_avx+0x46> + DB 233,130,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46> + DB 233,125,254,255,255 ; jmpq 499b <_sk_load_rgb_u16_be_avx+0x46> PUBLIC _sk_store_u16_be_avx _sk_store_u16_be_avx LABEL PROC @@ -8115,7 +8847,7 @@ _sk_store_u16_be_avx LABEL PROC DB 196,65,17,98,200 ; vpunpckldq %xmm8,%xmm13,%xmm9 DB 196,65,17,106,192 ; vpunpckhdq %xmm8,%xmm13,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,31 ; jne 4428 <_sk_store_u16_be_avx+0x102> + DB 117,31 ; jne 4c20 <_sk_store_u16_be_avx+0x102> DB 196,1,120,17,28,72 ; vmovups %xmm11,(%r8,%r9,2) DB 196,1,120,17,84,72,16 ; vmovups %xmm10,0x10(%r8,%r9,2) DB 196,1,120,17,76,72,32 ; vmovups %xmm9,0x20(%r8,%r9,2) @@ -8124,31 +8856,31 @@ _sk_store_u16_be_avx LABEL PROC DB 255,224 ; jmpq *%rax DB 196,1,121,214,28,72 ; vmovq %xmm11,(%r8,%r9,2) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,240 ; je 4424 <_sk_store_u16_be_avx+0xfe> + DB 116,240 ; je 4c1c <_sk_store_u16_be_avx+0xfe> DB 196,1,121,23,92,72,8 ; vmovhpd %xmm11,0x8(%r8,%r9,2) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,227 ; jb 4424 <_sk_store_u16_be_avx+0xfe> + DB 114,227 ; jb 4c1c <_sk_store_u16_be_avx+0xfe> DB 196,1,121,214,84,72,16 ; vmovq %xmm10,0x10(%r8,%r9,2) - DB 116,218 ; je 4424 <_sk_store_u16_be_avx+0xfe> + DB 116,218 ; je 4c1c <_sk_store_u16_be_avx+0xfe> DB 196,1,121,23,84,72,24 ; vmovhpd %xmm10,0x18(%r8,%r9,2) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,205 ; jb 4424 <_sk_store_u16_be_avx+0xfe> + DB 114,205 ; jb 4c1c <_sk_store_u16_be_avx+0xfe> DB 196,1,121,214,76,72,32 ; vmovq %xmm9,0x20(%r8,%r9,2) - DB 116,196 ; je 4424 <_sk_store_u16_be_avx+0xfe> + DB 116,196 ; je 4c1c <_sk_store_u16_be_avx+0xfe> DB 196,1,121,23,76,72,40 ; vmovhpd %xmm9,0x28(%r8,%r9,2) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,183 ; jb 4424 <_sk_store_u16_be_avx+0xfe> + DB 114,183 ; jb 4c1c <_sk_store_u16_be_avx+0xfe> DB 196,1,121,214,68,72,48 ; vmovq %xmm8,0x30(%r8,%r9,2) - DB 235,174 ; jmp 4424 <_sk_store_u16_be_avx+0xfe> + DB 235,174 ; jmp 4c1c <_sk_store_u16_be_avx+0xfe> PUBLIC _sk_load_f32_avx _sk_load_f32_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 72,131,249,7 ; cmp $0x7,%rcx - DB 119,110 ; ja 44ec <_sk_load_f32_avx+0x76> + DB 119,110 ; ja 4ce4 <_sk_load_f32_avx+0x76> DB 76,139,0 ; mov (%rax),%r8 DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9 - DB 76,141,21,132,0,0,0 ; lea 0x84(%rip),%r10 # 4514 <_sk_load_f32_avx+0x9e> + DB 76,141,21,132,0,0,0 ; lea 0x84(%rip),%r10 # 4d0c <_sk_load_f32_avx+0x9e> DB 73,99,4,138 ; movslq (%r10,%rcx,4),%rax DB 76,1,208 ; add %r10,%rax DB 255,224 ; jmpq *%rax @@ -8205,7 +8937,7 @@ _sk_store_f32_avx LABEL PROC DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8 DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11 DB 72,133,201 ; test %rcx,%rcx - DB 117,55 ; jne 45a1 <_sk_store_f32_avx+0x6d> + DB 117,55 ; jne 4d99 <_sk_store_f32_avx+0x6d> DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12 DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13 DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9 @@ -8218,22 +8950,22 @@ _sk_store_f32_avx LABEL PROC DB 255,224 ; jmpq *%rax DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,240 ; je 459d <_sk_store_f32_avx+0x69> + DB 116,240 ; je 4d95 <_sk_store_f32_avx+0x69> DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,227 ; jb 459d <_sk_store_f32_avx+0x69> + DB 114,227 ; jb 4d95 <_sk_store_f32_avx+0x69> DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4) - DB 116,218 ; je 459d <_sk_store_f32_avx+0x69> + DB 116,218 ; je 4d95 <_sk_store_f32_avx+0x69> DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,205 ; jb 459d <_sk_store_f32_avx+0x69> + DB 114,205 ; jb 4d95 <_sk_store_f32_avx+0x69> DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4) - DB 116,195 ; je 459d <_sk_store_f32_avx+0x69> + DB 116,195 ; je 4d95 <_sk_store_f32_avx+0x69> DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,181 ; jb 459d <_sk_store_f32_avx+0x69> + DB 114,181 ; jb 4d95 <_sk_store_f32_avx+0x69> DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4) - DB 235,171 ; jmp 459d <_sk_store_f32_avx+0x69> + DB 235,171 ; jmp 4d95 <_sk_store_f32_avx+0x69> PUBLIC _sk_clamp_x_avx _sk_clamp_x_avx LABEL PROC @@ -8537,7 +9269,7 @@ _sk_linear_gradient_avx LABEL PROC DB 196,226,125,24,88,28 ; vbroadcastss 0x1c(%rax),%ymm3 DB 76,139,0 ; mov (%rax),%r8 DB 77,133,192 ; test %r8,%r8 - DB 15,132,146,0,0,0 ; je 4b55 <_sk_linear_gradient_avx+0xb8> + DB 15,132,146,0,0,0 ; je 534d <_sk_linear_gradient_avx+0xb8> DB 72,139,64,8 ; mov 0x8(%rax),%rax DB 72,131,192,32 ; add $0x20,%rax DB 196,65,28,87,228 ; vxorps %ymm12,%ymm12,%ymm12 @@ -8564,8 +9296,8 @@ _sk_linear_gradient_avx LABEL PROC DB 196,227,13,74,219,208 ; vblendvps %ymm13,%ymm3,%ymm14,%ymm3 DB 72,131,192,36 ; add $0x24,%rax DB 73,255,200 ; dec %r8 - DB 117,140 ; jne 4adf <_sk_linear_gradient_avx+0x42> - DB 235,20 ; jmp 4b69 <_sk_linear_gradient_avx+0xcc> + DB 117,140 ; jne 52d7 <_sk_linear_gradient_avx+0x42> + DB 235,20 ; jmp 5361 <_sk_linear_gradient_avx+0xcc> DB 196,65,36,87,219 ; vxorps %ymm11,%ymm11,%ymm11 DB 196,65,44,87,210 ; vxorps %ymm10,%ymm10,%ymm10 DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9 @@ -11142,34 +11874,420 @@ _sk_table_a_sse41 LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax -PUBLIC _sk_load_a8_sse41 -_sk_load_a8_sse41 LABEL PROC - DB 72,173 ; lods %ds:(%rsi),%rax - DB 72,139,0 ; mov (%rax),%rax - DB 102,15,56,49,4,56 ; pmovzxbd (%rax,%rdi,1),%xmm0 - DB 15,91,192 ; cvtdq2ps %xmm0,%xmm0 - DB 184,129,128,128,59 ; mov $0x3b808081,%eax - DB 102,15,110,216 ; movd %eax,%xmm3 - DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 - DB 15,89,216 ; mulps %xmm0,%xmm3 - DB 72,173 ; lods %ds:(%rsi),%rax - DB 15,87,192 ; xorps %xmm0,%xmm0 - DB 15,87,201 ; xorps %xmm1,%xmm1 - DB 15,87,210 ; xorps %xmm2,%xmm2 - DB 255,224 ; jmpq *%rax - -PUBLIC _sk_gather_a8_sse41 -_sk_gather_a8_sse41 LABEL PROC +PUBLIC _sk_parametric_r_sse41 +_sk_parametric_r_sse41 LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 76,139,8 ; mov (%rax),%r9 - DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1 - DB 102,15,110,80,16 ; movd 0x10(%rax),%xmm2 - DB 102,15,112,210,0 ; pshufd $0x0,%xmm2,%xmm2 - DB 102,15,56,64,209 ; pmulld %xmm1,%xmm2 - DB 243,15,91,192 ; cvttps2dq %xmm0,%xmm0 - DB 102,15,254,194 ; paddd %xmm2,%xmm0 - DB 102,72,15,58,22,192,1 ; pextrq $0x1,%xmm0,%rax - DB 65,137,192 ; mov %eax,%r8d + DB 243,68,15,16,64,16 ; movss 0x10(%rax),%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 68,15,89,200 ; mulps %xmm0,%xmm9 + DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 68,15,89,208 ; mulps %xmm0,%xmm10 + DB 65,15,194,192,2 ; cmpleps %xmm8,%xmm0 + DB 243,68,15,16,64,24 ; movss 0x18(%rax),%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 69,15,88,200 ; addps %xmm8,%xmm9 + DB 243,68,15,16,0 ; movss (%rax),%xmm8 + DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,88,211 ; addps %xmm11,%xmm10 + DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11 + DB 185,0,0,0,52 ; mov $0x34000000,%ecx + DB 102,68,15,110,225 ; movd %ecx,%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,89,227 ; mulps %xmm11,%xmm12 + DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,92,227 ; subps %xmm11,%xmm12 + DB 185,255,255,127,0 ; mov $0x7fffff,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 102,69,15,112,219,0 ; pshufd $0x0,%xmm11,%xmm11 + DB 102,69,15,219,218 ; pand %xmm10,%xmm11 + DB 185,0,0,0,63 ; mov $0x3f000000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 102,69,15,112,210,0 ; pshufd $0x0,%xmm10,%xmm10 + DB 102,69,15,235,211 ; por %xmm11,%xmm10 + DB 185,42,145,49,64 ; mov $0x4031912a,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,88,220 ; addps %xmm12,%xmm11 + DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx + DB 102,68,15,110,225 ; movd %ecx,%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,89,226 ; mulps %xmm10,%xmm12 + DB 69,15,92,220 ; subps %xmm12,%xmm11 + DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx + DB 102,68,15,110,225 ; movd %ecx,%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,88,234 ; addps %xmm10,%xmm13 + DB 69,15,94,229 ; divps %xmm13,%xmm12 + DB 69,15,92,220 ; subps %xmm12,%xmm11 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 69,15,89,195 ; mulps %xmm11,%xmm8 + DB 102,69,15,58,8,216,1 ; roundps $0x1,%xmm8,%xmm11 + DB 185,0,0,0,75 ; mov $0x4b000000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx + DB 102,68,15,110,225 ; movd %ecx,%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,88,224 ; addps %xmm8,%xmm12 + DB 69,15,92,195 ; subps %xmm11,%xmm8 + DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,89,216 ; mulps %xmm8,%xmm11 + DB 69,15,92,227 ; subps %xmm11,%xmm12 + DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,92,232 ; subps %xmm8,%xmm13 + DB 69,15,94,221 ; divps %xmm13,%xmm11 + DB 69,15,88,220 ; addps %xmm12,%xmm11 + DB 69,15,89,218 ; mulps %xmm10,%xmm11 + DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10 + DB 243,68,15,16,64,20 ; movss 0x14(%rax),%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 69,15,88,194 ; addps %xmm10,%xmm8 + DB 102,69,15,56,20,193 ; blendvps %xmm0,%xmm9,%xmm8 + DB 15,87,192 ; xorps %xmm0,%xmm0 + DB 68,15,95,192 ; maxps %xmm0,%xmm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,15,110,192 ; movd %eax,%xmm0 + DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 + DB 68,15,93,192 ; minps %xmm0,%xmm8 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 65,15,40,192 ; movaps %xmm8,%xmm0 + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_g_sse41 +_sk_parametric_g_sse41 LABEL PROC + DB 68,15,40,192 ; movaps %xmm0,%xmm8 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 68,15,89,201 ; mulps %xmm1,%xmm9 + DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 68,15,89,217 ; mulps %xmm1,%xmm11 + DB 15,40,193 ; movaps %xmm1,%xmm0 + DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0 + DB 243,15,16,72,24 ; movss 0x18(%rax),%xmm1 + DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 + DB 68,15,88,201 ; addps %xmm1,%xmm9 + DB 243,68,15,16,32 ; movss (%rax),%xmm12 + DB 243,15,16,72,8 ; movss 0x8(%rax),%xmm1 + DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 + DB 68,15,88,217 ; addps %xmm1,%xmm11 + DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10 + DB 185,0,0,0,52 ; mov $0x34000000,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,234 ; mulps %xmm10,%xmm13 + DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx + DB 102,15,110,201 ; movd %ecx,%xmm1 + DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 + DB 68,15,92,233 ; subps %xmm1,%xmm13 + DB 185,255,255,127,0 ; mov $0x7fffff,%ecx + DB 102,15,110,201 ; movd %ecx,%xmm1 + DB 102,68,15,112,209,0 ; pshufd $0x0,%xmm1,%xmm10 + DB 102,69,15,219,211 ; pand %xmm11,%xmm10 + DB 185,0,0,0,63 ; mov $0x3f000000,%ecx + DB 102,15,110,201 ; movd %ecx,%xmm1 + DB 102,68,15,112,217,0 ; pshufd $0x0,%xmm1,%xmm11 + DB 102,69,15,235,218 ; por %xmm10,%xmm11 + DB 185,42,145,49,64 ; mov $0x4031912a,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,88,213 ; addps %xmm13,%xmm10 + DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx + DB 102,15,110,201 ; movd %ecx,%xmm1 + DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 + DB 65,15,89,203 ; mulps %xmm11,%xmm1 + DB 68,15,92,209 ; subps %xmm1,%xmm10 + DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx + DB 102,15,110,201 ; movd %ecx,%xmm1 + DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 + DB 65,15,88,203 ; addps %xmm11,%xmm1 + DB 68,15,94,233 ; divps %xmm1,%xmm13 + DB 69,15,92,213 ; subps %xmm13,%xmm10 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,89,226 ; mulps %xmm10,%xmm12 + DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11 + DB 185,0,0,0,75 ; mov $0x4b000000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,88,236 ; addps %xmm12,%xmm13 + DB 69,15,92,227 ; subps %xmm11,%xmm12 + DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx + DB 102,15,110,201 ; movd %ecx,%xmm1 + DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 + DB 65,15,89,204 ; mulps %xmm12,%xmm1 + DB 68,15,92,233 ; subps %xmm1,%xmm13 + DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx + DB 102,15,110,201 ; movd %ecx,%xmm1 + DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 + DB 65,15,92,204 ; subps %xmm12,%xmm1 + DB 68,15,94,217 ; divps %xmm1,%xmm11 + DB 69,15,88,221 ; addps %xmm13,%xmm11 + DB 69,15,89,218 ; mulps %xmm10,%xmm11 + DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10 + DB 243,15,16,72,20 ; movss 0x14(%rax),%xmm1 + DB 15,198,201,0 ; shufps $0x0,%xmm1,%xmm1 + DB 65,15,88,202 ; addps %xmm10,%xmm1 + DB 102,65,15,56,20,201 ; blendvps %xmm0,%xmm9,%xmm1 + DB 15,87,192 ; xorps %xmm0,%xmm0 + DB 15,95,200 ; maxps %xmm0,%xmm1 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,15,110,192 ; movd %eax,%xmm0 + DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 + DB 15,93,200 ; minps %xmm0,%xmm1 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 65,15,40,192 ; movaps %xmm8,%xmm0 + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_b_sse41 +_sk_parametric_b_sse41 LABEL PROC + DB 68,15,40,192 ; movaps %xmm0,%xmm8 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 68,15,89,202 ; mulps %xmm2,%xmm9 + DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 68,15,89,218 ; mulps %xmm2,%xmm11 + DB 15,40,194 ; movaps %xmm2,%xmm0 + DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0 + DB 243,15,16,80,24 ; movss 0x18(%rax),%xmm2 + DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 + DB 68,15,88,202 ; addps %xmm2,%xmm9 + DB 243,68,15,16,32 ; movss (%rax),%xmm12 + DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2 + DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 + DB 68,15,88,218 ; addps %xmm2,%xmm11 + DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10 + DB 185,0,0,0,52 ; mov $0x34000000,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,234 ; mulps %xmm10,%xmm13 + DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx + DB 102,15,110,209 ; movd %ecx,%xmm2 + DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 + DB 68,15,92,234 ; subps %xmm2,%xmm13 + DB 185,255,255,127,0 ; mov $0x7fffff,%ecx + DB 102,15,110,209 ; movd %ecx,%xmm2 + DB 102,68,15,112,210,0 ; pshufd $0x0,%xmm2,%xmm10 + DB 102,69,15,219,211 ; pand %xmm11,%xmm10 + DB 185,0,0,0,63 ; mov $0x3f000000,%ecx + DB 102,15,110,209 ; movd %ecx,%xmm2 + DB 102,68,15,112,218,0 ; pshufd $0x0,%xmm2,%xmm11 + DB 102,69,15,235,218 ; por %xmm10,%xmm11 + DB 185,42,145,49,64 ; mov $0x4031912a,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,88,213 ; addps %xmm13,%xmm10 + DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx + DB 102,15,110,209 ; movd %ecx,%xmm2 + DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 + DB 65,15,89,211 ; mulps %xmm11,%xmm2 + DB 68,15,92,210 ; subps %xmm2,%xmm10 + DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx + DB 102,15,110,209 ; movd %ecx,%xmm2 + DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 + DB 65,15,88,211 ; addps %xmm11,%xmm2 + DB 68,15,94,234 ; divps %xmm2,%xmm13 + DB 69,15,92,213 ; subps %xmm13,%xmm10 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,89,226 ; mulps %xmm10,%xmm12 + DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11 + DB 185,0,0,0,75 ; mov $0x4b000000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,88,236 ; addps %xmm12,%xmm13 + DB 69,15,92,227 ; subps %xmm11,%xmm12 + DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx + DB 102,15,110,209 ; movd %ecx,%xmm2 + DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 + DB 65,15,89,212 ; mulps %xmm12,%xmm2 + DB 68,15,92,234 ; subps %xmm2,%xmm13 + DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx + DB 102,15,110,209 ; movd %ecx,%xmm2 + DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 + DB 65,15,92,212 ; subps %xmm12,%xmm2 + DB 68,15,94,218 ; divps %xmm2,%xmm11 + DB 69,15,88,221 ; addps %xmm13,%xmm11 + DB 69,15,89,218 ; mulps %xmm10,%xmm11 + DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10 + DB 243,15,16,80,20 ; movss 0x14(%rax),%xmm2 + DB 15,198,210,0 ; shufps $0x0,%xmm2,%xmm2 + DB 65,15,88,210 ; addps %xmm10,%xmm2 + DB 102,65,15,56,20,209 ; blendvps %xmm0,%xmm9,%xmm2 + DB 15,87,192 ; xorps %xmm0,%xmm0 + DB 15,95,208 ; maxps %xmm0,%xmm2 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,15,110,192 ; movd %eax,%xmm0 + DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 + DB 15,93,208 ; minps %xmm0,%xmm2 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 65,15,40,192 ; movaps %xmm8,%xmm0 + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_a_sse41 +_sk_parametric_a_sse41 LABEL PROC + DB 68,15,40,192 ; movaps %xmm0,%xmm8 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 243,68,15,16,80,16 ; movss 0x10(%rax),%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 243,68,15,16,72,12 ; movss 0xc(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 68,15,89,203 ; mulps %xmm3,%xmm9 + DB 243,68,15,16,88,4 ; movss 0x4(%rax),%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 68,15,89,219 ; mulps %xmm3,%xmm11 + DB 15,40,195 ; movaps %xmm3,%xmm0 + DB 65,15,194,194,2 ; cmpleps %xmm10,%xmm0 + DB 243,15,16,88,24 ; movss 0x18(%rax),%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 68,15,88,203 ; addps %xmm3,%xmm9 + DB 243,68,15,16,32 ; movss (%rax),%xmm12 + DB 243,15,16,88,8 ; movss 0x8(%rax),%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 68,15,88,219 ; addps %xmm3,%xmm11 + DB 69,15,91,211 ; cvtdq2ps %xmm11,%xmm10 + DB 185,0,0,0,52 ; mov $0x34000000,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,234 ; mulps %xmm10,%xmm13 + DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx + DB 102,15,110,217 ; movd %ecx,%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 68,15,92,235 ; subps %xmm3,%xmm13 + DB 185,255,255,127,0 ; mov $0x7fffff,%ecx + DB 102,15,110,217 ; movd %ecx,%xmm3 + DB 102,68,15,112,211,0 ; pshufd $0x0,%xmm3,%xmm10 + DB 102,69,15,219,211 ; pand %xmm11,%xmm10 + DB 185,0,0,0,63 ; mov $0x3f000000,%ecx + DB 102,15,110,217 ; movd %ecx,%xmm3 + DB 102,68,15,112,219,0 ; pshufd $0x0,%xmm3,%xmm11 + DB 102,69,15,235,218 ; por %xmm10,%xmm11 + DB 185,42,145,49,64 ; mov $0x4031912a,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,88,213 ; addps %xmm13,%xmm10 + DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx + DB 102,15,110,217 ; movd %ecx,%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 65,15,89,219 ; mulps %xmm11,%xmm3 + DB 68,15,92,211 ; subps %xmm3,%xmm10 + DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx + DB 102,15,110,217 ; movd %ecx,%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 65,15,88,219 ; addps %xmm11,%xmm3 + DB 68,15,94,235 ; divps %xmm3,%xmm13 + DB 69,15,92,213 ; subps %xmm13,%xmm10 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,89,226 ; mulps %xmm10,%xmm12 + DB 102,69,15,58,8,220,1 ; roundps $0x1,%xmm12,%xmm11 + DB 185,0,0,0,75 ; mov $0x4b000000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,88,236 ; addps %xmm12,%xmm13 + DB 69,15,92,227 ; subps %xmm11,%xmm12 + DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx + DB 102,15,110,217 ; movd %ecx,%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 65,15,89,220 ; mulps %xmm12,%xmm3 + DB 68,15,92,235 ; subps %xmm3,%xmm13 + DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx + DB 102,15,110,217 ; movd %ecx,%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 65,15,92,220 ; subps %xmm12,%xmm3 + DB 68,15,94,219 ; divps %xmm3,%xmm11 + DB 69,15,88,221 ; addps %xmm13,%xmm11 + DB 69,15,89,218 ; mulps %xmm10,%xmm11 + DB 102,69,15,91,211 ; cvtps2dq %xmm11,%xmm10 + DB 243,15,16,88,20 ; movss 0x14(%rax),%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 65,15,88,218 ; addps %xmm10,%xmm3 + DB 102,65,15,56,20,217 ; blendvps %xmm0,%xmm9,%xmm3 + DB 15,87,192 ; xorps %xmm0,%xmm0 + DB 15,95,216 ; maxps %xmm0,%xmm3 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,15,110,192 ; movd %eax,%xmm0 + DB 15,198,192,0 ; shufps $0x0,%xmm0,%xmm0 + DB 15,93,216 ; minps %xmm0,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 65,15,40,192 ; movaps %xmm8,%xmm0 + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_load_a8_sse41 +_sk_load_a8_sse41 LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 72,139,0 ; mov (%rax),%rax + DB 102,15,56,49,4,56 ; pmovzxbd (%rax,%rdi,1),%xmm0 + DB 15,91,192 ; cvtdq2ps %xmm0,%xmm0 + DB 184,129,128,128,59 ; mov $0x3b808081,%eax + DB 102,15,110,216 ; movd %eax,%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 15,89,216 ; mulps %xmm0,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 15,87,192 ; xorps %xmm0,%xmm0 + DB 15,87,201 ; xorps %xmm1,%xmm1 + DB 15,87,210 ; xorps %xmm2,%xmm2 + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_gather_a8_sse41 +_sk_gather_a8_sse41 LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 76,139,8 ; mov (%rax),%r9 + DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1 + DB 102,15,110,80,16 ; movd 0x10(%rax),%xmm2 + DB 102,15,112,210,0 ; pshufd $0x0,%xmm2,%xmm2 + DB 102,15,56,64,209 ; pmulld %xmm1,%xmm2 + DB 243,15,91,192 ; cvttps2dq %xmm0,%xmm0 + DB 102,15,254,194 ; paddd %xmm2,%xmm0 + DB 102,72,15,58,22,192,1 ; pextrq $0x1,%xmm0,%rax + DB 65,137,192 ; mov %eax,%r8d DB 72,193,232,32 ; shr $0x20,%rax DB 102,72,15,126,193 ; movq %xmm0,%rcx DB 65,137,202 ; mov %ecx,%r10d @@ -11266,9 +12384,9 @@ _sk_gather_i8_sse41 LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 73,137,192 ; mov %rax,%r8 DB 77,133,192 ; test %r8,%r8 - DB 116,5 ; je 210a <_sk_gather_i8_sse41+0xf> + DB 116,5 ; je 27a3 <_sk_gather_i8_sse41+0xf> DB 76,137,192 ; mov %r8,%rax - DB 235,2 ; jmp 210c <_sk_gather_i8_sse41+0x11> + DB 235,2 ; jmp 27a5 <_sk_gather_i8_sse41+0x11> DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1 @@ -12362,7 +13480,7 @@ _sk_linear_gradient_sse41 LABEL PROC DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 DB 72,139,8 ; mov (%rax),%rcx DB 72,133,201 ; test %rcx,%rcx - DB 15,132,4,1,0,0 ; je 3338 <_sk_linear_gradient_sse41+0x13e> + DB 15,132,4,1,0,0 ; je 39d1 <_sk_linear_gradient_sse41+0x13e> DB 72,131,236,88 ; sub $0x58,%rsp DB 15,41,36,36 ; movaps %xmm4,(%rsp) DB 15,41,108,36,16 ; movaps %xmm5,0x10(%rsp) @@ -12413,13 +13531,13 @@ _sk_linear_gradient_sse41 LABEL PROC DB 15,40,196 ; movaps %xmm4,%xmm0 DB 72,131,192,36 ; add $0x24,%rax DB 72,255,201 ; dec %rcx - DB 15,133,65,255,255,255 ; jne 3260 <_sk_linear_gradient_sse41+0x66> + DB 15,133,65,255,255,255 ; jne 38f9 <_sk_linear_gradient_sse41+0x66> DB 15,40,124,36,48 ; movaps 0x30(%rsp),%xmm7 DB 15,40,116,36,32 ; movaps 0x20(%rsp),%xmm6 DB 15,40,108,36,16 ; movaps 0x10(%rsp),%xmm5 DB 15,40,36,36 ; movaps (%rsp),%xmm4 DB 72,131,196,88 ; add $0x58,%rsp - DB 235,13 ; jmp 3345 <_sk_linear_gradient_sse41+0x14b> + DB 235,13 ; jmp 39de <_sk_linear_gradient_sse41+0x14b> DB 15,87,201 ; xorps %xmm1,%xmm1 DB 15,87,210 ; xorps %xmm2,%xmm2 DB 15,87,219 ; xorps %xmm3,%xmm3 @@ -15098,6 +16216,414 @@ _sk_table_a_sse2 LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +PUBLIC _sk_parametric_r_sse2 +_sk_parametric_r_sse2 LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,89,192 ; mulps %xmm0,%xmm8 + DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 68,15,89,208 ; mulps %xmm0,%xmm10 + DB 65,15,194,193,2 ; cmpleps %xmm9,%xmm0 + DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 69,15,88,193 ; addps %xmm9,%xmm8 + DB 243,68,15,16,8 ; movss (%rax),%xmm9 + DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,88,211 ; addps %xmm11,%xmm10 + DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11 + DB 185,0,0,0,52 ; mov $0x34000000,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,235 ; mulps %xmm11,%xmm13 + DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,92,235 ; subps %xmm11,%xmm13 + DB 185,255,255,127,0 ; mov $0x7fffff,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12 + DB 102,69,15,219,226 ; pand %xmm10,%xmm12 + DB 185,0,0,0,63 ; mov $0x3f000000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11 + DB 102,69,15,235,220 ; por %xmm12,%xmm11 + DB 185,42,145,49,64 ; mov $0x4031912a,%ecx + DB 102,68,15,110,225 ; movd %ecx,%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,88,229 ; addps %xmm13,%xmm12 + DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,89,211 ; mulps %xmm11,%xmm10 + DB 69,15,92,226 ; subps %xmm10,%xmm12 + DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx + DB 102,68,15,110,241 ; movd %ecx,%xmm14 + DB 185,0,0,128,63 ; mov $0x3f800000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx + DB 102,68,15,110,249 ; movd %ecx,%xmm15 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 + DB 69,15,88,243 ; addps %xmm11,%xmm14 + DB 69,15,94,238 ; divps %xmm14,%xmm13 + DB 69,15,92,229 ; subps %xmm13,%xmm12 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 69,15,89,204 ; mulps %xmm12,%xmm9 + DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11 + DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11 + DB 69,15,40,225 ; movaps %xmm9,%xmm12 + DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15 + DB 69,15,88,249 ; addps %xmm9,%xmm15 + DB 69,15,40,233 ; movaps %xmm9,%xmm13 + DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,84,234 ; andps %xmm10,%xmm13 + DB 69,15,87,201 ; xorps %xmm9,%xmm9 + DB 69,15,92,221 ; subps %xmm13,%xmm11 + DB 69,15,92,227 ; subps %xmm11,%xmm12 + DB 102,69,15,110,216 ; movd %r8d,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,236 ; mulps %xmm12,%xmm13 + DB 69,15,92,253 ; subps %xmm13,%xmm15 + DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx + DB 102,68,15,110,241 ; movd %ecx,%xmm14 + DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 + DB 69,15,92,244 ; subps %xmm12,%xmm14 + DB 69,15,94,238 ; divps %xmm14,%xmm13 + DB 69,15,88,239 ; addps %xmm15,%xmm13 + DB 69,15,89,235 ; mulps %xmm11,%xmm13 + DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11 + DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,88,227 ; addps %xmm11,%xmm12 + DB 68,15,84,192 ; andps %xmm0,%xmm8 + DB 65,15,85,196 ; andnps %xmm12,%xmm0 + DB 65,15,86,192 ; orps %xmm8,%xmm0 + DB 65,15,95,193 ; maxps %xmm9,%xmm0 + DB 65,15,93,194 ; minps %xmm10,%xmm0 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_g_sse2 +_sk_parametric_g_sse2 LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,89,193 ; mulps %xmm1,%xmm8 + DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 68,15,89,209 ; mulps %xmm1,%xmm10 + DB 65,15,194,201,2 ; cmpleps %xmm9,%xmm1 + DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 69,15,88,193 ; addps %xmm9,%xmm8 + DB 243,68,15,16,8 ; movss (%rax),%xmm9 + DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,88,211 ; addps %xmm11,%xmm10 + DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11 + DB 185,0,0,0,52 ; mov $0x34000000,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,235 ; mulps %xmm11,%xmm13 + DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,92,235 ; subps %xmm11,%xmm13 + DB 185,255,255,127,0 ; mov $0x7fffff,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12 + DB 102,69,15,219,226 ; pand %xmm10,%xmm12 + DB 185,0,0,0,63 ; mov $0x3f000000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11 + DB 102,69,15,235,220 ; por %xmm12,%xmm11 + DB 185,42,145,49,64 ; mov $0x4031912a,%ecx + DB 102,68,15,110,225 ; movd %ecx,%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,88,229 ; addps %xmm13,%xmm12 + DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,89,211 ; mulps %xmm11,%xmm10 + DB 69,15,92,226 ; subps %xmm10,%xmm12 + DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx + DB 102,68,15,110,241 ; movd %ecx,%xmm14 + DB 185,0,0,128,63 ; mov $0x3f800000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx + DB 102,68,15,110,249 ; movd %ecx,%xmm15 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 + DB 69,15,88,243 ; addps %xmm11,%xmm14 + DB 69,15,94,238 ; divps %xmm14,%xmm13 + DB 69,15,92,229 ; subps %xmm13,%xmm12 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 69,15,89,204 ; mulps %xmm12,%xmm9 + DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11 + DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11 + DB 69,15,40,225 ; movaps %xmm9,%xmm12 + DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15 + DB 69,15,88,249 ; addps %xmm9,%xmm15 + DB 69,15,40,233 ; movaps %xmm9,%xmm13 + DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,84,234 ; andps %xmm10,%xmm13 + DB 69,15,87,201 ; xorps %xmm9,%xmm9 + DB 69,15,92,221 ; subps %xmm13,%xmm11 + DB 69,15,92,227 ; subps %xmm11,%xmm12 + DB 102,69,15,110,216 ; movd %r8d,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,236 ; mulps %xmm12,%xmm13 + DB 69,15,92,253 ; subps %xmm13,%xmm15 + DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx + DB 102,68,15,110,241 ; movd %ecx,%xmm14 + DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 + DB 69,15,92,244 ; subps %xmm12,%xmm14 + DB 69,15,94,238 ; divps %xmm14,%xmm13 + DB 69,15,88,239 ; addps %xmm15,%xmm13 + DB 69,15,89,235 ; mulps %xmm11,%xmm13 + DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11 + DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,88,227 ; addps %xmm11,%xmm12 + DB 68,15,84,193 ; andps %xmm1,%xmm8 + DB 65,15,85,204 ; andnps %xmm12,%xmm1 + DB 65,15,86,200 ; orps %xmm8,%xmm1 + DB 65,15,95,201 ; maxps %xmm9,%xmm1 + DB 65,15,93,202 ; minps %xmm10,%xmm1 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_b_sse2 +_sk_parametric_b_sse2 LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,89,194 ; mulps %xmm2,%xmm8 + DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 68,15,89,210 ; mulps %xmm2,%xmm10 + DB 65,15,194,209,2 ; cmpleps %xmm9,%xmm2 + DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 69,15,88,193 ; addps %xmm9,%xmm8 + DB 243,68,15,16,8 ; movss (%rax),%xmm9 + DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,88,211 ; addps %xmm11,%xmm10 + DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11 + DB 185,0,0,0,52 ; mov $0x34000000,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,235 ; mulps %xmm11,%xmm13 + DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,92,235 ; subps %xmm11,%xmm13 + DB 185,255,255,127,0 ; mov $0x7fffff,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12 + DB 102,69,15,219,226 ; pand %xmm10,%xmm12 + DB 185,0,0,0,63 ; mov $0x3f000000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11 + DB 102,69,15,235,220 ; por %xmm12,%xmm11 + DB 185,42,145,49,64 ; mov $0x4031912a,%ecx + DB 102,68,15,110,225 ; movd %ecx,%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,88,229 ; addps %xmm13,%xmm12 + DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,89,211 ; mulps %xmm11,%xmm10 + DB 69,15,92,226 ; subps %xmm10,%xmm12 + DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx + DB 102,68,15,110,241 ; movd %ecx,%xmm14 + DB 185,0,0,128,63 ; mov $0x3f800000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx + DB 102,68,15,110,249 ; movd %ecx,%xmm15 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 + DB 69,15,88,243 ; addps %xmm11,%xmm14 + DB 69,15,94,238 ; divps %xmm14,%xmm13 + DB 69,15,92,229 ; subps %xmm13,%xmm12 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 69,15,89,204 ; mulps %xmm12,%xmm9 + DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11 + DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11 + DB 69,15,40,225 ; movaps %xmm9,%xmm12 + DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15 + DB 69,15,88,249 ; addps %xmm9,%xmm15 + DB 69,15,40,233 ; movaps %xmm9,%xmm13 + DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,84,234 ; andps %xmm10,%xmm13 + DB 69,15,87,201 ; xorps %xmm9,%xmm9 + DB 69,15,92,221 ; subps %xmm13,%xmm11 + DB 69,15,92,227 ; subps %xmm11,%xmm12 + DB 102,69,15,110,216 ; movd %r8d,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,236 ; mulps %xmm12,%xmm13 + DB 69,15,92,253 ; subps %xmm13,%xmm15 + DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx + DB 102,68,15,110,241 ; movd %ecx,%xmm14 + DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 + DB 69,15,92,244 ; subps %xmm12,%xmm14 + DB 69,15,94,238 ; divps %xmm14,%xmm13 + DB 69,15,88,239 ; addps %xmm15,%xmm13 + DB 69,15,89,235 ; mulps %xmm11,%xmm13 + DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11 + DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,88,227 ; addps %xmm11,%xmm12 + DB 68,15,84,194 ; andps %xmm2,%xmm8 + DB 65,15,85,212 ; andnps %xmm12,%xmm2 + DB 65,15,86,208 ; orps %xmm8,%xmm2 + DB 65,15,95,209 ; maxps %xmm9,%xmm2 + DB 65,15,93,210 ; minps %xmm10,%xmm2 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_parametric_a_sse2 +_sk_parametric_a_sse2 LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 243,68,15,16,72,16 ; movss 0x10(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 243,68,15,16,64,12 ; movss 0xc(%rax),%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,89,195 ; mulps %xmm3,%xmm8 + DB 243,68,15,16,80,4 ; movss 0x4(%rax),%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 68,15,89,211 ; mulps %xmm3,%xmm10 + DB 65,15,194,217,2 ; cmpleps %xmm9,%xmm3 + DB 243,68,15,16,72,24 ; movss 0x18(%rax),%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 69,15,88,193 ; addps %xmm9,%xmm8 + DB 243,68,15,16,8 ; movss (%rax),%xmm9 + DB 243,68,15,16,88,8 ; movss 0x8(%rax),%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,88,211 ; addps %xmm11,%xmm10 + DB 69,15,91,218 ; cvtdq2ps %xmm10,%xmm11 + DB 185,0,0,0,52 ; mov $0x34000000,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,235 ; mulps %xmm11,%xmm13 + DB 185,0,0,254,66 ; mov $0x42fe0000,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 69,15,92,235 ; subps %xmm11,%xmm13 + DB 185,255,255,127,0 ; mov $0x7fffff,%ecx + DB 102,68,15,110,217 ; movd %ecx,%xmm11 + DB 102,69,15,112,227,0 ; pshufd $0x0,%xmm11,%xmm12 + DB 102,69,15,219,226 ; pand %xmm10,%xmm12 + DB 185,0,0,0,63 ; mov $0x3f000000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 102,69,15,112,218,0 ; pshufd $0x0,%xmm10,%xmm11 + DB 102,69,15,235,220 ; por %xmm12,%xmm11 + DB 185,42,145,49,64 ; mov $0x4031912a,%ecx + DB 102,68,15,110,225 ; movd %ecx,%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,88,229 ; addps %xmm13,%xmm12 + DB 185,117,191,191,63 ; mov $0x3fbfbf75,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,89,211 ; mulps %xmm11,%xmm10 + DB 69,15,92,226 ; subps %xmm10,%xmm12 + DB 185,163,233,220,63 ; mov $0x3fdce9a3,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 185,249,68,180,62 ; mov $0x3eb444f9,%ecx + DB 102,68,15,110,241 ; movd %ecx,%xmm14 + DB 185,0,0,128,63 ; mov $0x3f800000,%ecx + DB 102,68,15,110,209 ; movd %ecx,%xmm10 + DB 65,184,0,0,0,75 ; mov $0x4b000000,%r8d + DB 185,81,140,242,66 ; mov $0x42f28c51,%ecx + DB 102,68,15,110,249 ; movd %ecx,%xmm15 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 + DB 69,15,88,243 ; addps %xmm11,%xmm14 + DB 69,15,94,238 ; divps %xmm14,%xmm13 + DB 69,15,92,229 ; subps %xmm13,%xmm12 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 69,15,89,204 ; mulps %xmm12,%xmm9 + DB 243,69,15,91,217 ; cvttps2dq %xmm9,%xmm11 + DB 69,15,91,219 ; cvtdq2ps %xmm11,%xmm11 + DB 69,15,40,225 ; movaps %xmm9,%xmm12 + DB 69,15,198,255,0 ; shufps $0x0,%xmm15,%xmm15 + DB 69,15,88,249 ; addps %xmm9,%xmm15 + DB 69,15,40,233 ; movaps %xmm9,%xmm13 + DB 69,15,194,235,1 ; cmpltps %xmm11,%xmm13 + DB 69,15,198,210,0 ; shufps $0x0,%xmm10,%xmm10 + DB 69,15,84,234 ; andps %xmm10,%xmm13 + DB 69,15,87,201 ; xorps %xmm9,%xmm9 + DB 69,15,92,221 ; subps %xmm13,%xmm11 + DB 69,15,92,227 ; subps %xmm11,%xmm12 + DB 102,69,15,110,216 ; movd %r8d,%xmm11 + DB 69,15,198,219,0 ; shufps $0x0,%xmm11,%xmm11 + DB 185,141,188,190,63 ; mov $0x3fbebc8d,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 69,15,89,236 ; mulps %xmm12,%xmm13 + DB 69,15,92,253 ; subps %xmm13,%xmm15 + DB 185,254,210,221,65 ; mov $0x41ddd2fe,%ecx + DB 102,68,15,110,233 ; movd %ecx,%xmm13 + DB 69,15,198,237,0 ; shufps $0x0,%xmm13,%xmm13 + DB 185,248,245,154,64 ; mov $0x409af5f8,%ecx + DB 102,68,15,110,241 ; movd %ecx,%xmm14 + DB 69,15,198,246,0 ; shufps $0x0,%xmm14,%xmm14 + DB 69,15,92,244 ; subps %xmm12,%xmm14 + DB 69,15,94,238 ; divps %xmm14,%xmm13 + DB 69,15,88,239 ; addps %xmm15,%xmm13 + DB 69,15,89,235 ; mulps %xmm11,%xmm13 + DB 102,69,15,91,221 ; cvtps2dq %xmm13,%xmm11 + DB 243,68,15,16,96,20 ; movss 0x14(%rax),%xmm12 + DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 + DB 69,15,88,227 ; addps %xmm11,%xmm12 + DB 68,15,84,195 ; andps %xmm3,%xmm8 + DB 65,15,85,220 ; andnps %xmm12,%xmm3 + DB 65,15,86,216 ; orps %xmm8,%xmm3 + DB 65,15,95,217 ; maxps %xmm9,%xmm3 + DB 65,15,93,218 ; minps %xmm10,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + PUBLIC _sk_load_a8_sse2 _sk_load_a8_sse2 LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax @@ -15254,9 +16780,9 @@ _sk_gather_i8_sse2 LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 73,137,192 ; mov %rax,%r8 DB 77,133,192 ; test %r8,%r8 - DB 116,5 ; je 2285 <_sk_gather_i8_sse2+0xf> + DB 116,5 ; je 29b9 <_sk_gather_i8_sse2+0xf> DB 76,137,192 ; mov %r8,%rax - DB 235,2 ; jmp 2287 <_sk_gather_i8_sse2+0x11> + DB 235,2 ; jmp 29bb <_sk_gather_i8_sse2+0x11> DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 243,15,91,201 ; cvttps2dq %xmm1,%xmm1 @@ -16453,7 +17979,7 @@ _sk_linear_gradient_sse2 LABEL PROC DB 69,15,198,228,0 ; shufps $0x0,%xmm12,%xmm12 DB 72,139,8 ; mov (%rax),%rcx DB 72,133,201 ; test %rcx,%rcx - DB 15,132,15,1,0,0 ; je 366f <_sk_linear_gradient_sse2+0x149> + DB 15,132,15,1,0,0 ; je 3da3 <_sk_linear_gradient_sse2+0x149> DB 72,139,64,8 ; mov 0x8(%rax),%rax DB 72,131,192,32 ; add $0x20,%rax DB 69,15,87,192 ; xorps %xmm8,%xmm8 @@ -16514,8 +18040,8 @@ _sk_linear_gradient_sse2 LABEL PROC DB 69,15,86,231 ; orps %xmm15,%xmm12 DB 72,131,192,36 ; add $0x24,%rax DB 72,255,201 ; dec %rcx - DB 15,133,8,255,255,255 ; jne 3575 <_sk_linear_gradient_sse2+0x4f> - DB 235,13 ; jmp 367c <_sk_linear_gradient_sse2+0x156> + DB 15,133,8,255,255,255 ; jne 3ca9 <_sk_linear_gradient_sse2+0x4f> + DB 235,13 ; jmp 3db0 <_sk_linear_gradient_sse2+0x156> DB 15,87,201 ; xorps %xmm1,%xmm1 DB 15,87,210 ; xorps %xmm2,%xmm2 DB 15,87,219 ; xorps %xmm3,%xmm3 -- cgit v1.2.3