From aaca1e44b15f205a9393580b697bfd8331741a17 Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Fri, 31 Mar 2017 09:29:01 -0400 Subject: jumper, more blend modes Change-Id: I17ce08a7ec62ef8ffe8ae567079d669a87ef9a9c Reviewed-on: https://skia-review.googlesource.com/10921 Reviewed-by: Mike Klein Commit-Queue: Mike Klein --- src/jumper/SkJumper_generated_win.S | 1012 +++++++++++++++++++++++++++++------ 1 file changed, 858 insertions(+), 154 deletions(-) (limited to 'src/jumper/SkJumper_generated_win.S') diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S index 1590b7b5e4..c4d144d474 100644 --- a/src/jumper/SkJumper_generated_win.S +++ b/src/jumper/SkJumper_generated_win.S @@ -143,12 +143,81 @@ _sk_clear_hsw LABEL PROC DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3 DB 255,224 ; jmpq *%rax -PUBLIC _sk_plus__hsw -_sk_plus__hsw LABEL PROC - DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0 - DB 197,244,88,205 ; vaddps %ymm5,%ymm1,%ymm1 - DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2 - DB 197,228,88,223 ; vaddps %ymm7,%ymm3,%ymm3 +PUBLIC _sk_srcatop_hsw +_sk_srcatop_hsw LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,121,110,192 ; vmovd %eax,%xmm8 + DB 196,66,125,88,192 ; vpbroadcastd %xmm8,%ymm8 + DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8 + DB 197,60,89,204 ; vmulps %ymm4,%ymm8,%ymm9 + DB 196,194,69,168,193 ; vfmadd213ps %ymm9,%ymm7,%ymm0 + DB 197,60,89,205 ; vmulps %ymm5,%ymm8,%ymm9 + DB 196,194,69,168,201 ; vfmadd213ps %ymm9,%ymm7,%ymm1 + DB 197,60,89,206 ; vmulps %ymm6,%ymm8,%ymm9 + DB 196,194,69,168,209 ; vfmadd213ps %ymm9,%ymm7,%ymm2 + DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8 + DB 196,194,69,168,216 ; vfmadd213ps %ymm8,%ymm7,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstatop_hsw +_sk_dstatop_hsw LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,121,110,192 ; vmovd %eax,%xmm8 + DB 196,66,125,88,192 ; vpbroadcastd %xmm8,%ymm8 + DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8 + DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 + DB 196,226,101,184,196 ; vfmadd231ps %ymm4,%ymm3,%ymm0 + DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 + DB 196,226,101,184,205 ; vfmadd231ps %ymm5,%ymm3,%ymm1 + DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 + DB 196,226,101,184,214 ; vfmadd231ps %ymm6,%ymm3,%ymm2 + DB 197,60,89,195 ; vmulps %ymm3,%ymm8,%ymm8 + DB 196,194,69,168,216 ; vfmadd213ps %ymm8,%ymm7,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcin_hsw +_sk_srcin_hsw LABEL PROC + DB 197,252,89,199 ; vmulps %ymm7,%ymm0,%ymm0 + DB 197,244,89,207 ; vmulps %ymm7,%ymm1,%ymm1 + DB 197,236,89,215 ; vmulps %ymm7,%ymm2,%ymm2 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstin_hsw +_sk_dstin_hsw LABEL PROC + DB 197,228,89,196 ; vmulps %ymm4,%ymm3,%ymm0 + DB 197,228,89,205 ; vmulps %ymm5,%ymm3,%ymm1 + DB 197,228,89,214 ; vmulps %ymm6,%ymm3,%ymm2 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcout_hsw +_sk_srcout_hsw LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,121,110,192 ; vmovd %eax,%xmm8 + DB 196,66,125,88,192 ; vpbroadcastd %xmm8,%ymm8 + DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8 + DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 + DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 + DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 + DB 197,188,89,219 ; vmulps %ymm3,%ymm8,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstout_hsw +_sk_dstout_hsw LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,192 ; vmovd %eax,%xmm0 + DB 196,226,125,88,192 ; vpbroadcastd %xmm0,%ymm0 + DB 197,252,92,219 ; vsubps %ymm3,%ymm0,%ymm3 + DB 197,228,89,196 ; vmulps %ymm4,%ymm3,%ymm0 + DB 197,228,89,205 ; vmulps %ymm5,%ymm3,%ymm1 + DB 197,228,89,214 ; vmulps %ymm6,%ymm3,%ymm2 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -178,6 +247,78 @@ _sk_dstover_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +PUBLIC _sk_modulate_hsw +_sk_modulate_hsw LABEL PROC + DB 197,252,89,196 ; vmulps %ymm4,%ymm0,%ymm0 + DB 197,244,89,205 ; vmulps %ymm5,%ymm1,%ymm1 + DB 197,236,89,214 ; vmulps %ymm6,%ymm2,%ymm2 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_multiply_hsw +_sk_multiply_hsw LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,121,110,192 ; vmovd %eax,%xmm8 + DB 196,66,125,88,192 ; vpbroadcastd %xmm8,%ymm8 + DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9 + DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8 + DB 197,60,89,212 ; vmulps %ymm4,%ymm8,%ymm10 + DB 196,98,53,184,208 ; vfmadd231ps %ymm0,%ymm9,%ymm10 + DB 196,194,93,168,194 ; vfmadd213ps %ymm10,%ymm4,%ymm0 + DB 197,52,89,209 ; vmulps %ymm1,%ymm9,%ymm10 + DB 196,98,61,184,213 ; vfmadd231ps %ymm5,%ymm8,%ymm10 + DB 196,194,85,168,202 ; vfmadd213ps %ymm10,%ymm5,%ymm1 + DB 197,52,89,210 ; vmulps %ymm2,%ymm9,%ymm10 + DB 196,98,61,184,214 ; vfmadd231ps %ymm6,%ymm8,%ymm10 + DB 196,194,77,168,210 ; vfmadd213ps %ymm10,%ymm6,%ymm2 + DB 197,52,89,203 ; vmulps %ymm3,%ymm9,%ymm9 + DB 196,66,69,168,193 ; vfmadd213ps %ymm9,%ymm7,%ymm8 + DB 196,194,69,168,216 ; vfmadd213ps %ymm8,%ymm7,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_plus__hsw +_sk_plus__hsw LABEL PROC + DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0 + DB 197,244,88,205 ; vaddps %ymm5,%ymm1,%ymm1 + DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2 + DB 197,228,88,223 ; vaddps %ymm7,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_screen_hsw +_sk_screen_hsw LABEL PROC + DB 197,124,88,196 ; vaddps %ymm4,%ymm0,%ymm8 + DB 196,194,93,172,192 ; vfnmadd213ps %ymm8,%ymm4,%ymm0 + DB 197,116,88,197 ; vaddps %ymm5,%ymm1,%ymm8 + DB 196,194,85,172,200 ; vfnmadd213ps %ymm8,%ymm5,%ymm1 + DB 197,108,88,198 ; vaddps %ymm6,%ymm2,%ymm8 + DB 196,194,77,172,208 ; vfnmadd213ps %ymm8,%ymm6,%ymm2 + DB 197,100,88,199 ; vaddps %ymm7,%ymm3,%ymm8 + DB 196,194,69,172,216 ; vfnmadd213ps %ymm8,%ymm7,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_xor__hsw +_sk_xor__hsw LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,121,110,192 ; vmovd %eax,%xmm8 + DB 196,66,125,88,192 ; vpbroadcastd %xmm8,%ymm8 + DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9 + DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8 + DB 197,60,89,212 ; vmulps %ymm4,%ymm8,%ymm10 + DB 196,194,53,168,194 ; vfmadd213ps %ymm10,%ymm9,%ymm0 + DB 197,180,89,201 ; vmulps %ymm1,%ymm9,%ymm1 + DB 196,226,61,184,205 ; vfmadd231ps %ymm5,%ymm8,%ymm1 + DB 197,180,89,210 ; vmulps %ymm2,%ymm9,%ymm2 + DB 196,226,61,184,214 ; vfmadd231ps %ymm6,%ymm8,%ymm2 + DB 197,180,89,219 ; vmulps %ymm3,%ymm9,%ymm3 + DB 196,98,69,168,195 ; vfmadd213ps %ymm3,%ymm7,%ymm8 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 197,124,41,195 ; vmovaps %ymm8,%ymm3 + DB 255,224 ; jmpq *%rax + PUBLIC _sk_clamp_0_hsw _sk_clamp_0_hsw LABEL PROC DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 @@ -396,7 +537,7 @@ _sk_scale_u8_hsw LABEL PROC DB 72,139,0 ; mov (%rax),%rax DB 72,1,248 ; add %rdi,%rax DB 77,133,192 ; test %r8,%r8 - DB 117,56 ; jne 556 <_sk_scale_u8_hsw+0x48> + DB 117,56 ; jne 70e <_sk_scale_u8_hsw+0x48> DB 197,122,126,0 ; vmovq (%rax),%xmm8 DB 196,66,125,49,192 ; vpmovzxbd %xmm8,%ymm8 DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8 @@ -420,9 +561,9 @@ _sk_scale_u8_hsw LABEL PROC DB 77,9,217 ; or %r11,%r9 DB 72,131,193,8 ; add $0x8,%rcx DB 73,255,202 ; dec %r10 - DB 117,234 ; jne 55e <_sk_scale_u8_hsw+0x50> + DB 117,234 ; jne 716 <_sk_scale_u8_hsw+0x50> DB 196,65,249,110,193 ; vmovq %r9,%xmm8 - DB 235,167 ; jmp 522 <_sk_scale_u8_hsw+0x14> + DB 235,167 ; jmp 6da <_sk_scale_u8_hsw+0x14> PUBLIC _sk_lerp_1_float_hsw _sk_lerp_1_float_hsw LABEL PROC @@ -446,7 +587,7 @@ _sk_lerp_u8_hsw LABEL PROC DB 72,139,0 ; mov (%rax),%rax DB 72,1,248 ; add %rdi,%rax DB 77,133,192 ; test %r8,%r8 - DB 117,76 ; jne 606 <_sk_lerp_u8_hsw+0x5c> + DB 117,76 ; jne 7be <_sk_lerp_u8_hsw+0x5c> DB 197,122,126,0 ; vmovq (%rax),%xmm8 DB 196,66,125,49,192 ; vpmovzxbd %xmm8,%ymm8 DB 196,65,124,91,192 ; vcvtdq2ps %ymm8,%ymm8 @@ -474,16 +615,16 @@ _sk_lerp_u8_hsw LABEL PROC DB 77,9,217 ; or %r11,%r9 DB 72,131,193,8 ; add $0x8,%rcx DB 73,255,202 ; dec %r10 - DB 117,234 ; jne 60e <_sk_lerp_u8_hsw+0x64> + DB 117,234 ; jne 7c6 <_sk_lerp_u8_hsw+0x64> DB 196,65,249,110,193 ; vmovq %r9,%xmm8 - DB 235,147 ; jmp 5be <_sk_lerp_u8_hsw+0x14> + DB 235,147 ; jmp 776 <_sk_lerp_u8_hsw+0x14> PUBLIC _sk_lerp_565_hsw _sk_lerp_565_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,179,0,0,0 ; jne 6ec <_sk_lerp_565_hsw+0xc1> + DB 15,133,179,0,0,0 ; jne 8a4 <_sk_lerp_565_hsw+0xc1> DB 196,193,122,111,28,122 ; vmovdqu (%r10,%rdi,2),%xmm3 DB 196,98,125,51,195 ; vpmovzxwd %xmm3,%ymm8 DB 184,0,248,0,0 ; mov $0xf800,%eax @@ -529,9 +670,9 @@ _sk_lerp_565_hsw LABEL PROC DB 197,225,239,219 ; vpxor %xmm3,%xmm3,%xmm3 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,59,255,255,255 ; ja 63f <_sk_lerp_565_hsw+0x14> + DB 15,135,59,255,255,255 ; ja 7f7 <_sk_lerp_565_hsw+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,73,0,0,0 ; lea 0x49(%rip),%r9 # 758 <_sk_lerp_565_hsw+0x12d> + DB 76,141,13,73,0,0,0 ; lea 0x49(%rip),%r9 # 910 <_sk_lerp_565_hsw+0x12d> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -543,7 +684,7 @@ _sk_lerp_565_hsw LABEL PROC DB 196,193,97,196,92,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3 DB 196,193,97,196,92,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3 DB 196,193,97,196,28,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm3,%xmm3 - DB 233,231,254,255,255 ; jmpq 63f <_sk_lerp_565_hsw+0x14> + DB 233,231,254,255,255 ; jmpq 7f7 <_sk_lerp_565_hsw+0x14> DB 244 ; hlt DB 255 ; (bad) DB 255 ; (bad) @@ -575,7 +716,7 @@ _sk_load_tables_hsw LABEL PROC DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9 DB 76,3,8 ; add (%rax),%r9 DB 77,133,192 ; test %r8,%r8 - DB 117,121 ; jne 802 <_sk_load_tables_hsw+0x8e> + DB 117,121 ; jne 9ba <_sk_load_tables_hsw+0x8e> DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3 DB 185,255,0,0,0 ; mov $0xff,%ecx DB 197,249,110,193 ; vmovd %ecx,%xmm0 @@ -611,7 +752,7 @@ _sk_load_tables_hsw LABEL PROC DB 196,193,249,110,194 ; vmovq %r10,%xmm0 DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0 DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3 - DB 233,99,255,255,255 ; jmpq 78e <_sk_load_tables_hsw+0x1a> + DB 233,99,255,255,255 ; jmpq 946 <_sk_load_tables_hsw+0x1a> PUBLIC _sk_load_a8_hsw _sk_load_a8_hsw LABEL PROC @@ -620,7 +761,7 @@ _sk_load_a8_hsw LABEL PROC DB 72,139,0 ; mov (%rax),%rax DB 72,1,248 ; add %rdi,%rax DB 77,133,192 ; test %r8,%r8 - DB 117,50 ; jne 86d <_sk_load_a8_hsw+0x42> + DB 117,50 ; jne a25 <_sk_load_a8_hsw+0x42> DB 197,250,126,0 ; vmovq (%rax),%xmm0 DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0 DB 197,252,91,192 ; vcvtdq2ps %ymm0,%ymm0 @@ -643,9 +784,9 @@ _sk_load_a8_hsw LABEL PROC DB 77,9,217 ; or %r11,%r9 DB 72,131,193,8 ; add $0x8,%rcx DB 73,255,202 ; dec %r10 - DB 117,234 ; jne 875 <_sk_load_a8_hsw+0x4a> + DB 117,234 ; jne a2d <_sk_load_a8_hsw+0x4a> DB 196,193,249,110,193 ; vmovq %r9,%xmm0 - DB 235,173 ; jmp 83f <_sk_load_a8_hsw+0x14> + DB 235,173 ; jmp 9f7 <_sk_load_a8_hsw+0x14> PUBLIC _sk_store_a8_hsw _sk_store_a8_hsw LABEL PROC @@ -660,7 +801,7 @@ _sk_store_a8_hsw LABEL PROC DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne 8cd <_sk_store_a8_hsw+0x3b> + DB 117,10 ; jne a85 <_sk_store_a8_hsw+0x3b> DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -668,10 +809,10 @@ _sk_store_a8_hsw LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja 8c9 <_sk_store_a8_hsw+0x37> + DB 119,236 ; ja a81 <_sk_store_a8_hsw+0x37> DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8 DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 930 <_sk_store_a8_hsw+0x9e> + DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # ae8 <_sk_store_a8_hsw+0x9e> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -682,7 +823,7 @@ _sk_store_a8_hsw LABEL PROC DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1) DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1) DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1) - DB 235,154 ; jmp 8c9 <_sk_store_a8_hsw+0x37> + DB 235,154 ; jmp a81 <_sk_store_a8_hsw+0x37> DB 144 ; nop DB 246,255 ; idiv %bh DB 255 ; (bad) @@ -712,7 +853,7 @@ _sk_load_565_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,149,0,0,0 ; jne 9ef <_sk_load_565_hsw+0xa3> + DB 15,133,149,0,0,0 ; jne ba7 <_sk_load_565_hsw+0xa3> DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0 DB 196,226,125,51,208 ; vpmovzxwd %xmm0,%ymm2 DB 184,0,248,0,0 ; mov $0xf800,%eax @@ -752,9 +893,9 @@ _sk_load_565_hsw LABEL PROC DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,89,255,255,255 ; ja 960 <_sk_load_565_hsw+0x14> + DB 15,135,89,255,255,255 ; ja b18 <_sk_load_565_hsw+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # a5c <_sk_load_565_hsw+0x110> + DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # c14 <_sk_load_565_hsw+0x110> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -766,12 +907,12 @@ _sk_load_565_hsw LABEL PROC DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0 - DB 233,5,255,255,255 ; jmpq 960 <_sk_load_565_hsw+0x14> + DB 233,5,255,255,255 ; jmpq b18 <_sk_load_565_hsw+0x14> DB 144 ; nop DB 243,255 ; repz (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 235,255 ; jmp a61 <_sk_load_565_hsw+0x115> + DB 235,255 ; jmp c19 <_sk_load_565_hsw+0x115> DB 255 ; (bad) DB 255,227 ; jmpq *%rbx DB 255 ; (bad) @@ -814,7 +955,7 @@ _sk_store_565_hsw LABEL PROC DB 196,67,125,57,193,1 ; vextracti128 $0x1,%ymm8,%xmm9 DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne ae4 <_sk_store_565_hsw+0x6c> + DB 117,10 ; jne c9c <_sk_store_565_hsw+0x6c> DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -822,9 +963,9 @@ _sk_store_565_hsw LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja ae0 <_sk_store_565_hsw+0x68> + DB 119,236 ; ja c98 <_sk_store_565_hsw+0x68> DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # b44 <_sk_store_565_hsw+0xcc> + DB 76,141,5,69,0,0,0 ; lea 0x45(%rip),%r8 # cfc <_sk_store_565_hsw+0xcc> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -835,7 +976,7 @@ _sk_store_565_hsw LABEL PROC DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2) DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2) DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2) - DB 235,159 ; jmp ae0 <_sk_store_565_hsw+0x68> + DB 235,159 ; jmp c98 <_sk_store_565_hsw+0x68> DB 15,31,0 ; nopl (%rax) DB 244 ; hlt DB 255 ; (bad) @@ -868,7 +1009,7 @@ _sk_load_8888_hsw LABEL PROC DB 76,141,12,189,0,0,0,0 ; lea 0x0(,%rdi,4),%r9 DB 76,3,8 ; add (%rax),%r9 DB 77,133,192 ; test %r8,%r8 - DB 117,104 ; jne bdd <_sk_load_8888_hsw+0x7d> + DB 117,104 ; jne d95 <_sk_load_8888_hsw+0x7d> DB 196,193,126,111,25 ; vmovdqu (%r9),%ymm3 DB 184,255,0,0,0 ; mov $0xff,%eax DB 197,249,110,192 ; vmovd %eax,%xmm0 @@ -901,7 +1042,7 @@ _sk_load_8888_hsw LABEL PROC DB 196,225,249,110,192 ; vmovq %rax,%xmm0 DB 196,226,125,33,192 ; vpmovsxbd %xmm0,%ymm0 DB 196,194,125,140,25 ; vpmaskmovd (%r9),%ymm0,%ymm3 - DB 233,116,255,255,255 ; jmpq b7a <_sk_load_8888_hsw+0x1a> + DB 233,116,255,255,255 ; jmpq d32 <_sk_load_8888_hsw+0x1a> PUBLIC _sk_store_8888_hsw _sk_store_8888_hsw LABEL PROC @@ -927,7 +1068,7 @@ _sk_store_8888_hsw LABEL PROC DB 196,65,45,235,192 ; vpor %ymm8,%ymm10,%ymm8 DB 196,65,53,235,192 ; vpor %ymm8,%ymm9,%ymm8 DB 77,133,192 ; test %r8,%r8 - DB 117,12 ; jne c7a <_sk_store_8888_hsw+0x74> + DB 117,12 ; jne e32 <_sk_store_8888_hsw+0x74> DB 196,65,126,127,1 ; vmovdqu %ymm8,(%r9) DB 72,173 ; lods %ds:(%rsi),%rax DB 76,137,193 ; mov %r8,%rcx @@ -940,14 +1081,14 @@ _sk_store_8888_hsw LABEL PROC DB 196,97,249,110,200 ; vmovq %rax,%xmm9 DB 196,66,125,33,201 ; vpmovsxbd %xmm9,%ymm9 DB 196,66,53,142,1 ; vpmaskmovd %ymm8,%ymm9,(%r9) - DB 235,211 ; jmp c73 <_sk_store_8888_hsw+0x6d> + DB 235,211 ; jmp e2b <_sk_store_8888_hsw+0x6d> PUBLIC _sk_load_f16_hsw _sk_load_f16_hsw LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 72,139,0 ; mov (%rax),%rax DB 72,133,201 ; test %rcx,%rcx - DB 117,97 ; jne d0b <_sk_load_f16_hsw+0x6b> + DB 117,97 ; jne ec3 <_sk_load_f16_hsw+0x6b> DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8 DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2 DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3 @@ -973,29 +1114,29 @@ _sk_load_f16_hsw LABEL PROC DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8 DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9 DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,79 ; je d6a <_sk_load_f16_hsw+0xca> + DB 116,79 ; je f22 <_sk_load_f16_hsw+0xca> DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,67 ; jb d6a <_sk_load_f16_hsw+0xca> + DB 114,67 ; jb f22 <_sk_load_f16_hsw+0xca> DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 116,68 ; je d77 <_sk_load_f16_hsw+0xd7> + DB 116,68 ; je f2f <_sk_load_f16_hsw+0xd7> DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,56 ; jb d77 <_sk_load_f16_hsw+0xd7> + DB 114,56 ; jb f2f <_sk_load_f16_hsw+0xd7> DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 15,132,114,255,255,255 ; je cc1 <_sk_load_f16_hsw+0x21> + DB 15,132,114,255,255,255 ; je e79 <_sk_load_f16_hsw+0x21> DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3 DB 72,131,249,7 ; cmp $0x7,%rcx - DB 15,130,98,255,255,255 ; jb cc1 <_sk_load_f16_hsw+0x21> + DB 15,130,98,255,255,255 ; jb e79 <_sk_load_f16_hsw+0x21> DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9 - DB 233,87,255,255,255 ; jmpq cc1 <_sk_load_f16_hsw+0x21> + DB 233,87,255,255,255 ; jmpq e79 <_sk_load_f16_hsw+0x21> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2 - DB 233,74,255,255,255 ; jmpq cc1 <_sk_load_f16_hsw+0x21> + DB 233,74,255,255,255 ; jmpq e79 <_sk_load_f16_hsw+0x21> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 - DB 233,65,255,255,255 ; jmpq cc1 <_sk_load_f16_hsw+0x21> + DB 233,65,255,255,255 ; jmpq e79 <_sk_load_f16_hsw+0x21> PUBLIC _sk_store_f16_hsw _sk_store_f16_hsw LABEL PROC @@ -1014,7 +1155,7 @@ _sk_store_f16_hsw LABEL PROC DB 196,65,57,98,205 ; vpunpckldq %xmm13,%xmm8,%xmm9 DB 196,65,57,106,197 ; vpunpckhdq %xmm13,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,27 ; jne de5 <_sk_store_f16_hsw+0x65> + DB 117,27 ; jne f9d <_sk_store_f16_hsw+0x65> DB 197,120,17,28,248 ; vmovups %xmm11,(%rax,%rdi,8) DB 197,120,17,84,248,16 ; vmovups %xmm10,0x10(%rax,%rdi,8) DB 197,120,17,76,248,32 ; vmovups %xmm9,0x20(%rax,%rdi,8) @@ -1023,22 +1164,22 @@ _sk_store_f16_hsw LABEL PROC DB 255,224 ; jmpq *%rax DB 197,121,214,28,248 ; vmovq %xmm11,(%rax,%rdi,8) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,241 ; je de1 <_sk_store_f16_hsw+0x61> + DB 116,241 ; je f99 <_sk_store_f16_hsw+0x61> DB 197,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%rax,%rdi,8) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,229 ; jb de1 <_sk_store_f16_hsw+0x61> + DB 114,229 ; jb f99 <_sk_store_f16_hsw+0x61> DB 197,121,214,84,248,16 ; vmovq %xmm10,0x10(%rax,%rdi,8) - DB 116,221 ; je de1 <_sk_store_f16_hsw+0x61> + DB 116,221 ; je f99 <_sk_store_f16_hsw+0x61> DB 197,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%rax,%rdi,8) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,209 ; jb de1 <_sk_store_f16_hsw+0x61> + DB 114,209 ; jb f99 <_sk_store_f16_hsw+0x61> DB 197,121,214,76,248,32 ; vmovq %xmm9,0x20(%rax,%rdi,8) - DB 116,201 ; je de1 <_sk_store_f16_hsw+0x61> + DB 116,201 ; je f99 <_sk_store_f16_hsw+0x61> DB 197,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%rax,%rdi,8) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,189 ; jb de1 <_sk_store_f16_hsw+0x61> + DB 114,189 ; jb f99 <_sk_store_f16_hsw+0x61> DB 197,121,214,68,248,48 ; vmovq %xmm8,0x30(%rax,%rdi,8) - DB 235,181 ; jmp de1 <_sk_store_f16_hsw+0x61> + DB 235,181 ; jmp f99 <_sk_store_f16_hsw+0x61> PUBLIC _sk_store_f32_hsw _sk_store_f32_hsw LABEL PROC @@ -1054,7 +1195,7 @@ _sk_store_f32_hsw LABEL PROC DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8 DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11 DB 72,133,201 ; test %rcx,%rcx - DB 117,55 ; jne e99 <_sk_store_f32_hsw+0x6d> + DB 117,55 ; jne 1051 <_sk_store_f32_hsw+0x6d> DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12 DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13 DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9 @@ -1067,22 +1208,22 @@ _sk_store_f32_hsw LABEL PROC DB 255,224 ; jmpq *%rax DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,240 ; je e95 <_sk_store_f32_hsw+0x69> + DB 116,240 ; je 104d <_sk_store_f32_hsw+0x69> DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,227 ; jb e95 <_sk_store_f32_hsw+0x69> + DB 114,227 ; jb 104d <_sk_store_f32_hsw+0x69> DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4) - DB 116,218 ; je e95 <_sk_store_f32_hsw+0x69> + DB 116,218 ; je 104d <_sk_store_f32_hsw+0x69> DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,205 ; jb e95 <_sk_store_f32_hsw+0x69> + DB 114,205 ; jb 104d <_sk_store_f32_hsw+0x69> DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4) - DB 116,195 ; je e95 <_sk_store_f32_hsw+0x69> + DB 116,195 ; je 104d <_sk_store_f32_hsw+0x69> DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,181 ; jb e95 <_sk_store_f32_hsw+0x69> + DB 114,181 ; jb 104d <_sk_store_f32_hsw+0x69> DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4) - DB 235,171 ; jmp e95 <_sk_store_f32_hsw+0x69> + DB 235,171 ; jmp 104d <_sk_store_f32_hsw+0x69> PUBLIC _sk_clamp_x_hsw _sk_clamp_x_hsw LABEL PROC @@ -1470,12 +1611,93 @@ _sk_clear_avx LABEL PROC DB 197,228,87,219 ; vxorps %ymm3,%ymm3,%ymm3 DB 255,224 ; jmpq *%rax -PUBLIC _sk_plus__avx -_sk_plus__avx LABEL PROC - DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0 - DB 197,244,88,205 ; vaddps %ymm5,%ymm1,%ymm1 - DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2 - DB 197,228,88,223 ; vaddps %ymm7,%ymm3,%ymm3 +PUBLIC _sk_srcatop_avx +_sk_srcatop_avx LABEL PROC + DB 197,124,89,199 ; vmulps %ymm7,%ymm0,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,192 ; vmovd %eax,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 197,124,92,203 ; vsubps %ymm3,%ymm0,%ymm9 + DB 197,180,89,196 ; vmulps %ymm4,%ymm9,%ymm0 + DB 197,188,88,192 ; vaddps %ymm0,%ymm8,%ymm0 + DB 197,244,89,207 ; vmulps %ymm7,%ymm1,%ymm1 + DB 197,52,89,197 ; vmulps %ymm5,%ymm9,%ymm8 + DB 196,193,116,88,200 ; vaddps %ymm8,%ymm1,%ymm1 + DB 197,236,89,215 ; vmulps %ymm7,%ymm2,%ymm2 + DB 197,52,89,198 ; vmulps %ymm6,%ymm9,%ymm8 + DB 196,193,108,88,208 ; vaddps %ymm8,%ymm2,%ymm2 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 + DB 197,52,89,199 ; vmulps %ymm7,%ymm9,%ymm8 + DB 196,193,100,88,216 ; vaddps %ymm8,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstatop_avx +_sk_dstatop_avx LABEL PROC + DB 197,100,89,196 ; vmulps %ymm4,%ymm3,%ymm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,121,110,200 ; vmovd %eax,%xmm9 + DB 196,67,121,4,201,0 ; vpermilps $0x0,%xmm9,%xmm9 + DB 196,67,53,24,201,1 ; vinsertf128 $0x1,%xmm9,%ymm9,%ymm9 + DB 197,52,92,207 ; vsubps %ymm7,%ymm9,%ymm9 + DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0 + DB 197,188,88,192 ; vaddps %ymm0,%ymm8,%ymm0 + DB 197,100,89,197 ; vmulps %ymm5,%ymm3,%ymm8 + DB 197,180,89,201 ; vmulps %ymm1,%ymm9,%ymm1 + DB 197,188,88,201 ; vaddps %ymm1,%ymm8,%ymm1 + DB 197,100,89,198 ; vmulps %ymm6,%ymm3,%ymm8 + DB 197,180,89,210 ; vmulps %ymm2,%ymm9,%ymm2 + DB 197,188,88,210 ; vaddps %ymm2,%ymm8,%ymm2 + DB 197,100,89,199 ; vmulps %ymm7,%ymm3,%ymm8 + DB 197,180,89,219 ; vmulps %ymm3,%ymm9,%ymm3 + DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcin_avx +_sk_srcin_avx LABEL PROC + DB 197,252,89,199 ; vmulps %ymm7,%ymm0,%ymm0 + DB 197,244,89,207 ; vmulps %ymm7,%ymm1,%ymm1 + DB 197,236,89,215 ; vmulps %ymm7,%ymm2,%ymm2 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstin_avx +_sk_dstin_avx LABEL PROC + DB 197,228,89,196 ; vmulps %ymm4,%ymm3,%ymm0 + DB 197,228,89,205 ; vmulps %ymm5,%ymm3,%ymm1 + DB 197,228,89,214 ; vmulps %ymm6,%ymm3,%ymm2 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcout_avx +_sk_srcout_avx LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,121,110,192 ; vmovd %eax,%xmm8 + DB 196,67,121,4,192,0 ; vpermilps $0x0,%xmm8,%xmm8 + DB 196,67,61,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm8,%ymm8 + DB 197,60,92,199 ; vsubps %ymm7,%ymm8,%ymm8 + DB 197,188,89,192 ; vmulps %ymm0,%ymm8,%ymm0 + DB 197,188,89,201 ; vmulps %ymm1,%ymm8,%ymm1 + DB 197,188,89,210 ; vmulps %ymm2,%ymm8,%ymm2 + DB 197,188,89,219 ; vmulps %ymm3,%ymm8,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstout_avx +_sk_dstout_avx LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,249,110,192 ; vmovd %eax,%xmm0 + DB 196,227,121,4,192,0 ; vpermilps $0x0,%xmm0,%xmm0 + DB 196,227,125,24,192,1 ; vinsertf128 $0x1,%xmm0,%ymm0,%ymm0 + DB 197,252,92,219 ; vsubps %ymm3,%ymm0,%ymm3 + DB 197,228,89,196 ; vmulps %ymm4,%ymm3,%ymm0 + DB 197,228,89,205 ; vmulps %ymm5,%ymm3,%ymm1 + DB 197,228,89,214 ; vmulps %ymm6,%ymm3,%ymm2 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -1515,6 +1737,95 @@ _sk_dstover_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +PUBLIC _sk_modulate_avx +_sk_modulate_avx LABEL PROC + DB 197,252,89,196 ; vmulps %ymm4,%ymm0,%ymm0 + DB 197,244,89,205 ; vmulps %ymm5,%ymm1,%ymm1 + DB 197,236,89,214 ; vmulps %ymm6,%ymm2,%ymm2 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_multiply_avx +_sk_multiply_avx LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,121,110,192 ; vmovd %eax,%xmm8 + DB 196,67,121,4,192,0 ; vpermilps $0x0,%xmm8,%xmm8 + DB 196,67,61,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm8,%ymm8 + DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9 + DB 197,52,89,208 ; vmulps %ymm0,%ymm9,%ymm10 + DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8 + DB 197,60,89,220 ; vmulps %ymm4,%ymm8,%ymm11 + DB 196,65,44,88,211 ; vaddps %ymm11,%ymm10,%ymm10 + DB 197,252,89,196 ; vmulps %ymm4,%ymm0,%ymm0 + DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0 + DB 197,52,89,209 ; vmulps %ymm1,%ymm9,%ymm10 + DB 197,60,89,221 ; vmulps %ymm5,%ymm8,%ymm11 + DB 196,65,36,88,210 ; vaddps %ymm10,%ymm11,%ymm10 + DB 197,244,89,205 ; vmulps %ymm5,%ymm1,%ymm1 + DB 196,193,116,88,202 ; vaddps %ymm10,%ymm1,%ymm1 + DB 197,52,89,210 ; vmulps %ymm2,%ymm9,%ymm10 + DB 197,60,89,222 ; vmulps %ymm6,%ymm8,%ymm11 + DB 196,65,36,88,210 ; vaddps %ymm10,%ymm11,%ymm10 + DB 197,236,89,214 ; vmulps %ymm6,%ymm2,%ymm2 + DB 196,193,108,88,210 ; vaddps %ymm10,%ymm2,%ymm2 + DB 197,52,89,203 ; vmulps %ymm3,%ymm9,%ymm9 + DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8 + DB 196,65,60,88,193 ; vaddps %ymm9,%ymm8,%ymm8 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 + DB 196,193,100,88,216 ; vaddps %ymm8,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_plus__avx +_sk_plus__avx LABEL PROC + DB 197,252,88,196 ; vaddps %ymm4,%ymm0,%ymm0 + DB 197,244,88,205 ; vaddps %ymm5,%ymm1,%ymm1 + DB 197,236,88,214 ; vaddps %ymm6,%ymm2,%ymm2 + DB 197,228,88,223 ; vaddps %ymm7,%ymm3,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_screen_avx +_sk_screen_avx LABEL PROC + DB 197,124,88,196 ; vaddps %ymm4,%ymm0,%ymm8 + DB 197,252,89,196 ; vmulps %ymm4,%ymm0,%ymm0 + DB 197,188,92,192 ; vsubps %ymm0,%ymm8,%ymm0 + DB 197,116,88,197 ; vaddps %ymm5,%ymm1,%ymm8 + DB 197,244,89,205 ; vmulps %ymm5,%ymm1,%ymm1 + DB 197,188,92,201 ; vsubps %ymm1,%ymm8,%ymm1 + DB 197,108,88,198 ; vaddps %ymm6,%ymm2,%ymm8 + DB 197,236,89,214 ; vmulps %ymm6,%ymm2,%ymm2 + DB 197,188,92,210 ; vsubps %ymm2,%ymm8,%ymm2 + DB 197,100,88,199 ; vaddps %ymm7,%ymm3,%ymm8 + DB 197,228,89,223 ; vmulps %ymm7,%ymm3,%ymm3 + DB 197,188,92,219 ; vsubps %ymm3,%ymm8,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_xor__avx +_sk_xor__avx LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 197,121,110,192 ; vmovd %eax,%xmm8 + DB 196,67,121,4,192,0 ; vpermilps $0x0,%xmm8,%xmm8 + DB 196,67,61,24,192,1 ; vinsertf128 $0x1,%xmm8,%ymm8,%ymm8 + DB 197,60,92,207 ; vsubps %ymm7,%ymm8,%ymm9 + DB 197,180,89,192 ; vmulps %ymm0,%ymm9,%ymm0 + DB 197,60,92,195 ; vsubps %ymm3,%ymm8,%ymm8 + DB 197,60,89,212 ; vmulps %ymm4,%ymm8,%ymm10 + DB 196,193,124,88,194 ; vaddps %ymm10,%ymm0,%ymm0 + DB 197,180,89,201 ; vmulps %ymm1,%ymm9,%ymm1 + DB 197,60,89,213 ; vmulps %ymm5,%ymm8,%ymm10 + DB 197,172,88,201 ; vaddps %ymm1,%ymm10,%ymm1 + DB 197,180,89,210 ; vmulps %ymm2,%ymm9,%ymm2 + DB 197,60,89,214 ; vmulps %ymm6,%ymm8,%ymm10 + DB 197,172,88,210 ; vaddps %ymm2,%ymm10,%ymm2 + DB 197,180,89,219 ; vmulps %ymm3,%ymm9,%ymm3 + DB 197,60,89,199 ; vmulps %ymm7,%ymm8,%ymm8 + DB 197,188,88,219 ; vaddps %ymm3,%ymm8,%ymm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + PUBLIC _sk_clamp_0_avx _sk_clamp_0_avx LABEL PROC DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 @@ -1757,7 +2068,7 @@ _sk_scale_u8_avx LABEL PROC DB 72,139,0 ; mov (%rax),%rax DB 72,1,248 ; add %rdi,%rax DB 77,133,192 ; test %r8,%r8 - DB 117,80 ; jne 639 <_sk_scale_u8_avx+0x60> + DB 117,80 ; jne 86b <_sk_scale_u8_avx+0x60> DB 197,122,126,0 ; vmovq (%rax),%xmm8 DB 196,66,121,49,200 ; vpmovzxbd %xmm8,%xmm9 DB 196,67,121,4,192,229 ; vpermilps $0xe5,%xmm8,%xmm8 @@ -1785,9 +2096,9 @@ _sk_scale_u8_avx LABEL PROC DB 77,9,217 ; or %r11,%r9 DB 72,131,193,8 ; add $0x8,%rcx DB 73,255,202 ; dec %r10 - DB 117,234 ; jne 641 <_sk_scale_u8_avx+0x68> + DB 117,234 ; jne 873 <_sk_scale_u8_avx+0x68> DB 196,65,249,110,193 ; vmovq %r9,%xmm8 - DB 235,143 ; jmp 5ed <_sk_scale_u8_avx+0x14> + DB 235,143 ; jmp 81f <_sk_scale_u8_avx+0x14> PUBLIC _sk_lerp_1_float_avx _sk_lerp_1_float_avx LABEL PROC @@ -1815,7 +2126,7 @@ _sk_lerp_u8_avx LABEL PROC DB 72,139,0 ; mov (%rax),%rax DB 72,1,248 ; add %rdi,%rax DB 77,133,192 ; test %r8,%r8 - DB 117,116 ; jne 721 <_sk_lerp_u8_avx+0x84> + DB 117,116 ; jne 953 <_sk_lerp_u8_avx+0x84> DB 197,122,126,0 ; vmovq (%rax),%xmm8 DB 196,66,121,49,200 ; vpmovzxbd %xmm8,%xmm9 DB 196,67,121,4,192,229 ; vpermilps $0xe5,%xmm8,%xmm8 @@ -1851,16 +2162,16 @@ _sk_lerp_u8_avx LABEL PROC DB 77,9,217 ; or %r11,%r9 DB 72,131,193,8 ; add $0x8,%rcx DB 73,255,202 ; dec %r10 - DB 117,234 ; jne 729 <_sk_lerp_u8_avx+0x8c> + DB 117,234 ; jne 95b <_sk_lerp_u8_avx+0x8c> DB 196,65,249,110,193 ; vmovq %r9,%xmm8 - DB 233,104,255,255,255 ; jmpq 6b1 <_sk_lerp_u8_avx+0x14> + DB 233,104,255,255,255 ; jmpq 8e3 <_sk_lerp_u8_avx+0x14> PUBLIC _sk_lerp_565_avx _sk_lerp_565_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,250,0,0,0 ; jne 851 <_sk_lerp_565_avx+0x108> + DB 15,133,250,0,0,0 ; jne a83 <_sk_lerp_565_avx+0x108> DB 196,65,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm8 DB 197,225,239,219 ; vpxor %xmm3,%xmm3,%xmm3 DB 197,185,105,219 ; vpunpckhwd %xmm3,%xmm8,%xmm3 @@ -1919,9 +2230,9 @@ _sk_lerp_565_avx LABEL PROC DB 196,65,57,239,192 ; vpxor %xmm8,%xmm8,%xmm8 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,243,254,255,255 ; ja 75d <_sk_lerp_565_avx+0x14> + DB 15,135,243,254,255,255 ; ja 98f <_sk_lerp_565_avx+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,75,0,0,0 ; lea 0x4b(%rip),%r9 # 8c0 <_sk_lerp_565_avx+0x177> + DB 76,141,13,73,0,0,0 ; lea 0x49(%rip),%r9 # af0 <_sk_lerp_565_avx+0x175> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -1933,28 +2244,27 @@ _sk_lerp_565_avx LABEL PROC DB 196,65,57,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8 DB 196,65,57,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8 DB 196,65,57,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm8,%xmm8 - DB 233,159,254,255,255 ; jmpq 75d <_sk_lerp_565_avx+0x14> - DB 102,144 ; xchg %ax,%ax - DB 242,255 ; repnz (bad) + DB 233,159,254,255,255 ; jmpq 98f <_sk_lerp_565_avx+0x14> + DB 244 ; hlt DB 255 ; (bad) DB 255 ; (bad) - DB 234 ; (bad) DB 255 ; (bad) + DB 236 ; in (%dx),%al DB 255 ; (bad) - DB 255,226 ; jmpq *%rdx DB 255 ; (bad) + DB 255,228 ; jmpq *%rsp DB 255 ; (bad) DB 255 ; (bad) - DB 218,255 ; (bad) DB 255 ; (bad) - DB 255,210 ; callq *%rdx + DB 220,255 ; fdivr %st,%st(7) DB 255 ; (bad) + DB 255,212 ; callq *%rsp DB 255 ; (bad) - DB 255,202 ; dec %edx DB 255 ; (bad) + DB 255,204 ; dec %esp DB 255 ; (bad) DB 255 ; (bad) - DB 190 ; .byte 0xbe + DB 255,192 ; inc %eax DB 255 ; (bad) DB 255 ; (bad) DB 255 ; .byte 0xff @@ -1970,7 +2280,7 @@ _sk_load_tables_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,0 ; mov (%rax),%r8 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,56,2,0,0 ; jne b2c <_sk_load_tables_avx+0x250> + DB 15,133,56,2,0,0 ; jne d5c <_sk_load_tables_avx+0x250> DB 196,65,124,16,4,184 ; vmovups (%r8,%rdi,4),%ymm8 DB 187,255,0,0,0 ; mov $0xff,%ebx DB 197,249,110,195 ; vmovd %ebx,%xmm0 @@ -2089,9 +2399,9 @@ _sk_load_tables_avx LABEL PROC DB 196,65,60,87,192 ; vxorps %ymm8,%ymm8,%ymm8 DB 254,203 ; dec %bl DB 128,251,6 ; cmp $0x6,%bl - DB 15,135,185,253,255,255 ; ja 8fa <_sk_load_tables_avx+0x1e> + DB 15,135,185,253,255,255 ; ja b2a <_sk_load_tables_avx+0x1e> DB 15,182,219 ; movzbl %bl,%ebx - DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # bd4 <_sk_load_tables_avx+0x2f8> + DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # e04 <_sk_load_tables_avx+0x2f8> DB 73,99,28,153 ; movslq (%r9,%rbx,4),%rbx DB 76,1,203 ; add %r9,%rbx DB 255,227 ; jmpq *%rbx @@ -2114,7 +2424,7 @@ _sk_load_tables_avx LABEL PROC DB 196,99,61,12,192,15 ; vblendps $0xf,%ymm0,%ymm8,%ymm8 DB 196,195,57,34,4,184,0 ; vpinsrd $0x0,(%r8,%rdi,4),%xmm8,%xmm0 DB 196,99,61,12,192,15 ; vblendps $0xf,%ymm0,%ymm8,%ymm8 - DB 233,38,253,255,255 ; jmpq 8fa <_sk_load_tables_avx+0x1e> + DB 233,38,253,255,255 ; jmpq b2a <_sk_load_tables_avx+0x1e> DB 238 ; out %al,(%dx) DB 255 ; (bad) DB 255 ; (bad) @@ -2141,7 +2451,7 @@ _sk_load_a8_avx LABEL PROC DB 72,139,0 ; mov (%rax),%rax DB 72,1,248 ; add %rdi,%rax DB 77,133,192 ; test %r8,%r8 - DB 117,74 ; jne c4a <_sk_load_a8_avx+0x5a> + DB 117,74 ; jne e7a <_sk_load_a8_avx+0x5a> DB 197,250,126,0 ; vmovq (%rax),%xmm0 DB 196,226,121,49,200 ; vpmovzxbd %xmm0,%xmm1 DB 196,227,121,4,192,229 ; vpermilps $0xe5,%xmm0,%xmm0 @@ -2168,9 +2478,9 @@ _sk_load_a8_avx LABEL PROC DB 77,9,217 ; or %r11,%r9 DB 72,131,193,8 ; add $0x8,%rcx DB 73,255,202 ; dec %r10 - DB 117,234 ; jne c52 <_sk_load_a8_avx+0x62> + DB 117,234 ; jne e82 <_sk_load_a8_avx+0x62> DB 196,193,249,110,193 ; vmovq %r9,%xmm0 - DB 235,149 ; jmp c04 <_sk_load_a8_avx+0x14> + DB 235,149 ; jmp e34 <_sk_load_a8_avx+0x14> PUBLIC _sk_store_a8_avx _sk_store_a8_avx LABEL PROC @@ -2186,7 +2496,7 @@ _sk_store_a8_avx LABEL PROC DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 196,65,57,103,192 ; vpackuswb %xmm8,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne cb1 <_sk_store_a8_avx+0x42> + DB 117,10 ; jne ee1 <_sk_store_a8_avx+0x42> DB 196,65,123,17,4,57 ; vmovsd %xmm8,(%r9,%rdi,1) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -2194,10 +2504,10 @@ _sk_store_a8_avx LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja cad <_sk_store_a8_avx+0x3e> + DB 119,236 ; ja edd <_sk_store_a8_avx+0x3e> DB 196,66,121,48,192 ; vpmovzxbw %xmm8,%xmm8 DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # d14 <_sk_store_a8_avx+0xa5> + DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # f44 <_sk_store_a8_avx+0xa5> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -2208,7 +2518,7 @@ _sk_store_a8_avx LABEL PROC DB 196,67,121,20,68,57,2,4 ; vpextrb $0x4,%xmm8,0x2(%r9,%rdi,1) DB 196,67,121,20,68,57,1,2 ; vpextrb $0x2,%xmm8,0x1(%r9,%rdi,1) DB 196,67,121,20,4,57,0 ; vpextrb $0x0,%xmm8,(%r9,%rdi,1) - DB 235,154 ; jmp cad <_sk_store_a8_avx+0x3e> + DB 235,154 ; jmp edd <_sk_store_a8_avx+0x3e> DB 144 ; nop DB 246,255 ; idiv %bh DB 255 ; (bad) @@ -2238,7 +2548,7 @@ _sk_load_565_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,209,0,0,0 ; jne e0f <_sk_load_565_avx+0xdf> + DB 15,133,209,0,0,0 ; jne 103f <_sk_load_565_avx+0xdf> DB 196,193,122,111,4,122 ; vmovdqu (%r10,%rdi,2),%xmm0 DB 197,241,239,201 ; vpxor %xmm1,%xmm1,%xmm1 DB 197,249,105,201 ; vpunpckhwd %xmm1,%xmm0,%xmm1 @@ -2288,9 +2598,9 @@ _sk_load_565_avx LABEL PROC DB 197,249,239,192 ; vpxor %xmm0,%xmm0,%xmm0 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,29,255,255,255 ; ja d44 <_sk_load_565_avx+0x14> + DB 15,135,29,255,255,255 ; ja f74 <_sk_load_565_avx+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # e7c <_sk_load_565_avx+0x14c> + DB 76,141,13,74,0,0,0 ; lea 0x4a(%rip),%r9 # 10ac <_sk_load_565_avx+0x14c> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -2302,12 +2612,12 @@ _sk_load_565_avx LABEL PROC DB 196,193,121,196,68,122,4,2 ; vpinsrw $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,68,122,2,1 ; vpinsrw $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0 DB 196,193,121,196,4,122,0 ; vpinsrw $0x0,(%r10,%rdi,2),%xmm0,%xmm0 - DB 233,201,254,255,255 ; jmpq d44 <_sk_load_565_avx+0x14> + DB 233,201,254,255,255 ; jmpq f74 <_sk_load_565_avx+0x14> DB 144 ; nop DB 243,255 ; repz (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 235,255 ; jmp e81 <_sk_load_565_avx+0x151> + DB 235,255 ; jmp 10b1 <_sk_load_565_avx+0x151> DB 255 ; (bad) DB 255,227 ; jmpq *%rbx DB 255 ; (bad) @@ -2358,7 +2668,7 @@ _sk_store_565_avx LABEL PROC DB 196,67,125,25,193,1 ; vextractf128 $0x1,%ymm8,%xmm9 DB 196,66,57,43,193 ; vpackusdw %xmm9,%xmm8,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne f36 <_sk_store_565_avx+0x9e> + DB 117,10 ; jne 1166 <_sk_store_565_avx+0x9e> DB 196,65,122,127,4,121 ; vmovdqu %xmm8,(%r9,%rdi,2) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -2366,9 +2676,9 @@ _sk_store_565_avx LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja f32 <_sk_store_565_avx+0x9a> + DB 119,236 ; ja 1162 <_sk_store_565_avx+0x9a> DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # f94 <_sk_store_565_avx+0xfc> + DB 76,141,5,67,0,0,0 ; lea 0x43(%rip),%r8 # 11c4 <_sk_store_565_avx+0xfc> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -2379,7 +2689,7 @@ _sk_store_565_avx LABEL PROC DB 196,67,121,21,68,121,4,2 ; vpextrw $0x2,%xmm8,0x4(%r9,%rdi,2) DB 196,67,121,21,68,121,2,1 ; vpextrw $0x1,%xmm8,0x2(%r9,%rdi,2) DB 196,67,121,21,4,121,0 ; vpextrw $0x0,%xmm8,(%r9,%rdi,2) - DB 235,159 ; jmp f32 <_sk_store_565_avx+0x9a> + DB 235,159 ; jmp 1162 <_sk_store_565_avx+0x9a> DB 144 ; nop DB 246,255 ; idiv %bh DB 255 ; (bad) @@ -2409,7 +2719,7 @@ _sk_load_8888_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,16 ; mov (%rax),%r10 DB 72,133,201 ; test %rcx,%rcx - DB 15,133,157,0,0,0 ; jne 105b <_sk_load_8888_avx+0xab> + DB 15,133,157,0,0,0 ; jne 128b <_sk_load_8888_avx+0xab> DB 196,65,124,16,12,186 ; vmovups (%r10,%rdi,4),%ymm9 DB 184,255,0,0,0 ; mov $0xff,%eax DB 197,249,110,192 ; vmovd %eax,%xmm0 @@ -2447,9 +2757,9 @@ _sk_load_8888_avx LABEL PROC DB 196,65,52,87,201 ; vxorps %ymm9,%ymm9,%ymm9 DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 15,135,80,255,255,255 ; ja fc4 <_sk_load_8888_avx+0x14> + DB 15,135,80,255,255,255 ; ja 11f4 <_sk_load_8888_avx+0x14> DB 69,15,182,192 ; movzbl %r8b,%r8d - DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 1108 <_sk_load_8888_avx+0x158> + DB 76,141,13,137,0,0,0 ; lea 0x89(%rip),%r9 # 1338 <_sk_load_8888_avx+0x158> DB 75,99,4,129 ; movslq (%r9,%r8,4),%rax DB 76,1,200 ; add %r9,%rax DB 255,224 ; jmpq *%rax @@ -2472,7 +2782,7 @@ _sk_load_8888_avx LABEL PROC DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9 DB 196,195,49,34,4,186,0 ; vpinsrd $0x0,(%r10,%rdi,4),%xmm9,%xmm0 DB 196,99,53,12,200,15 ; vblendps $0xf,%ymm0,%ymm9,%ymm9 - DB 233,188,254,255,255 ; jmpq fc4 <_sk_load_8888_avx+0x14> + DB 233,188,254,255,255 ; jmpq 11f4 <_sk_load_8888_avx+0x14> DB 238 ; out %al,(%dx) DB 255 ; (bad) DB 255 ; (bad) @@ -2524,7 +2834,7 @@ _sk_store_8888_avx LABEL PROC DB 196,65,45,86,192 ; vorpd %ymm8,%ymm10,%ymm8 DB 196,65,53,86,192 ; vorpd %ymm8,%ymm9,%ymm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,10 ; jne 11c8 <_sk_store_8888_avx+0xa4> + DB 117,10 ; jne 13f8 <_sk_store_8888_avx+0xa4> DB 196,65,124,17,4,185 ; vmovups %ymm8,(%r9,%rdi,4) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -2532,9 +2842,9 @@ _sk_store_8888_avx LABEL PROC DB 65,128,224,7 ; and $0x7,%r8b DB 65,254,200 ; dec %r8b DB 65,128,248,6 ; cmp $0x6,%r8b - DB 119,236 ; ja 11c4 <_sk_store_8888_avx+0xa0> + DB 119,236 ; ja 13f4 <_sk_store_8888_avx+0xa0> DB 65,15,182,192 ; movzbl %r8b,%eax - DB 76,141,5,85,0,0,0 ; lea 0x55(%rip),%r8 # 1238 <_sk_store_8888_avx+0x114> + DB 76,141,5,85,0,0,0 ; lea 0x55(%rip),%r8 # 1468 <_sk_store_8888_avx+0x114> DB 73,99,4,128 ; movslq (%r8,%rax,4),%rax DB 76,1,192 ; add %r8,%rax DB 255,224 ; jmpq *%rax @@ -2548,7 +2858,7 @@ _sk_store_8888_avx LABEL PROC DB 196,67,121,22,68,185,8,2 ; vpextrd $0x2,%xmm8,0x8(%r9,%rdi,4) DB 196,67,121,22,68,185,4,1 ; vpextrd $0x1,%xmm8,0x4(%r9,%rdi,4) DB 196,65,121,126,4,185 ; vmovd %xmm8,(%r9,%rdi,4) - DB 235,143 ; jmp 11c4 <_sk_store_8888_avx+0xa0> + DB 235,143 ; jmp 13f4 <_sk_store_8888_avx+0xa0> DB 15,31,0 ; nopl (%rax) DB 245 ; cmc DB 255 ; (bad) @@ -2579,7 +2889,7 @@ _sk_load_f16_avx LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 72,139,0 ; mov (%rax),%rax DB 72,133,201 ; test %rcx,%rcx - DB 15,133,2,1,0,0 ; jne 1364 <_sk_load_f16_avx+0x110> + DB 15,133,2,1,0,0 ; jne 1594 <_sk_load_f16_avx+0x110> DB 197,121,16,4,248 ; vmovupd (%rax,%rdi,8),%xmm8 DB 197,249,16,84,248,16 ; vmovupd 0x10(%rax,%rdi,8),%xmm2 DB 197,249,16,92,248,32 ; vmovupd 0x20(%rax,%rdi,8),%xmm3 @@ -2637,29 +2947,29 @@ _sk_load_f16_avx LABEL PROC DB 197,123,16,4,248 ; vmovsd (%rax,%rdi,8),%xmm8 DB 196,65,49,239,201 ; vpxor %xmm9,%xmm9,%xmm9 DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,79 ; je 13c3 <_sk_load_f16_avx+0x16f> + DB 116,79 ; je 15f3 <_sk_load_f16_avx+0x16f> DB 197,57,22,68,248,8 ; vmovhpd 0x8(%rax,%rdi,8),%xmm8,%xmm8 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,67 ; jb 13c3 <_sk_load_f16_avx+0x16f> + DB 114,67 ; jb 15f3 <_sk_load_f16_avx+0x16f> DB 197,251,16,84,248,16 ; vmovsd 0x10(%rax,%rdi,8),%xmm2 DB 72,131,249,3 ; cmp $0x3,%rcx - DB 116,68 ; je 13d0 <_sk_load_f16_avx+0x17c> + DB 116,68 ; je 1600 <_sk_load_f16_avx+0x17c> DB 197,233,22,84,248,24 ; vmovhpd 0x18(%rax,%rdi,8),%xmm2,%xmm2 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,56 ; jb 13d0 <_sk_load_f16_avx+0x17c> + DB 114,56 ; jb 1600 <_sk_load_f16_avx+0x17c> DB 197,251,16,92,248,32 ; vmovsd 0x20(%rax,%rdi,8),%xmm3 DB 72,131,249,5 ; cmp $0x5,%rcx - DB 15,132,209,254,255,255 ; je 1279 <_sk_load_f16_avx+0x25> + DB 15,132,209,254,255,255 ; je 14a9 <_sk_load_f16_avx+0x25> DB 197,225,22,92,248,40 ; vmovhpd 0x28(%rax,%rdi,8),%xmm3,%xmm3 DB 72,131,249,7 ; cmp $0x7,%rcx - DB 15,130,193,254,255,255 ; jb 1279 <_sk_load_f16_avx+0x25> + DB 15,130,193,254,255,255 ; jb 14a9 <_sk_load_f16_avx+0x25> DB 197,122,126,76,248,48 ; vmovq 0x30(%rax,%rdi,8),%xmm9 - DB 233,182,254,255,255 ; jmpq 1279 <_sk_load_f16_avx+0x25> + DB 233,182,254,255,255 ; jmpq 14a9 <_sk_load_f16_avx+0x25> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 DB 197,233,87,210 ; vxorpd %xmm2,%xmm2,%xmm2 - DB 233,169,254,255,255 ; jmpq 1279 <_sk_load_f16_avx+0x25> + DB 233,169,254,255,255 ; jmpq 14a9 <_sk_load_f16_avx+0x25> DB 197,225,87,219 ; vxorpd %xmm3,%xmm3,%xmm3 - DB 233,160,254,255,255 ; jmpq 1279 <_sk_load_f16_avx+0x25> + DB 233,160,254,255,255 ; jmpq 14a9 <_sk_load_f16_avx+0x25> PUBLIC _sk_store_f16_avx _sk_store_f16_avx LABEL PROC @@ -2698,7 +3008,7 @@ _sk_store_f16_avx LABEL PROC DB 196,65,25,98,205 ; vpunpckldq %xmm13,%xmm12,%xmm9 DB 196,65,25,106,197 ; vpunpckhdq %xmm13,%xmm12,%xmm8 DB 72,133,201 ; test %rcx,%rcx - DB 117,31 ; jne 14af <_sk_store_f16_avx+0xd6> + DB 117,31 ; jne 16df <_sk_store_f16_avx+0xd6> DB 196,65,120,17,28,248 ; vmovups %xmm11,(%r8,%rdi,8) DB 196,65,120,17,84,248,16 ; vmovups %xmm10,0x10(%r8,%rdi,8) DB 196,65,120,17,76,248,32 ; vmovups %xmm9,0x20(%r8,%rdi,8) @@ -2707,22 +3017,22 @@ _sk_store_f16_avx LABEL PROC DB 255,224 ; jmpq *%rax DB 196,65,121,214,28,248 ; vmovq %xmm11,(%r8,%rdi,8) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,240 ; je 14ab <_sk_store_f16_avx+0xd2> + DB 116,240 ; je 16db <_sk_store_f16_avx+0xd2> DB 196,65,121,23,92,248,8 ; vmovhpd %xmm11,0x8(%r8,%rdi,8) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,227 ; jb 14ab <_sk_store_f16_avx+0xd2> + DB 114,227 ; jb 16db <_sk_store_f16_avx+0xd2> DB 196,65,121,214,84,248,16 ; vmovq %xmm10,0x10(%r8,%rdi,8) - DB 116,218 ; je 14ab <_sk_store_f16_avx+0xd2> + DB 116,218 ; je 16db <_sk_store_f16_avx+0xd2> DB 196,65,121,23,84,248,24 ; vmovhpd %xmm10,0x18(%r8,%rdi,8) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,205 ; jb 14ab <_sk_store_f16_avx+0xd2> + DB 114,205 ; jb 16db <_sk_store_f16_avx+0xd2> DB 196,65,121,214,76,248,32 ; vmovq %xmm9,0x20(%r8,%rdi,8) - DB 116,196 ; je 14ab <_sk_store_f16_avx+0xd2> + DB 116,196 ; je 16db <_sk_store_f16_avx+0xd2> DB 196,65,121,23,76,248,40 ; vmovhpd %xmm9,0x28(%r8,%rdi,8) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,183 ; jb 14ab <_sk_store_f16_avx+0xd2> + DB 114,183 ; jb 16db <_sk_store_f16_avx+0xd2> DB 196,65,121,214,68,248,48 ; vmovq %xmm8,0x30(%r8,%rdi,8) - DB 235,174 ; jmp 14ab <_sk_store_f16_avx+0xd2> + DB 235,174 ; jmp 16db <_sk_store_f16_avx+0xd2> PUBLIC _sk_store_f32_avx _sk_store_f32_avx LABEL PROC @@ -2738,7 +3048,7 @@ _sk_store_f32_avx LABEL PROC DB 196,65,37,20,196 ; vunpcklpd %ymm12,%ymm11,%ymm8 DB 196,65,37,21,220 ; vunpckhpd %ymm12,%ymm11,%ymm11 DB 72,133,201 ; test %rcx,%rcx - DB 117,55 ; jne 156a <_sk_store_f32_avx+0x6d> + DB 117,55 ; jne 179a <_sk_store_f32_avx+0x6d> DB 196,67,45,24,225,1 ; vinsertf128 $0x1,%xmm9,%ymm10,%ymm12 DB 196,67,61,24,235,1 ; vinsertf128 $0x1,%xmm11,%ymm8,%ymm13 DB 196,67,45,6,201,49 ; vperm2f128 $0x31,%ymm9,%ymm10,%ymm9 @@ -2751,22 +3061,22 @@ _sk_store_f32_avx LABEL PROC DB 255,224 ; jmpq *%rax DB 196,65,121,17,20,128 ; vmovupd %xmm10,(%r8,%rax,4) DB 72,131,249,1 ; cmp $0x1,%rcx - DB 116,240 ; je 1566 <_sk_store_f32_avx+0x69> + DB 116,240 ; je 1796 <_sk_store_f32_avx+0x69> DB 196,65,121,17,76,128,16 ; vmovupd %xmm9,0x10(%r8,%rax,4) DB 72,131,249,3 ; cmp $0x3,%rcx - DB 114,227 ; jb 1566 <_sk_store_f32_avx+0x69> + DB 114,227 ; jb 1796 <_sk_store_f32_avx+0x69> DB 196,65,121,17,68,128,32 ; vmovupd %xmm8,0x20(%r8,%rax,4) - DB 116,218 ; je 1566 <_sk_store_f32_avx+0x69> + DB 116,218 ; je 1796 <_sk_store_f32_avx+0x69> DB 196,65,121,17,92,128,48 ; vmovupd %xmm11,0x30(%r8,%rax,4) DB 72,131,249,5 ; cmp $0x5,%rcx - DB 114,205 ; jb 1566 <_sk_store_f32_avx+0x69> + DB 114,205 ; jb 1796 <_sk_store_f32_avx+0x69> DB 196,67,125,25,84,128,64,1 ; vextractf128 $0x1,%ymm10,0x40(%r8,%rax,4) - DB 116,195 ; je 1566 <_sk_store_f32_avx+0x69> + DB 116,195 ; je 1796 <_sk_store_f32_avx+0x69> DB 196,67,125,25,76,128,80,1 ; vextractf128 $0x1,%ymm9,0x50(%r8,%rax,4) DB 72,131,249,7 ; cmp $0x7,%rcx - DB 114,181 ; jb 1566 <_sk_store_f32_avx+0x69> + DB 114,181 ; jb 1796 <_sk_store_f32_avx+0x69> DB 196,67,125,25,68,128,96,1 ; vextractf128 $0x1,%ymm8,0x60(%r8,%rax,4) - DB 235,171 ; jmp 1566 <_sk_store_f32_avx+0x69> + DB 235,171 ; jmp 1796 <_sk_store_f32_avx+0x69> PUBLIC _sk_clamp_x_avx _sk_clamp_x_avx LABEL PROC @@ -3207,13 +3517,103 @@ _sk_clear_sse41 LABEL PROC DB 15,87,219 ; xorps %xmm3,%xmm3 DB 255,224 ; jmpq *%rax -PUBLIC _sk_plus__sse41 -_sk_plus__sse41 LABEL PROC - DB 15,88,196 ; addps %xmm4,%xmm0 - DB 15,88,205 ; addps %xmm5,%xmm1 - DB 15,88,214 ; addps %xmm6,%xmm2 - DB 15,88,223 ; addps %xmm7,%xmm3 +PUBLIC _sk_srcatop_sse41 +_sk_srcatop_sse41 LABEL PROC + DB 15,89,199 ; mulps %xmm7,%xmm0 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,192 ; movd %eax,%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,92,195 ; subps %xmm3,%xmm8 + DB 69,15,40,200 ; movaps %xmm8,%xmm9 + DB 68,15,89,204 ; mulps %xmm4,%xmm9 + DB 65,15,88,193 ; addps %xmm9,%xmm0 + DB 15,89,207 ; mulps %xmm7,%xmm1 + DB 69,15,40,200 ; movaps %xmm8,%xmm9 + DB 68,15,89,205 ; mulps %xmm5,%xmm9 + DB 65,15,88,201 ; addps %xmm9,%xmm1 + DB 15,89,215 ; mulps %xmm7,%xmm2 + DB 69,15,40,200 ; movaps %xmm8,%xmm9 + DB 68,15,89,206 ; mulps %xmm6,%xmm9 + DB 65,15,88,209 ; addps %xmm9,%xmm2 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 68,15,89,199 ; mulps %xmm7,%xmm8 + DB 65,15,88,216 ; addps %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstatop_sse41 +_sk_dstatop_sse41 LABEL PROC + DB 68,15,40,195 ; movaps %xmm3,%xmm8 + DB 68,15,89,196 ; mulps %xmm4,%xmm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,200 ; movd %eax,%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 68,15,92,207 ; subps %xmm7,%xmm9 + DB 65,15,89,193 ; mulps %xmm9,%xmm0 + DB 65,15,88,192 ; addps %xmm8,%xmm0 + DB 68,15,40,195 ; movaps %xmm3,%xmm8 + DB 68,15,89,197 ; mulps %xmm5,%xmm8 + DB 65,15,89,201 ; mulps %xmm9,%xmm1 + DB 65,15,88,200 ; addps %xmm8,%xmm1 + DB 68,15,40,195 ; movaps %xmm3,%xmm8 + DB 68,15,89,198 ; mulps %xmm6,%xmm8 + DB 65,15,89,209 ; mulps %xmm9,%xmm2 + DB 65,15,88,208 ; addps %xmm8,%xmm2 + DB 68,15,89,203 ; mulps %xmm3,%xmm9 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 65,15,88,217 ; addps %xmm9,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcin_sse41 +_sk_srcin_sse41 LABEL PROC + DB 15,89,199 ; mulps %xmm7,%xmm0 + DB 15,89,207 ; mulps %xmm7,%xmm1 + DB 15,89,215 ; mulps %xmm7,%xmm2 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstin_sse41 +_sk_dstin_sse41 LABEL PROC + DB 15,40,195 ; movaps %xmm3,%xmm0 + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 15,40,203 ; movaps %xmm3,%xmm1 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 15,40,211 ; movaps %xmm3,%xmm2 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcout_sse41 +_sk_srcout_sse41 LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,192 ; movd %eax,%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,92,199 ; subps %xmm7,%xmm8 + DB 65,15,89,192 ; mulps %xmm8,%xmm0 + DB 65,15,89,200 ; mulps %xmm8,%xmm1 + DB 65,15,89,208 ; mulps %xmm8,%xmm2 + DB 65,15,89,216 ; mulps %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstout_sse41 +_sk_dstout_sse41 LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,192 ; movd %eax,%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,92,195 ; subps %xmm3,%xmm8 + DB 65,15,40,192 ; movaps %xmm8,%xmm0 + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 65,15,40,200 ; movaps %xmm8,%xmm1 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 65,15,40,208 ; movaps %xmm8,%xmm2 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 68,15,89,199 ; mulps %xmm7,%xmm8 DB 72,173 ; lods %ds:(%rsi),%rax + DB 65,15,40,216 ; movaps %xmm8,%xmm3 DB 255,224 ; jmpq *%rax PUBLIC _sk_srcover_sse41 @@ -3253,6 +3653,113 @@ _sk_dstover_sse41 LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +PUBLIC _sk_modulate_sse41 +_sk_modulate_sse41 LABEL PROC + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_multiply_sse41 +_sk_multiply_sse41 LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,192 ; movd %eax,%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 69,15,40,200 ; movaps %xmm8,%xmm9 + DB 68,15,92,207 ; subps %xmm7,%xmm9 + DB 69,15,40,209 ; movaps %xmm9,%xmm10 + DB 68,15,89,208 ; mulps %xmm0,%xmm10 + DB 68,15,92,195 ; subps %xmm3,%xmm8 + DB 69,15,40,216 ; movaps %xmm8,%xmm11 + DB 68,15,89,220 ; mulps %xmm4,%xmm11 + DB 69,15,88,218 ; addps %xmm10,%xmm11 + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 65,15,88,195 ; addps %xmm11,%xmm0 + DB 69,15,40,209 ; movaps %xmm9,%xmm10 + DB 68,15,89,209 ; mulps %xmm1,%xmm10 + DB 69,15,40,216 ; movaps %xmm8,%xmm11 + DB 68,15,89,221 ; mulps %xmm5,%xmm11 + DB 69,15,88,218 ; addps %xmm10,%xmm11 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 65,15,88,203 ; addps %xmm11,%xmm1 + DB 69,15,40,209 ; movaps %xmm9,%xmm10 + DB 68,15,89,210 ; mulps %xmm2,%xmm10 + DB 69,15,40,216 ; movaps %xmm8,%xmm11 + DB 68,15,89,222 ; mulps %xmm6,%xmm11 + DB 69,15,88,218 ; addps %xmm10,%xmm11 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 65,15,88,211 ; addps %xmm11,%xmm2 + DB 68,15,89,203 ; mulps %xmm3,%xmm9 + DB 68,15,89,199 ; mulps %xmm7,%xmm8 + DB 69,15,88,193 ; addps %xmm9,%xmm8 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 65,15,88,216 ; addps %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_plus__sse41 +_sk_plus__sse41 LABEL PROC + DB 15,88,196 ; addps %xmm4,%xmm0 + DB 15,88,205 ; addps %xmm5,%xmm1 + DB 15,88,214 ; addps %xmm6,%xmm2 + DB 15,88,223 ; addps %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_screen_sse41 +_sk_screen_sse41 LABEL PROC + DB 68,15,40,192 ; movaps %xmm0,%xmm8 + DB 68,15,88,196 ; addps %xmm4,%xmm8 + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 68,15,92,192 ; subps %xmm0,%xmm8 + DB 68,15,40,201 ; movaps %xmm1,%xmm9 + DB 68,15,88,205 ; addps %xmm5,%xmm9 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 68,15,92,201 ; subps %xmm1,%xmm9 + DB 68,15,40,210 ; movaps %xmm2,%xmm10 + DB 68,15,88,214 ; addps %xmm6,%xmm10 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 68,15,92,210 ; subps %xmm2,%xmm10 + DB 68,15,40,219 ; movaps %xmm3,%xmm11 + DB 68,15,88,223 ; addps %xmm7,%xmm11 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 68,15,92,219 ; subps %xmm3,%xmm11 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 65,15,40,192 ; movaps %xmm8,%xmm0 + DB 65,15,40,201 ; movaps %xmm9,%xmm1 + DB 65,15,40,210 ; movaps %xmm10,%xmm2 + DB 65,15,40,219 ; movaps %xmm11,%xmm3 + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_xor__sse41 +_sk_xor__sse41 LABEL PROC + DB 68,15,40,195 ; movaps %xmm3,%xmm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,15,110,216 ; movd %eax,%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 68,15,40,203 ; movaps %xmm3,%xmm9 + DB 68,15,92,207 ; subps %xmm7,%xmm9 + DB 65,15,89,193 ; mulps %xmm9,%xmm0 + DB 65,15,92,216 ; subps %xmm8,%xmm3 + DB 68,15,40,211 ; movaps %xmm3,%xmm10 + DB 68,15,89,212 ; mulps %xmm4,%xmm10 + DB 65,15,88,194 ; addps %xmm10,%xmm0 + DB 65,15,89,201 ; mulps %xmm9,%xmm1 + DB 68,15,40,211 ; movaps %xmm3,%xmm10 + DB 68,15,89,213 ; mulps %xmm5,%xmm10 + DB 65,15,88,202 ; addps %xmm10,%xmm1 + DB 65,15,89,209 ; mulps %xmm9,%xmm2 + DB 68,15,40,211 ; movaps %xmm3,%xmm10 + DB 68,15,89,214 ; mulps %xmm6,%xmm10 + DB 65,15,88,210 ; addps %xmm10,%xmm2 + DB 69,15,89,200 ; mulps %xmm8,%xmm9 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 65,15,88,217 ; addps %xmm9,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + PUBLIC _sk_clamp_0_sse41 _sk_clamp_0_sse41 LABEL PROC DB 69,15,87,192 ; xorps %xmm8,%xmm8 @@ -4419,13 +4926,103 @@ _sk_clear_sse2 LABEL PROC DB 15,87,219 ; xorps %xmm3,%xmm3 DB 255,224 ; jmpq *%rax -PUBLIC _sk_plus__sse2 -_sk_plus__sse2 LABEL PROC - DB 15,88,196 ; addps %xmm4,%xmm0 - DB 15,88,205 ; addps %xmm5,%xmm1 - DB 15,88,214 ; addps %xmm6,%xmm2 - DB 15,88,223 ; addps %xmm7,%xmm3 +PUBLIC _sk_srcatop_sse2 +_sk_srcatop_sse2 LABEL PROC + DB 15,89,199 ; mulps %xmm7,%xmm0 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,192 ; movd %eax,%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,92,195 ; subps %xmm3,%xmm8 + DB 69,15,40,200 ; movaps %xmm8,%xmm9 + DB 68,15,89,204 ; mulps %xmm4,%xmm9 + DB 65,15,88,193 ; addps %xmm9,%xmm0 + DB 15,89,207 ; mulps %xmm7,%xmm1 + DB 69,15,40,200 ; movaps %xmm8,%xmm9 + DB 68,15,89,205 ; mulps %xmm5,%xmm9 + DB 65,15,88,201 ; addps %xmm9,%xmm1 + DB 15,89,215 ; mulps %xmm7,%xmm2 + DB 69,15,40,200 ; movaps %xmm8,%xmm9 + DB 68,15,89,206 ; mulps %xmm6,%xmm9 + DB 65,15,88,209 ; addps %xmm9,%xmm2 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 68,15,89,199 ; mulps %xmm7,%xmm8 + DB 65,15,88,216 ; addps %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstatop_sse2 +_sk_dstatop_sse2 LABEL PROC + DB 68,15,40,195 ; movaps %xmm3,%xmm8 + DB 68,15,89,196 ; mulps %xmm4,%xmm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,200 ; movd %eax,%xmm9 + DB 69,15,198,201,0 ; shufps $0x0,%xmm9,%xmm9 + DB 68,15,92,207 ; subps %xmm7,%xmm9 + DB 65,15,89,193 ; mulps %xmm9,%xmm0 + DB 65,15,88,192 ; addps %xmm8,%xmm0 + DB 68,15,40,195 ; movaps %xmm3,%xmm8 + DB 68,15,89,197 ; mulps %xmm5,%xmm8 + DB 65,15,89,201 ; mulps %xmm9,%xmm1 + DB 65,15,88,200 ; addps %xmm8,%xmm1 + DB 68,15,40,195 ; movaps %xmm3,%xmm8 + DB 68,15,89,198 ; mulps %xmm6,%xmm8 + DB 65,15,89,209 ; mulps %xmm9,%xmm2 + DB 65,15,88,208 ; addps %xmm8,%xmm2 + DB 68,15,89,203 ; mulps %xmm3,%xmm9 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 65,15,88,217 ; addps %xmm9,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcin_sse2 +_sk_srcin_sse2 LABEL PROC + DB 15,89,199 ; mulps %xmm7,%xmm0 + DB 15,89,207 ; mulps %xmm7,%xmm1 + DB 15,89,215 ; mulps %xmm7,%xmm2 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstin_sse2 +_sk_dstin_sse2 LABEL PROC + DB 15,40,195 ; movaps %xmm3,%xmm0 + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 15,40,203 ; movaps %xmm3,%xmm1 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 15,40,211 ; movaps %xmm3,%xmm2 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcout_sse2 +_sk_srcout_sse2 LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,192 ; movd %eax,%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,92,199 ; subps %xmm7,%xmm8 + DB 65,15,89,192 ; mulps %xmm8,%xmm0 + DB 65,15,89,200 ; mulps %xmm8,%xmm1 + DB 65,15,89,208 ; mulps %xmm8,%xmm2 + DB 65,15,89,216 ; mulps %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstout_sse2 +_sk_dstout_sse2 LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,192 ; movd %eax,%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 68,15,92,195 ; subps %xmm3,%xmm8 + DB 65,15,40,192 ; movaps %xmm8,%xmm0 + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 65,15,40,200 ; movaps %xmm8,%xmm1 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 65,15,40,208 ; movaps %xmm8,%xmm2 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 68,15,89,199 ; mulps %xmm7,%xmm8 DB 72,173 ; lods %ds:(%rsi),%rax + DB 65,15,40,216 ; movaps %xmm8,%xmm3 DB 255,224 ; jmpq *%rax PUBLIC _sk_srcover_sse2 @@ -4465,6 +5062,113 @@ _sk_dstover_sse2 LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +PUBLIC _sk_modulate_sse2 +_sk_modulate_sse2 LABEL PROC + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_multiply_sse2 +_sk_multiply_sse2 LABEL PROC + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,68,15,110,192 ; movd %eax,%xmm8 + DB 69,15,198,192,0 ; shufps $0x0,%xmm8,%xmm8 + DB 69,15,40,200 ; movaps %xmm8,%xmm9 + DB 68,15,92,207 ; subps %xmm7,%xmm9 + DB 69,15,40,209 ; movaps %xmm9,%xmm10 + DB 68,15,89,208 ; mulps %xmm0,%xmm10 + DB 68,15,92,195 ; subps %xmm3,%xmm8 + DB 69,15,40,216 ; movaps %xmm8,%xmm11 + DB 68,15,89,220 ; mulps %xmm4,%xmm11 + DB 69,15,88,218 ; addps %xmm10,%xmm11 + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 65,15,88,195 ; addps %xmm11,%xmm0 + DB 69,15,40,209 ; movaps %xmm9,%xmm10 + DB 68,15,89,209 ; mulps %xmm1,%xmm10 + DB 69,15,40,216 ; movaps %xmm8,%xmm11 + DB 68,15,89,221 ; mulps %xmm5,%xmm11 + DB 69,15,88,218 ; addps %xmm10,%xmm11 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 65,15,88,203 ; addps %xmm11,%xmm1 + DB 69,15,40,209 ; movaps %xmm9,%xmm10 + DB 68,15,89,210 ; mulps %xmm2,%xmm10 + DB 69,15,40,216 ; movaps %xmm8,%xmm11 + DB 68,15,89,222 ; mulps %xmm6,%xmm11 + DB 69,15,88,218 ; addps %xmm10,%xmm11 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 65,15,88,211 ; addps %xmm11,%xmm2 + DB 68,15,89,203 ; mulps %xmm3,%xmm9 + DB 68,15,89,199 ; mulps %xmm7,%xmm8 + DB 69,15,88,193 ; addps %xmm9,%xmm8 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 65,15,88,216 ; addps %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_plus__sse2 +_sk_plus__sse2 LABEL PROC + DB 15,88,196 ; addps %xmm4,%xmm0 + DB 15,88,205 ; addps %xmm5,%xmm1 + DB 15,88,214 ; addps %xmm6,%xmm2 + DB 15,88,223 ; addps %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_screen_sse2 +_sk_screen_sse2 LABEL PROC + DB 68,15,40,192 ; movaps %xmm0,%xmm8 + DB 68,15,88,196 ; addps %xmm4,%xmm8 + DB 15,89,196 ; mulps %xmm4,%xmm0 + DB 68,15,92,192 ; subps %xmm0,%xmm8 + DB 68,15,40,201 ; movaps %xmm1,%xmm9 + DB 68,15,88,205 ; addps %xmm5,%xmm9 + DB 15,89,205 ; mulps %xmm5,%xmm1 + DB 68,15,92,201 ; subps %xmm1,%xmm9 + DB 68,15,40,210 ; movaps %xmm2,%xmm10 + DB 68,15,88,214 ; addps %xmm6,%xmm10 + DB 15,89,214 ; mulps %xmm6,%xmm2 + DB 68,15,92,210 ; subps %xmm2,%xmm10 + DB 68,15,40,219 ; movaps %xmm3,%xmm11 + DB 68,15,88,223 ; addps %xmm7,%xmm11 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 68,15,92,219 ; subps %xmm3,%xmm11 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 65,15,40,192 ; movaps %xmm8,%xmm0 + DB 65,15,40,201 ; movaps %xmm9,%xmm1 + DB 65,15,40,210 ; movaps %xmm10,%xmm2 + DB 65,15,40,219 ; movaps %xmm11,%xmm3 + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_xor__sse2 +_sk_xor__sse2 LABEL PROC + DB 68,15,40,195 ; movaps %xmm3,%xmm8 + DB 184,0,0,128,63 ; mov $0x3f800000,%eax + DB 102,15,110,216 ; movd %eax,%xmm3 + DB 15,198,219,0 ; shufps $0x0,%xmm3,%xmm3 + DB 68,15,40,203 ; movaps %xmm3,%xmm9 + DB 68,15,92,207 ; subps %xmm7,%xmm9 + DB 65,15,89,193 ; mulps %xmm9,%xmm0 + DB 65,15,92,216 ; subps %xmm8,%xmm3 + DB 68,15,40,211 ; movaps %xmm3,%xmm10 + DB 68,15,89,212 ; mulps %xmm4,%xmm10 + DB 65,15,88,194 ; addps %xmm10,%xmm0 + DB 65,15,89,201 ; mulps %xmm9,%xmm1 + DB 68,15,40,211 ; movaps %xmm3,%xmm10 + DB 68,15,89,213 ; mulps %xmm5,%xmm10 + DB 65,15,88,202 ; addps %xmm10,%xmm1 + DB 65,15,89,209 ; mulps %xmm9,%xmm2 + DB 68,15,40,211 ; movaps %xmm3,%xmm10 + DB 68,15,89,214 ; mulps %xmm6,%xmm10 + DB 65,15,88,210 ; addps %xmm10,%xmm2 + DB 69,15,89,200 ; mulps %xmm8,%xmm9 + DB 15,89,223 ; mulps %xmm7,%xmm3 + DB 65,15,88,217 ; addps %xmm9,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + PUBLIC _sk_clamp_0_sse2 _sk_clamp_0_sse2 LABEL PROC DB 69,15,87,192 ; xorps %xmm8,%xmm8 -- cgit v1.2.3