aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-08-14 08:49:54 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-08-14 14:22:46 +0000
commit228f761111ea4c914dc11fc5523bdabc5eca7ffc (patch)
tree121824c2d6a6642dea51747d97310721b6d448a8 /src/jumper
parentd0803a4e1dd2a54c3fb03766eace5be93a1add00 (diff)
Simplify difference and exclusion.
We can fold through some math in these two modes. $ out/ok bench:samples=100 rp filter:search="Difference|Exclusion" serial Before: [blendmode_rect_Exclusion] 4.94ms @0 6.13ms @99 6.25ms @100 [blendmode_mask_Exclusion] 10.9ms @0 12.8ms @99 12.9ms @100 [blendmode_rect_Difference] 5.56ms @0 6.79ms @99 6.8ms @100 [blendmode_mask_Difference] 11.4ms @0 13.8ms @99 14.1ms @100 After: [blendmode_rect_Exclusion] 3.5ms @0 4.12ms @99 4.59ms @100 [blendmode_mask_Exclusion] 9.27ms @0 11.2ms @99 11.6ms @100 [blendmode_rect_Difference] 5.37ms @0 6.58ms @99 6.6ms @100 [blendmode_mask_Difference] 11ms @0 12.1ms @99 12.6ms @100 Change-Id: I03f32368244d4f979cfee83723fd78dfbc7d5fc1 Reviewed-on: https://skia-review.googlesource.com/33980 Commit-Queue: Florin Malita <fmalita@chromium.org> Reviewed-by: Florin Malita <fmalita@chromium.org>
Diffstat (limited to 'src/jumper')
-rw-r--r--src/jumper/SkJumper_generated.S958
-rw-r--r--src/jumper/SkJumper_generated_win.S944
-rw-r--r--src/jumper/SkJumper_stages_8bit.cpp20
3 files changed, 858 insertions, 1064 deletions
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 2f0417a5c2..a6b1cbdbfe 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -56656,7 +56656,7 @@ HIDDEN _sk_set_rgb_hsw_8bit
FUNCTION(_sk_set_rgb_hsw_8bit)
_sk_set_rgb_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 197,250,16,37,66,45,0,0 // vmovss 0x2d42(%rip),%xmm4 # 2e04 <_sk_exclusion_hsw_8bit+0x16a>
+ .byte 197,250,16,37,142,44,0,0 // vmovss 0x2c8e(%rip),%xmm4 # 2d50 <_sk_difference_hsw_8bit+0x182>
.byte 197,218,89,40 // vmulss (%rax),%xmm4,%xmm5
.byte 196,225,250,44,205 // vcvttss2si %xmm5,%rcx
.byte 197,218,89,104,4 // vmulss 0x4(%rax),%xmm4,%xmm5
@@ -56669,7 +56669,7 @@ _sk_set_rgb_hsw_8bit:
.byte 9,208 // or %edx,%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
.byte 196,226,125,88,228 // vpbroadcastd %xmm4,%ymm4
- .byte 197,253,111,45,70,45,0,0 // vmovdqa 0x2d46(%rip),%ymm5 # 2e40 <_sk_exclusion_hsw_8bit+0x1a6>
+ .byte 197,253,111,45,134,44,0,0 // vmovdqa 0x2c86(%rip),%ymm5 # 2d80 <_sk_difference_hsw_8bit+0x1b2>
.byte 197,245,219,205 // vpand %ymm5,%ymm1,%ymm1
.byte 197,253,219,197 // vpand %ymm5,%ymm0,%ymm0
.byte 197,221,235,192 // vpor %ymm0,%ymm4,%ymm0
@@ -56681,10 +56681,10 @@ HIDDEN _sk_premul_hsw_8bit
.globl _sk_premul_hsw_8bit
FUNCTION(_sk_premul_hsw_8bit)
_sk_premul_hsw_8bit:
- .byte 197,253,111,37,74,45,0,0 // vmovdqa 0x2d4a(%rip),%ymm4 # 2e60 <_sk_exclusion_hsw_8bit+0x1c6>
+ .byte 197,253,111,37,138,44,0,0 // vmovdqa 0x2c8a(%rip),%ymm4 # 2da0 <_sk_difference_hsw_8bit+0x1d2>
.byte 196,226,125,0,236 // vpshufb %ymm4,%ymm0,%ymm5
.byte 196,226,117,0,228 // vpshufb %ymm4,%ymm1,%ymm4
- .byte 197,253,111,53,88,45,0,0 // vmovdqa 0x2d58(%rip),%ymm6 # 2e80 <_sk_exclusion_hsw_8bit+0x1e6>
+ .byte 197,253,111,53,152,44,0,0 // vmovdqa 0x2c98(%rip),%ymm6 # 2dc0 <_sk_difference_hsw_8bit+0x1f2>
.byte 197,221,235,230 // vpor %ymm6,%ymm4,%ymm4
.byte 197,213,235,238 // vpor %ymm6,%ymm5,%ymm5
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -56724,7 +56724,7 @@ HIDDEN _sk_swap_rb_hsw_8bit
.globl _sk_swap_rb_hsw_8bit
FUNCTION(_sk_swap_rb_hsw_8bit)
_sk_swap_rb_hsw_8bit:
- .byte 197,253,111,37,208,44,0,0 // vmovdqa 0x2cd0(%rip),%ymm4 # 2ea0 <_sk_exclusion_hsw_8bit+0x206>
+ .byte 197,253,111,37,16,44,0,0 // vmovdqa 0x2c10(%rip),%ymm4 # 2de0 <_sk_difference_hsw_8bit+0x212>
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57074,7 +57074,7 @@ _sk_load_bgra_hsw_8bit:
.byte 117,35 // jne 6b4 <_sk_load_bgra_hsw_8bit+0x44>
.byte 196,161,126,111,76,130,32 // vmovdqu 0x20(%rdx,%r8,4),%ymm1
.byte 196,161,126,111,4,130 // vmovdqu (%rdx,%r8,4),%ymm0
- .byte 197,253,111,37,26,40,0,0 // vmovdqa 0x281a(%rip),%ymm4 # 2ec0 <_sk_exclusion_hsw_8bit+0x226>
+ .byte 197,253,111,37,90,39,0,0 // vmovdqa 0x275a(%rip),%ymm4 # 2e00 <_sk_difference_hsw_8bit+0x232>
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57189,7 +57189,7 @@ _sk_load_bgra_dst_hsw_8bit:
.byte 117,35 // jne 86c <_sk_load_bgra_dst_hsw_8bit+0x44>
.byte 196,161,126,111,92,130,32 // vmovdqu 0x20(%rdx,%r8,4),%ymm3
.byte 196,161,126,111,20,130 // vmovdqu (%rdx,%r8,4),%ymm2
- .byte 197,253,111,37,130,38,0,0 // vmovdqa 0x2682(%rip),%ymm4 # 2ee0 <_sk_exclusion_hsw_8bit+0x246>
+ .byte 197,253,111,37,194,37,0,0 // vmovdqa 0x25c2(%rip),%ymm4 # 2e20 <_sk_difference_hsw_8bit+0x252>
.byte 196,226,109,0,212 // vpshufb %ymm4,%ymm2,%ymm2
.byte 196,226,101,0,220 // vpshufb %ymm4,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57300,7 +57300,7 @@ _sk_store_bgra_hsw_8bit:
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,193,226,2 // shl $0x2,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 197,253,111,37,252,36,0,0 // vmovdqa 0x24fc(%rip),%ymm4 # 2f00 <_sk_exclusion_hsw_8bit+0x266>
+ .byte 197,253,111,37,60,36,0,0 // vmovdqa 0x243c(%rip),%ymm4 # 2e40 <_sk_difference_hsw_8bit+0x272>
.byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5
.byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4
.byte 77,133,201 // test %r9,%r9
@@ -57586,10 +57586,10 @@ _sk_store_a8_hsw_8bit:
.byte 72,99,87,8 // movslq 0x8(%rdi),%rdx
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 197,253,111,37,40,33,0,0 // vmovdqa 0x2128(%rip),%ymm4 # 2f20 <_sk_exclusion_hsw_8bit+0x286>
+ .byte 197,253,111,37,104,32,0,0 // vmovdqa 0x2068(%rip),%ymm4 # 2e60 <_sk_difference_hsw_8bit+0x292>
.byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5
.byte 196,227,253,0,237,232 // vpermq $0xe8,%ymm5,%ymm5
- .byte 197,249,111,53,149,35,0,0 // vmovdqa 0x2395(%rip),%xmm6 # 31a0 <_sk_exclusion_hsw_8bit+0x506>
+ .byte 197,249,111,53,245,34,0,0 // vmovdqa 0x22f5(%rip),%xmm6 # 3100 <_sk_difference_hsw_8bit+0x532>
.byte 196,226,81,0,238 // vpshufb %xmm6,%xmm5,%xmm5
.byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4
.byte 196,227,253,0,228,232 // vpermq $0xe8,%ymm4,%ymm4
@@ -57681,10 +57681,10 @@ _sk_load_g8_hsw_8bit:
.byte 196,226,125,49,200 // vpmovzxbd %xmm0,%ymm1
.byte 197,249,112,192,78 // vpshufd $0x4e,%xmm0,%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
- .byte 196,226,125,88,37,189,30,0,0 // vpbroadcastd 0x1ebd(%rip),%ymm4 # 2e08 <_sk_exclusion_hsw_8bit+0x16e>
+ .byte 196,226,125,88,37,9,30,0,0 // vpbroadcastd 0x1e09(%rip),%ymm4 # 2d54 <_sk_difference_hsw_8bit+0x186>
.byte 196,226,125,64,236 // vpmulld %ymm4,%ymm0,%ymm5
.byte 196,226,117,64,196 // vpmulld %ymm4,%ymm1,%ymm0
- .byte 196,226,125,88,13,174,30,0,0 // vpbroadcastd 0x1eae(%rip),%ymm1 # 2e0c <_sk_exclusion_hsw_8bit+0x172>
+ .byte 196,226,125,88,13,250,29,0,0 // vpbroadcastd 0x1dfa(%rip),%ymm1 # 2d58 <_sk_difference_hsw_8bit+0x18a>
.byte 197,253,235,193 // vpor %ymm1,%ymm0,%ymm0
.byte 197,213,235,201 // vpor %ymm1,%ymm5,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57782,10 +57782,10 @@ _sk_load_g8_dst_hsw_8bit:
.byte 196,226,125,49,218 // vpmovzxbd %xmm2,%ymm3
.byte 197,249,112,210,78 // vpshufd $0x4e,%xmm2,%xmm2
.byte 196,226,125,49,210 // vpmovzxbd %xmm2,%ymm2
- .byte 196,226,125,88,37,77,29,0,0 // vpbroadcastd 0x1d4d(%rip),%ymm4 # 2e10 <_sk_exclusion_hsw_8bit+0x176>
+ .byte 196,226,125,88,37,153,28,0,0 // vpbroadcastd 0x1c99(%rip),%ymm4 # 2d5c <_sk_difference_hsw_8bit+0x18e>
.byte 196,226,109,64,236 // vpmulld %ymm4,%ymm2,%ymm5
.byte 196,226,101,64,212 // vpmulld %ymm4,%ymm3,%ymm2
- .byte 196,226,125,88,29,62,29,0,0 // vpbroadcastd 0x1d3e(%rip),%ymm3 # 2e14 <_sk_exclusion_hsw_8bit+0x17a>
+ .byte 196,226,125,88,29,138,28,0,0 // vpbroadcastd 0x1c8a(%rip),%ymm3 # 2d60 <_sk_difference_hsw_8bit+0x192>
.byte 197,237,235,211 // vpor %ymm3,%ymm2,%ymm2
.byte 197,213,235,219 // vpor %ymm3,%ymm5,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57882,7 +57882,7 @@ _sk_srcover_rgba_8888_hsw_8bit:
.byte 15,133,222,0,0,0 // jne 1303 <_sk_srcover_rgba_8888_hsw_8bit+0x103>
.byte 196,33,126,111,76,138,32 // vmovdqu 0x20(%rdx,%r9,4),%ymm9
.byte 196,33,126,111,28,138 // vmovdqu (%rdx,%r9,4),%ymm11
- .byte 197,253,111,53,6,29,0,0 // vmovdqa 0x1d06(%rip),%ymm6 # 2f40 <_sk_exclusion_hsw_8bit+0x2a6>
+ .byte 197,253,111,53,70,28,0,0 // vmovdqa 0x1c46(%rip),%ymm6 # 2e80 <_sk_difference_hsw_8bit+0x2b2>
.byte 196,226,117,0,254 // vpshufb %ymm6,%ymm1,%ymm7
.byte 196,226,125,0,246 // vpshufb %ymm6,%ymm0,%ymm6
.byte 196,66,125,48,195 // vpmovzxbw %xmm11,%ymm8
@@ -58090,7 +58090,7 @@ FUNCTION(_sk_scale_1_float_hsw_8bit)
_sk_scale_1_float_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,250,16,32 // vmovss (%rax),%xmm4
- .byte 197,218,89,37,130,24,0,0 // vmulss 0x1882(%rip),%xmm4,%xmm4 # 2e18 <_sk_exclusion_hsw_8bit+0x17e>
+ .byte 197,218,89,37,206,23,0,0 // vmulss 0x17ce(%rip),%xmm4,%xmm4 # 2d64 <_sk_difference_hsw_8bit+0x196>
.byte 197,250,44,196 // vcvttss2si %xmm4,%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
.byte 196,226,125,120,228 // vpbroadcastb %xmm4,%ymm4
@@ -58100,7 +58100,7 @@ _sk_scale_1_float_hsw_8bit:
.byte 196,226,125,48,241 // vpmovzxbw %xmm1,%ymm6
.byte 196,227,125,57,201,1 // vextracti128 $0x1,%ymm1,%xmm1
.byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1
- .byte 197,221,219,37,149,25,0,0 // vpand 0x1995(%rip),%ymm4,%ymm4 # 2f60 <_sk_exclusion_hsw_8bit+0x2c6>
+ .byte 197,221,219,37,213,24,0,0 // vpand 0x18d5(%rip),%ymm4,%ymm4 # 2ea0 <_sk_difference_hsw_8bit+0x2d2>
.byte 197,221,213,249 // vpmullw %ymm1,%ymm4,%ymm7
.byte 197,93,213,198 // vpmullw %ymm6,%ymm4,%ymm8
.byte 197,93,213,200 // vpmullw %ymm0,%ymm4,%ymm9
@@ -58139,7 +58139,7 @@ _sk_scale_u8_hsw_8bit:
.byte 196,226,125,49,236 // vpmovzxbd %xmm4,%ymm5
.byte 197,249,112,228,78 // vpshufd $0x4e,%xmm4,%xmm4
.byte 196,226,125,49,228 // vpmovzxbd %xmm4,%ymm4
- .byte 197,253,111,53,31,25,0,0 // vmovdqa 0x191f(%rip),%ymm6 # 2f80 <_sk_exclusion_hsw_8bit+0x2e6>
+ .byte 197,253,111,53,95,24,0,0 // vmovdqa 0x185f(%rip),%ymm6 # 2ec0 <_sk_difference_hsw_8bit+0x2f2>
.byte 196,226,93,0,230 // vpshufb %ymm6,%ymm4,%ymm4
.byte 196,226,85,0,238 // vpshufb %ymm6,%ymm5,%ymm5
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -58256,7 +58256,7 @@ FUNCTION(_sk_lerp_1_float_hsw_8bit)
_sk_lerp_1_float_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,250,16,32 // vmovss (%rax),%xmm4
- .byte 197,218,89,37,230,21,0,0 // vmulss 0x15e6(%rip),%xmm4,%xmm4 # 2e1c <_sk_exclusion_hsw_8bit+0x182>
+ .byte 197,218,89,37,50,21,0,0 // vmulss 0x1532(%rip),%xmm4,%xmm4 # 2d68 <_sk_difference_hsw_8bit+0x19a>
.byte 197,250,44,196 // vcvttss2si %xmm4,%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
.byte 196,226,125,120,228 // vpbroadcastb %xmm4,%ymm4
@@ -58266,7 +58266,7 @@ _sk_lerp_1_float_hsw_8bit:
.byte 196,226,125,48,241 // vpmovzxbw %xmm1,%ymm6
.byte 196,227,125,57,201,1 // vextracti128 $0x1,%ymm1,%xmm1
.byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1
- .byte 197,221,219,61,53,23,0,0 // vpand 0x1735(%rip),%ymm4,%ymm7 # 2fa0 <_sk_exclusion_hsw_8bit+0x306>
+ .byte 197,221,219,61,117,22,0,0 // vpand 0x1675(%rip),%ymm4,%ymm7 # 2ee0 <_sk_difference_hsw_8bit+0x312>
.byte 197,69,213,193 // vpmullw %ymm1,%ymm7,%ymm8
.byte 197,69,213,206 // vpmullw %ymm6,%ymm7,%ymm9
.byte 197,69,213,208 // vpmullw %ymm0,%ymm7,%ymm10
@@ -58336,7 +58336,7 @@ _sk_lerp_u8_hsw_8bit:
.byte 196,226,125,49,236 // vpmovzxbd %xmm4,%ymm5
.byte 197,249,112,228,78 // vpshufd $0x4e,%xmm4,%xmm4
.byte 196,226,125,49,228 // vpmovzxbd %xmm4,%ymm4
- .byte 197,253,111,53,42,22,0,0 // vmovdqa 0x162a(%rip),%ymm6 # 2fc0 <_sk_exclusion_hsw_8bit+0x326>
+ .byte 197,253,111,53,106,21,0,0 // vmovdqa 0x156a(%rip),%ymm6 # 2f00 <_sk_difference_hsw_8bit+0x332>
.byte 196,98,93,0,206 // vpshufb %ymm6,%ymm4,%ymm9
.byte 196,98,85,0,222 // vpshufb %ymm6,%ymm5,%ymm11
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -58505,7 +58505,7 @@ HIDDEN _sk_black_color_hsw_8bit
FUNCTION(_sk_black_color_hsw_8bit)
_sk_black_color_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,245,17,0,0 // vbroadcastss 0x11f5(%rip),%ymm0 # 2e20 <_sk_exclusion_hsw_8bit+0x186>
+ .byte 196,226,125,24,5,65,17,0,0 // vbroadcastss 0x1141(%rip),%ymm0 # 2d6c <_sk_difference_hsw_8bit+0x19e>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 255,224 // jmpq *%rax
@@ -58531,7 +58531,7 @@ HIDDEN _sk_srcatop_hsw_8bit
.globl _sk_srcatop_hsw_8bit
FUNCTION(_sk_srcatop_hsw_8bit)
_sk_srcatop_hsw_8bit:
- .byte 197,125,111,5,143,19,0,0 // vmovdqa 0x138f(%rip),%ymm8 # 2fe0 <_sk_exclusion_hsw_8bit+0x346>
+ .byte 197,125,111,5,207,18,0,0 // vmovdqa 0x12cf(%rip),%ymm8 # 2f20 <_sk_difference_hsw_8bit+0x352>
.byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
.byte 196,194,109,0,232 // vpshufb %ymm8,%ymm2,%ymm5
.byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
@@ -58608,7 +58608,7 @@ HIDDEN _sk_dstatop_hsw_8bit
.globl _sk_dstatop_hsw_8bit
FUNCTION(_sk_dstatop_hsw_8bit)
_sk_dstatop_hsw_8bit:
- .byte 197,125,111,5,74,18,0,0 // vmovdqa 0x124a(%rip),%ymm8 # 3000 <_sk_exclusion_hsw_8bit+0x366>
+ .byte 197,125,111,5,138,17,0,0 // vmovdqa 0x118a(%rip),%ymm8 # 2f40 <_sk_difference_hsw_8bit+0x372>
.byte 196,194,117,0,224 // vpshufb %ymm8,%ymm1,%ymm4
.byte 196,194,125,0,232 // vpshufb %ymm8,%ymm0,%ymm5
.byte 196,98,125,48,210 // vpmovzxbw %xmm2,%ymm10
@@ -58685,7 +58685,7 @@ HIDDEN _sk_srcin_hsw_8bit
.globl _sk_srcin_hsw_8bit
FUNCTION(_sk_srcin_hsw_8bit)
_sk_srcin_hsw_8bit:
- .byte 197,253,111,37,1,17,0,0 // vmovdqa 0x1101(%rip),%ymm4 # 3020 <_sk_exclusion_hsw_8bit+0x386>
+ .byte 197,253,111,37,65,16,0,0 // vmovdqa 0x1041(%rip),%ymm4 # 2f60 <_sk_difference_hsw_8bit+0x392>
.byte 196,226,101,0,236 // vpshufb %ymm4,%ymm3,%ymm5
.byte 196,226,109,0,228 // vpshufb %ymm4,%ymm2,%ymm4
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -58725,7 +58725,7 @@ HIDDEN _sk_dstin_hsw_8bit
.globl _sk_dstin_hsw_8bit
FUNCTION(_sk_dstin_hsw_8bit)
_sk_dstin_hsw_8bit:
- .byte 197,253,111,37,119,16,0,0 // vmovdqa 0x1077(%rip),%ymm4 # 3040 <_sk_exclusion_hsw_8bit+0x3a6>
+ .byte 197,253,111,37,183,15,0,0 // vmovdqa 0xfb7(%rip),%ymm4 # 2f80 <_sk_difference_hsw_8bit+0x3b2>
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,125,48,226 // vpmovzxbw %xmm2,%ymm4
@@ -58765,7 +58765,7 @@ HIDDEN _sk_srcout_hsw_8bit
.globl _sk_srcout_hsw_8bit
FUNCTION(_sk_srcout_hsw_8bit)
_sk_srcout_hsw_8bit:
- .byte 197,253,111,37,235,15,0,0 // vmovdqa 0xfeb(%rip),%ymm4 # 3060 <_sk_exclusion_hsw_8bit+0x3c6>
+ .byte 197,253,111,37,43,15,0,0 // vmovdqa 0xf2b(%rip),%ymm4 # 2fa0 <_sk_difference_hsw_8bit+0x3d2>
.byte 196,226,109,0,236 // vpshufb %ymm4,%ymm2,%ymm5
.byte 196,226,101,0,228 // vpshufb %ymm4,%ymm3,%ymm4
.byte 197,205,118,246 // vpcmpeqd %ymm6,%ymm6,%ymm6
@@ -58808,7 +58808,7 @@ HIDDEN _sk_dstout_hsw_8bit
.globl _sk_dstout_hsw_8bit
FUNCTION(_sk_dstout_hsw_8bit)
_sk_dstout_hsw_8bit:
- .byte 197,253,111,37,85,15,0,0 // vmovdqa 0xf55(%rip),%ymm4 # 3080 <_sk_exclusion_hsw_8bit+0x3e6>
+ .byte 197,253,111,37,149,14,0,0 // vmovdqa 0xe95(%rip),%ymm4 # 2fc0 <_sk_difference_hsw_8bit+0x3f2>
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 197,221,118,228 // vpcmpeqd %ymm4,%ymm4,%ymm4
@@ -58851,7 +58851,7 @@ HIDDEN _sk_srcover_hsw_8bit
.globl _sk_srcover_hsw_8bit
FUNCTION(_sk_srcover_hsw_8bit)
_sk_srcover_hsw_8bit:
- .byte 197,253,111,37,189,14,0,0 // vmovdqa 0xebd(%rip),%ymm4 # 30a0 <_sk_exclusion_hsw_8bit+0x406>
+ .byte 197,253,111,37,253,13,0,0 // vmovdqa 0xdfd(%rip),%ymm4 # 2fe0 <_sk_difference_hsw_8bit+0x412>
.byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5
.byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4
.byte 196,98,125,48,202 // vpmovzxbw %xmm2,%ymm9
@@ -58895,7 +58895,7 @@ HIDDEN _sk_dstover_hsw_8bit
.globl _sk_dstover_hsw_8bit
FUNCTION(_sk_dstover_hsw_8bit)
_sk_dstover_hsw_8bit:
- .byte 197,253,111,37,30,14,0,0 // vmovdqa 0xe1e(%rip),%ymm4 # 30c0 <_sk_exclusion_hsw_8bit+0x426>
+ .byte 197,253,111,37,94,13,0,0 // vmovdqa 0xd5e(%rip),%ymm4 # 3000 <_sk_difference_hsw_8bit+0x432>
.byte 196,226,101,0,236 // vpshufb %ymm4,%ymm3,%ymm5
.byte 196,226,109,0,228 // vpshufb %ymm4,%ymm2,%ymm4
.byte 196,98,125,48,200 // vpmovzxbw %xmm0,%ymm9
@@ -58978,7 +58978,7 @@ FUNCTION(_sk_multiply_hsw_8bit)
_sk_multiply_hsw_8bit:
.byte 197,253,111,243 // vmovdqa %ymm3,%ymm6
.byte 197,253,111,218 // vmovdqa %ymm2,%ymm3
- .byte 197,125,111,13,223,12,0,0 // vmovdqa 0xcdf(%rip),%ymm9 # 30e0 <_sk_exclusion_hsw_8bit+0x446>
+ .byte 197,125,111,13,31,12,0,0 // vmovdqa 0xc1f(%rip),%ymm9 # 3020 <_sk_difference_hsw_8bit+0x452>
.byte 196,194,101,0,225 // vpshufb %ymm9,%ymm3,%ymm4
.byte 196,194,77,0,233 // vpshufb %ymm9,%ymm6,%ymm5
.byte 196,65,45,118,210 // vpcmpeqd %ymm10,%ymm10,%ymm10
@@ -59122,7 +59122,7 @@ HIDDEN _sk_xor__hsw_8bit
.globl _sk_xor__hsw_8bit
FUNCTION(_sk_xor__hsw_8bit)
_sk_xor__hsw_8bit:
- .byte 197,125,111,13,109,10,0,0 // vmovdqa 0xa6d(%rip),%ymm9 # 3100 <_sk_exclusion_hsw_8bit+0x466>
+ .byte 197,125,111,13,173,9,0,0 // vmovdqa 0x9ad(%rip),%ymm9 # 3040 <_sk_difference_hsw_8bit+0x472>
.byte 196,194,109,0,225 // vpshufb %ymm9,%ymm2,%ymm4
.byte 196,194,101,0,249 // vpshufb %ymm9,%ymm3,%ymm7
.byte 196,65,37,118,219 // vpcmpeqd %ymm11,%ymm11,%ymm11
@@ -59201,7 +59201,7 @@ HIDDEN _sk_darken_hsw_8bit
.globl _sk_darken_hsw_8bit
FUNCTION(_sk_darken_hsw_8bit)
_sk_darken_hsw_8bit:
- .byte 197,125,111,5,27,9,0,0 // vmovdqa 0x91b(%rip),%ymm8 # 3120 <_sk_exclusion_hsw_8bit+0x486>
+ .byte 197,125,111,5,91,8,0,0 // vmovdqa 0x85b(%rip),%ymm8 # 3060 <_sk_difference_hsw_8bit+0x492>
.byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
.byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6
.byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
@@ -59274,7 +59274,7 @@ _sk_darken_hsw_8bit:
.byte 197,253,248,246 // vpsubb %ymm6,%ymm0,%ymm6
.byte 197,245,248,205 // vpsubb %ymm5,%ymm1,%ymm1
.byte 197,253,248,196 // vpsubb %ymm4,%ymm0,%ymm0
- .byte 196,226,125,88,37,176,4,0,0 // vpbroadcastd 0x4b0(%rip),%ymm4 # 2e24 <_sk_exclusion_hsw_8bit+0x18a>
+ .byte 196,226,125,88,37,252,3,0,0 // vpbroadcastd 0x3fc(%rip),%ymm4 # 2d70 <_sk_difference_hsw_8bit+0x1a2>
.byte 196,227,125,76,198,64 // vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
.byte 196,227,117,76,207,64 // vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -59284,7 +59284,7 @@ HIDDEN _sk_lighten_hsw_8bit
.globl _sk_lighten_hsw_8bit
FUNCTION(_sk_lighten_hsw_8bit)
_sk_lighten_hsw_8bit:
- .byte 197,125,111,5,180,7,0,0 // vmovdqa 0x7b4(%rip),%ymm8 # 3140 <_sk_exclusion_hsw_8bit+0x4a6>
+ .byte 197,125,111,5,244,6,0,0 // vmovdqa 0x6f4(%rip),%ymm8 # 3080 <_sk_difference_hsw_8bit+0x4b2>
.byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
.byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6
.byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
@@ -59357,17 +59357,63 @@ _sk_lighten_hsw_8bit:
.byte 197,253,248,246 // vpsubb %ymm6,%ymm0,%ymm6
.byte 197,245,248,205 // vpsubb %ymm5,%ymm1,%ymm1
.byte 197,253,248,196 // vpsubb %ymm4,%ymm0,%ymm0
- .byte 196,226,125,88,37,45,3,0,0 // vpbroadcastd 0x32d(%rip),%ymm4 # 2e28 <_sk_exclusion_hsw_8bit+0x18e>
+ .byte 196,226,125,88,37,121,2,0,0 // vpbroadcastd 0x279(%rip),%ymm4 # 2d74 <_sk_difference_hsw_8bit+0x1a6>
.byte 196,227,125,76,198,64 // vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
.byte 196,227,117,76,207,64 // vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_exclusion_hsw_8bit
+.globl _sk_exclusion_hsw_8bit
+FUNCTION(_sk_exclusion_hsw_8bit)
+_sk_exclusion_hsw_8bit:
+ .byte 196,98,125,48,200 // vpmovzxbw %xmm0,%ymm9
+ .byte 196,227,125,57,197,1 // vextracti128 $0x1,%ymm0,%xmm5
+ .byte 196,226,125,48,237 // vpmovzxbw %xmm5,%ymm5
+ .byte 196,226,125,48,241 // vpmovzxbw %xmm1,%ymm6
+ .byte 196,227,125,57,207,1 // vextracti128 $0x1,%ymm1,%xmm7
+ .byte 196,226,125,48,255 // vpmovzxbw %xmm7,%ymm7
+ .byte 196,98,125,48,194 // vpmovzxbw %xmm2,%ymm8
+ .byte 196,227,125,57,212,1 // vextracti128 $0x1,%ymm2,%xmm4
+ .byte 196,98,125,48,220 // vpmovzxbw %xmm4,%ymm11
+ .byte 196,98,125,48,211 // vpmovzxbw %xmm3,%ymm10
+ .byte 196,227,125,57,220,1 // vextracti128 $0x1,%ymm3,%xmm4
+ .byte 196,226,125,48,228 // vpmovzxbw %xmm4,%ymm4
+ .byte 197,221,213,231 // vpmullw %ymm7,%ymm4,%ymm4
+ .byte 197,45,213,214 // vpmullw %ymm6,%ymm10,%ymm10
+ .byte 197,37,213,221 // vpmullw %ymm5,%ymm11,%ymm11
+ .byte 196,65,61,213,193 // vpmullw %ymm9,%ymm8,%ymm8
+ .byte 196,65,61,253,193 // vpaddw %ymm9,%ymm8,%ymm8
+ .byte 197,165,253,237 // vpaddw %ymm5,%ymm11,%ymm5
+ .byte 197,173,253,246 // vpaddw %ymm6,%ymm10,%ymm6
+ .byte 197,221,253,231 // vpaddw %ymm7,%ymm4,%ymm4
+ .byte 197,221,113,212,8 // vpsrlw $0x8,%ymm4,%ymm4
+ .byte 197,205,113,214,8 // vpsrlw $0x8,%ymm6,%ymm6
+ .byte 197,213,113,213,8 // vpsrlw $0x8,%ymm5,%ymm5
+ .byte 196,193,69,113,208,8 // vpsrlw $0x8,%ymm8,%ymm7
+ .byte 196,99,69,56,197,1 // vinserti128 $0x1,%xmm5,%ymm7,%ymm8
+ .byte 196,227,69,70,237,49 // vperm2i128 $0x31,%ymm5,%ymm7,%ymm5
+ .byte 197,189,103,237 // vpackuswb %ymm5,%ymm8,%ymm5
+ .byte 196,227,77,56,252,1 // vinserti128 $0x1,%xmm4,%ymm6,%ymm7
+ .byte 196,227,77,70,228,49 // vperm2i128 $0x31,%ymm4,%ymm6,%ymm4
+ .byte 197,197,103,228 // vpackuswb %ymm4,%ymm7,%ymm4
+ .byte 197,253,111,53,246,4,0,0 // vmovdqa 0x4f6(%rip),%ymm6 # 30a0 <_sk_difference_hsw_8bit+0x4d2>
+ .byte 197,221,219,254 // vpand %ymm6,%ymm4,%ymm7
+ .byte 197,213,219,246 // vpand %ymm6,%ymm5,%ymm6
+ .byte 197,237,252,192 // vpaddb %ymm0,%ymm2,%ymm0
+ .byte 197,229,252,201 // vpaddb %ymm1,%ymm3,%ymm1
+ .byte 197,245,248,204 // vpsubb %ymm4,%ymm1,%ymm1
+ .byte 197,253,248,197 // vpsubb %ymm5,%ymm0,%ymm0
+ .byte 197,253,248,198 // vpsubb %ymm6,%ymm0,%ymm0
+ .byte 197,245,248,207 // vpsubb %ymm7,%ymm1,%ymm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_difference_hsw_8bit
.globl _sk_difference_hsw_8bit
FUNCTION(_sk_difference_hsw_8bit)
_sk_difference_hsw_8bit:
- .byte 197,125,111,5,77,6,0,0 // vmovdqa 0x64d(%rip),%ymm8 # 3160 <_sk_exclusion_hsw_8bit+0x4c6>
+ .byte 197,125,111,5,234,4,0,0 // vmovdqa 0x4ea(%rip),%ymm8 # 30c0 <_sk_difference_hsw_8bit+0x4f2>
.byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
.byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6
.byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
@@ -59432,100 +59478,23 @@ _sk_difference_hsw_8bit:
.byte 196,227,77,56,253,1 // vinserti128 $0x1,%xmm5,%ymm6,%ymm7
.byte 196,227,77,70,237,49 // vperm2i128 $0x31,%ymm5,%ymm6,%ymm5
.byte 197,197,103,237 // vpackuswb %ymm5,%ymm7,%ymm5
- .byte 197,165,218,245 // vpminub %ymm5,%ymm11,%ymm6
- .byte 197,181,218,252 // vpminub %ymm4,%ymm9,%ymm7
- .byte 197,197,252,255 // vpaddb %ymm7,%ymm7,%ymm7
- .byte 197,205,252,246 // vpaddb %ymm6,%ymm6,%ymm6
+ .byte 197,181,218,228 // vpminub %ymm4,%ymm9,%ymm4
+ .byte 197,165,218,237 // vpminub %ymm5,%ymm11,%ymm5
+ .byte 197,253,111,53,180,3,0,0 // vmovdqa 0x3b4(%rip),%ymm6 # 30e0 <_sk_difference_hsw_8bit+0x512>
+ .byte 197,213,219,254 // vpand %ymm6,%ymm5,%ymm7
+ .byte 197,221,219,246 // vpand %ymm6,%ymm4,%ymm6
.byte 197,237,252,192 // vpaddb %ymm0,%ymm2,%ymm0
.byte 197,229,252,201 // vpaddb %ymm1,%ymm3,%ymm1
- .byte 197,245,248,246 // vpsubb %ymm6,%ymm1,%ymm6
- .byte 197,253,248,255 // vpsubb %ymm7,%ymm0,%ymm7
.byte 197,245,248,205 // vpsubb %ymm5,%ymm1,%ymm1
.byte 197,253,248,196 // vpsubb %ymm4,%ymm0,%ymm0
- .byte 196,226,125,88,37,162,1,0,0 // vpbroadcastd 0x1a2(%rip),%ymm4 # 2e2c <_sk_exclusion_hsw_8bit+0x192>
- .byte 196,227,125,76,199,64 // vpblendvb %ymm4,%ymm7,%ymm0,%ymm0
- .byte 196,227,117,76,206,64 // vpblendvb %ymm4,%ymm6,%ymm1,%ymm1
- .byte 72,173 // lods %ds:(%rsi),%rax
- .byte 255,224 // jmpq *%rax
-
-HIDDEN _sk_exclusion_hsw_8bit
-.globl _sk_exclusion_hsw_8bit
-FUNCTION(_sk_exclusion_hsw_8bit)
-_sk_exclusion_hsw_8bit:
- .byte 196,98,125,48,192 // vpmovzxbw %xmm0,%ymm8
- .byte 196,227,125,57,196,1 // vextracti128 $0x1,%ymm0,%xmm4
- .byte 196,98,125,48,204 // vpmovzxbw %xmm4,%ymm9
- .byte 196,98,125,48,209 // vpmovzxbw %xmm1,%ymm10
- .byte 196,227,125,57,204,1 // vextracti128 $0x1,%ymm1,%xmm4
- .byte 196,98,125,48,220 // vpmovzxbw %xmm4,%ymm11
- .byte 196,226,125,48,226 // vpmovzxbw %xmm2,%ymm4
- .byte 196,227,125,57,213,1 // vextracti128 $0x1,%ymm2,%xmm5
- .byte 196,226,125,48,237 // vpmovzxbw %xmm5,%ymm5
- .byte 196,226,125,48,243 // vpmovzxbw %xmm3,%ymm6
- .byte 196,227,125,57,223,1 // vextracti128 $0x1,%ymm3,%xmm7
- .byte 196,226,125,48,255 // vpmovzxbw %xmm7,%ymm7
- .byte 196,65,69,213,227 // vpmullw %ymm11,%ymm7,%ymm12
- .byte 196,65,77,213,234 // vpmullw %ymm10,%ymm6,%ymm13
- .byte 196,65,85,213,241 // vpmullw %ymm9,%ymm5,%ymm14
- .byte 196,65,93,213,248 // vpmullw %ymm8,%ymm4,%ymm15
- .byte 196,65,5,253,192 // vpaddw %ymm8,%ymm15,%ymm8
- .byte 196,65,13,253,201 // vpaddw %ymm9,%ymm14,%ymm9
- .byte 196,65,21,253,210 // vpaddw %ymm10,%ymm13,%ymm10
- .byte 196,65,29,253,219 // vpaddw %ymm11,%ymm12,%ymm11
- .byte 196,193,37,113,211,8 // vpsrlw $0x8,%ymm11,%ymm11
- .byte 196,193,45,113,210,8 // vpsrlw $0x8,%ymm10,%ymm10
- .byte 196,193,53,113,209,8 // vpsrlw $0x8,%ymm9,%ymm9
- .byte 196,193,61,113,208,8 // vpsrlw $0x8,%ymm8,%ymm8
- .byte 196,67,61,56,225,1 // vinserti128 $0x1,%xmm9,%ymm8,%ymm12
- .byte 196,67,61,70,193,49 // vperm2i128 $0x31,%ymm9,%ymm8,%ymm8
- .byte 196,65,29,103,192 // vpackuswb %ymm8,%ymm12,%ymm8
- .byte 196,67,45,56,203,1 // vinserti128 $0x1,%xmm11,%ymm10,%ymm9
- .byte 196,67,45,70,211,49 // vperm2i128 $0x31,%ymm11,%ymm10,%ymm10
- .byte 196,65,53,103,202 // vpackuswb %ymm10,%ymm9,%ymm9
- .byte 196,65,61,252,208 // vpaddb %ymm8,%ymm8,%ymm10
- .byte 196,65,53,252,193 // vpaddb %ymm9,%ymm9,%ymm8
- .byte 197,109,252,200 // vpaddb %ymm0,%ymm2,%ymm9
- .byte 197,101,252,217 // vpaddb %ymm1,%ymm3,%ymm11
- .byte 196,65,37,248,192 // vpsubb %ymm8,%ymm11,%ymm8
- .byte 196,65,53,248,210 // vpsubb %ymm10,%ymm9,%ymm10
- .byte 197,125,111,37,32,4,0,0 // vmovdqa 0x420(%rip),%ymm12 # 3180 <_sk_exclusion_hsw_8bit+0x4e6>
- .byte 196,194,117,0,204 // vpshufb %ymm12,%ymm1,%ymm1
- .byte 196,194,125,0,196 // vpshufb %ymm12,%ymm0,%ymm0
- .byte 196,98,125,48,224 // vpmovzxbw %xmm0,%ymm12
- .byte 196,227,125,57,192,1 // vextracti128 $0x1,%ymm0,%xmm0
- .byte 196,226,125,48,192 // vpmovzxbw %xmm0,%ymm0
- .byte 196,98,125,48,233 // vpmovzxbw %xmm1,%ymm13
- .byte 196,227,125,57,201,1 // vextracti128 $0x1,%ymm1,%xmm1
- .byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1
- .byte 197,197,213,201 // vpmullw %ymm1,%ymm7,%ymm1
- .byte 196,65,77,213,237 // vpmullw %ymm13,%ymm6,%ymm13
- .byte 197,213,213,192 // vpmullw %ymm0,%ymm5,%ymm0
- .byte 196,65,93,213,228 // vpmullw %ymm12,%ymm4,%ymm12
- .byte 197,157,253,228 // vpaddw %ymm4,%ymm12,%ymm4
- .byte 197,253,253,197 // vpaddw %ymm5,%ymm0,%ymm0
- .byte 197,149,253,238 // vpaddw %ymm6,%ymm13,%ymm5
- .byte 197,245,253,207 // vpaddw %ymm7,%ymm1,%ymm1
- .byte 197,245,113,209,8 // vpsrlw $0x8,%ymm1,%ymm1
- .byte 197,213,113,213,8 // vpsrlw $0x8,%ymm5,%ymm5
- .byte 197,253,113,208,8 // vpsrlw $0x8,%ymm0,%ymm0
- .byte 197,221,113,212,8 // vpsrlw $0x8,%ymm4,%ymm4
- .byte 196,227,93,56,240,1 // vinserti128 $0x1,%xmm0,%ymm4,%ymm6
- .byte 196,227,93,70,192,49 // vperm2i128 $0x31,%ymm0,%ymm4,%ymm0
- .byte 197,205,103,192 // vpackuswb %ymm0,%ymm6,%ymm0
- .byte 196,227,85,56,225,1 // vinserti128 $0x1,%xmm1,%ymm5,%ymm4
- .byte 196,227,85,70,201,49 // vperm2i128 $0x31,%ymm1,%ymm5,%ymm1
- .byte 197,221,103,201 // vpackuswb %ymm1,%ymm4,%ymm1
- .byte 197,165,248,201 // vpsubb %ymm1,%ymm11,%ymm1
- .byte 197,181,248,192 // vpsubb %ymm0,%ymm9,%ymm0
- .byte 196,226,125,88,37,63,0,0,0 // vpbroadcastd 0x3f(%rip),%ymm4 # 2e30 <_sk_exclusion_hsw_8bit+0x196>
- .byte 196,195,125,76,194,64 // vpblendvb %ymm4,%ymm10,%ymm0,%ymm0
- .byte 196,195,117,76,200,64 // vpblendvb %ymm4,%ymm8,%ymm1,%ymm1
+ .byte 197,253,248,198 // vpsubb %ymm6,%ymm0,%ymm0
+ .byte 197,245,248,207 // vpsubb %ymm7,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2e4b <_sk_exclusion_hsw_8bit+0x1b1>
+ .byte 127,67 // jg 2d97 <_sk_difference_hsw_8bit+0x1c9>
.byte 1,1 // add %eax,(%rcx)
.byte 1,0 // add %eax,(%rax)
.byte 0,0 // add %al,(%rax)
@@ -59535,9 +59504,9 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 0,255 // add %bh,%bh
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2e5f <_sk_exclusion_hsw_8bit+0x1c5>
+ .byte 127,67 // jg 2dab <_sk_difference_hsw_8bit+0x1dd>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2e63 <_sk_exclusion_hsw_8bit+0x1c9>
+ .byte 127,67 // jg 2daf <_sk_difference_hsw_8bit+0x1e1>
.byte 0,0 // add %al,(%rax)
.byte 0,255 // add %bh,%bh
.byte 255 // (bad)
@@ -59546,12 +59515,6 @@ BALIGN4
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,0 // incl (%rax)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255,0 // incl (%rax)
- .byte 255 // (bad)
- .byte 255 // (bad)
- .byte 255,0 // incl (%rax)
BALIGN32
.byte 0,0 // add %al,(%rax)
@@ -60030,18 +59993,31 @@ BALIGN32
.byte 15 // (bad)
.byte 15 // (bad)
.byte 15 // (bad)
- .byte 15,3,3 // lsl (%rbx),%eax
+ .byte 15,255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
.byte 3,3 // add (%rbx),%eax
- .byte 7 // (bad)
- .byte 7 // (bad)
- .byte 7 // (bad)
- .byte 7 // (bad)
- .byte 11,11 // or (%rbx),%ecx
- .byte 11,11 // or (%rbx),%ecx
- .byte 15 // (bad)
- .byte 15 // (bad)
- .byte 15 // (bad)
- .byte 15,3,3 // lsl (%rbx),%eax
.byte 3,3 // add (%rbx),%eax
.byte 7 // (bad)
.byte 7 // (bad)
@@ -60063,18 +60039,30 @@ BALIGN32
.byte 15 // (bad)
.byte 15 // (bad)
.byte 15 // (bad)
- .byte 15,3,3 // lsl (%rbx),%eax
- .byte 3,3 // add (%rbx),%eax
- .byte 7 // (bad)
- .byte 7 // (bad)
- .byte 7 // (bad)
- .byte 7 // (bad)
- .byte 11,11 // or (%rbx),%ecx
- .byte 11,11 // or (%rbx),%ecx
- .byte 15 // (bad)
- .byte 15 // .byte 0xf
- .byte 15 // .byte 0xf
- .byte 15 // .byte 0xf
+ .byte 15,255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
BALIGN16
.byte 0,2 // add %al,(%rdx)
@@ -60169,7 +60157,7 @@ HIDDEN _sk_set_rgb_sse41_8bit
FUNCTION(_sk_set_rgb_sse41_8bit)
_sk_set_rgb_sse41_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,15,16,37,237,33,0,0 // movss 0x21ed(%rip),%xmm4 # 22b0 <_sk_exclusion_sse41_8bit+0x137>
+ .byte 243,15,16,37,89,33,0,0 // movss 0x2159(%rip),%xmm4 # 221c <_sk_difference_sse41_8bit+0x169>
.byte 243,15,16,40 // movss (%rax),%xmm5
.byte 243,15,89,236 // mulss %xmm4,%xmm5
.byte 243,72,15,44,205 // cvttss2si %xmm5,%rcx
@@ -60184,7 +60172,7 @@ _sk_set_rgb_sse41_8bit:
.byte 9,208 // or %edx,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,112,228,0 // pshufd $0x0,%xmm4,%xmm4
- .byte 102,15,111,45,189,33,0,0 // movdqa 0x21bd(%rip),%xmm5 # 22c0 <_sk_exclusion_sse41_8bit+0x147>
+ .byte 102,15,111,45,45,33,0,0 // movdqa 0x212d(%rip),%xmm5 # 2230 <_sk_difference_sse41_8bit+0x17d>
.byte 102,15,219,205 // pand %xmm5,%xmm1
.byte 102,15,219,197 // pand %xmm5,%xmm0
.byte 102,15,235,196 // por %xmm4,%xmm0
@@ -60198,12 +60186,12 @@ FUNCTION(_sk_premul_sse41_8bit)
_sk_premul_sse41_8bit:
.byte 102,15,111,225 // movdqa %xmm1,%xmm4
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,15,111,5,169,33,0,0 // movdqa 0x21a9(%rip),%xmm0 # 22d0 <_sk_exclusion_sse41_8bit+0x157>
+ .byte 102,15,111,5,25,33,0,0 // movdqa 0x2119(%rip),%xmm0 # 2240 <_sk_difference_sse41_8bit+0x18d>
.byte 102,15,111,253 // movdqa %xmm5,%xmm7
.byte 102,15,56,0,248 // pshufb %xmm0,%xmm7
.byte 102,15,111,244 // movdqa %xmm4,%xmm6
.byte 102,15,56,0,240 // pshufb %xmm0,%xmm6
- .byte 102,15,111,5,159,33,0,0 // movdqa 0x219f(%rip),%xmm0 # 22e0 <_sk_exclusion_sse41_8bit+0x167>
+ .byte 102,15,111,5,15,33,0,0 // movdqa 0x210f(%rip),%xmm0 # 2250 <_sk_difference_sse41_8bit+0x19d>
.byte 102,15,235,240 // por %xmm0,%xmm6
.byte 102,15,235,248 // por %xmm0,%xmm7
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
@@ -60236,7 +60224,7 @@ HIDDEN _sk_swap_rb_sse41_8bit
.globl _sk_swap_rb_sse41_8bit
FUNCTION(_sk_swap_rb_sse41_8bit)
_sk_swap_rb_sse41_8bit:
- .byte 102,15,111,37,44,33,0,0 // movdqa 0x212c(%rip),%xmm4 # 22f0 <_sk_exclusion_sse41_8bit+0x177>
+ .byte 102,15,111,37,156,32,0,0 // movdqa 0x209c(%rip),%xmm4 # 2260 <_sk_difference_sse41_8bit+0x1ad>
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60363,7 +60351,7 @@ _sk_load_8888_dst_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,221 // jmpq ffffffffde000354 <_sk_exclusion_sse41_8bit+0xffffffffddffe1db>
+ .byte 233,255,255,255,221 // jmpq ffffffffde000354 <_sk_difference_sse41_8bit+0xffffffffddffe2a1>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -60446,7 +60434,7 @@ _sk_load_bgra_sse41_8bit:
.byte 117,35 // jne 448 <_sk_load_bgra_sse41_8bit+0x44>
.byte 243,66,15,111,76,130,16 // movdqu 0x10(%rdx,%r8,4),%xmm1
.byte 243,66,15,111,4,130 // movdqu (%rdx,%r8,4),%xmm0
- .byte 102,15,111,37,198,30,0,0 // movdqa 0x1ec6(%rip),%xmm4 # 2300 <_sk_exclusion_sse41_8bit+0x187>
+ .byte 102,15,111,37,54,30,0,0 // movdqa 0x1e36(%rip),%xmm4 # 2270 <_sk_difference_sse41_8bit+0x1bd>
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60509,7 +60497,7 @@ _sk_load_bgra_dst_sse41_8bit:
.byte 117,35 // jne 518 <_sk_load_bgra_dst_sse41_8bit+0x44>
.byte 243,66,15,111,92,130,16 // movdqu 0x10(%rdx,%r8,4),%xmm3
.byte 243,66,15,111,20,130 // movdqu (%rdx,%r8,4),%xmm2
- .byte 102,15,111,37,6,30,0,0 // movdqa 0x1e06(%rip),%xmm4 # 2310 <_sk_exclusion_sse41_8bit+0x197>
+ .byte 102,15,111,37,118,29,0,0 // movdqa 0x1d76(%rip),%xmm4 # 2280 <_sk_difference_sse41_8bit+0x1cd>
.byte 102,15,56,0,212 // pshufb %xmm4,%xmm2
.byte 102,15,56,0,220 // pshufb %xmm4,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60568,7 +60556,7 @@ _sk_store_bgra_sse41_8bit:
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,193,226,2 // shl $0x2,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 102,15,111,53,88,29,0,0 // movdqa 0x1d58(%rip),%xmm6 # 2320 <_sk_exclusion_sse41_8bit+0x1a7>
+ .byte 102,15,111,53,200,28,0,0 // movdqa 0x1cc8(%rip),%xmm6 # 2290 <_sk_difference_sse41_8bit+0x1dd>
.byte 102,15,111,233 // movdqa %xmm1,%xmm5
.byte 102,15,56,0,238 // pshufb %xmm6,%xmm5
.byte 102,15,111,224 // movdqa %xmm0,%xmm4
@@ -60634,7 +60622,7 @@ _sk_load_a8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,42 // jne 6af <_sk_load_a8_sse41_8bit+0x47>
.byte 102,66,15,56,48,12,2 // pmovzxbw (%rdx,%r8,1),%xmm1
- .byte 102,15,219,13,156,28,0,0 // pand 0x1c9c(%rip),%xmm1 # 2330 <_sk_exclusion_sse41_8bit+0x1b7>
+ .byte 102,15,219,13,12,28,0,0 // pand 0x1c0c(%rip),%xmm1 # 22a0 <_sk_difference_sse41_8bit+0x1ed>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,193 // pmovzxwd %xmm1,%xmm0
.byte 102,15,105,204 // punpckhwd %xmm4,%xmm1
@@ -60682,7 +60670,7 @@ _sk_load_a8_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf000750 <_sk_exclusion_sse41_8bit+0xffffffffdeffe5d7>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf000750 <_sk_difference_sse41_8bit+0xffffffffdeffe69d>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -60707,7 +60695,7 @@ _sk_load_a8_dst_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,42 // jne 7a3 <_sk_load_a8_dst_sse41_8bit+0x47>
.byte 102,66,15,56,48,28,2 // pmovzxbw (%rdx,%r8,1),%xmm3
- .byte 102,15,219,29,184,27,0,0 // pand 0x1bb8(%rip),%xmm3 # 2340 <_sk_exclusion_sse41_8bit+0x1c7>
+ .byte 102,15,219,29,40,27,0,0 // pand 0x1b28(%rip),%xmm3 # 22b0 <_sk_difference_sse41_8bit+0x1fd>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,211 // pmovzxwd %xmm3,%xmm2
.byte 102,15,105,220 // punpckhwd %xmm4,%xmm3
@@ -60755,7 +60743,7 @@ _sk_load_a8_dst_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf000844 <_sk_exclusion_sse41_8bit+0xffffffffdeffe6cb>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf000844 <_sk_difference_sse41_8bit+0xffffffffdeffe791>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -60777,7 +60765,7 @@ _sk_store_a8_sse41_8bit:
.byte 72,99,87,8 // movslq 0x8(%rdi),%rdx
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 102,15,111,45,224,26,0,0 // movdqa 0x1ae0(%rip),%xmm5 # 2350 <_sk_exclusion_sse41_8bit+0x1d7>
+ .byte 102,15,111,45,80,26,0,0 // movdqa 0x1a50(%rip),%xmm5 # 22c0 <_sk_difference_sse41_8bit+0x20d>
.byte 102,15,111,241 // movdqa %xmm1,%xmm6
.byte 102,15,56,0,245 // pshufb %xmm5,%xmm6
.byte 102,15,111,224 // movdqa %xmm0,%xmm4
@@ -60785,7 +60773,7 @@ _sk_store_a8_sse41_8bit:
.byte 102,15,108,230 // punpcklqdq %xmm6,%xmm4
.byte 77,133,201 // test %r9,%r9
.byte 117,19 // jne 89e <_sk_store_a8_sse41_8bit+0x4e>
- .byte 102,15,56,0,37,236,26,0,0 // pshufb 0x1aec(%rip),%xmm4 # 2380 <_sk_exclusion_sse41_8bit+0x207>
+ .byte 102,15,56,0,37,92,26,0,0 // pshufb 0x1a5c(%rip),%xmm4 # 22f0 <_sk_difference_sse41_8bit+0x23d>
.byte 102,66,15,214,36,2 // movq %xmm4,(%rdx,%r8,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -60801,13 +60789,13 @@ _sk_store_a8_sse41_8bit:
.byte 102,66,15,58,20,36,2,0 // pextrb $0x0,%xmm4,(%rdx,%r8,1)
.byte 235,209 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a>
.byte 102,66,15,58,20,100,2,2,4 // pextrb $0x4,%xmm4,0x2(%rdx,%r8,1)
- .byte 102,15,56,0,37,149,26,0,0 // pshufb 0x1a95(%rip),%xmm4 # 2370 <_sk_exclusion_sse41_8bit+0x1f7>
+ .byte 102,15,56,0,37,5,26,0,0 // pshufb 0x1a05(%rip),%xmm4 # 22e0 <_sk_difference_sse41_8bit+0x22d>
.byte 102,66,15,58,21,36,2,0 // pextrw $0x0,%xmm4,(%rdx,%r8,1)
.byte 235,181 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a>
.byte 102,66,15,58,20,100,2,6,12 // pextrb $0xc,%xmm4,0x6(%rdx,%r8,1)
.byte 102,66,15,58,20,100,2,5,10 // pextrb $0xa,%xmm4,0x5(%rdx,%r8,1)
.byte 102,66,15,58,20,100,2,4,8 // pextrb $0x8,%xmm4,0x4(%rdx,%r8,1)
- .byte 102,15,56,0,37,87,26,0,0 // pshufb 0x1a57(%rip),%xmm4 # 2360 <_sk_exclusion_sse41_8bit+0x1e7>
+ .byte 102,15,56,0,37,199,25,0,0 // pshufb 0x19c7(%rip),%xmm4 # 22d0 <_sk_difference_sse41_8bit+0x21d>
.byte 102,66,15,126,36,2 // movd %xmm4,(%rdx,%r8,1)
.byte 235,137 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a>
.byte 15,31,0 // nopl (%rax)
@@ -60847,14 +60835,14 @@ _sk_load_g8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,66 // jne 98f <_sk_load_g8_sse41_8bit+0x5f>
.byte 102,66,15,56,48,12,2 // pmovzxbw (%rdx,%r8,1),%xmm1
- .byte 102,15,219,13,52,26,0,0 // pand 0x1a34(%rip),%xmm1 # 2390 <_sk_exclusion_sse41_8bit+0x217>
+ .byte 102,15,219,13,164,25,0,0 // pand 0x19a4(%rip),%xmm1 # 2300 <_sk_difference_sse41_8bit+0x24d>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,193 // pmovzxwd %xmm1,%xmm0
.byte 102,15,105,204 // punpckhwd %xmm4,%xmm1
- .byte 102,15,111,37,47,26,0,0 // movdqa 0x1a2f(%rip),%xmm4 # 23a0 <_sk_exclusion_sse41_8bit+0x227>
+ .byte 102,15,111,37,159,25,0,0 // movdqa 0x199f(%rip),%xmm4 # 2310 <_sk_difference_sse41_8bit+0x25d>
.byte 102,15,56,64,204 // pmulld %xmm4,%xmm1
.byte 102,15,56,64,196 // pmulld %xmm4,%xmm0
- .byte 102,15,111,37,45,26,0,0 // movdqa 0x1a2d(%rip),%xmm4 # 23b0 <_sk_exclusion_sse41_8bit+0x237>
+ .byte 102,15,111,37,157,25,0,0 // movdqa 0x199d(%rip),%xmm4 # 2320 <_sk_difference_sse41_8bit+0x26d>
.byte 102,15,235,196 // por %xmm4,%xmm0
.byte 102,15,235,204 // por %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60899,7 +60887,7 @@ _sk_load_g8_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 232,255,255,255,221 // callq ffffffffde000a34 <_sk_exclusion_sse41_8bit+0xffffffffddffe8bb>
+ .byte 232,255,255,255,221 // callq ffffffffde000a34 <_sk_difference_sse41_8bit+0xffffffffddffe981>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,210 // callq *%rdx
@@ -60924,14 +60912,14 @@ _sk_load_g8_dst_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,66 // jne a9f <_sk_load_g8_dst_sse41_8bit+0x5f>
.byte 102,66,15,56,48,28,2 // pmovzxbw (%rdx,%r8,1),%xmm3
- .byte 102,15,219,29,84,25,0,0 // pand 0x1954(%rip),%xmm3 # 23c0 <_sk_exclusion_sse41_8bit+0x247>
+ .byte 102,15,219,29,196,24,0,0 // pand 0x18c4(%rip),%xmm3 # 2330 <_sk_difference_sse41_8bit+0x27d>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,211 // pmovzxwd %xmm3,%xmm2
.byte 102,15,105,220 // punpckhwd %xmm4,%xmm3
- .byte 102,15,111,37,79,25,0,0 // movdqa 0x194f(%rip),%xmm4 # 23d0 <_sk_exclusion_sse41_8bit+0x257>
+ .byte 102,15,111,37,191,24,0,0 // movdqa 0x18bf(%rip),%xmm4 # 2340 <_sk_difference_sse41_8bit+0x28d>
.byte 102,15,56,64,220 // pmulld %xmm4,%xmm3
.byte 102,15,56,64,212 // pmulld %xmm4,%xmm2
- .byte 102,15,111,37,77,25,0,0 // movdqa 0x194d(%rip),%xmm4 # 23e0 <_sk_exclusion_sse41_8bit+0x267>
+ .byte 102,15,111,37,189,24,0,0 // movdqa 0x18bd(%rip),%xmm4 # 2350 <_sk_difference_sse41_8bit+0x29d>
.byte 102,15,235,212 // por %xmm4,%xmm2
.byte 102,15,235,220 // por %xmm4,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60976,7 +60964,7 @@ _sk_load_g8_dst_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 232,255,255,255,221 // callq ffffffffde000b44 <_sk_exclusion_sse41_8bit+0xffffffffddffe9cb>
+ .byte 232,255,255,255,221 // callq ffffffffde000b44 <_sk_difference_sse41_8bit+0xffffffffddffea91>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,210 // callq *%rdx
@@ -61004,7 +60992,7 @@ _sk_srcover_rgba_8888_sse41_8bit:
.byte 243,70,15,111,68,138,16 // movdqu 0x10(%rdx,%r9,4),%xmm8
.byte 243,70,15,111,12,138 // movdqu (%rdx,%r9,4),%xmm9
.byte 77,133,192 // test %r8,%r8
- .byte 102,15,111,37,99,24,0,0 // movdqa 0x1863(%rip),%xmm4 # 23f0 <_sk_exclusion_sse41_8bit+0x277>
+ .byte 102,15,111,37,211,23,0,0 // movdqa 0x17d3(%rip),%xmm4 # 2360 <_sk_difference_sse41_8bit+0x2ad>
.byte 102,15,111,241 // movdqa %xmm1,%xmm6
.byte 102,15,56,0,244 // pshufb %xmm4,%xmm6
.byte 102,15,111,248 // movdqa %xmm0,%xmm7
@@ -61126,7 +61114,7 @@ _sk_scale_1_float_sse41_8bit:
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,0 // movss (%rax),%xmm0
- .byte 243,15,89,5,70,21,0,0 // mulss 0x1546(%rip),%xmm0 # 22b4 <_sk_exclusion_sse41_8bit+0x13b>
+ .byte 243,15,89,5,178,20,0,0 // mulss 0x14b2(%rip),%xmm0 # 2220 <_sk_difference_sse41_8bit+0x16d>
.byte 243,15,44,192 // cvttss2si %xmm0,%eax
.byte 15,87,192 // xorps %xmm0,%xmm0
.byte 102,68,15,56,48,197 // pmovzxbw %xmm5,%xmm8
@@ -61134,7 +61122,7 @@ _sk_scale_1_float_sse41_8bit:
.byte 102,68,15,56,48,204 // pmovzxbw %xmm4,%xmm9
.byte 102,15,104,224 // punpckhbw %xmm0,%xmm4
.byte 102,15,110,240 // movd %eax,%xmm6
- .byte 102,15,56,0,53,106,22,0,0 // pshufb 0x166a(%rip),%xmm6 # 2400 <_sk_exclusion_sse41_8bit+0x287>
+ .byte 102,15,56,0,53,218,21,0,0 // pshufb 0x15da(%rip),%xmm6 # 2370 <_sk_difference_sse41_8bit+0x2bd>
.byte 102,15,111,206 // movdqa %xmm6,%xmm1
.byte 102,65,15,213,201 // pmullw %xmm9,%xmm1
.byte 102,15,111,198 // movdqa %xmm6,%xmm0
@@ -61169,11 +61157,11 @@ _sk_scale_u8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 15,133,160,0,0,0 // jne ea7 <_sk_scale_u8_sse41_8bit+0xc1>
.byte 102,66,15,56,48,52,2 // pmovzxbw (%rdx,%r8,1),%xmm6
- .byte 102,15,219,53,250,21,0,0 // pand 0x15fa(%rip),%xmm6 # 2410 <_sk_exclusion_sse41_8bit+0x297>
+ .byte 102,15,219,53,106,21,0,0 // pand 0x156a(%rip),%xmm6 # 2380 <_sk_difference_sse41_8bit+0x2cd>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,111,254 // movdqa %xmm6,%xmm7
- .byte 102,15,56,0,61,248,21,0,0 // pshufb 0x15f8(%rip),%xmm7 # 2420 <_sk_exclusion_sse41_8bit+0x2a7>
- .byte 102,15,56,0,53,255,21,0,0 // pshufb 0x15ff(%rip),%xmm6 # 2430 <_sk_exclusion_sse41_8bit+0x2b7>
+ .byte 102,15,56,0,61,104,21,0,0 // pshufb 0x1568(%rip),%xmm7 # 2390 <_sk_difference_sse41_8bit+0x2dd>
+ .byte 102,15,56,0,53,111,21,0,0 // pshufb 0x156f(%rip),%xmm6 # 23a0 <_sk_difference_sse41_8bit+0x2ed>
.byte 102,68,15,56,48,200 // pmovzxbw %xmm0,%xmm9
.byte 102,65,15,104,192 // punpckhbw %xmm8,%xmm0
.byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10
@@ -61258,7 +61246,7 @@ FUNCTION(_sk_lerp_1_float_sse41_8bit)
_sk_lerp_1_float_sse41_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,32 // movss (%rax),%xmm4
- .byte 243,15,89,37,74,19,0,0 // mulss 0x134a(%rip),%xmm4 # 22b8 <_sk_exclusion_sse41_8bit+0x13f>
+ .byte 243,15,89,37,182,18,0,0 // mulss 0x12b6(%rip),%xmm4 # 2224 <_sk_difference_sse41_8bit+0x171>
.byte 243,15,44,196 // cvttss2si %xmm4,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
@@ -61269,7 +61257,7 @@ _sk_lerp_1_float_sse41_8bit:
.byte 102,65,15,104,193 // punpckhbw %xmm9,%xmm0
.byte 102,68,15,56,48,217 // pmovzxbw %xmm1,%xmm11
.byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1
- .byte 102,15,56,0,61,151,20,0,0 // pshufb 0x1497(%rip),%xmm7 # 2440 <_sk_exclusion_sse41_8bit+0x2c7>
+ .byte 102,15,56,0,61,7,20,0,0 // pshufb 0x1407(%rip),%xmm7 # 23b0 <_sk_difference_sse41_8bit+0x2fd>
.byte 102,68,15,111,231 // movdqa %xmm7,%xmm12
.byte 102,69,15,213,227 // pmullw %xmm11,%xmm12
.byte 102,68,15,111,239 // movdqa %xmm7,%xmm13
@@ -61332,11 +61320,11 @@ _sk_lerp_u8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 15,133,46,1,0,0 // jne 11d1 <_sk_lerp_u8_sse41_8bit+0x14f>
.byte 102,66,15,56,48,60,2 // pmovzxbw (%rdx,%r8,1),%xmm7
- .byte 102,15,219,61,158,19,0,0 // pand 0x139e(%rip),%xmm7 # 2450 <_sk_exclusion_sse41_8bit+0x2d7>
+ .byte 102,15,219,61,14,19,0,0 // pand 0x130e(%rip),%xmm7 # 23c0 <_sk_difference_sse41_8bit+0x30d>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,111,247 // movdqa %xmm7,%xmm6
- .byte 102,15,56,0,53,156,19,0,0 // pshufb 0x139c(%rip),%xmm6 # 2460 <_sk_exclusion_sse41_8bit+0x2e7>
- .byte 102,15,56,0,61,163,19,0,0 // pshufb 0x13a3(%rip),%xmm7 # 2470 <_sk_exclusion_sse41_8bit+0x2f7>
+ .byte 102,15,56,0,53,12,19,0,0 // pshufb 0x130c(%rip),%xmm6 # 23d0 <_sk_difference_sse41_8bit+0x31d>
+ .byte 102,15,56,0,61,19,19,0,0 // pshufb 0x1313(%rip),%xmm7 # 23e0 <_sk_difference_sse41_8bit+0x32d>
.byte 102,68,15,56,48,200 // pmovzxbw %xmm0,%xmm9
.byte 102,65,15,104,192 // punpckhbw %xmm8,%xmm0
.byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10
@@ -61431,7 +61419,7 @@ _sk_lerp_u8_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf00127c <_sk_exclusion_sse41_8bit+0xffffffffdefff103>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf00127c <_sk_difference_sse41_8bit+0xffffffffdefff1c9>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -61465,7 +61453,7 @@ HIDDEN _sk_black_color_sse41_8bit
FUNCTION(_sk_black_color_sse41_8bit)
_sk_black_color_sse41_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 15,40,5,219,17,0,0 // movaps 0x11db(%rip),%xmm0 # 2480 <_sk_exclusion_sse41_8bit+0x307>
+ .byte 15,40,5,75,17,0,0 // movaps 0x114b(%rip),%xmm0 # 23f0 <_sk_difference_sse41_8bit+0x33d>
.byte 15,40,200 // movaps %xmm0,%xmm1
.byte 255,224 // jmpq *%rax
@@ -61491,7 +61479,7 @@ HIDDEN _sk_srcatop_sse41_8bit
.globl _sk_srcatop_sse41_8bit
FUNCTION(_sk_srcatop_sse41_8bit)
_sk_srcatop_sse41_8bit:
- .byte 102,68,15,111,21,199,17,0,0 // movdqa 0x11c7(%rip),%xmm10 # 2490 <_sk_exclusion_sse41_8bit+0x317>
+ .byte 102,68,15,111,21,55,17,0,0 // movdqa 0x1137(%rip),%xmm10 # 2400 <_sk_difference_sse41_8bit+0x34d>
.byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
.byte 102,68,15,56,48,195 // pmovzxbw %xmm3,%xmm8
.byte 102,15,111,235 // movdqa %xmm3,%xmm5
@@ -61561,7 +61549,7 @@ HIDDEN _sk_dstatop_sse41_8bit
.globl _sk_dstatop_sse41_8bit
FUNCTION(_sk_dstatop_sse41_8bit)
_sk_dstatop_sse41_8bit:
- .byte 102,68,15,111,29,156,16,0,0 // movdqa 0x109c(%rip),%xmm11 # 24a0 <_sk_exclusion_sse41_8bit+0x327>
+ .byte 102,68,15,111,29,12,16,0,0 // movdqa 0x100c(%rip),%xmm11 # 2410 <_sk_difference_sse41_8bit+0x35d>
.byte 102,68,15,111,233 // movdqa %xmm1,%xmm13
.byte 102,69,15,56,0,235 // pshufb %xmm11,%xmm13
.byte 102,68,15,111,248 // movdqa %xmm0,%xmm15
@@ -61633,7 +61621,7 @@ FUNCTION(_sk_srcin_sse41_8bit)
_sk_srcin_sse41_8bit:
.byte 102,15,111,225 // movdqa %xmm1,%xmm4
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,15,111,5,99,15,0,0 // movdqa 0xf63(%rip),%xmm0 # 24b0 <_sk_exclusion_sse41_8bit+0x337>
+ .byte 102,15,111,5,211,14,0,0 // movdqa 0xed3(%rip),%xmm0 # 2420 <_sk_difference_sse41_8bit+0x36d>
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
.byte 102,15,56,0,240 // pshufb %xmm0,%xmm6
.byte 102,15,111,250 // movdqa %xmm2,%xmm7
@@ -61668,7 +61656,7 @@ HIDDEN _sk_dstin_sse41_8bit
.globl _sk_dstin_sse41_8bit
FUNCTION(_sk_dstin_sse41_8bit)
_sk_dstin_sse41_8bit:
- .byte 102,15,111,37,230,14,0,0 // movdqa 0xee6(%rip),%xmm4 # 24c0 <_sk_exclusion_sse41_8bit+0x347>
+ .byte 102,15,111,37,86,14,0,0 // movdqa 0xe56(%rip),%xmm4 # 2430 <_sk_difference_sse41_8bit+0x37d>
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,69,15,239,210 // pxor %xmm10,%xmm10
@@ -61707,7 +61695,7 @@ FUNCTION(_sk_srcout_sse41_8bit)
_sk_srcout_sse41_8bit:
.byte 102,15,111,225 // movdqa %xmm1,%xmm4
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,15,111,5,89,14,0,0 // movdqa 0xe59(%rip),%xmm0 # 24d0 <_sk_exclusion_sse41_8bit+0x357>
+ .byte 102,15,111,5,201,13,0,0 // movdqa 0xdc9(%rip),%xmm0 # 2440 <_sk_difference_sse41_8bit+0x38d>
.byte 102,15,111,250 // movdqa %xmm2,%xmm7
.byte 102,15,56,0,248 // pshufb %xmm0,%xmm7
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
@@ -61745,7 +61733,7 @@ HIDDEN _sk_dstout_sse41_8bit
.globl _sk_dstout_sse41_8bit
FUNCTION(_sk_dstout_sse41_8bit)
_sk_dstout_sse41_8bit:
- .byte 102,15,111,37,208,13,0,0 // movdqa 0xdd0(%rip),%xmm4 # 24e0 <_sk_exclusion_sse41_8bit+0x367>
+ .byte 102,15,111,37,64,13,0,0 // movdqa 0xd40(%rip),%xmm4 # 2450 <_sk_difference_sse41_8bit+0x39d>
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 102,15,118,228 // pcmpeqd %xmm4,%xmm4
@@ -61785,7 +61773,7 @@ HIDDEN _sk_srcover_sse41_8bit
.globl _sk_srcover_sse41_8bit
FUNCTION(_sk_srcover_sse41_8bit)
_sk_srcover_sse41_8bit:
- .byte 102,15,111,53,63,13,0,0 // movdqa 0xd3f(%rip),%xmm6 # 24f0 <_sk_exclusion_sse41_8bit+0x377>
+ .byte 102,15,111,53,175,12,0,0 // movdqa 0xcaf(%rip),%xmm6 # 2460 <_sk_difference_sse41_8bit+0x3ad>
.byte 102,68,15,111,217 // movdqa %xmm1,%xmm11
.byte 102,68,15,56,0,222 // pshufb %xmm6,%xmm11
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
@@ -61826,7 +61814,7 @@ HIDDEN _sk_dstover_sse41_8bit
.globl _sk_dstover_sse41_8bit
FUNCTION(_sk_dstover_sse41_8bit)
_sk_dstover_sse41_8bit:
- .byte 102,68,15,111,5,159,12,0,0 // movdqa 0xc9f(%rip),%xmm8 # 2500 <_sk_exclusion_sse41_8bit+0x387>
+ .byte 102,68,15,111,5,15,12,0,0 // movdqa 0xc0f(%rip),%xmm8 # 2470 <_sk_difference_sse41_8bit+0x3bd>
.byte 102,68,15,111,209 // movdqa %xmm1,%xmm10
.byte 102,68,15,56,48,201 // pmovzxbw %xmm1,%xmm9
.byte 102,15,252,203 // paddb %xmm3,%xmm1
@@ -61905,7 +61893,7 @@ _sk_multiply_sse41_8bit:
.byte 102,15,111,218 // movdqa %xmm2,%xmm3
.byte 102,15,111,209 // movdqa %xmm1,%xmm2
.byte 102,15,111,200 // movdqa %xmm0,%xmm1
- .byte 102,68,15,111,53,109,11,0,0 // movdqa 0xb6d(%rip),%xmm14 # 2510 <_sk_exclusion_sse41_8bit+0x397>
+ .byte 102,68,15,111,53,221,10,0,0 // movdqa 0xadd(%rip),%xmm14 # 2480 <_sk_difference_sse41_8bit+0x3cd>
.byte 102,68,15,111,195 // movdqa %xmm3,%xmm8
.byte 102,15,111,235 // movdqa %xmm3,%xmm5
.byte 102,65,15,56,0,238 // pshufb %xmm14,%xmm5
@@ -62034,7 +62022,7 @@ HIDDEN _sk_xor__sse41_8bit
.globl _sk_xor__sse41_8bit
FUNCTION(_sk_xor__sse41_8bit)
_sk_xor__sse41_8bit:
- .byte 102,68,15,111,21,59,9,0,0 // movdqa 0x93b(%rip),%xmm10 # 2520 <_sk_exclusion_sse41_8bit+0x3a7>
+ .byte 102,68,15,111,21,171,8,0,0 // movdqa 0x8ab(%rip),%xmm10 # 2490 <_sk_difference_sse41_8bit+0x3dd>
.byte 102,68,15,111,226 // movdqa %xmm2,%xmm12
.byte 102,68,15,56,48,194 // pmovzxbw %xmm2,%xmm8
.byte 102,15,111,234 // movdqa %xmm2,%xmm5
@@ -62107,7 +62095,7 @@ HIDDEN _sk_darken_sse41_8bit
FUNCTION(_sk_darken_sse41_8bit)
_sk_darken_sse41_8bit:
.byte 102,68,15,111,240 // movdqa %xmm0,%xmm14
- .byte 102,68,15,111,5,254,7,0,0 // movdqa 0x7fe(%rip),%xmm8 # 2530 <_sk_exclusion_sse41_8bit+0x3b7>
+ .byte 102,68,15,111,5,110,7,0,0 // movdqa 0x76e(%rip),%xmm8 # 24a0 <_sk_difference_sse41_8bit+0x3ed>
.byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
.byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
@@ -62174,7 +62162,7 @@ _sk_darken_sse41_8bit:
.byte 102,15,248,231 // psubb %xmm7,%xmm4
.byte 102,15,248,206 // psubb %xmm6,%xmm1
.byte 102,68,15,248,245 // psubb %xmm5,%xmm14
- .byte 15,40,5,194,6,0,0 // movaps 0x6c2(%rip),%xmm0 # 2540 <_sk_exclusion_sse41_8bit+0x3c7>
+ .byte 15,40,5,50,6,0,0 // movaps 0x632(%rip),%xmm0 # 24b0 <_sk_difference_sse41_8bit+0x3fd>
.byte 102,68,15,56,16,244 // pblendvb %xmm0,%xmm4,%xmm14
.byte 102,65,15,56,16,200 // pblendvb %xmm0,%xmm8,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -62186,7 +62174,7 @@ HIDDEN _sk_lighten_sse41_8bit
FUNCTION(_sk_lighten_sse41_8bit)
_sk_lighten_sse41_8bit:
.byte 102,68,15,111,240 // movdqa %xmm0,%xmm14
- .byte 102,68,15,111,5,175,6,0,0 // movdqa 0x6af(%rip),%xmm8 # 2550 <_sk_exclusion_sse41_8bit+0x3d7>
+ .byte 102,68,15,111,5,31,6,0,0 // movdqa 0x61f(%rip),%xmm8 # 24c0 <_sk_difference_sse41_8bit+0x40d>
.byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
.byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
@@ -62253,173 +62241,142 @@ _sk_lighten_sse41_8bit:
.byte 102,15,248,231 // psubb %xmm7,%xmm4
.byte 102,15,248,206 // psubb %xmm6,%xmm1
.byte 102,68,15,248,245 // psubb %xmm5,%xmm14
- .byte 15,40,5,115,5,0,0 // movaps 0x573(%rip),%xmm0 # 2560 <_sk_exclusion_sse41_8bit+0x3e7>
+ .byte 15,40,5,227,4,0,0 // movaps 0x4e3(%rip),%xmm0 # 24d0 <_sk_difference_sse41_8bit+0x41d>
.byte 102,68,15,56,16,244 // pblendvb %xmm0,%xmm4,%xmm14
.byte 102,65,15,56,16,200 // pblendvb %xmm0,%xmm8,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 102,65,15,111,198 // movdqa %xmm14,%xmm0
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_exclusion_sse41_8bit
+.globl _sk_exclusion_sse41_8bit
+FUNCTION(_sk_exclusion_sse41_8bit)
+_sk_exclusion_sse41_8bit:
+ .byte 102,69,15,239,192 // pxor %xmm8,%xmm8
+ .byte 102,68,15,111,216 // movdqa %xmm0,%xmm11
+ .byte 102,69,15,104,216 // punpckhbw %xmm8,%xmm11
+ .byte 102,68,15,111,225 // movdqa %xmm1,%xmm12
+ .byte 102,69,15,104,224 // punpckhbw %xmm8,%xmm12
+ .byte 102,68,15,56,48,200 // pmovzxbw %xmm0,%xmm9
+ .byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10
+ .byte 102,15,56,48,250 // pmovzxbw %xmm2,%xmm7
+ .byte 102,15,252,194 // paddb %xmm2,%xmm0
+ .byte 102,15,111,234 // movdqa %xmm2,%xmm5
+ .byte 102,65,15,104,232 // punpckhbw %xmm8,%xmm5
+ .byte 102,15,56,48,227 // pmovzxbw %xmm3,%xmm4
+ .byte 102,15,252,203 // paddb %xmm3,%xmm1
+ .byte 102,15,111,243 // movdqa %xmm3,%xmm6
+ .byte 102,65,15,104,240 // punpckhbw %xmm8,%xmm6
+ .byte 102,65,15,213,244 // pmullw %xmm12,%xmm6
+ .byte 102,65,15,213,235 // pmullw %xmm11,%xmm5
+ .byte 102,65,15,213,226 // pmullw %xmm10,%xmm4
+ .byte 102,65,15,213,249 // pmullw %xmm9,%xmm7
+ .byte 102,65,15,253,235 // paddw %xmm11,%xmm5
+ .byte 102,65,15,253,244 // paddw %xmm12,%xmm6
+ .byte 102,65,15,253,249 // paddw %xmm9,%xmm7
+ .byte 102,65,15,253,226 // paddw %xmm10,%xmm4
+ .byte 102,15,113,214,8 // psrlw $0x8,%xmm6
+ .byte 102,15,113,213,8 // psrlw $0x8,%xmm5
+ .byte 102,15,113,212,8 // psrlw $0x8,%xmm4
+ .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
+ .byte 102,15,103,253 // packuswb %xmm5,%xmm7
+ .byte 102,15,103,230 // packuswb %xmm6,%xmm4
+ .byte 102,15,111,45,73,4,0,0 // movdqa 0x449(%rip),%xmm5 # 24e0 <_sk_difference_sse41_8bit+0x42d>
+ .byte 102,15,248,204 // psubb %xmm4,%xmm1
+ .byte 102,15,219,229 // pand %xmm5,%xmm4
+ .byte 102,15,219,239 // pand %xmm7,%xmm5
+ .byte 102,15,248,199 // psubb %xmm7,%xmm0
+ .byte 102,15,248,197 // psubb %xmm5,%xmm0
+ .byte 102,15,248,204 // psubb %xmm4,%xmm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_difference_sse41_8bit
.globl _sk_difference_sse41_8bit
FUNCTION(_sk_difference_sse41_8bit)
_sk_difference_sse41_8bit:
- .byte 102,68,15,111,240 // movdqa %xmm0,%xmm14
- .byte 102,68,15,111,5,96,5,0,0 // movdqa 0x560(%rip),%xmm8 # 2570 <_sk_exclusion_sse41_8bit+0x3f7>
+ .byte 102,68,15,111,193 // movdqa %xmm1,%xmm8
+ .byte 102,15,111,13,48,4,0,0 // movdqa 0x430(%rip),%xmm1 # 24f0 <_sk_difference_sse41_8bit+0x43d>
.byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
.byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9
- .byte 102,15,111,251 // movdqa %xmm3,%xmm7
- .byte 102,65,15,56,0,248 // pshufb %xmm8,%xmm7
+ .byte 102,15,111,243 // movdqa %xmm3,%xmm6
+ .byte 102,15,56,0,241 // pshufb %xmm1,%xmm6
.byte 102,68,15,111,234 // movdqa %xmm2,%xmm13
.byte 102,68,15,56,48,210 // pmovzxbw %xmm2,%xmm10
- .byte 102,15,111,234 // movdqa %xmm2,%xmm5
- .byte 102,65,15,56,0,232 // pshufb %xmm8,%xmm5
+ .byte 102,15,111,226 // movdqa %xmm2,%xmm4
+ .byte 102,15,56,0,225 // pshufb %xmm1,%xmm4
.byte 102,69,15,239,228 // pxor %xmm12,%xmm12
- .byte 102,65,15,104,196 // punpckhbw %xmm12,%xmm0
- .byte 102,15,111,225 // movdqa %xmm1,%xmm4
- .byte 102,65,15,104,228 // punpckhbw %xmm12,%xmm4
- .byte 102,69,15,56,48,254 // pmovzxbw %xmm14,%xmm15
- .byte 102,15,56,48,245 // pmovzxbw %xmm5,%xmm6
+ .byte 102,15,111,232 // movdqa %xmm0,%xmm5
.byte 102,65,15,104,236 // punpckhbw %xmm12,%xmm5
- .byte 102,15,213,232 // pmullw %xmm0,%xmm5
- .byte 102,15,253,232 // paddw %xmm0,%xmm5
- .byte 102,15,56,48,199 // pmovzxbw %xmm7,%xmm0
- .byte 102,65,15,104,252 // punpckhbw %xmm12,%xmm7
- .byte 102,15,213,252 // pmullw %xmm4,%xmm7
- .byte 102,15,253,252 // paddw %xmm4,%xmm7
- .byte 102,65,15,111,230 // movdqa %xmm14,%xmm4
- .byte 102,68,15,252,242 // paddb %xmm2,%xmm14
- .byte 102,65,15,213,247 // pmullw %xmm15,%xmm6
- .byte 102,65,15,253,247 // paddw %xmm15,%xmm6
- .byte 102,68,15,56,48,249 // pmovzxbw %xmm1,%xmm15
- .byte 102,65,15,213,199 // pmullw %xmm15,%xmm0
- .byte 102,65,15,253,199 // paddw %xmm15,%xmm0
- .byte 102,68,15,111,249 // movdqa %xmm1,%xmm15
- .byte 102,15,252,203 // paddb %xmm3,%xmm1
- .byte 102,15,113,213,8 // psrlw $0x8,%xmm5
+ .byte 102,65,15,111,200 // movdqa %xmm8,%xmm1
+ .byte 102,65,15,104,204 // punpckhbw %xmm12,%xmm1
+ .byte 102,68,15,56,48,248 // pmovzxbw %xmm0,%xmm15
+ .byte 102,68,15,56,48,244 // pmovzxbw %xmm4,%xmm14
+ .byte 102,65,15,104,228 // punpckhbw %xmm12,%xmm4
+ .byte 102,15,213,229 // pmullw %xmm5,%xmm4
+ .byte 102,15,253,229 // paddw %xmm5,%xmm4
+ .byte 102,15,56,48,254 // pmovzxbw %xmm6,%xmm7
+ .byte 102,65,15,104,244 // punpckhbw %xmm12,%xmm6
+ .byte 102,15,213,241 // pmullw %xmm1,%xmm6
+ .byte 102,15,253,241 // paddw %xmm1,%xmm6
+ .byte 102,15,111,232 // movdqa %xmm0,%xmm5
+ .byte 102,15,252,194 // paddb %xmm2,%xmm0
+ .byte 102,69,15,213,247 // pmullw %xmm15,%xmm14
+ .byte 102,69,15,253,247 // paddw %xmm15,%xmm14
+ .byte 102,65,15,56,48,200 // pmovzxbw %xmm8,%xmm1
+ .byte 102,15,213,249 // pmullw %xmm1,%xmm7
+ .byte 102,15,253,249 // paddw %xmm1,%xmm7
+ .byte 102,65,15,111,200 // movdqa %xmm8,%xmm1
+ .byte 102,68,15,252,195 // paddb %xmm3,%xmm8
+ .byte 102,15,113,212,8 // psrlw $0x8,%xmm4
+ .byte 102,65,15,113,214,8 // psrlw $0x8,%xmm14
+ .byte 102,68,15,103,244 // packuswb %xmm4,%xmm14
.byte 102,15,113,214,8 // psrlw $0x8,%xmm6
- .byte 102,15,103,245 // packuswb %xmm5,%xmm6
.byte 102,15,113,215,8 // psrlw $0x8,%xmm7
- .byte 102,15,113,208,8 // psrlw $0x8,%xmm0
- .byte 102,15,103,199 // packuswb %xmm7,%xmm0
- .byte 102,69,15,56,0,248 // pshufb %xmm8,%xmm15
- .byte 102,65,15,56,0,224 // pshufb %xmm8,%xmm4
+ .byte 102,15,103,254 // packuswb %xmm6,%xmm7
+ .byte 102,15,111,37,117,3,0,0 // movdqa 0x375(%rip),%xmm4 # 24f0 <_sk_difference_sse41_8bit+0x43d>
+ .byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
+ .byte 102,15,56,0,236 // pshufb %xmm4,%xmm5
.byte 102,69,15,104,236 // punpckhbw %xmm12,%xmm13
.byte 102,69,15,104,220 // punpckhbw %xmm12,%xmm11
- .byte 102,15,56,48,236 // pmovzxbw %xmm4,%xmm5
- .byte 102,65,15,56,48,255 // pmovzxbw %xmm15,%xmm7
- .byte 102,65,15,104,228 // punpckhbw %xmm12,%xmm4
- .byte 102,69,15,104,252 // punpckhbw %xmm12,%xmm15
- .byte 102,69,15,213,251 // pmullw %xmm11,%xmm15
- .byte 102,65,15,213,229 // pmullw %xmm13,%xmm4
- .byte 102,65,15,213,249 // pmullw %xmm9,%xmm7
- .byte 102,65,15,213,234 // pmullw %xmm10,%xmm5
- .byte 102,65,15,253,229 // paddw %xmm13,%xmm4
- .byte 102,69,15,253,251 // paddw %xmm11,%xmm15
- .byte 102,65,15,253,234 // paddw %xmm10,%xmm5
- .byte 102,65,15,253,249 // paddw %xmm9,%xmm7
- .byte 102,65,15,113,215,8 // psrlw $0x8,%xmm15
- .byte 102,15,113,212,8 // psrlw $0x8,%xmm4
- .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
- .byte 102,15,113,213,8 // psrlw $0x8,%xmm5
- .byte 102,15,103,236 // packuswb %xmm4,%xmm5
- .byte 102,65,15,103,255 // packuswb %xmm15,%xmm7
- .byte 102,15,218,199 // pminub %xmm7,%xmm0
- .byte 102,15,218,245 // pminub %xmm5,%xmm6
- .byte 102,15,252,246 // paddb %xmm6,%xmm6
- .byte 102,15,252,192 // paddb %xmm0,%xmm0
- .byte 102,68,15,111,193 // movdqa %xmm1,%xmm8
- .byte 102,68,15,248,192 // psubb %xmm0,%xmm8
- .byte 102,65,15,111,230 // movdqa %xmm14,%xmm4
- .byte 102,15,248,230 // psubb %xmm6,%xmm4
- .byte 102,15,248,207 // psubb %xmm7,%xmm1
- .byte 102,68,15,248,245 // psubb %xmm5,%xmm14
- .byte 15,40,5,28,4,0,0 // movaps 0x41c(%rip),%xmm0 # 2580 <_sk_exclusion_sse41_8bit+0x407>
- .byte 102,68,15,56,16,244 // pblendvb %xmm0,%xmm4,%xmm14
- .byte 102,65,15,56,16,200 // pblendvb %xmm0,%xmm8,%xmm1
- .byte 72,173 // lods %ds:(%rsi),%rax
- .byte 102,65,15,111,198 // movdqa %xmm14,%xmm0
- .byte 255,224 // jmpq *%rax
-
-HIDDEN _sk_exclusion_sse41_8bit
-.globl _sk_exclusion_sse41_8bit
-FUNCTION(_sk_exclusion_sse41_8bit)
-_sk_exclusion_sse41_8bit:
- .byte 102,69,15,239,201 // pxor %xmm9,%xmm9
- .byte 102,15,111,248 // movdqa %xmm0,%xmm7
- .byte 102,65,15,104,249 // punpckhbw %xmm9,%xmm7
- .byte 102,68,15,56,48,208 // pmovzxbw %xmm0,%xmm10
- .byte 102,68,15,111,218 // movdqa %xmm2,%xmm11
- .byte 102,15,111,234 // movdqa %xmm2,%xmm5
- .byte 102,65,15,104,233 // punpckhbw %xmm9,%xmm5
- .byte 102,68,15,252,216 // paddb %xmm0,%xmm11
- .byte 102,68,15,111,5,231,3,0,0 // movdqa 0x3e7(%rip),%xmm8 # 2590 <_sk_exclusion_sse41_8bit+0x417>
- .byte 102,65,15,56,0,192 // pshufb %xmm8,%xmm0
- .byte 102,15,56,48,240 // pmovzxbw %xmm0,%xmm6
- .byte 102,65,15,104,193 // punpckhbw %xmm9,%xmm0
- .byte 102,15,213,197 // pmullw %xmm5,%xmm0
- .byte 102,15,253,197 // paddw %xmm5,%xmm0
- .byte 102,15,213,239 // pmullw %xmm7,%xmm5
- .byte 102,15,253,239 // paddw %xmm7,%xmm5
- .byte 102,15,56,48,250 // pmovzxbw %xmm2,%xmm7
- .byte 102,15,213,247 // pmullw %xmm7,%xmm6
- .byte 102,15,253,247 // paddw %xmm7,%xmm6
- .byte 102,65,15,213,250 // pmullw %xmm10,%xmm7
- .byte 102,65,15,253,250 // paddw %xmm10,%xmm7
- .byte 102,15,111,225 // movdqa %xmm1,%xmm4
- .byte 102,65,15,104,225 // punpckhbw %xmm9,%xmm4
- .byte 102,15,113,213,8 // psrlw $0x8,%xmm5
- .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
- .byte 102,15,103,253 // packuswb %xmm5,%xmm7
- .byte 102,15,252,255 // paddb %xmm7,%xmm7
- .byte 102,65,15,111,235 // movdqa %xmm11,%xmm5
- .byte 102,15,248,239 // psubb %xmm7,%xmm5
- .byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10
- .byte 102,15,113,208,8 // psrlw $0x8,%xmm0
- .byte 102,15,113,214,8 // psrlw $0x8,%xmm6
- .byte 102,15,103,240 // packuswb %xmm0,%xmm6
- .byte 102,68,15,248,222 // psubb %xmm6,%xmm11
- .byte 15,40,5,124,3,0,0 // movaps 0x37c(%rip),%xmm0 # 25a0 <_sk_exclusion_sse41_8bit+0x427>
- .byte 102,68,15,56,16,221 // pblendvb %xmm0,%xmm5,%xmm11
- .byte 102,15,111,235 // movdqa %xmm3,%xmm5
- .byte 102,15,111,243 // movdqa %xmm3,%xmm6
- .byte 102,65,15,104,241 // punpckhbw %xmm9,%xmm6
- .byte 102,15,252,233 // paddb %xmm1,%xmm5
- .byte 102,65,15,56,0,200 // pshufb %xmm8,%xmm1
- .byte 102,15,56,48,249 // pmovzxbw %xmm1,%xmm7
- .byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1
- .byte 102,15,213,206 // pmullw %xmm6,%xmm1
- .byte 102,15,253,206 // paddw %xmm6,%xmm1
- .byte 102,15,213,244 // pmullw %xmm4,%xmm6
- .byte 102,15,253,244 // paddw %xmm4,%xmm6
- .byte 102,15,56,48,227 // pmovzxbw %xmm3,%xmm4
- .byte 102,15,213,252 // pmullw %xmm4,%xmm7
- .byte 102,15,253,252 // paddw %xmm4,%xmm7
+ .byte 102,15,56,48,229 // pmovzxbw %xmm5,%xmm4
+ .byte 102,15,56,48,241 // pmovzxbw %xmm1,%xmm6
+ .byte 102,65,15,104,236 // punpckhbw %xmm12,%xmm5
+ .byte 102,65,15,104,204 // punpckhbw %xmm12,%xmm1
+ .byte 102,65,15,213,203 // pmullw %xmm11,%xmm1
+ .byte 102,65,15,213,237 // pmullw %xmm13,%xmm5
+ .byte 102,65,15,213,241 // pmullw %xmm9,%xmm6
.byte 102,65,15,213,226 // pmullw %xmm10,%xmm4
+ .byte 102,65,15,253,237 // paddw %xmm13,%xmm5
+ .byte 102,65,15,253,203 // paddw %xmm11,%xmm1
.byte 102,65,15,253,226 // paddw %xmm10,%xmm4
+ .byte 102,65,15,253,241 // paddw %xmm9,%xmm6
+ .byte 102,15,113,209,8 // psrlw $0x8,%xmm1
+ .byte 102,15,113,213,8 // psrlw $0x8,%xmm5
.byte 102,15,113,214,8 // psrlw $0x8,%xmm6
.byte 102,15,113,212,8 // psrlw $0x8,%xmm4
- .byte 102,15,103,230 // packuswb %xmm6,%xmm4
- .byte 102,15,252,228 // paddb %xmm4,%xmm4
- .byte 102,15,111,245 // movdqa %xmm5,%xmm6
- .byte 102,15,248,244 // psubb %xmm4,%xmm6
- .byte 102,15,113,209,8 // psrlw $0x8,%xmm1
- .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
- .byte 102,15,103,249 // packuswb %xmm1,%xmm7
- .byte 102,15,248,239 // psubb %xmm7,%xmm5
- .byte 102,15,56,16,238 // pblendvb %xmm0,%xmm6,%xmm5
+ .byte 102,15,103,229 // packuswb %xmm5,%xmm4
+ .byte 102,15,103,241 // packuswb %xmm1,%xmm6
+ .byte 102,65,15,218,230 // pminub %xmm14,%xmm4
+ .byte 102,15,218,247 // pminub %xmm7,%xmm6
+ .byte 102,15,111,13,8,3,0,0 // movdqa 0x308(%rip),%xmm1 # 2500 <_sk_difference_sse41_8bit+0x44d>
+ .byte 102,68,15,248,198 // psubb %xmm6,%xmm8
+ .byte 102,15,219,241 // pand %xmm1,%xmm6
+ .byte 102,15,219,204 // pand %xmm4,%xmm1
+ .byte 102,15,248,196 // psubb %xmm4,%xmm0
+ .byte 102,15,248,193 // psubb %xmm1,%xmm0
+ .byte 102,68,15,248,198 // psubb %xmm6,%xmm8
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 102,65,15,111,195 // movdqa %xmm11,%xmm0
- .byte 102,15,111,205 // movdqa %xmm5,%xmm1
+ .byte 102,65,15,111,200 // movdqa %xmm8,%xmm1
.byte 255,224 // jmpq *%rax
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 22f7 <_sk_exclusion_sse41_8bit+0x17e>
+ .byte 127,67 // jg 2263 <_sk_difference_sse41_8bit+0x1b0>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 22fb <_sk_exclusion_sse41_8bit+0x182>
+ .byte 127,67 // jg 2267 <_sk_difference_sse41_8bit+0x1b4>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 22ff <_sk_exclusion_sse41_8bit+0x186>
+ .byte 127,67 // jg 226b <_sk_difference_sse41_8bit+0x1b8>
BALIGN16
.byte 0,0 // add %al,(%rax)
@@ -62815,18 +62772,7 @@ BALIGN16
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,0 // incl (%rax)
- .byte 3,3 // add (%rbx),%eax
- .byte 3,3 // add (%rbx),%eax
- .byte 7 // (bad)
- .byte 7 // (bad)
- .byte 7 // (bad)
- .byte 7 // (bad)
- .byte 11,11 // or (%rbx),%ecx
- .byte 11,11 // or (%rbx),%ecx
- .byte 15 // (bad)
- .byte 15 // (bad)
- .byte 15 // (bad)
- .byte 15,255 // (bad)
+ .byte 255 // (bad)
.byte 255 // (bad)
.byte 255,0 // incl (%rax)
.byte 255 // (bad)
@@ -62945,7 +62891,7 @@ HIDDEN _sk_set_rgb_sse2_8bit
FUNCTION(_sk_set_rgb_sse2_8bit)
_sk_set_rgb_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,15,16,37,97,41,0,0 // movss 0x2961(%rip),%xmm4 # 2a24 <_sk_exclusion_sse2_8bit+0x197>
+ .byte 243,15,16,37,89,40,0,0 // movss 0x2859(%rip),%xmm4 # 291c <_sk_difference_sse2_8bit+0x1c5>
.byte 243,15,16,40 // movss (%rax),%xmm5
.byte 243,15,89,236 // mulss %xmm4,%xmm5
.byte 243,72,15,44,205 // cvttss2si %xmm5,%rcx
@@ -62960,7 +62906,7 @@ _sk_set_rgb_sse2_8bit:
.byte 9,208 // or %edx,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,112,228,0 // pshufd $0x0,%xmm4,%xmm4
- .byte 102,15,111,45,45,41,0,0 // movdqa 0x292d(%rip),%xmm5 # 2a30 <_sk_exclusion_sse2_8bit+0x1a3>
+ .byte 102,15,111,45,45,40,0,0 // movdqa 0x282d(%rip),%xmm5 # 2930 <_sk_difference_sse2_8bit+0x1d9>
.byte 102,15,219,205 // pand %xmm5,%xmm1
.byte 102,15,219,197 // pand %xmm5,%xmm0
.byte 102,15,235,196 // por %xmm4,%xmm0
@@ -62985,7 +62931,7 @@ _sk_premul_sse2_8bit:
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
.byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
.byte 243,15,112,248,95 // pshufhw $0x5f,%xmm0,%xmm7
- .byte 102,15,111,5,225,40,0,0 // movdqa 0x28e1(%rip),%xmm0 # 2a40 <_sk_exclusion_sse2_8bit+0x1b3>
+ .byte 102,15,111,5,225,39,0,0 // movdqa 0x27e1(%rip),%xmm0 # 2940 <_sk_difference_sse2_8bit+0x1e9>
.byte 102,15,235,248 // por %xmm0,%xmm7
.byte 102,15,235,240 // por %xmm0,%xmm6
.byte 102,69,15,239,201 // pxor %xmm9,%xmm9
@@ -63506,7 +63452,7 @@ _sk_load_a8_sse2_8bit:
.byte 117,48 // jne 7fd <_sk_load_a8_sse2_8bit+0x4d>
.byte 243,66,15,126,4,2 // movq (%rdx,%r8,1),%xmm0
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
- .byte 102,15,84,5,113,34,0,0 // andpd 0x2271(%rip),%xmm0 # 2a50 <_sk_exclusion_sse2_8bit+0x1c3>
+ .byte 102,15,84,5,113,33,0,0 // andpd 0x2171(%rip),%xmm0 # 2950 <_sk_difference_sse2_8bit+0x1f9>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,40,200 // movapd %xmm0,%xmm1
.byte 102,15,105,204 // punpckhwd %xmm4,%xmm1
@@ -63583,7 +63529,7 @@ _sk_load_a8_dst_sse2_8bit:
.byte 117,48 // jne 8f1 <_sk_load_a8_dst_sse2_8bit+0x4d>
.byte 243,66,15,126,20,2 // movq (%rdx,%r8,1),%xmm2
.byte 102,15,96,208 // punpcklbw %xmm0,%xmm2
- .byte 102,15,84,21,141,33,0,0 // andpd 0x218d(%rip),%xmm2 # 2a60 <_sk_exclusion_sse2_8bit+0x1d3>
+ .byte 102,15,84,21,141,32,0,0 // andpd 0x208d(%rip),%xmm2 # 2960 <_sk_difference_sse2_8bit+0x209>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,40,218 // movapd %xmm2,%xmm3
.byte 102,15,105,220 // punpckhwd %xmm4,%xmm3
@@ -63667,7 +63613,7 @@ _sk_store_a8_sse2_8bit:
.byte 102,15,107,229 // packssdw %xmm5,%xmm4
.byte 77,133,201 // test %r9,%r9
.byte 117,22 // jne 9f5 <_sk_store_a8_sse2_8bit+0x5d>
- .byte 102,15,219,37,137,32,0,0 // pand 0x2089(%rip),%xmm4 # 2a70 <_sk_exclusion_sse2_8bit+0x1e3>
+ .byte 102,15,219,37,137,31,0,0 // pand 0x1f89(%rip),%xmm4 # 2970 <_sk_difference_sse2_8bit+0x219>
.byte 102,15,103,228 // packuswb %xmm4,%xmm4
.byte 102,66,15,214,36,2 // movq %xmm4,(%rdx,%r8,1)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -63688,7 +63634,7 @@ _sk_store_a8_sse2_8bit:
.byte 102,15,127,100,36,184 // movdqa %xmm4,-0x48(%rsp)
.byte 138,68,36,188 // mov -0x44(%rsp),%al
.byte 66,136,68,2,2 // mov %al,0x2(%rdx,%r8,1)
- .byte 102,15,219,37,51,32,0,0 // pand 0x2033(%rip),%xmm4 # 2a70 <_sk_exclusion_sse2_8bit+0x1e3>
+ .byte 102,15,219,37,51,31,0,0 // pand 0x1f33(%rip),%xmm4 # 2970 <_sk_difference_sse2_8bit+0x219>
.byte 102,15,103,228 // packuswb %xmm4,%xmm4
.byte 102,15,126,224 // movd %xmm4,%eax
.byte 102,66,137,4,2 // mov %ax,(%rdx,%r8,1)
@@ -63702,7 +63648,7 @@ _sk_store_a8_sse2_8bit:
.byte 102,15,127,100,36,200 // movdqa %xmm4,-0x38(%rsp)
.byte 138,68,36,208 // mov -0x30(%rsp),%al
.byte 66,136,68,2,4 // mov %al,0x4(%rdx,%r8,1)
- .byte 102,15,219,37,239,31,0,0 // pand 0x1fef(%rip),%xmm4 # 2a70 <_sk_exclusion_sse2_8bit+0x1e3>
+ .byte 102,15,219,37,239,30,0,0 // pand 0x1eef(%rip),%xmm4 # 2970 <_sk_difference_sse2_8bit+0x219>
.byte 102,15,103,228 // packuswb %xmm4,%xmm4
.byte 102,66,15,126,36,2 // movd %xmm4,(%rdx,%r8,1)
.byte 233,97,255,255,255 // jmpq 9f1 <_sk_store_a8_sse2_8bit+0x59>
@@ -63712,7 +63658,7 @@ _sk_store_a8_sse2_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,218 // jmpq ffffffffdb000aa0 <_sk_exclusion_sse2_8bit+0xffffffffdaffe213>
+ .byte 233,255,255,255,218 // jmpq ffffffffdb000aa0 <_sk_difference_sse2_8bit+0xffffffffdaffe349>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,203 // dec %ebx
@@ -63739,12 +63685,12 @@ _sk_load_g8_sse2_8bit:
.byte 117,116 // jne b3d <_sk_load_g8_sse2_8bit+0x91>
.byte 243,66,15,126,4,2 // movq (%rdx,%r8,1),%xmm0
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
- .byte 102,15,84,5,165,31,0,0 // andpd 0x1fa5(%rip),%xmm0 # 2a80 <_sk_exclusion_sse2_8bit+0x1f3>
+ .byte 102,15,84,5,165,30,0,0 // andpd 0x1ea5(%rip),%xmm0 # 2980 <_sk_difference_sse2_8bit+0x229>
.byte 102,15,239,201 // pxor %xmm1,%xmm1
.byte 102,15,40,224 // movapd %xmm0,%xmm4
.byte 102,15,97,225 // punpcklwd %xmm1,%xmm4
.byte 102,15,105,193 // punpckhwd %xmm1,%xmm0
- .byte 102,15,111,45,157,31,0,0 // movdqa 0x1f9d(%rip),%xmm5 # 2a90 <_sk_exclusion_sse2_8bit+0x203>
+ .byte 102,15,111,45,157,30,0,0 // movdqa 0x1e9d(%rip),%xmm5 # 2990 <_sk_difference_sse2_8bit+0x239>
.byte 102,15,112,240,245 // pshufd $0xf5,%xmm0,%xmm6
.byte 102,15,244,197 // pmuludq %xmm5,%xmm0
.byte 102,15,112,200,232 // pshufd $0xe8,%xmm0,%xmm1
@@ -63757,7 +63703,7 @@ _sk_load_g8_sse2_8bit:
.byte 102,15,244,245 // pmuludq %xmm5,%xmm6
.byte 102,15,112,230,232 // pshufd $0xe8,%xmm6,%xmm4
.byte 102,15,98,196 // punpckldq %xmm4,%xmm0
- .byte 102,15,111,37,111,31,0,0 // movdqa 0x1f6f(%rip),%xmm4 # 2aa0 <_sk_exclusion_sse2_8bit+0x213>
+ .byte 102,15,111,37,111,30,0,0 // movdqa 0x1e6f(%rip),%xmm4 # 29a0 <_sk_difference_sse2_8bit+0x249>
.byte 102,15,235,196 // por %xmm4,%xmm0
.byte 102,15,235,204 // por %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -63831,12 +63777,12 @@ _sk_load_g8_dst_sse2_8bit:
.byte 117,116 // jne c7d <_sk_load_g8_dst_sse2_8bit+0x91>
.byte 243,66,15,126,20,2 // movq (%rdx,%r8,1),%xmm2
.byte 102,15,96,208 // punpcklbw %xmm0,%xmm2
- .byte 102,15,84,21,149,30,0,0 // andpd 0x1e95(%rip),%xmm2 # 2ab0 <_sk_exclusion_sse2_8bit+0x223>
+ .byte 102,15,84,21,149,29,0,0 // andpd 0x1d95(%rip),%xmm2 # 29b0 <_sk_difference_sse2_8bit+0x259>
.byte 102,15,239,219 // pxor %xmm3,%xmm3
.byte 102,15,40,226 // movapd %xmm2,%xmm4
.byte 102,15,97,227 // punpcklwd %xmm3,%xmm4
.byte 102,15,105,211 // punpckhwd %xmm3,%xmm2
- .byte 102,15,111,45,141,30,0,0 // movdqa 0x1e8d(%rip),%xmm5 # 2ac0 <_sk_exclusion_sse2_8bit+0x233>
+ .byte 102,15,111,45,141,29,0,0 // movdqa 0x1d8d(%rip),%xmm5 # 29c0 <_sk_difference_sse2_8bit+0x269>
.byte 102,15,112,242,245 // pshufd $0xf5,%xmm2,%xmm6
.byte 102,15,244,213 // pmuludq %xmm5,%xmm2
.byte 102,15,112,218,232 // pshufd $0xe8,%xmm2,%xmm3
@@ -63849,7 +63795,7 @@ _sk_load_g8_dst_sse2_8bit:
.byte 102,15,244,245 // pmuludq %xmm5,%xmm6
.byte 102,15,112,230,232 // pshufd $0xe8,%xmm6,%xmm4
.byte 102,15,98,212 // punpckldq %xmm4,%xmm2
- .byte 102,15,111,37,95,30,0,0 // movdqa 0x1e5f(%rip),%xmm4 # 2ad0 <_sk_exclusion_sse2_8bit+0x243>
+ .byte 102,15,111,37,95,29,0,0 // movdqa 0x1d5f(%rip),%xmm4 # 29d0 <_sk_difference_sse2_8bit+0x279>
.byte 102,15,235,212 // por %xmm4,%xmm2
.byte 102,15,235,220 // por %xmm4,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -64066,7 +64012,7 @@ _sk_scale_1_float_sse2_8bit:
.byte 102,68,15,111,200 // movdqa %xmm0,%xmm9
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,0 // movss (%rax),%xmm0
- .byte 243,15,89,5,152,26,0,0 // mulss 0x1a98(%rip),%xmm0 # 2a28 <_sk_exclusion_sse2_8bit+0x19b>
+ .byte 243,15,89,5,144,25,0,0 // mulss 0x1990(%rip),%xmm0 # 2920 <_sk_difference_sse2_8bit+0x1c9>
.byte 243,15,44,192 // cvttss2si %xmm0,%eax
.byte 102,15,239,246 // pxor %xmm6,%xmm6
.byte 102,65,15,111,193 // movdqa %xmm9,%xmm0
@@ -64078,7 +64024,7 @@ _sk_scale_1_float_sse2_8bit:
.byte 102,15,96,246 // punpcklbw %xmm6,%xmm6
.byte 242,15,112,246,0 // pshuflw $0x0,%xmm6,%xmm6
.byte 102,15,112,246,80 // pshufd $0x50,%xmm6,%xmm6
- .byte 102,15,219,53,23,27,0,0 // pand 0x1b17(%rip),%xmm6 # 2ae0 <_sk_exclusion_sse2_8bit+0x253>
+ .byte 102,15,219,53,23,26,0,0 // pand 0x1a17(%rip),%xmm6 # 29e0 <_sk_difference_sse2_8bit+0x289>
.byte 102,15,111,254 // movdqa %xmm6,%xmm7
.byte 102,65,15,213,248 // pmullw %xmm8,%xmm7
.byte 102,15,111,230 // movdqa %xmm6,%xmm4
@@ -64114,7 +64060,7 @@ _sk_scale_u8_sse2_8bit:
.byte 15,133,239,0,0,0 // jne 1129 <_sk_scale_u8_sse2_8bit+0x110>
.byte 243,66,15,126,36,2 // movq (%rdx,%r8,1),%xmm4
.byte 102,15,96,224 // punpcklbw %xmm0,%xmm4
- .byte 102,15,84,37,164,26,0,0 // andpd 0x1aa4(%rip),%xmm4 # 2af0 <_sk_exclusion_sse2_8bit+0x263>
+ .byte 102,15,84,37,164,25,0,0 // andpd 0x19a4(%rip),%xmm4 # 29f0 <_sk_difference_sse2_8bit+0x299>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,40,236 // movapd %xmm4,%xmm5
.byte 102,65,15,105,232 // punpckhwd %xmm8,%xmm5
@@ -64223,7 +64169,7 @@ FUNCTION(_sk_lerp_1_float_sse2_8bit)
_sk_lerp_1_float_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,32 // movss (%rax),%xmm4
- .byte 243,15,89,37,66,24,0,0 // mulss 0x1842(%rip),%xmm4 # 2a2c <_sk_exclusion_sse2_8bit+0x19f>
+ .byte 243,15,89,37,58,23,0,0 // mulss 0x173a(%rip),%xmm4 # 2924 <_sk_difference_sse2_8bit+0x1cd>
.byte 243,15,44,196 // cvttss2si %xmm4,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
@@ -64236,7 +64182,7 @@ _sk_lerp_1_float_sse2_8bit:
.byte 102,68,15,111,217 // movdqa %xmm1,%xmm11
.byte 102,69,15,96,217 // punpcklbw %xmm9,%xmm11
.byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1
- .byte 102,15,111,53,213,24,0,0 // movdqa 0x18d5(%rip),%xmm6 # 2b00 <_sk_exclusion_sse2_8bit+0x273>
+ .byte 102,15,111,53,213,23,0,0 // movdqa 0x17d5(%rip),%xmm6 # 2a00 <_sk_difference_sse2_8bit+0x2a9>
.byte 102,65,15,219,240 // pand %xmm8,%xmm6
.byte 102,15,111,230 // movdqa %xmm6,%xmm4
.byte 102,15,213,225 // pmullw %xmm1,%xmm4
@@ -64304,7 +64250,7 @@ _sk_lerp_u8_sse2_8bit:
.byte 15,133,141,1,0,0 // jne 14c0 <_sk_lerp_u8_sse2_8bit+0x1ae>
.byte 243,66,15,126,44,2 // movq (%rdx,%r8,1),%xmm5
.byte 102,15,96,232 // punpcklbw %xmm0,%xmm5
- .byte 102,15,84,45,203,23,0,0 // andpd 0x17cb(%rip),%xmm5 # 2b10 <_sk_exclusion_sse2_8bit+0x283>
+ .byte 102,15,84,45,203,22,0,0 // andpd 0x16cb(%rip),%xmm5 # 2a10 <_sk_difference_sse2_8bit+0x2b9>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,40,229 // movapd %xmm5,%xmm4
.byte 102,65,15,105,224 // punpckhwd %xmm8,%xmm4
@@ -64462,7 +64408,7 @@ HIDDEN _sk_black_color_sse2_8bit
FUNCTION(_sk_black_color_sse2_8bit)
_sk_black_color_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 15,40,5,147,21,0,0 // movaps 0x1593(%rip),%xmm0 # 2b20 <_sk_exclusion_sse2_8bit+0x293>
+ .byte 15,40,5,147,20,0,0 // movaps 0x1493(%rip),%xmm0 # 2a20 <_sk_difference_sse2_8bit+0x2c9>
.byte 15,40,200 // movaps %xmm0,%xmm1
.byte 255,224 // jmpq *%rax
@@ -65354,7 +65300,7 @@ _sk_darken_sse2_8bit:
.byte 102,65,15,248,234 // psubb %xmm10,%xmm5
.byte 102,15,248,207 // psubb %xmm7,%xmm1
.byte 102,15,248,196 // psubb %xmm4,%xmm0
- .byte 102,15,111,37,174,6,0,0 // movdqa 0x6ae(%rip),%xmm4 # 2b30 <_sk_exclusion_sse2_8bit+0x2a3>
+ .byte 102,15,111,37,174,5,0,0 // movdqa 0x5ae(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x2d9>
.byte 102,15,219,236 // pand %xmm4,%xmm5
.byte 102,15,111,252 // movdqa %xmm4,%xmm7
.byte 102,15,223,248 // pandn %xmm0,%xmm7
@@ -65462,7 +65408,7 @@ _sk_lighten_sse2_8bit:
.byte 102,65,15,248,234 // psubb %xmm10,%xmm5
.byte 102,15,248,207 // psubb %xmm7,%xmm1
.byte 102,15,248,196 // psubb %xmm4,%xmm0
- .byte 102,15,111,37,210,4,0,0 // movdqa 0x4d2(%rip),%xmm4 # 2b40 <_sk_exclusion_sse2_8bit+0x2b3>
+ .byte 102,15,111,37,210,3,0,0 // movdqa 0x3d2(%rip),%xmm4 # 2a40 <_sk_difference_sse2_8bit+0x2e9>
.byte 102,15,219,236 // pand %xmm4,%xmm5
.byte 102,15,111,252 // movdqa %xmm4,%xmm7
.byte 102,15,223,248 // pandn %xmm0,%xmm7
@@ -65475,6 +65421,53 @@ _sk_lighten_sse2_8bit:
.byte 102,15,111,206 // movdqa %xmm6,%xmm1
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_exclusion_sse2_8bit
+.globl _sk_exclusion_sse2_8bit
+FUNCTION(_sk_exclusion_sse2_8bit)
+_sk_exclusion_sse2_8bit:
+ .byte 102,69,15,239,192 // pxor %xmm8,%xmm8
+ .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9
+ .byte 102,69,15,96,200 // punpcklbw %xmm8,%xmm9
+ .byte 102,68,15,111,208 // movdqa %xmm0,%xmm10
+ .byte 102,69,15,104,208 // punpckhbw %xmm8,%xmm10
+ .byte 102,68,15,111,217 // movdqa %xmm1,%xmm11
+ .byte 102,69,15,96,216 // punpcklbw %xmm8,%xmm11
+ .byte 102,68,15,111,225 // movdqa %xmm1,%xmm12
+ .byte 102,69,15,104,224 // punpckhbw %xmm8,%xmm12
+ .byte 102,15,111,226 // movdqa %xmm2,%xmm4
+ .byte 102,15,252,194 // paddb %xmm2,%xmm0
+ .byte 102,15,111,242 // movdqa %xmm2,%xmm6
+ .byte 102,65,15,96,240 // punpcklbw %xmm8,%xmm6
+ .byte 102,65,15,104,224 // punpckhbw %xmm8,%xmm4
+ .byte 102,15,111,251 // movdqa %xmm3,%xmm7
+ .byte 102,15,252,203 // paddb %xmm3,%xmm1
+ .byte 102,15,111,235 // movdqa %xmm3,%xmm5
+ .byte 102,65,15,96,232 // punpcklbw %xmm8,%xmm5
+ .byte 102,65,15,104,248 // punpckhbw %xmm8,%xmm7
+ .byte 102,65,15,213,252 // pmullw %xmm12,%xmm7
+ .byte 102,65,15,213,235 // pmullw %xmm11,%xmm5
+ .byte 102,65,15,213,226 // pmullw %xmm10,%xmm4
+ .byte 102,65,15,213,241 // pmullw %xmm9,%xmm6
+ .byte 102,65,15,253,241 // paddw %xmm9,%xmm6
+ .byte 102,65,15,253,226 // paddw %xmm10,%xmm4
+ .byte 102,65,15,253,235 // paddw %xmm11,%xmm5
+ .byte 102,65,15,253,252 // paddw %xmm12,%xmm7
+ .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
+ .byte 102,15,113,213,8 // psrlw $0x8,%xmm5
+ .byte 102,15,113,212,8 // psrlw $0x8,%xmm4
+ .byte 102,15,113,214,8 // psrlw $0x8,%xmm6
+ .byte 102,15,103,244 // packuswb %xmm4,%xmm6
+ .byte 102,15,103,239 // packuswb %xmm7,%xmm5
+ .byte 102,15,111,37,21,3,0,0 // movdqa 0x315(%rip),%xmm4 # 2a50 <_sk_difference_sse2_8bit+0x2f9>
+ .byte 102,15,248,205 // psubb %xmm5,%xmm1
+ .byte 102,15,219,236 // pand %xmm4,%xmm5
+ .byte 102,15,219,230 // pand %xmm6,%xmm4
+ .byte 102,15,248,198 // psubb %xmm6,%xmm0
+ .byte 102,15,248,196 // psubb %xmm4,%xmm0
+ .byte 102,15,248,205 // psubb %xmm5,%xmm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_difference_sse2_8bit
.globl _sk_difference_sse2_8bit
FUNCTION(_sk_difference_sse2_8bit)
@@ -65492,196 +65485,95 @@ _sk_difference_sse2_8bit:
.byte 242,15,112,237,95 // pshuflw $0x5f,%xmm5,%xmm5
.byte 243,15,112,253,95 // pshufhw $0x5f,%xmm5,%xmm7
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
- .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9
+ .byte 102,15,111,232 // movdqa %xmm0,%xmm5
+ .byte 102,65,15,96,232 // punpcklbw %xmm8,%xmm5
+ .byte 102,15,111,240 // movdqa %xmm0,%xmm6
+ .byte 102,65,15,104,240 // punpckhbw %xmm8,%xmm6
+ .byte 102,68,15,111,201 // movdqa %xmm1,%xmm9
.byte 102,69,15,96,200 // punpcklbw %xmm8,%xmm9
- .byte 102,68,15,111,208 // movdqa %xmm0,%xmm10
- .byte 102,69,15,104,208 // punpckhbw %xmm8,%xmm10
- .byte 102,68,15,111,217 // movdqa %xmm1,%xmm11
- .byte 102,69,15,96,216 // punpcklbw %xmm8,%xmm11
.byte 102,68,15,111,225 // movdqa %xmm1,%xmm12
.byte 102,69,15,104,224 // punpckhbw %xmm8,%xmm12
- .byte 102,68,15,111,239 // movdqa %xmm7,%xmm13
- .byte 102,69,15,96,232 // punpcklbw %xmm8,%xmm13
+ .byte 102,68,15,111,215 // movdqa %xmm7,%xmm10
+ .byte 102,69,15,96,208 // punpcklbw %xmm8,%xmm10
.byte 102,65,15,104,248 // punpckhbw %xmm8,%xmm7
- .byte 102,68,15,111,244 // movdqa %xmm4,%xmm14
- .byte 102,69,15,96,240 // punpcklbw %xmm8,%xmm14
+ .byte 102,68,15,111,220 // movdqa %xmm4,%xmm11
+ .byte 102,69,15,96,216 // punpcklbw %xmm8,%xmm11
.byte 102,65,15,104,224 // punpckhbw %xmm8,%xmm4
.byte 102,65,15,213,228 // pmullw %xmm12,%xmm4
- .byte 102,69,15,213,243 // pmullw %xmm11,%xmm14
- .byte 102,65,15,213,250 // pmullw %xmm10,%xmm7
- .byte 102,69,15,213,233 // pmullw %xmm9,%xmm13
- .byte 102,69,15,253,233 // paddw %xmm9,%xmm13
- .byte 102,65,15,253,250 // paddw %xmm10,%xmm7
- .byte 102,69,15,253,243 // paddw %xmm11,%xmm14
+ .byte 102,69,15,213,217 // pmullw %xmm9,%xmm11
+ .byte 102,15,213,254 // pmullw %xmm6,%xmm7
+ .byte 102,68,15,213,213 // pmullw %xmm5,%xmm10
+ .byte 102,68,15,253,213 // paddw %xmm5,%xmm10
+ .byte 102,15,253,254 // paddw %xmm6,%xmm7
+ .byte 102,69,15,253,217 // paddw %xmm9,%xmm11
.byte 102,65,15,253,228 // paddw %xmm12,%xmm4
.byte 102,15,113,212,8 // psrlw $0x8,%xmm4
- .byte 102,65,15,113,214,8 // psrlw $0x8,%xmm14
+ .byte 102,65,15,113,211,8 // psrlw $0x8,%xmm11
.byte 102,15,113,215,8 // psrlw $0x8,%xmm7
- .byte 102,65,15,113,213,8 // psrlw $0x8,%xmm13
- .byte 102,68,15,103,239 // packuswb %xmm7,%xmm13
- .byte 102,68,15,103,244 // packuswb %xmm4,%xmm14
+ .byte 102,65,15,113,210,8 // psrlw $0x8,%xmm10
+ .byte 102,68,15,103,215 // packuswb %xmm7,%xmm10
+ .byte 102,68,15,103,220 // packuswb %xmm4,%xmm11
.byte 242,15,112,225,231 // pshuflw $0xe7,%xmm1,%xmm4
.byte 243,15,112,228,231 // pshufhw $0xe7,%xmm4,%xmm4
.byte 102,15,112,228,232 // pshufd $0xe8,%xmm4,%xmm4
.byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
.byte 242,15,112,228,95 // pshuflw $0x5f,%xmm4,%xmm4
- .byte 243,68,15,112,212,95 // pshufhw $0x5f,%xmm4,%xmm10
+ .byte 243,68,15,112,228,95 // pshufhw $0x5f,%xmm4,%xmm12
.byte 242,15,112,224,231 // pshuflw $0xe7,%xmm0,%xmm4
.byte 243,15,112,228,231 // pshufhw $0xe7,%xmm4,%xmm4
.byte 102,15,112,228,232 // pshufd $0xe8,%xmm4,%xmm4
.byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
.byte 242,15,112,228,95 // pshuflw $0x5f,%xmm4,%xmm4
- .byte 243,68,15,112,220,95 // pshufhw $0x5f,%xmm4,%xmm11
+ .byte 243,68,15,112,236,95 // pshufhw $0x5f,%xmm4,%xmm13
.byte 102,68,15,111,202 // movdqa %xmm2,%xmm9
.byte 102,15,252,194 // paddb %xmm2,%xmm0
- .byte 102,15,111,242 // movdqa %xmm2,%xmm6
- .byte 102,65,15,96,240 // punpcklbw %xmm8,%xmm6
+ .byte 102,15,111,234 // movdqa %xmm2,%xmm5
+ .byte 102,65,15,96,232 // punpcklbw %xmm8,%xmm5
.byte 102,69,15,104,200 // punpckhbw %xmm8,%xmm9
- .byte 102,68,15,111,227 // movdqa %xmm3,%xmm12
+ .byte 102,68,15,111,243 // movdqa %xmm3,%xmm14
.byte 102,15,252,203 // paddb %xmm3,%xmm1
- .byte 102,15,111,235 // movdqa %xmm3,%xmm5
- .byte 102,65,15,96,232 // punpcklbw %xmm8,%xmm5
- .byte 102,69,15,104,224 // punpckhbw %xmm8,%xmm12
- .byte 102,65,15,111,227 // movdqa %xmm11,%xmm4
- .byte 102,65,15,96,224 // punpcklbw %xmm8,%xmm4
- .byte 102,69,15,104,216 // punpckhbw %xmm8,%xmm11
- .byte 102,65,15,111,250 // movdqa %xmm10,%xmm7
+ .byte 102,15,111,251 // movdqa %xmm3,%xmm7
.byte 102,65,15,96,248 // punpcklbw %xmm8,%xmm7
- .byte 102,69,15,104,208 // punpckhbw %xmm8,%xmm10
- .byte 102,69,15,213,212 // pmullw %xmm12,%xmm10
- .byte 102,15,213,253 // pmullw %xmm5,%xmm7
- .byte 102,69,15,213,217 // pmullw %xmm9,%xmm11
- .byte 102,15,213,230 // pmullw %xmm6,%xmm4
- .byte 102,15,253,230 // paddw %xmm6,%xmm4
- .byte 102,69,15,253,217 // paddw %xmm9,%xmm11
- .byte 102,15,253,253 // paddw %xmm5,%xmm7
- .byte 102,69,15,253,212 // paddw %xmm12,%xmm10
- .byte 102,65,15,113,210,8 // psrlw $0x8,%xmm10
- .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
- .byte 102,65,15,113,211,8 // psrlw $0x8,%xmm11
+ .byte 102,69,15,104,240 // punpckhbw %xmm8,%xmm14
+ .byte 102,65,15,111,229 // movdqa %xmm13,%xmm4
+ .byte 102,65,15,96,224 // punpcklbw %xmm8,%xmm4
+ .byte 102,69,15,104,232 // punpckhbw %xmm8,%xmm13
+ .byte 102,65,15,111,244 // movdqa %xmm12,%xmm6
+ .byte 102,65,15,96,240 // punpcklbw %xmm8,%xmm6
+ .byte 102,69,15,104,224 // punpckhbw %xmm8,%xmm12
+ .byte 102,69,15,213,230 // pmullw %xmm14,%xmm12
+ .byte 102,15,213,247 // pmullw %xmm7,%xmm6
+ .byte 102,69,15,213,233 // pmullw %xmm9,%xmm13
+ .byte 102,15,213,229 // pmullw %xmm5,%xmm4
+ .byte 102,15,253,229 // paddw %xmm5,%xmm4
+ .byte 102,69,15,253,233 // paddw %xmm9,%xmm13
+ .byte 102,15,253,247 // paddw %xmm7,%xmm6
+ .byte 102,69,15,253,230 // paddw %xmm14,%xmm12
+ .byte 102,65,15,113,212,8 // psrlw $0x8,%xmm12
+ .byte 102,15,113,214,8 // psrlw $0x8,%xmm6
+ .byte 102,65,15,113,213,8 // psrlw $0x8,%xmm13
.byte 102,15,113,212,8 // psrlw $0x8,%xmm4
- .byte 102,65,15,103,227 // packuswb %xmm11,%xmm4
- .byte 102,65,15,103,250 // packuswb %xmm10,%xmm7
- .byte 102,68,15,218,247 // pminub %xmm7,%xmm14
- .byte 102,68,15,218,236 // pminub %xmm4,%xmm13
- .byte 102,69,15,252,237 // paddb %xmm13,%xmm13
- .byte 102,69,15,252,246 // paddb %xmm14,%xmm14
- .byte 102,15,111,241 // movdqa %xmm1,%xmm6
- .byte 102,65,15,248,246 // psubb %xmm14,%xmm6
- .byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,65,15,248,237 // psubb %xmm13,%xmm5
- .byte 102,15,248,207 // psubb %xmm7,%xmm1
- .byte 102,15,248,196 // psubb %xmm4,%xmm0
- .byte 102,15,111,37,235,2,0,0 // movdqa 0x2eb(%rip),%xmm4 # 2b50 <_sk_exclusion_sse2_8bit+0x2c3>
+ .byte 102,65,15,103,229 // packuswb %xmm13,%xmm4
+ .byte 102,65,15,103,244 // packuswb %xmm12,%xmm6
+ .byte 102,65,15,218,226 // pminub %xmm10,%xmm4
+ .byte 102,65,15,218,243 // pminub %xmm11,%xmm6
+ .byte 102,15,111,45,98,1,0,0 // movdqa 0x162(%rip),%xmm5 # 2a60 <_sk_difference_sse2_8bit+0x309>
+ .byte 102,15,248,206 // psubb %xmm6,%xmm1
+ .byte 102,15,219,245 // pand %xmm5,%xmm6
.byte 102,15,219,236 // pand %xmm4,%xmm5
- .byte 102,15,111,252 // movdqa %xmm4,%xmm7
- .byte 102,15,223,248 // pandn %xmm0,%xmm7
- .byte 102,15,235,239 // por %xmm7,%xmm5
- .byte 102,15,219,244 // pand %xmm4,%xmm6
- .byte 102,15,223,225 // pandn %xmm1,%xmm4
- .byte 102,15,235,244 // por %xmm4,%xmm6
- .byte 72,173 // lods %ds:(%rsi),%rax
- .byte 102,15,111,197 // movdqa %xmm5,%xmm0
- .byte 102,15,111,206 // movdqa %xmm6,%xmm1
- .byte 255,224 // jmpq *%rax
-
-HIDDEN _sk_exclusion_sse2_8bit
-.globl _sk_exclusion_sse2_8bit
-FUNCTION(_sk_exclusion_sse2_8bit)
-_sk_exclusion_sse2_8bit:
- .byte 102,69,15,239,201 // pxor %xmm9,%xmm9
- .byte 102,68,15,111,192 // movdqa %xmm0,%xmm8
- .byte 102,68,15,111,226 // movdqa %xmm2,%xmm12
- .byte 102,68,15,252,224 // paddb %xmm0,%xmm12
- .byte 242,68,15,112,208,231 // pshuflw $0xe7,%xmm0,%xmm10
- .byte 102,15,111,224 // movdqa %xmm0,%xmm4
- .byte 102,65,15,96,225 // punpcklbw %xmm9,%xmm4
- .byte 102,69,15,104,193 // punpckhbw %xmm9,%xmm8
- .byte 102,68,15,111,233 // movdqa %xmm1,%xmm13
- .byte 102,68,15,111,243 // movdqa %xmm3,%xmm14
- .byte 102,68,15,252,241 // paddb %xmm1,%xmm14
- .byte 242,68,15,112,217,231 // pshuflw $0xe7,%xmm1,%xmm11
- .byte 102,68,15,111,249 // movdqa %xmm1,%xmm15
- .byte 102,69,15,96,249 // punpcklbw %xmm9,%xmm15
- .byte 102,15,111,234 // movdqa %xmm2,%xmm5
- .byte 102,15,111,250 // movdqa %xmm2,%xmm7
- .byte 102,65,15,96,249 // punpcklbw %xmm9,%xmm7
- .byte 102,65,15,104,233 // punpckhbw %xmm9,%xmm5
- .byte 243,65,15,112,194,231 // pshufhw $0xe7,%xmm10,%xmm0
- .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0
- .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
- .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
- .byte 243,68,15,112,208,95 // pshufhw $0x5f,%xmm0,%xmm10
- .byte 102,65,15,111,194 // movdqa %xmm10,%xmm0
- .byte 102,65,15,96,193 // punpcklbw %xmm9,%xmm0
- .byte 102,15,213,199 // pmullw %xmm7,%xmm0
- .byte 102,15,253,199 // paddw %xmm7,%xmm0
- .byte 102,15,213,252 // pmullw %xmm4,%xmm7
- .byte 102,15,253,252 // paddw %xmm4,%xmm7
- .byte 102,15,111,227 // movdqa %xmm3,%xmm4
- .byte 102,65,15,96,225 // punpcklbw %xmm9,%xmm4
- .byte 102,69,15,104,209 // punpckhbw %xmm9,%xmm10
- .byte 102,68,15,213,213 // pmullw %xmm5,%xmm10
- .byte 102,68,15,253,213 // paddw %xmm5,%xmm10
- .byte 102,65,15,213,232 // pmullw %xmm8,%xmm5
- .byte 102,65,15,253,232 // paddw %xmm8,%xmm5
- .byte 243,65,15,112,243,231 // pshufhw $0xe7,%xmm11,%xmm6
- .byte 102,15,112,246,232 // pshufd $0xe8,%xmm6,%xmm6
- .byte 102,15,96,246 // punpcklbw %xmm6,%xmm6
- .byte 242,15,112,246,95 // pshuflw $0x5f,%xmm6,%xmm6
- .byte 243,15,112,206,95 // pshufhw $0x5f,%xmm6,%xmm1
- .byte 102,15,111,241 // movdqa %xmm1,%xmm6
- .byte 102,65,15,96,241 // punpcklbw %xmm9,%xmm6
- .byte 102,15,213,244 // pmullw %xmm4,%xmm6
- .byte 102,15,253,244 // paddw %xmm4,%xmm6
- .byte 102,65,15,213,231 // pmullw %xmm15,%xmm4
- .byte 102,65,15,253,231 // paddw %xmm15,%xmm4
- .byte 102,68,15,111,195 // movdqa %xmm3,%xmm8
- .byte 102,69,15,104,233 // punpckhbw %xmm9,%xmm13
- .byte 102,69,15,104,193 // punpckhbw %xmm9,%xmm8
- .byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1
- .byte 102,65,15,213,200 // pmullw %xmm8,%xmm1
- .byte 102,65,15,253,200 // paddw %xmm8,%xmm1
- .byte 102,69,15,213,197 // pmullw %xmm13,%xmm8
- .byte 102,69,15,253,197 // paddw %xmm13,%xmm8
- .byte 102,15,113,213,8 // psrlw $0x8,%xmm5
- .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
- .byte 102,15,103,253 // packuswb %xmm5,%xmm7
- .byte 102,65,15,113,208,8 // psrlw $0x8,%xmm8
- .byte 102,15,113,212,8 // psrlw $0x8,%xmm4
- .byte 102,65,15,103,224 // packuswb %xmm8,%xmm4
- .byte 102,15,252,255 // paddb %xmm7,%xmm7
- .byte 102,15,252,228 // paddb %xmm4,%xmm4
- .byte 102,65,15,111,238 // movdqa %xmm14,%xmm5
- .byte 102,15,248,236 // psubb %xmm4,%xmm5
- .byte 102,65,15,111,228 // movdqa %xmm12,%xmm4
- .byte 102,15,248,231 // psubb %xmm7,%xmm4
- .byte 102,65,15,113,210,8 // psrlw $0x8,%xmm10
- .byte 102,15,113,208,8 // psrlw $0x8,%xmm0
- .byte 102,65,15,103,194 // packuswb %xmm10,%xmm0
- .byte 102,15,113,209,8 // psrlw $0x8,%xmm1
- .byte 102,15,113,214,8 // psrlw $0x8,%xmm6
- .byte 102,15,103,241 // packuswb %xmm1,%xmm6
- .byte 102,68,15,248,246 // psubb %xmm6,%xmm14
- .byte 102,68,15,248,224 // psubb %xmm0,%xmm12
- .byte 102,15,111,13,96,1,0,0 // movdqa 0x160(%rip),%xmm1 # 2b60 <_sk_exclusion_sse2_8bit+0x2d3>
- .byte 102,15,111,193 // movdqa %xmm1,%xmm0
- .byte 102,65,15,223,196 // pandn %xmm12,%xmm0
- .byte 102,15,219,225 // pand %xmm1,%xmm4
- .byte 102,15,235,196 // por %xmm4,%xmm0
- .byte 102,15,219,233 // pand %xmm1,%xmm5
- .byte 102,65,15,223,206 // pandn %xmm14,%xmm1
- .byte 102,15,235,205 // por %xmm5,%xmm1
+ .byte 102,15,248,196 // psubb %xmm4,%xmm0
+ .byte 102,15,248,197 // psubb %xmm5,%xmm0
+ .byte 102,15,248,206 // psubb %xmm6,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2a6b <_sk_exclusion_sse2_8bit+0x1de>
+ .byte 127,67 // jg 2963 <_sk_difference_sse2_8bit+0x20c>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2a6f <_sk_exclusion_sse2_8bit+0x1e2>
+ .byte 127,67 // jg 2967 <_sk_difference_sse2_8bit+0x210>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2a73 <_sk_exclusion_sse2_8bit+0x1e6>
+ .byte 127,67 // jg 296b <_sk_difference_sse2_8bit+0x214>
BALIGN16
.byte 0,0 // add %al,(%rax)
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index b65679526e..02d1015c03 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -39059,7 +39059,7 @@ _sk_uniform_color_hsw_8bit LABEL PROC
PUBLIC _sk_set_rgb_hsw_8bit
_sk_set_rgb_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 197,250,16,37,69,45,0,0 ; vmovss 0x2d45(%rip),%xmm4 # 2ec8 <_sk_exclusion_hsw_8bit+0x168>
+ DB 197,250,16,37,149,44,0,0 ; vmovss 0x2c95(%rip),%xmm4 # 2e18 <_sk_difference_hsw_8bit+0x184>
DB 197,218,89,40 ; vmulss (%rax),%xmm4,%xmm5
DB 196,225,250,44,205 ; vcvttss2si %xmm5,%rcx
DB 197,218,89,104,4 ; vmulss 0x4(%rax),%xmm4,%xmm5
@@ -39072,7 +39072,7 @@ _sk_set_rgb_hsw_8bit LABEL PROC
DB 9,208 ; or %edx,%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
DB 196,226,125,88,228 ; vpbroadcastd %xmm4,%ymm4
- DB 197,253,111,45,69,45,0,0 ; vmovdqa 0x2d45(%rip),%ymm5 # 2f00 <_sk_exclusion_hsw_8bit+0x1a0>
+ DB 197,253,111,45,133,44,0,0 ; vmovdqa 0x2c85(%rip),%ymm5 # 2e40 <_sk_difference_hsw_8bit+0x1ac>
DB 197,245,219,205 ; vpand %ymm5,%ymm1,%ymm1
DB 197,253,219,197 ; vpand %ymm5,%ymm0,%ymm0
DB 197,221,235,192 ; vpor %ymm0,%ymm4,%ymm0
@@ -39082,10 +39082,10 @@ _sk_set_rgb_hsw_8bit LABEL PROC
PUBLIC _sk_premul_hsw_8bit
_sk_premul_hsw_8bit LABEL PROC
- DB 197,253,111,37,73,45,0,0 ; vmovdqa 0x2d49(%rip),%ymm4 # 2f20 <_sk_exclusion_hsw_8bit+0x1c0>
+ DB 197,253,111,37,137,44,0,0 ; vmovdqa 0x2c89(%rip),%ymm4 # 2e60 <_sk_difference_hsw_8bit+0x1cc>
DB 196,226,125,0,236 ; vpshufb %ymm4,%ymm0,%ymm5
DB 196,226,117,0,228 ; vpshufb %ymm4,%ymm1,%ymm4
- DB 197,253,111,53,87,45,0,0 ; vmovdqa 0x2d57(%rip),%ymm6 # 2f40 <_sk_exclusion_hsw_8bit+0x1e0>
+ DB 197,253,111,53,151,44,0,0 ; vmovdqa 0x2c97(%rip),%ymm6 # 2e80 <_sk_difference_hsw_8bit+0x1ec>
DB 197,221,235,230 ; vpor %ymm6,%ymm4,%ymm4
DB 197,213,235,238 ; vpor %ymm6,%ymm5,%ymm5
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -39123,7 +39123,7 @@ _sk_premul_hsw_8bit LABEL PROC
PUBLIC _sk_swap_rb_hsw_8bit
_sk_swap_rb_hsw_8bit LABEL PROC
- DB 197,253,111,37,207,44,0,0 ; vmovdqa 0x2ccf(%rip),%ymm4 # 2f60 <_sk_exclusion_hsw_8bit+0x200>
+ DB 197,253,111,37,15,44,0,0 ; vmovdqa 0x2c0f(%rip),%ymm4 # 2ea0 <_sk_difference_hsw_8bit+0x20c>
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -39463,7 +39463,7 @@ _sk_load_bgra_hsw_8bit LABEL PROC
DB 117,35 ; jne 774 <_sk_load_bgra_hsw_8bit+0x44>
DB 196,161,126,111,76,130,32 ; vmovdqu 0x20(%rdx,%r8,4),%ymm1
DB 196,161,126,111,4,130 ; vmovdqu (%rdx,%r8,4),%ymm0
- DB 197,253,111,37,26,40,0,0 ; vmovdqa 0x281a(%rip),%ymm4 # 2f80 <_sk_exclusion_hsw_8bit+0x220>
+ DB 197,253,111,37,90,39,0,0 ; vmovdqa 0x275a(%rip),%ymm4 # 2ec0 <_sk_difference_hsw_8bit+0x22c>
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -39576,7 +39576,7 @@ _sk_load_bgra_dst_hsw_8bit LABEL PROC
DB 117,35 ; jne 92c <_sk_load_bgra_dst_hsw_8bit+0x44>
DB 196,161,126,111,92,130,32 ; vmovdqu 0x20(%rdx,%r8,4),%ymm3
DB 196,161,126,111,20,130 ; vmovdqu (%rdx,%r8,4),%ymm2
- DB 197,253,111,37,130,38,0,0 ; vmovdqa 0x2682(%rip),%ymm4 # 2fa0 <_sk_exclusion_hsw_8bit+0x240>
+ DB 197,253,111,37,194,37,0,0 ; vmovdqa 0x25c2(%rip),%ymm4 # 2ee0 <_sk_difference_hsw_8bit+0x24c>
DB 196,226,109,0,212 ; vpshufb %ymm4,%ymm2,%ymm2
DB 196,226,101,0,220 ; vpshufb %ymm4,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -39685,7 +39685,7 @@ _sk_store_bgra_hsw_8bit LABEL PROC
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,193,226,2 ; shl $0x2,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 197,253,111,37,252,36,0,0 ; vmovdqa 0x24fc(%rip),%ymm4 # 2fc0 <_sk_exclusion_hsw_8bit+0x260>
+ DB 197,253,111,37,60,36,0,0 ; vmovdqa 0x243c(%rip),%ymm4 # 2f00 <_sk_difference_hsw_8bit+0x26c>
DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5
DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4
DB 77,133,201 ; test %r9,%r9
@@ -39965,10 +39965,10 @@ _sk_store_a8_hsw_8bit LABEL PROC
DB 72,99,87,8 ; movslq 0x8(%rdi),%rdx
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 197,253,111,37,40,33,0,0 ; vmovdqa 0x2128(%rip),%ymm4 # 2fe0 <_sk_exclusion_hsw_8bit+0x280>
+ DB 197,253,111,37,104,32,0,0 ; vmovdqa 0x2068(%rip),%ymm4 # 2f20 <_sk_difference_hsw_8bit+0x28c>
DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5
DB 196,227,253,0,237,232 ; vpermq $0xe8,%ymm5,%ymm5
- DB 197,249,111,53,149,35,0,0 ; vmovdqa 0x2395(%rip),%xmm6 # 3260 <_sk_exclusion_hsw_8bit+0x500>
+ DB 197,249,111,53,245,34,0,0 ; vmovdqa 0x22f5(%rip),%xmm6 # 31c0 <_sk_difference_hsw_8bit+0x52c>
DB 196,226,81,0,238 ; vpshufb %xmm6,%xmm5,%xmm5
DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4
DB 196,227,253,0,228,232 ; vpermq $0xe8,%ymm4,%ymm4
@@ -40058,10 +40058,10 @@ _sk_load_g8_hsw_8bit LABEL PROC
DB 196,226,125,49,200 ; vpmovzxbd %xmm0,%ymm1
DB 197,249,112,192,78 ; vpshufd $0x4e,%xmm0,%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
- DB 196,226,125,88,37,193,30,0,0 ; vpbroadcastd 0x1ec1(%rip),%ymm4 # 2ecc <_sk_exclusion_hsw_8bit+0x16c>
+ DB 196,226,125,88,37,17,30,0,0 ; vpbroadcastd 0x1e11(%rip),%ymm4 # 2e1c <_sk_difference_hsw_8bit+0x188>
DB 196,226,125,64,236 ; vpmulld %ymm4,%ymm0,%ymm5
DB 196,226,117,64,196 ; vpmulld %ymm4,%ymm1,%ymm0
- DB 196,226,125,88,13,178,30,0,0 ; vpbroadcastd 0x1eb2(%rip),%ymm1 # 2ed0 <_sk_exclusion_hsw_8bit+0x170>
+ DB 196,226,125,88,13,2,30,0,0 ; vpbroadcastd 0x1e02(%rip),%ymm1 # 2e20 <_sk_difference_hsw_8bit+0x18c>
DB 197,253,235,193 ; vpor %ymm1,%ymm0,%ymm0
DB 197,213,235,201 ; vpor %ymm1,%ymm5,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -40157,10 +40157,10 @@ _sk_load_g8_dst_hsw_8bit LABEL PROC
DB 196,226,125,49,218 ; vpmovzxbd %xmm2,%ymm3
DB 197,249,112,210,78 ; vpshufd $0x4e,%xmm2,%xmm2
DB 196,226,125,49,210 ; vpmovzxbd %xmm2,%ymm2
- DB 196,226,125,88,37,81,29,0,0 ; vpbroadcastd 0x1d51(%rip),%ymm4 # 2ed4 <_sk_exclusion_hsw_8bit+0x174>
+ DB 196,226,125,88,37,161,28,0,0 ; vpbroadcastd 0x1ca1(%rip),%ymm4 # 2e24 <_sk_difference_hsw_8bit+0x190>
DB 196,226,109,64,236 ; vpmulld %ymm4,%ymm2,%ymm5
DB 196,226,101,64,212 ; vpmulld %ymm4,%ymm3,%ymm2
- DB 196,226,125,88,29,66,29,0,0 ; vpbroadcastd 0x1d42(%rip),%ymm3 # 2ed8 <_sk_exclusion_hsw_8bit+0x178>
+ DB 196,226,125,88,29,146,28,0,0 ; vpbroadcastd 0x1c92(%rip),%ymm3 # 2e28 <_sk_difference_hsw_8bit+0x194>
DB 197,237,235,211 ; vpor %ymm3,%ymm2,%ymm2
DB 197,213,235,219 ; vpor %ymm3,%ymm5,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -40255,7 +40255,7 @@ _sk_srcover_rgba_8888_hsw_8bit LABEL PROC
DB 15,133,222,0,0,0 ; jne 13c3 <_sk_srcover_rgba_8888_hsw_8bit+0x103>
DB 196,33,126,111,76,138,32 ; vmovdqu 0x20(%rdx,%r9,4),%ymm9
DB 196,33,126,111,28,138 ; vmovdqu (%rdx,%r9,4),%ymm11
- DB 197,253,111,53,6,29,0,0 ; vmovdqa 0x1d06(%rip),%ymm6 # 3000 <_sk_exclusion_hsw_8bit+0x2a0>
+ DB 197,253,111,53,70,28,0,0 ; vmovdqa 0x1c46(%rip),%ymm6 # 2f40 <_sk_difference_hsw_8bit+0x2ac>
DB 196,226,117,0,254 ; vpshufb %ymm6,%ymm1,%ymm7
DB 196,226,125,0,246 ; vpshufb %ymm6,%ymm0,%ymm6
DB 196,66,125,48,195 ; vpmovzxbw %xmm11,%ymm8
@@ -40461,7 +40461,7 @@ PUBLIC _sk_scale_1_float_hsw_8bit
_sk_scale_1_float_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,250,16,32 ; vmovss (%rax),%xmm4
- DB 197,218,89,37,134,24,0,0 ; vmulss 0x1886(%rip),%xmm4,%xmm4 # 2edc <_sk_exclusion_hsw_8bit+0x17c>
+ DB 197,218,89,37,214,23,0,0 ; vmulss 0x17d6(%rip),%xmm4,%xmm4 # 2e2c <_sk_difference_hsw_8bit+0x198>
DB 197,250,44,196 ; vcvttss2si %xmm4,%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
DB 196,226,125,120,228 ; vpbroadcastb %xmm4,%ymm4
@@ -40471,7 +40471,7 @@ _sk_scale_1_float_hsw_8bit LABEL PROC
DB 196,226,125,48,241 ; vpmovzxbw %xmm1,%ymm6
DB 196,227,125,57,201,1 ; vextracti128 $0x1,%ymm1,%xmm1
DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1
- DB 197,221,219,37,149,25,0,0 ; vpand 0x1995(%rip),%ymm4,%ymm4 # 3020 <_sk_exclusion_hsw_8bit+0x2c0>
+ DB 197,221,219,37,213,24,0,0 ; vpand 0x18d5(%rip),%ymm4,%ymm4 # 2f60 <_sk_difference_hsw_8bit+0x2cc>
DB 197,221,213,249 ; vpmullw %ymm1,%ymm4,%ymm7
DB 197,93,213,198 ; vpmullw %ymm6,%ymm4,%ymm8
DB 197,93,213,200 ; vpmullw %ymm0,%ymm4,%ymm9
@@ -40508,7 +40508,7 @@ _sk_scale_u8_hsw_8bit LABEL PROC
DB 196,226,125,49,236 ; vpmovzxbd %xmm4,%ymm5
DB 197,249,112,228,78 ; vpshufd $0x4e,%xmm4,%xmm4
DB 196,226,125,49,228 ; vpmovzxbd %xmm4,%ymm4
- DB 197,253,111,53,31,25,0,0 ; vmovdqa 0x191f(%rip),%ymm6 # 3040 <_sk_exclusion_hsw_8bit+0x2e0>
+ DB 197,253,111,53,95,24,0,0 ; vmovdqa 0x185f(%rip),%ymm6 # 2f80 <_sk_difference_hsw_8bit+0x2ec>
DB 196,226,93,0,230 ; vpshufb %ymm6,%ymm4,%ymm4
DB 196,226,85,0,238 ; vpshufb %ymm6,%ymm5,%ymm5
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -40623,7 +40623,7 @@ PUBLIC _sk_lerp_1_float_hsw_8bit
_sk_lerp_1_float_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,250,16,32 ; vmovss (%rax),%xmm4
- DB 197,218,89,37,234,21,0,0 ; vmulss 0x15ea(%rip),%xmm4,%xmm4 # 2ee0 <_sk_exclusion_hsw_8bit+0x180>
+ DB 197,218,89,37,58,21,0,0 ; vmulss 0x153a(%rip),%xmm4,%xmm4 # 2e30 <_sk_difference_hsw_8bit+0x19c>
DB 197,250,44,196 ; vcvttss2si %xmm4,%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
DB 196,226,125,120,228 ; vpbroadcastb %xmm4,%ymm4
@@ -40633,7 +40633,7 @@ _sk_lerp_1_float_hsw_8bit LABEL PROC
DB 196,226,125,48,241 ; vpmovzxbw %xmm1,%ymm6
DB 196,227,125,57,201,1 ; vextracti128 $0x1,%ymm1,%xmm1
DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1
- DB 197,221,219,61,53,23,0,0 ; vpand 0x1735(%rip),%ymm4,%ymm7 # 3060 <_sk_exclusion_hsw_8bit+0x300>
+ DB 197,221,219,61,117,22,0,0 ; vpand 0x1675(%rip),%ymm4,%ymm7 # 2fa0 <_sk_difference_hsw_8bit+0x30c>
DB 197,69,213,193 ; vpmullw %ymm1,%ymm7,%ymm8
DB 197,69,213,206 ; vpmullw %ymm6,%ymm7,%ymm9
DB 197,69,213,208 ; vpmullw %ymm0,%ymm7,%ymm10
@@ -40701,7 +40701,7 @@ _sk_lerp_u8_hsw_8bit LABEL PROC
DB 196,226,125,49,236 ; vpmovzxbd %xmm4,%ymm5
DB 197,249,112,228,78 ; vpshufd $0x4e,%xmm4,%xmm4
DB 196,226,125,49,228 ; vpmovzxbd %xmm4,%ymm4
- DB 197,253,111,53,42,22,0,0 ; vmovdqa 0x162a(%rip),%ymm6 # 3080 <_sk_exclusion_hsw_8bit+0x320>
+ DB 197,253,111,53,106,21,0,0 ; vmovdqa 0x156a(%rip),%ymm6 # 2fc0 <_sk_difference_hsw_8bit+0x32c>
DB 196,98,93,0,206 ; vpshufb %ymm6,%ymm4,%ymm9
DB 196,98,85,0,222 ; vpshufb %ymm6,%ymm5,%ymm11
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -40864,7 +40864,7 @@ _sk_move_dst_src_hsw_8bit LABEL PROC
PUBLIC _sk_black_color_hsw_8bit
_sk_black_color_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,249,17,0,0 ; vbroadcastss 0x11f9(%rip),%ymm0 # 2ee4 <_sk_exclusion_hsw_8bit+0x184>
+ DB 196,226,125,24,5,73,17,0,0 ; vbroadcastss 0x1149(%rip),%ymm0 # 2e34 <_sk_difference_hsw_8bit+0x1a0>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 255,224 ; jmpq *%rax
@@ -40884,7 +40884,7 @@ _sk_clear_hsw_8bit LABEL PROC
PUBLIC _sk_srcatop_hsw_8bit
_sk_srcatop_hsw_8bit LABEL PROC
- DB 197,125,111,5,143,19,0,0 ; vmovdqa 0x138f(%rip),%ymm8 # 30a0 <_sk_exclusion_hsw_8bit+0x340>
+ DB 197,125,111,5,207,18,0,0 ; vmovdqa 0x12cf(%rip),%ymm8 # 2fe0 <_sk_difference_hsw_8bit+0x34c>
DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
DB 196,194,109,0,232 ; vpshufb %ymm8,%ymm2,%ymm5
DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
@@ -40959,7 +40959,7 @@ _sk_srcatop_hsw_8bit LABEL PROC
PUBLIC _sk_dstatop_hsw_8bit
_sk_dstatop_hsw_8bit LABEL PROC
- DB 197,125,111,5,74,18,0,0 ; vmovdqa 0x124a(%rip),%ymm8 # 30c0 <_sk_exclusion_hsw_8bit+0x360>
+ DB 197,125,111,5,138,17,0,0 ; vmovdqa 0x118a(%rip),%ymm8 # 3000 <_sk_difference_hsw_8bit+0x36c>
DB 196,194,117,0,224 ; vpshufb %ymm8,%ymm1,%ymm4
DB 196,194,125,0,232 ; vpshufb %ymm8,%ymm0,%ymm5
DB 196,98,125,48,210 ; vpmovzxbw %xmm2,%ymm10
@@ -41034,7 +41034,7 @@ _sk_dstatop_hsw_8bit LABEL PROC
PUBLIC _sk_srcin_hsw_8bit
_sk_srcin_hsw_8bit LABEL PROC
- DB 197,253,111,37,1,17,0,0 ; vmovdqa 0x1101(%rip),%ymm4 # 30e0 <_sk_exclusion_hsw_8bit+0x380>
+ DB 197,253,111,37,65,16,0,0 ; vmovdqa 0x1041(%rip),%ymm4 # 3020 <_sk_difference_hsw_8bit+0x38c>
DB 196,226,101,0,236 ; vpshufb %ymm4,%ymm3,%ymm5
DB 196,226,109,0,228 ; vpshufb %ymm4,%ymm2,%ymm4
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -41072,7 +41072,7 @@ _sk_srcin_hsw_8bit LABEL PROC
PUBLIC _sk_dstin_hsw_8bit
_sk_dstin_hsw_8bit LABEL PROC
- DB 197,253,111,37,119,16,0,0 ; vmovdqa 0x1077(%rip),%ymm4 # 3100 <_sk_exclusion_hsw_8bit+0x3a0>
+ DB 197,253,111,37,183,15,0,0 ; vmovdqa 0xfb7(%rip),%ymm4 # 3040 <_sk_difference_hsw_8bit+0x3ac>
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,125,48,226 ; vpmovzxbw %xmm2,%ymm4
@@ -41110,7 +41110,7 @@ _sk_dstin_hsw_8bit LABEL PROC
PUBLIC _sk_srcout_hsw_8bit
_sk_srcout_hsw_8bit LABEL PROC
- DB 197,253,111,37,235,15,0,0 ; vmovdqa 0xfeb(%rip),%ymm4 # 3120 <_sk_exclusion_hsw_8bit+0x3c0>
+ DB 197,253,111,37,43,15,0,0 ; vmovdqa 0xf2b(%rip),%ymm4 # 3060 <_sk_difference_hsw_8bit+0x3cc>
DB 196,226,109,0,236 ; vpshufb %ymm4,%ymm2,%ymm5
DB 196,226,101,0,228 ; vpshufb %ymm4,%ymm3,%ymm4
DB 197,205,118,246 ; vpcmpeqd %ymm6,%ymm6,%ymm6
@@ -41151,7 +41151,7 @@ _sk_srcout_hsw_8bit LABEL PROC
PUBLIC _sk_dstout_hsw_8bit
_sk_dstout_hsw_8bit LABEL PROC
- DB 197,253,111,37,85,15,0,0 ; vmovdqa 0xf55(%rip),%ymm4 # 3140 <_sk_exclusion_hsw_8bit+0x3e0>
+ DB 197,253,111,37,149,14,0,0 ; vmovdqa 0xe95(%rip),%ymm4 # 3080 <_sk_difference_hsw_8bit+0x3ec>
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 197,221,118,228 ; vpcmpeqd %ymm4,%ymm4,%ymm4
@@ -41192,7 +41192,7 @@ _sk_dstout_hsw_8bit LABEL PROC
PUBLIC _sk_srcover_hsw_8bit
_sk_srcover_hsw_8bit LABEL PROC
- DB 197,253,111,37,189,14,0,0 ; vmovdqa 0xebd(%rip),%ymm4 # 3160 <_sk_exclusion_hsw_8bit+0x400>
+ DB 197,253,111,37,253,13,0,0 ; vmovdqa 0xdfd(%rip),%ymm4 # 30a0 <_sk_difference_hsw_8bit+0x40c>
DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5
DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4
DB 196,98,125,48,202 ; vpmovzxbw %xmm2,%ymm9
@@ -41234,7 +41234,7 @@ _sk_srcover_hsw_8bit LABEL PROC
PUBLIC _sk_dstover_hsw_8bit
_sk_dstover_hsw_8bit LABEL PROC
- DB 197,253,111,37,30,14,0,0 ; vmovdqa 0xe1e(%rip),%ymm4 # 3180 <_sk_exclusion_hsw_8bit+0x420>
+ DB 197,253,111,37,94,13,0,0 ; vmovdqa 0xd5e(%rip),%ymm4 # 30c0 <_sk_difference_hsw_8bit+0x42c>
DB 196,226,101,0,236 ; vpshufb %ymm4,%ymm3,%ymm5
DB 196,226,109,0,228 ; vpshufb %ymm4,%ymm2,%ymm4
DB 196,98,125,48,200 ; vpmovzxbw %xmm0,%ymm9
@@ -41314,7 +41314,7 @@ _sk_multiply_hsw_8bit LABEL PROC
DB 72,131,236,56 ; sub $0x38,%rsp
DB 197,253,111,243 ; vmovdqa %ymm3,%ymm6
DB 197,253,111,218 ; vmovdqa %ymm2,%ymm3
- DB 197,125,111,13,219,12,0,0 ; vmovdqa 0xcdb(%rip),%ymm9 # 31a0 <_sk_exclusion_hsw_8bit+0x440>
+ DB 197,125,111,13,27,12,0,0 ; vmovdqa 0xc1b(%rip),%ymm9 # 30e0 <_sk_difference_hsw_8bit+0x44c>
DB 196,194,101,0,225 ; vpshufb %ymm9,%ymm3,%ymm4
DB 196,194,77,0,233 ; vpshufb %ymm9,%ymm6,%ymm5
DB 196,65,45,118,210 ; vpcmpeqd %ymm10,%ymm10,%ymm10
@@ -41455,7 +41455,7 @@ _sk_screen_hsw_8bit LABEL PROC
PUBLIC _sk_xor__hsw_8bit
_sk_xor__hsw_8bit LABEL PROC
- DB 197,125,111,13,103,10,0,0 ; vmovdqa 0xa67(%rip),%ymm9 # 31c0 <_sk_exclusion_hsw_8bit+0x460>
+ DB 197,125,111,13,167,9,0,0 ; vmovdqa 0x9a7(%rip),%ymm9 # 3100 <_sk_difference_hsw_8bit+0x46c>
DB 196,194,109,0,225 ; vpshufb %ymm9,%ymm2,%ymm4
DB 196,194,101,0,249 ; vpshufb %ymm9,%ymm3,%ymm7
DB 196,65,37,118,219 ; vpcmpeqd %ymm11,%ymm11,%ymm11
@@ -41532,7 +41532,7 @@ _sk_xor__hsw_8bit LABEL PROC
PUBLIC _sk_darken_hsw_8bit
_sk_darken_hsw_8bit LABEL PROC
- DB 197,125,111,5,21,9,0,0 ; vmovdqa 0x915(%rip),%ymm8 # 31e0 <_sk_exclusion_hsw_8bit+0x480>
+ DB 197,125,111,5,85,8,0,0 ; vmovdqa 0x855(%rip),%ymm8 # 3120 <_sk_difference_hsw_8bit+0x48c>
DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6
DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
@@ -41605,7 +41605,7 @@ _sk_darken_hsw_8bit LABEL PROC
DB 197,253,248,246 ; vpsubb %ymm6,%ymm0,%ymm6
DB 197,245,248,205 ; vpsubb %ymm5,%ymm1,%ymm1
DB 197,253,248,196 ; vpsubb %ymm4,%ymm0,%ymm0
- DB 196,226,125,88,37,174,4,0,0 ; vpbroadcastd 0x4ae(%rip),%ymm4 # 2ee8 <_sk_exclusion_hsw_8bit+0x188>
+ DB 196,226,125,88,37,254,3,0,0 ; vpbroadcastd 0x3fe(%rip),%ymm4 # 2e38 <_sk_difference_hsw_8bit+0x1a4>
DB 196,227,125,76,198,64 ; vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
DB 196,227,117,76,207,64 ; vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -41613,7 +41613,7 @@ _sk_darken_hsw_8bit LABEL PROC
PUBLIC _sk_lighten_hsw_8bit
_sk_lighten_hsw_8bit LABEL PROC
- DB 197,125,111,5,174,7,0,0 ; vmovdqa 0x7ae(%rip),%ymm8 # 3200 <_sk_exclusion_hsw_8bit+0x4a0>
+ DB 197,125,111,5,238,6,0,0 ; vmovdqa 0x6ee(%rip),%ymm8 # 3140 <_sk_difference_hsw_8bit+0x4ac>
DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6
DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
@@ -41686,15 +41686,59 @@ _sk_lighten_hsw_8bit LABEL PROC
DB 197,253,248,246 ; vpsubb %ymm6,%ymm0,%ymm6
DB 197,245,248,205 ; vpsubb %ymm5,%ymm1,%ymm1
DB 197,253,248,196 ; vpsubb %ymm4,%ymm0,%ymm0
- DB 196,226,125,88,37,43,3,0,0 ; vpbroadcastd 0x32b(%rip),%ymm4 # 2eec <_sk_exclusion_hsw_8bit+0x18c>
+ DB 196,226,125,88,37,123,2,0,0 ; vpbroadcastd 0x27b(%rip),%ymm4 # 2e3c <_sk_difference_hsw_8bit+0x1a8>
DB 196,227,125,76,198,64 ; vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
DB 196,227,117,76,207,64 ; vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_exclusion_hsw_8bit
+_sk_exclusion_hsw_8bit LABEL PROC
+ DB 196,98,125,48,200 ; vpmovzxbw %xmm0,%ymm9
+ DB 196,227,125,57,197,1 ; vextracti128 $0x1,%ymm0,%xmm5
+ DB 196,226,125,48,237 ; vpmovzxbw %xmm5,%ymm5
+ DB 196,226,125,48,241 ; vpmovzxbw %xmm1,%ymm6
+ DB 196,227,125,57,207,1 ; vextracti128 $0x1,%ymm1,%xmm7
+ DB 196,226,125,48,255 ; vpmovzxbw %xmm7,%ymm7
+ DB 196,98,125,48,194 ; vpmovzxbw %xmm2,%ymm8
+ DB 196,227,125,57,212,1 ; vextracti128 $0x1,%ymm2,%xmm4
+ DB 196,98,125,48,220 ; vpmovzxbw %xmm4,%ymm11
+ DB 196,98,125,48,211 ; vpmovzxbw %xmm3,%ymm10
+ DB 196,227,125,57,220,1 ; vextracti128 $0x1,%ymm3,%xmm4
+ DB 196,226,125,48,228 ; vpmovzxbw %xmm4,%ymm4
+ DB 197,221,213,231 ; vpmullw %ymm7,%ymm4,%ymm4
+ DB 197,45,213,214 ; vpmullw %ymm6,%ymm10,%ymm10
+ DB 197,37,213,221 ; vpmullw %ymm5,%ymm11,%ymm11
+ DB 196,65,61,213,193 ; vpmullw %ymm9,%ymm8,%ymm8
+ DB 196,65,61,253,193 ; vpaddw %ymm9,%ymm8,%ymm8
+ DB 197,165,253,237 ; vpaddw %ymm5,%ymm11,%ymm5
+ DB 197,173,253,246 ; vpaddw %ymm6,%ymm10,%ymm6
+ DB 197,221,253,231 ; vpaddw %ymm7,%ymm4,%ymm4
+ DB 197,221,113,212,8 ; vpsrlw $0x8,%ymm4,%ymm4
+ DB 197,205,113,214,8 ; vpsrlw $0x8,%ymm6,%ymm6
+ DB 197,213,113,213,8 ; vpsrlw $0x8,%ymm5,%ymm5
+ DB 196,193,69,113,208,8 ; vpsrlw $0x8,%ymm8,%ymm7
+ DB 196,99,69,56,197,1 ; vinserti128 $0x1,%xmm5,%ymm7,%ymm8
+ DB 196,227,69,70,237,49 ; vperm2i128 $0x31,%ymm5,%ymm7,%ymm5
+ DB 197,189,103,237 ; vpackuswb %ymm5,%ymm8,%ymm5
+ DB 196,227,77,56,252,1 ; vinserti128 $0x1,%xmm4,%ymm6,%ymm7
+ DB 196,227,77,70,228,49 ; vperm2i128 $0x31,%ymm4,%ymm6,%ymm4
+ DB 197,197,103,228 ; vpackuswb %ymm4,%ymm7,%ymm4
+ DB 197,253,111,53,240,4,0,0 ; vmovdqa 0x4f0(%rip),%ymm6 # 3160 <_sk_difference_hsw_8bit+0x4cc>
+ DB 197,221,219,254 ; vpand %ymm6,%ymm4,%ymm7
+ DB 197,213,219,246 ; vpand %ymm6,%ymm5,%ymm6
+ DB 197,237,252,192 ; vpaddb %ymm0,%ymm2,%ymm0
+ DB 197,229,252,201 ; vpaddb %ymm1,%ymm3,%ymm1
+ DB 197,245,248,204 ; vpsubb %ymm4,%ymm1,%ymm1
+ DB 197,253,248,197 ; vpsubb %ymm5,%ymm0,%ymm0
+ DB 197,253,248,198 ; vpsubb %ymm6,%ymm0,%ymm0
+ DB 197,245,248,207 ; vpsubb %ymm7,%ymm1,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_difference_hsw_8bit
_sk_difference_hsw_8bit LABEL PROC
- DB 197,125,111,5,71,6,0,0 ; vmovdqa 0x647(%rip),%ymm8 # 3220 <_sk_exclusion_hsw_8bit+0x4c0>
+ DB 197,125,111,5,228,4,0,0 ; vmovdqa 0x4e4(%rip),%ymm8 # 3180 <_sk_difference_hsw_8bit+0x4ec>
DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6
DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
@@ -41759,98 +41803,23 @@ _sk_difference_hsw_8bit LABEL PROC
DB 196,227,77,56,253,1 ; vinserti128 $0x1,%xmm5,%ymm6,%ymm7
DB 196,227,77,70,237,49 ; vperm2i128 $0x31,%ymm5,%ymm6,%ymm5
DB 197,197,103,237 ; vpackuswb %ymm5,%ymm7,%ymm5
- DB 197,165,218,245 ; vpminub %ymm5,%ymm11,%ymm6
- DB 197,181,218,252 ; vpminub %ymm4,%ymm9,%ymm7
- DB 197,197,252,255 ; vpaddb %ymm7,%ymm7,%ymm7
- DB 197,205,252,246 ; vpaddb %ymm6,%ymm6,%ymm6
+ DB 197,181,218,228 ; vpminub %ymm4,%ymm9,%ymm4
+ DB 197,165,218,237 ; vpminub %ymm5,%ymm11,%ymm5
+ DB 197,253,111,53,174,3,0,0 ; vmovdqa 0x3ae(%rip),%ymm6 # 31a0 <_sk_difference_hsw_8bit+0x50c>
+ DB 197,213,219,254 ; vpand %ymm6,%ymm5,%ymm7
+ DB 197,221,219,246 ; vpand %ymm6,%ymm4,%ymm6
DB 197,237,252,192 ; vpaddb %ymm0,%ymm2,%ymm0
DB 197,229,252,201 ; vpaddb %ymm1,%ymm3,%ymm1
- DB 197,245,248,246 ; vpsubb %ymm6,%ymm1,%ymm6
- DB 197,253,248,255 ; vpsubb %ymm7,%ymm0,%ymm7
DB 197,245,248,205 ; vpsubb %ymm5,%ymm1,%ymm1
DB 197,253,248,196 ; vpsubb %ymm4,%ymm0,%ymm0
- DB 196,226,125,88,37,160,1,0,0 ; vpbroadcastd 0x1a0(%rip),%ymm4 # 2ef0 <_sk_exclusion_hsw_8bit+0x190>
- DB 196,227,125,76,199,64 ; vpblendvb %ymm4,%ymm7,%ymm0,%ymm0
- DB 196,227,117,76,206,64 ; vpblendvb %ymm4,%ymm6,%ymm1,%ymm1
- DB 72,173 ; lods %ds:(%rsi),%rax
- DB 255,224 ; jmpq *%rax
-
-PUBLIC _sk_exclusion_hsw_8bit
-_sk_exclusion_hsw_8bit LABEL PROC
- DB 196,98,125,48,192 ; vpmovzxbw %xmm0,%ymm8
- DB 196,227,125,57,196,1 ; vextracti128 $0x1,%ymm0,%xmm4
- DB 196,98,125,48,204 ; vpmovzxbw %xmm4,%ymm9
- DB 196,98,125,48,209 ; vpmovzxbw %xmm1,%ymm10
- DB 196,227,125,57,204,1 ; vextracti128 $0x1,%ymm1,%xmm4
- DB 196,98,125,48,220 ; vpmovzxbw %xmm4,%ymm11
- DB 196,226,125,48,226 ; vpmovzxbw %xmm2,%ymm4
- DB 196,227,125,57,213,1 ; vextracti128 $0x1,%ymm2,%xmm5
- DB 196,226,125,48,237 ; vpmovzxbw %xmm5,%ymm5
- DB 196,226,125,48,243 ; vpmovzxbw %xmm3,%ymm6
- DB 196,227,125,57,223,1 ; vextracti128 $0x1,%ymm3,%xmm7
- DB 196,226,125,48,255 ; vpmovzxbw %xmm7,%ymm7
- DB 196,65,69,213,227 ; vpmullw %ymm11,%ymm7,%ymm12
- DB 196,65,77,213,234 ; vpmullw %ymm10,%ymm6,%ymm13
- DB 196,65,85,213,241 ; vpmullw %ymm9,%ymm5,%ymm14
- DB 196,65,93,213,248 ; vpmullw %ymm8,%ymm4,%ymm15
- DB 196,65,5,253,192 ; vpaddw %ymm8,%ymm15,%ymm8
- DB 196,65,13,253,201 ; vpaddw %ymm9,%ymm14,%ymm9
- DB 196,65,21,253,210 ; vpaddw %ymm10,%ymm13,%ymm10
- DB 196,65,29,253,219 ; vpaddw %ymm11,%ymm12,%ymm11
- DB 196,193,37,113,211,8 ; vpsrlw $0x8,%ymm11,%ymm11
- DB 196,193,45,113,210,8 ; vpsrlw $0x8,%ymm10,%ymm10
- DB 196,193,53,113,209,8 ; vpsrlw $0x8,%ymm9,%ymm9
- DB 196,193,61,113,208,8 ; vpsrlw $0x8,%ymm8,%ymm8
- DB 196,67,61,56,225,1 ; vinserti128 $0x1,%xmm9,%ymm8,%ymm12
- DB 196,67,61,70,193,49 ; vperm2i128 $0x31,%ymm9,%ymm8,%ymm8
- DB 196,65,29,103,192 ; vpackuswb %ymm8,%ymm12,%ymm8
- DB 196,67,45,56,203,1 ; vinserti128 $0x1,%xmm11,%ymm10,%ymm9
- DB 196,67,45,70,211,49 ; vperm2i128 $0x31,%ymm11,%ymm10,%ymm10
- DB 196,65,53,103,202 ; vpackuswb %ymm10,%ymm9,%ymm9
- DB 196,65,61,252,208 ; vpaddb %ymm8,%ymm8,%ymm10
- DB 196,65,53,252,193 ; vpaddb %ymm9,%ymm9,%ymm8
- DB 197,109,252,200 ; vpaddb %ymm0,%ymm2,%ymm9
- DB 197,101,252,217 ; vpaddb %ymm1,%ymm3,%ymm11
- DB 196,65,37,248,192 ; vpsubb %ymm8,%ymm11,%ymm8
- DB 196,65,53,248,210 ; vpsubb %ymm10,%ymm9,%ymm10
- DB 197,125,111,37,26,4,0,0 ; vmovdqa 0x41a(%rip),%ymm12 # 3240 <_sk_exclusion_hsw_8bit+0x4e0>
- DB 196,194,117,0,204 ; vpshufb %ymm12,%ymm1,%ymm1
- DB 196,194,125,0,196 ; vpshufb %ymm12,%ymm0,%ymm0
- DB 196,98,125,48,224 ; vpmovzxbw %xmm0,%ymm12
- DB 196,227,125,57,192,1 ; vextracti128 $0x1,%ymm0,%xmm0
- DB 196,226,125,48,192 ; vpmovzxbw %xmm0,%ymm0
- DB 196,98,125,48,233 ; vpmovzxbw %xmm1,%ymm13
- DB 196,227,125,57,201,1 ; vextracti128 $0x1,%ymm1,%xmm1
- DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1
- DB 197,197,213,201 ; vpmullw %ymm1,%ymm7,%ymm1
- DB 196,65,77,213,237 ; vpmullw %ymm13,%ymm6,%ymm13
- DB 197,213,213,192 ; vpmullw %ymm0,%ymm5,%ymm0
- DB 196,65,93,213,228 ; vpmullw %ymm12,%ymm4,%ymm12
- DB 197,157,253,228 ; vpaddw %ymm4,%ymm12,%ymm4
- DB 197,253,253,197 ; vpaddw %ymm5,%ymm0,%ymm0
- DB 197,149,253,238 ; vpaddw %ymm6,%ymm13,%ymm5
- DB 197,245,253,207 ; vpaddw %ymm7,%ymm1,%ymm1
- DB 197,245,113,209,8 ; vpsrlw $0x8,%ymm1,%ymm1
- DB 197,213,113,213,8 ; vpsrlw $0x8,%ymm5,%ymm5
- DB 197,253,113,208,8 ; vpsrlw $0x8,%ymm0,%ymm0
- DB 197,221,113,212,8 ; vpsrlw $0x8,%ymm4,%ymm4
- DB 196,227,93,56,240,1 ; vinserti128 $0x1,%xmm0,%ymm4,%ymm6
- DB 196,227,93,70,192,49 ; vperm2i128 $0x31,%ymm0,%ymm4,%ymm0
- DB 197,205,103,192 ; vpackuswb %ymm0,%ymm6,%ymm0
- DB 196,227,85,56,225,1 ; vinserti128 $0x1,%xmm1,%ymm5,%ymm4
- DB 196,227,85,70,201,49 ; vperm2i128 $0x31,%ymm1,%ymm5,%ymm1
- DB 197,221,103,201 ; vpackuswb %ymm1,%ymm4,%ymm1
- DB 197,165,248,201 ; vpsubb %ymm1,%ymm11,%ymm1
- DB 197,181,248,192 ; vpsubb %ymm0,%ymm9,%ymm0
- DB 196,226,125,88,37,61,0,0,0 ; vpbroadcastd 0x3d(%rip),%ymm4 # 2ef4 <_sk_exclusion_hsw_8bit+0x194>
- DB 196,195,125,76,194,64 ; vpblendvb %ymm4,%ymm10,%ymm0,%ymm0
- DB 196,195,117,76,200,64 ; vpblendvb %ymm4,%ymm8,%ymm1,%ymm1
+ DB 197,253,248,198 ; vpsubb %ymm6,%ymm0,%ymm0
+ DB 197,245,248,207 ; vpsubb %ymm7,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2f0f <_sk_exclusion_hsw_8bit+0x1af>
+ DB 127,67 ; jg 2e5f <_sk_difference_hsw_8bit+0x1cb>
DB 1,1 ; add %eax,(%rcx)
DB 1,0 ; add %eax,(%rax)
DB 0,0 ; add %al,(%rax)
@@ -41860,9 +41829,9 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 0,255 ; add %bh,%bh
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2f23 <_sk_exclusion_hsw_8bit+0x1c3>
+ DB 127,67 ; jg 2e73 <_sk_difference_hsw_8bit+0x1df>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2f27 <_sk_exclusion_hsw_8bit+0x1c7>
+ DB 127,67 ; jg 2e77 <_sk_difference_hsw_8bit+0x1e3>
DB 0,0 ; add %al,(%rax)
DB 0,255 ; add %bh,%bh
DB 255 ; (bad)
@@ -41871,12 +41840,6 @@ ALIGN 4
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,0 ; incl (%rax)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255,0 ; incl (%rax)
- DB 255 ; (bad)
- DB 255 ; (bad)
- DB 255,0 ; incl (%rax)
ALIGN 32
DB 0,0 ; add %al,(%rax)
@@ -42355,18 +42318,31 @@ ALIGN 32
DB 15 ; (bad)
DB 15 ; (bad)
DB 15 ; (bad)
- DB 15,3,3 ; lsl (%rbx),%eax
+ DB 15,255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
DB 3,3 ; add (%rbx),%eax
- DB 7 ; (bad)
- DB 7 ; (bad)
- DB 7 ; (bad)
- DB 7 ; (bad)
- DB 11,11 ; or (%rbx),%ecx
- DB 11,11 ; or (%rbx),%ecx
- DB 15 ; (bad)
- DB 15 ; (bad)
- DB 15 ; (bad)
- DB 15,3,3 ; lsl (%rbx),%eax
DB 3,3 ; add (%rbx),%eax
DB 7 ; (bad)
DB 7 ; (bad)
@@ -42388,18 +42364,30 @@ ALIGN 32
DB 15 ; (bad)
DB 15 ; (bad)
DB 15 ; (bad)
- DB 15,3,3 ; lsl (%rbx),%eax
- DB 3,3 ; add (%rbx),%eax
- DB 7 ; (bad)
- DB 7 ; (bad)
- DB 7 ; (bad)
- DB 7 ; (bad)
- DB 11,11 ; or (%rbx),%ecx
- DB 11,11 ; or (%rbx),%ecx
- DB 15 ; (bad)
- DB 15 ; .byte 0xf
- DB 15 ; .byte 0xf
- DB 15 ; .byte 0xf
+ DB 15,255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
ALIGN 16
DB 0,2 ; add %al,(%rdx)
@@ -42510,7 +42498,7 @@ _sk_uniform_color_sse41_8bit LABEL PROC
PUBLIC _sk_set_rgb_sse41_8bit
_sk_set_rgb_sse41_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,15,16,37,236,33,0,0 ; movss 0x21ec(%rip),%xmm4 # 236c <_sk_exclusion_sse41_8bit+0x137>
+ DB 243,15,16,37,88,33,0,0 ; movss 0x2158(%rip),%xmm4 # 22d8 <_sk_difference_sse41_8bit+0x169>
DB 243,15,16,40 ; movss (%rax),%xmm5
DB 243,15,89,236 ; mulss %xmm4,%xmm5
DB 243,72,15,44,205 ; cvttss2si %xmm5,%rcx
@@ -42525,7 +42513,7 @@ _sk_set_rgb_sse41_8bit LABEL PROC
DB 9,208 ; or %edx,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,112,228,0 ; pshufd $0x0,%xmm4,%xmm4
- DB 102,15,111,45,192,33,0,0 ; movdqa 0x21c0(%rip),%xmm5 # 2380 <_sk_exclusion_sse41_8bit+0x14b>
+ DB 102,15,111,45,48,33,0,0 ; movdqa 0x2130(%rip),%xmm5 # 22f0 <_sk_difference_sse41_8bit+0x181>
DB 102,15,219,205 ; pand %xmm5,%xmm1
DB 102,15,219,197 ; pand %xmm5,%xmm0
DB 102,15,235,196 ; por %xmm4,%xmm0
@@ -42537,12 +42525,12 @@ PUBLIC _sk_premul_sse41_8bit
_sk_premul_sse41_8bit LABEL PROC
DB 102,15,111,225 ; movdqa %xmm1,%xmm4
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,15,111,5,172,33,0,0 ; movdqa 0x21ac(%rip),%xmm0 # 2390 <_sk_exclusion_sse41_8bit+0x15b>
+ DB 102,15,111,5,28,33,0,0 ; movdqa 0x211c(%rip),%xmm0 # 2300 <_sk_difference_sse41_8bit+0x191>
DB 102,15,111,253 ; movdqa %xmm5,%xmm7
DB 102,15,56,0,248 ; pshufb %xmm0,%xmm7
DB 102,15,111,244 ; movdqa %xmm4,%xmm6
DB 102,15,56,0,240 ; pshufb %xmm0,%xmm6
- DB 102,15,111,5,162,33,0,0 ; movdqa 0x21a2(%rip),%xmm0 # 23a0 <_sk_exclusion_sse41_8bit+0x16b>
+ DB 102,15,111,5,18,33,0,0 ; movdqa 0x2112(%rip),%xmm0 # 2310 <_sk_difference_sse41_8bit+0x1a1>
DB 102,15,235,240 ; por %xmm0,%xmm6
DB 102,15,235,248 ; por %xmm0,%xmm7
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
@@ -42573,7 +42561,7 @@ _sk_premul_sse41_8bit LABEL PROC
PUBLIC _sk_swap_rb_sse41_8bit
_sk_swap_rb_sse41_8bit LABEL PROC
- DB 102,15,111,37,47,33,0,0 ; movdqa 0x212f(%rip),%xmm4 # 23b0 <_sk_exclusion_sse41_8bit+0x17b>
+ DB 102,15,111,37,159,32,0,0 ; movdqa 0x209f(%rip),%xmm4 # 2320 <_sk_difference_sse41_8bit+0x1b1>
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42694,7 +42682,7 @@ _sk_load_8888_dst_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,221 ; jmpq ffffffffde000410 <_sk_exclusion_sse41_8bit+0xffffffffddffe1db>
+ DB 233,255,255,255,221 ; jmpq ffffffffde000410 <_sk_difference_sse41_8bit+0xffffffffddffe2a1>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -42773,7 +42761,7 @@ _sk_load_bgra_sse41_8bit LABEL PROC
DB 117,35 ; jne 504 <_sk_load_bgra_sse41_8bit+0x44>
DB 243,66,15,111,76,130,16 ; movdqu 0x10(%rdx,%r8,4),%xmm1
DB 243,66,15,111,4,130 ; movdqu (%rdx,%r8,4),%xmm0
- DB 102,15,111,37,202,30,0,0 ; movdqa 0x1eca(%rip),%xmm4 # 23c0 <_sk_exclusion_sse41_8bit+0x18b>
+ DB 102,15,111,37,58,30,0,0 ; movdqa 0x1e3a(%rip),%xmm4 # 2330 <_sk_difference_sse41_8bit+0x1c1>
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42834,7 +42822,7 @@ _sk_load_bgra_dst_sse41_8bit LABEL PROC
DB 117,35 ; jne 5d4 <_sk_load_bgra_dst_sse41_8bit+0x44>
DB 243,66,15,111,92,130,16 ; movdqu 0x10(%rdx,%r8,4),%xmm3
DB 243,66,15,111,20,130 ; movdqu (%rdx,%r8,4),%xmm2
- DB 102,15,111,37,10,30,0,0 ; movdqa 0x1e0a(%rip),%xmm4 # 23d0 <_sk_exclusion_sse41_8bit+0x19b>
+ DB 102,15,111,37,122,29,0,0 ; movdqa 0x1d7a(%rip),%xmm4 # 2340 <_sk_difference_sse41_8bit+0x1d1>
DB 102,15,56,0,212 ; pshufb %xmm4,%xmm2
DB 102,15,56,0,220 ; pshufb %xmm4,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42891,7 +42879,7 @@ _sk_store_bgra_sse41_8bit LABEL PROC
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,193,226,2 ; shl $0x2,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 102,15,111,53,92,29,0,0 ; movdqa 0x1d5c(%rip),%xmm6 # 23e0 <_sk_exclusion_sse41_8bit+0x1ab>
+ DB 102,15,111,53,204,28,0,0 ; movdqa 0x1ccc(%rip),%xmm6 # 2350 <_sk_difference_sse41_8bit+0x1e1>
DB 102,15,111,233 ; movdqa %xmm1,%xmm5
DB 102,15,56,0,238 ; pshufb %xmm6,%xmm5
DB 102,15,111,224 ; movdqa %xmm0,%xmm4
@@ -42955,7 +42943,7 @@ _sk_load_a8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,42 ; jne 76b <_sk_load_a8_sse41_8bit+0x47>
DB 102,66,15,56,48,12,2 ; pmovzxbw (%rdx,%r8,1),%xmm1
- DB 102,15,219,13,160,28,0,0 ; pand 0x1ca0(%rip),%xmm1 # 23f0 <_sk_exclusion_sse41_8bit+0x1bb>
+ DB 102,15,219,13,16,28,0,0 ; pand 0x1c10(%rip),%xmm1 # 2360 <_sk_difference_sse41_8bit+0x1f1>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,193 ; pmovzxwd %xmm1,%xmm0
DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1
@@ -43003,7 +42991,7 @@ _sk_load_a8_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf00080c <_sk_exclusion_sse41_8bit+0xffffffffdeffe5d7>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf00080c <_sk_difference_sse41_8bit+0xffffffffdeffe69d>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -43026,7 +43014,7 @@ _sk_load_a8_dst_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,42 ; jne 85f <_sk_load_a8_dst_sse41_8bit+0x47>
DB 102,66,15,56,48,28,2 ; pmovzxbw (%rdx,%r8,1),%xmm3
- DB 102,15,219,29,188,27,0,0 ; pand 0x1bbc(%rip),%xmm3 # 2400 <_sk_exclusion_sse41_8bit+0x1cb>
+ DB 102,15,219,29,44,27,0,0 ; pand 0x1b2c(%rip),%xmm3 # 2370 <_sk_difference_sse41_8bit+0x201>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,211 ; pmovzxwd %xmm3,%xmm2
DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3
@@ -43074,7 +43062,7 @@ _sk_load_a8_dst_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf000900 <_sk_exclusion_sse41_8bit+0xffffffffdeffe6cb>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf000900 <_sk_difference_sse41_8bit+0xffffffffdeffe791>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -43094,7 +43082,7 @@ _sk_store_a8_sse41_8bit LABEL PROC
DB 72,99,87,8 ; movslq 0x8(%rdi),%rdx
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 102,15,111,45,228,26,0,0 ; movdqa 0x1ae4(%rip),%xmm5 # 2410 <_sk_exclusion_sse41_8bit+0x1db>
+ DB 102,15,111,45,84,26,0,0 ; movdqa 0x1a54(%rip),%xmm5 # 2380 <_sk_difference_sse41_8bit+0x211>
DB 102,15,111,241 ; movdqa %xmm1,%xmm6
DB 102,15,56,0,245 ; pshufb %xmm5,%xmm6
DB 102,15,111,224 ; movdqa %xmm0,%xmm4
@@ -43102,7 +43090,7 @@ _sk_store_a8_sse41_8bit LABEL PROC
DB 102,15,108,230 ; punpcklqdq %xmm6,%xmm4
DB 77,133,201 ; test %r9,%r9
DB 117,19 ; jne 95a <_sk_store_a8_sse41_8bit+0x4e>
- DB 102,15,56,0,37,240,26,0,0 ; pshufb 0x1af0(%rip),%xmm4 # 2440 <_sk_exclusion_sse41_8bit+0x20b>
+ DB 102,15,56,0,37,96,26,0,0 ; pshufb 0x1a60(%rip),%xmm4 # 23b0 <_sk_difference_sse41_8bit+0x241>
DB 102,66,15,214,36,2 ; movq %xmm4,(%rdx,%r8,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -43118,13 +43106,13 @@ _sk_store_a8_sse41_8bit LABEL PROC
DB 102,66,15,58,20,36,2,0 ; pextrb $0x0,%xmm4,(%rdx,%r8,1)
DB 235,209 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a>
DB 102,66,15,58,20,100,2,2,4 ; pextrb $0x4,%xmm4,0x2(%rdx,%r8,1)
- DB 102,15,56,0,37,153,26,0,0 ; pshufb 0x1a99(%rip),%xmm4 # 2430 <_sk_exclusion_sse41_8bit+0x1fb>
+ DB 102,15,56,0,37,9,26,0,0 ; pshufb 0x1a09(%rip),%xmm4 # 23a0 <_sk_difference_sse41_8bit+0x231>
DB 102,66,15,58,21,36,2,0 ; pextrw $0x0,%xmm4,(%rdx,%r8,1)
DB 235,181 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a>
DB 102,66,15,58,20,100,2,6,12 ; pextrb $0xc,%xmm4,0x6(%rdx,%r8,1)
DB 102,66,15,58,20,100,2,5,10 ; pextrb $0xa,%xmm4,0x5(%rdx,%r8,1)
DB 102,66,15,58,20,100,2,4,8 ; pextrb $0x8,%xmm4,0x4(%rdx,%r8,1)
- DB 102,15,56,0,37,91,26,0,0 ; pshufb 0x1a5b(%rip),%xmm4 # 2420 <_sk_exclusion_sse41_8bit+0x1eb>
+ DB 102,15,56,0,37,203,25,0,0 ; pshufb 0x19cb(%rip),%xmm4 # 2390 <_sk_difference_sse41_8bit+0x221>
DB 102,66,15,126,36,2 ; movd %xmm4,(%rdx,%r8,1)
DB 235,137 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a>
DB 15,31,0 ; nopl (%rax)
@@ -43162,14 +43150,14 @@ _sk_load_g8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,66 ; jne a4b <_sk_load_g8_sse41_8bit+0x5f>
DB 102,66,15,56,48,12,2 ; pmovzxbw (%rdx,%r8,1),%xmm1
- DB 102,15,219,13,56,26,0,0 ; pand 0x1a38(%rip),%xmm1 # 2450 <_sk_exclusion_sse41_8bit+0x21b>
+ DB 102,15,219,13,168,25,0,0 ; pand 0x19a8(%rip),%xmm1 # 23c0 <_sk_difference_sse41_8bit+0x251>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,193 ; pmovzxwd %xmm1,%xmm0
DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1
- DB 102,15,111,37,51,26,0,0 ; movdqa 0x1a33(%rip),%xmm4 # 2460 <_sk_exclusion_sse41_8bit+0x22b>
+ DB 102,15,111,37,163,25,0,0 ; movdqa 0x19a3(%rip),%xmm4 # 23d0 <_sk_difference_sse41_8bit+0x261>
DB 102,15,56,64,204 ; pmulld %xmm4,%xmm1
DB 102,15,56,64,196 ; pmulld %xmm4,%xmm0
- DB 102,15,111,37,49,26,0,0 ; movdqa 0x1a31(%rip),%xmm4 # 2470 <_sk_exclusion_sse41_8bit+0x23b>
+ DB 102,15,111,37,161,25,0,0 ; movdqa 0x19a1(%rip),%xmm4 # 23e0 <_sk_difference_sse41_8bit+0x271>
DB 102,15,235,196 ; por %xmm4,%xmm0
DB 102,15,235,204 ; por %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -43214,7 +43202,7 @@ _sk_load_g8_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 232,255,255,255,221 ; callq ffffffffde000af0 <_sk_exclusion_sse41_8bit+0xffffffffddffe8bb>
+ DB 232,255,255,255,221 ; callq ffffffffde000af0 <_sk_difference_sse41_8bit+0xffffffffddffe981>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,210 ; callq *%rdx
@@ -43237,14 +43225,14 @@ _sk_load_g8_dst_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,66 ; jne b5b <_sk_load_g8_dst_sse41_8bit+0x5f>
DB 102,66,15,56,48,28,2 ; pmovzxbw (%rdx,%r8,1),%xmm3
- DB 102,15,219,29,88,25,0,0 ; pand 0x1958(%rip),%xmm3 # 2480 <_sk_exclusion_sse41_8bit+0x24b>
+ DB 102,15,219,29,200,24,0,0 ; pand 0x18c8(%rip),%xmm3 # 23f0 <_sk_difference_sse41_8bit+0x281>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,211 ; pmovzxwd %xmm3,%xmm2
DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3
- DB 102,15,111,37,83,25,0,0 ; movdqa 0x1953(%rip),%xmm4 # 2490 <_sk_exclusion_sse41_8bit+0x25b>
+ DB 102,15,111,37,195,24,0,0 ; movdqa 0x18c3(%rip),%xmm4 # 2400 <_sk_difference_sse41_8bit+0x291>
DB 102,15,56,64,220 ; pmulld %xmm4,%xmm3
DB 102,15,56,64,212 ; pmulld %xmm4,%xmm2
- DB 102,15,111,37,81,25,0,0 ; movdqa 0x1951(%rip),%xmm4 # 24a0 <_sk_exclusion_sse41_8bit+0x26b>
+ DB 102,15,111,37,193,24,0,0 ; movdqa 0x18c1(%rip),%xmm4 # 2410 <_sk_difference_sse41_8bit+0x2a1>
DB 102,15,235,212 ; por %xmm4,%xmm2
DB 102,15,235,220 ; por %xmm4,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -43289,7 +43277,7 @@ _sk_load_g8_dst_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 232,255,255,255,221 ; callq ffffffffde000c00 <_sk_exclusion_sse41_8bit+0xffffffffddffe9cb>
+ DB 232,255,255,255,221 ; callq ffffffffde000c00 <_sk_difference_sse41_8bit+0xffffffffddffea91>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,210 ; callq *%rdx
@@ -43315,7 +43303,7 @@ _sk_srcover_rgba_8888_sse41_8bit LABEL PROC
DB 243,70,15,111,68,138,16 ; movdqu 0x10(%rdx,%r9,4),%xmm8
DB 243,70,15,111,12,138 ; movdqu (%rdx,%r9,4),%xmm9
DB 77,133,192 ; test %r8,%r8
- DB 102,15,111,37,103,24,0,0 ; movdqa 0x1867(%rip),%xmm4 # 24b0 <_sk_exclusion_sse41_8bit+0x27b>
+ DB 102,15,111,37,215,23,0,0 ; movdqa 0x17d7(%rip),%xmm4 # 2420 <_sk_difference_sse41_8bit+0x2b1>
DB 102,15,111,241 ; movdqa %xmm1,%xmm6
DB 102,15,56,0,244 ; pshufb %xmm4,%xmm6
DB 102,15,111,248 ; movdqa %xmm0,%xmm7
@@ -43435,7 +43423,7 @@ _sk_scale_1_float_sse41_8bit LABEL PROC
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,0 ; movss (%rax),%xmm0
- DB 243,15,89,5,70,21,0,0 ; mulss 0x1546(%rip),%xmm0 # 2370 <_sk_exclusion_sse41_8bit+0x13b>
+ DB 243,15,89,5,178,20,0,0 ; mulss 0x14b2(%rip),%xmm0 # 22dc <_sk_difference_sse41_8bit+0x16d>
DB 243,15,44,192 ; cvttss2si %xmm0,%eax
DB 15,87,192 ; xorps %xmm0,%xmm0
DB 102,68,15,56,48,197 ; pmovzxbw %xmm5,%xmm8
@@ -43443,7 +43431,7 @@ _sk_scale_1_float_sse41_8bit LABEL PROC
DB 102,68,15,56,48,204 ; pmovzxbw %xmm4,%xmm9
DB 102,15,104,224 ; punpckhbw %xmm0,%xmm4
DB 102,15,110,240 ; movd %eax,%xmm6
- DB 102,15,56,0,53,110,22,0,0 ; pshufb 0x166e(%rip),%xmm6 # 24c0 <_sk_exclusion_sse41_8bit+0x28b>
+ DB 102,15,56,0,53,222,21,0,0 ; pshufb 0x15de(%rip),%xmm6 # 2430 <_sk_difference_sse41_8bit+0x2c1>
DB 102,15,111,206 ; movdqa %xmm6,%xmm1
DB 102,65,15,213,201 ; pmullw %xmm9,%xmm1
DB 102,15,111,198 ; movdqa %xmm6,%xmm0
@@ -43476,11 +43464,11 @@ _sk_scale_u8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 15,133,160,0,0,0 ; jne f63 <_sk_scale_u8_sse41_8bit+0xc1>
DB 102,66,15,56,48,52,2 ; pmovzxbw (%rdx,%r8,1),%xmm6
- DB 102,15,219,53,254,21,0,0 ; pand 0x15fe(%rip),%xmm6 # 24d0 <_sk_exclusion_sse41_8bit+0x29b>
+ DB 102,15,219,53,110,21,0,0 ; pand 0x156e(%rip),%xmm6 # 2440 <_sk_difference_sse41_8bit+0x2d1>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,111,254 ; movdqa %xmm6,%xmm7
- DB 102,15,56,0,61,252,21,0,0 ; pshufb 0x15fc(%rip),%xmm7 # 24e0 <_sk_exclusion_sse41_8bit+0x2ab>
- DB 102,15,56,0,53,3,22,0,0 ; pshufb 0x1603(%rip),%xmm6 # 24f0 <_sk_exclusion_sse41_8bit+0x2bb>
+ DB 102,15,56,0,61,108,21,0,0 ; pshufb 0x156c(%rip),%xmm7 # 2450 <_sk_difference_sse41_8bit+0x2e1>
+ DB 102,15,56,0,53,115,21,0,0 ; pshufb 0x1573(%rip),%xmm6 # 2460 <_sk_difference_sse41_8bit+0x2f1>
DB 102,68,15,56,48,200 ; pmovzxbw %xmm0,%xmm9
DB 102,65,15,104,192 ; punpckhbw %xmm8,%xmm0
DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10
@@ -43563,7 +43551,7 @@ PUBLIC _sk_lerp_1_float_sse41_8bit
_sk_lerp_1_float_sse41_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,32 ; movss (%rax),%xmm4
- DB 243,15,89,37,74,19,0,0 ; mulss 0x134a(%rip),%xmm4 # 2374 <_sk_exclusion_sse41_8bit+0x13f>
+ DB 243,15,89,37,182,18,0,0 ; mulss 0x12b6(%rip),%xmm4 # 22e0 <_sk_difference_sse41_8bit+0x171>
DB 243,15,44,196 ; cvttss2si %xmm4,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
@@ -43574,7 +43562,7 @@ _sk_lerp_1_float_sse41_8bit LABEL PROC
DB 102,65,15,104,193 ; punpckhbw %xmm9,%xmm0
DB 102,68,15,56,48,217 ; pmovzxbw %xmm1,%xmm11
DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1
- DB 102,15,56,0,61,155,20,0,0 ; pshufb 0x149b(%rip),%xmm7 # 2500 <_sk_exclusion_sse41_8bit+0x2cb>
+ DB 102,15,56,0,61,11,20,0,0 ; pshufb 0x140b(%rip),%xmm7 # 2470 <_sk_difference_sse41_8bit+0x301>
DB 102,68,15,111,231 ; movdqa %xmm7,%xmm12
DB 102,69,15,213,227 ; pmullw %xmm11,%xmm12
DB 102,68,15,111,239 ; movdqa %xmm7,%xmm13
@@ -43635,11 +43623,11 @@ _sk_lerp_u8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 15,133,46,1,0,0 ; jne 128d <_sk_lerp_u8_sse41_8bit+0x14f>
DB 102,66,15,56,48,60,2 ; pmovzxbw (%rdx,%r8,1),%xmm7
- DB 102,15,219,61,162,19,0,0 ; pand 0x13a2(%rip),%xmm7 # 2510 <_sk_exclusion_sse41_8bit+0x2db>
+ DB 102,15,219,61,18,19,0,0 ; pand 0x1312(%rip),%xmm7 # 2480 <_sk_difference_sse41_8bit+0x311>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,111,247 ; movdqa %xmm7,%xmm6
- DB 102,15,56,0,53,160,19,0,0 ; pshufb 0x13a0(%rip),%xmm6 # 2520 <_sk_exclusion_sse41_8bit+0x2eb>
- DB 102,15,56,0,61,167,19,0,0 ; pshufb 0x13a7(%rip),%xmm7 # 2530 <_sk_exclusion_sse41_8bit+0x2fb>
+ DB 102,15,56,0,53,16,19,0,0 ; pshufb 0x1310(%rip),%xmm6 # 2490 <_sk_difference_sse41_8bit+0x321>
+ DB 102,15,56,0,61,23,19,0,0 ; pshufb 0x1317(%rip),%xmm7 # 24a0 <_sk_difference_sse41_8bit+0x331>
DB 102,68,15,56,48,200 ; pmovzxbw %xmm0,%xmm9
DB 102,65,15,104,192 ; punpckhbw %xmm8,%xmm0
DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10
@@ -43734,7 +43722,7 @@ _sk_lerp_u8_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf001338 <_sk_exclusion_sse41_8bit+0xffffffffdefff103>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf001338 <_sk_difference_sse41_8bit+0xffffffffdefff1c9>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -43762,7 +43750,7 @@ _sk_move_dst_src_sse41_8bit LABEL PROC
PUBLIC _sk_black_color_sse41_8bit
_sk_black_color_sse41_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 15,40,5,223,17,0,0 ; movaps 0x11df(%rip),%xmm0 # 2540 <_sk_exclusion_sse41_8bit+0x30b>
+ DB 15,40,5,79,17,0,0 ; movaps 0x114f(%rip),%xmm0 # 24b0 <_sk_difference_sse41_8bit+0x341>
DB 15,40,200 ; movaps %xmm0,%xmm1
DB 255,224 ; jmpq *%rax
@@ -43782,7 +43770,7 @@ _sk_clear_sse41_8bit LABEL PROC
PUBLIC _sk_srcatop_sse41_8bit
_sk_srcatop_sse41_8bit LABEL PROC
- DB 102,68,15,111,21,203,17,0,0 ; movdqa 0x11cb(%rip),%xmm10 # 2550 <_sk_exclusion_sse41_8bit+0x31b>
+ DB 102,68,15,111,21,59,17,0,0 ; movdqa 0x113b(%rip),%xmm10 # 24c0 <_sk_difference_sse41_8bit+0x351>
DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
DB 102,68,15,56,48,195 ; pmovzxbw %xmm3,%xmm8
DB 102,15,111,235 ; movdqa %xmm3,%xmm5
@@ -43850,7 +43838,7 @@ _sk_srcatop_sse41_8bit LABEL PROC
PUBLIC _sk_dstatop_sse41_8bit
_sk_dstatop_sse41_8bit LABEL PROC
- DB 102,68,15,111,29,160,16,0,0 ; movdqa 0x10a0(%rip),%xmm11 # 2560 <_sk_exclusion_sse41_8bit+0x32b>
+ DB 102,68,15,111,29,16,16,0,0 ; movdqa 0x1010(%rip),%xmm11 # 24d0 <_sk_difference_sse41_8bit+0x361>
DB 102,68,15,111,233 ; movdqa %xmm1,%xmm13
DB 102,69,15,56,0,235 ; pshufb %xmm11,%xmm13
DB 102,68,15,111,248 ; movdqa %xmm0,%xmm15
@@ -43920,7 +43908,7 @@ PUBLIC _sk_srcin_sse41_8bit
_sk_srcin_sse41_8bit LABEL PROC
DB 102,15,111,225 ; movdqa %xmm1,%xmm4
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,15,111,5,103,15,0,0 ; movdqa 0xf67(%rip),%xmm0 # 2570 <_sk_exclusion_sse41_8bit+0x33b>
+ DB 102,15,111,5,215,14,0,0 ; movdqa 0xed7(%rip),%xmm0 # 24e0 <_sk_difference_sse41_8bit+0x371>
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
DB 102,15,56,0,240 ; pshufb %xmm0,%xmm6
DB 102,15,111,250 ; movdqa %xmm2,%xmm7
@@ -43953,7 +43941,7 @@ _sk_srcin_sse41_8bit LABEL PROC
PUBLIC _sk_dstin_sse41_8bit
_sk_dstin_sse41_8bit LABEL PROC
- DB 102,15,111,37,234,14,0,0 ; movdqa 0xeea(%rip),%xmm4 # 2580 <_sk_exclusion_sse41_8bit+0x34b>
+ DB 102,15,111,37,90,14,0,0 ; movdqa 0xe5a(%rip),%xmm4 # 24f0 <_sk_difference_sse41_8bit+0x381>
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,69,15,239,210 ; pxor %xmm10,%xmm10
@@ -43990,7 +43978,7 @@ PUBLIC _sk_srcout_sse41_8bit
_sk_srcout_sse41_8bit LABEL PROC
DB 102,15,111,225 ; movdqa %xmm1,%xmm4
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,15,111,5,93,14,0,0 ; movdqa 0xe5d(%rip),%xmm0 # 2590 <_sk_exclusion_sse41_8bit+0x35b>
+ DB 102,15,111,5,205,13,0,0 ; movdqa 0xdcd(%rip),%xmm0 # 2500 <_sk_difference_sse41_8bit+0x391>
DB 102,15,111,250 ; movdqa %xmm2,%xmm7
DB 102,15,56,0,248 ; pshufb %xmm0,%xmm7
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
@@ -44026,7 +44014,7 @@ _sk_srcout_sse41_8bit LABEL PROC
PUBLIC _sk_dstout_sse41_8bit
_sk_dstout_sse41_8bit LABEL PROC
- DB 102,15,111,37,212,13,0,0 ; movdqa 0xdd4(%rip),%xmm4 # 25a0 <_sk_exclusion_sse41_8bit+0x36b>
+ DB 102,15,111,37,68,13,0,0 ; movdqa 0xd44(%rip),%xmm4 # 2510 <_sk_difference_sse41_8bit+0x3a1>
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 102,15,118,228 ; pcmpeqd %xmm4,%xmm4
@@ -44064,7 +44052,7 @@ _sk_dstout_sse41_8bit LABEL PROC
PUBLIC _sk_srcover_sse41_8bit
_sk_srcover_sse41_8bit LABEL PROC
- DB 102,15,111,53,67,13,0,0 ; movdqa 0xd43(%rip),%xmm6 # 25b0 <_sk_exclusion_sse41_8bit+0x37b>
+ DB 102,15,111,53,179,12,0,0 ; movdqa 0xcb3(%rip),%xmm6 # 2520 <_sk_difference_sse41_8bit+0x3b1>
DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11
DB 102,68,15,56,0,222 ; pshufb %xmm6,%xmm11
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
@@ -44103,7 +44091,7 @@ _sk_srcover_sse41_8bit LABEL PROC
PUBLIC _sk_dstover_sse41_8bit
_sk_dstover_sse41_8bit LABEL PROC
- DB 102,68,15,111,5,163,12,0,0 ; movdqa 0xca3(%rip),%xmm8 # 25c0 <_sk_exclusion_sse41_8bit+0x38b>
+ DB 102,68,15,111,5,19,12,0,0 ; movdqa 0xc13(%rip),%xmm8 # 2530 <_sk_difference_sse41_8bit+0x3c1>
DB 102,68,15,111,209 ; movdqa %xmm1,%xmm10
DB 102,68,15,56,48,201 ; pmovzxbw %xmm1,%xmm9
DB 102,15,252,203 ; paddb %xmm3,%xmm1
@@ -44178,7 +44166,7 @@ _sk_multiply_sse41_8bit LABEL PROC
DB 102,15,111,218 ; movdqa %xmm2,%xmm3
DB 102,15,111,209 ; movdqa %xmm1,%xmm2
DB 102,15,111,200 ; movdqa %xmm0,%xmm1
- DB 102,68,15,111,53,113,11,0,0 ; movdqa 0xb71(%rip),%xmm14 # 25d0 <_sk_exclusion_sse41_8bit+0x39b>
+ DB 102,68,15,111,53,225,10,0,0 ; movdqa 0xae1(%rip),%xmm14 # 2540 <_sk_difference_sse41_8bit+0x3d1>
DB 102,68,15,111,195 ; movdqa %xmm3,%xmm8
DB 102,15,111,235 ; movdqa %xmm3,%xmm5
DB 102,65,15,56,0,238 ; pshufb %xmm14,%xmm5
@@ -44303,7 +44291,7 @@ _sk_screen_sse41_8bit LABEL PROC
PUBLIC _sk_xor__sse41_8bit
_sk_xor__sse41_8bit LABEL PROC
- DB 102,68,15,111,21,63,9,0,0 ; movdqa 0x93f(%rip),%xmm10 # 25e0 <_sk_exclusion_sse41_8bit+0x3ab>
+ DB 102,68,15,111,21,175,8,0,0 ; movdqa 0x8af(%rip),%xmm10 # 2550 <_sk_difference_sse41_8bit+0x3e1>
DB 102,68,15,111,226 ; movdqa %xmm2,%xmm12
DB 102,68,15,56,48,194 ; pmovzxbw %xmm2,%xmm8
DB 102,15,111,234 ; movdqa %xmm2,%xmm5
@@ -44374,7 +44362,7 @@ _sk_xor__sse41_8bit LABEL PROC
PUBLIC _sk_darken_sse41_8bit
_sk_darken_sse41_8bit LABEL PROC
DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14
- DB 102,68,15,111,5,2,8,0,0 ; movdqa 0x802(%rip),%xmm8 # 25f0 <_sk_exclusion_sse41_8bit+0x3bb>
+ DB 102,68,15,111,5,114,7,0,0 ; movdqa 0x772(%rip),%xmm8 # 2560 <_sk_difference_sse41_8bit+0x3f1>
DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
@@ -44441,7 +44429,7 @@ _sk_darken_sse41_8bit LABEL PROC
DB 102,15,248,231 ; psubb %xmm7,%xmm4
DB 102,15,248,206 ; psubb %xmm6,%xmm1
DB 102,68,15,248,245 ; psubb %xmm5,%xmm14
- DB 15,40,5,198,6,0,0 ; movaps 0x6c6(%rip),%xmm0 # 2600 <_sk_exclusion_sse41_8bit+0x3cb>
+ DB 15,40,5,54,6,0,0 ; movaps 0x636(%rip),%xmm0 # 2570 <_sk_difference_sse41_8bit+0x401>
DB 102,68,15,56,16,244 ; pblendvb %xmm0,%xmm4,%xmm14
DB 102,65,15,56,16,200 ; pblendvb %xmm0,%xmm8,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -44451,7 +44439,7 @@ _sk_darken_sse41_8bit LABEL PROC
PUBLIC _sk_lighten_sse41_8bit
_sk_lighten_sse41_8bit LABEL PROC
DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14
- DB 102,68,15,111,5,179,6,0,0 ; movdqa 0x6b3(%rip),%xmm8 # 2610 <_sk_exclusion_sse41_8bit+0x3db>
+ DB 102,68,15,111,5,35,6,0,0 ; movdqa 0x623(%rip),%xmm8 # 2580 <_sk_difference_sse41_8bit+0x411>
DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
@@ -44518,169 +44506,138 @@ _sk_lighten_sse41_8bit LABEL PROC
DB 102,15,248,231 ; psubb %xmm7,%xmm4
DB 102,15,248,206 ; psubb %xmm6,%xmm1
DB 102,68,15,248,245 ; psubb %xmm5,%xmm14
- DB 15,40,5,119,5,0,0 ; movaps 0x577(%rip),%xmm0 # 2620 <_sk_exclusion_sse41_8bit+0x3eb>
+ DB 15,40,5,231,4,0,0 ; movaps 0x4e7(%rip),%xmm0 # 2590 <_sk_difference_sse41_8bit+0x421>
DB 102,68,15,56,16,244 ; pblendvb %xmm0,%xmm4,%xmm14
DB 102,65,15,56,16,200 ; pblendvb %xmm0,%xmm8,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 102,65,15,111,198 ; movdqa %xmm14,%xmm0
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_exclusion_sse41_8bit
+_sk_exclusion_sse41_8bit LABEL PROC
+ DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
+ DB 102,68,15,111,216 ; movdqa %xmm0,%xmm11
+ DB 102,69,15,104,216 ; punpckhbw %xmm8,%xmm11
+ DB 102,68,15,111,225 ; movdqa %xmm1,%xmm12
+ DB 102,69,15,104,224 ; punpckhbw %xmm8,%xmm12
+ DB 102,68,15,56,48,200 ; pmovzxbw %xmm0,%xmm9
+ DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10
+ DB 102,15,56,48,250 ; pmovzxbw %xmm2,%xmm7
+ DB 102,15,252,194 ; paddb %xmm2,%xmm0
+ DB 102,15,111,234 ; movdqa %xmm2,%xmm5
+ DB 102,65,15,104,232 ; punpckhbw %xmm8,%xmm5
+ DB 102,15,56,48,227 ; pmovzxbw %xmm3,%xmm4
+ DB 102,15,252,203 ; paddb %xmm3,%xmm1
+ DB 102,15,111,243 ; movdqa %xmm3,%xmm6
+ DB 102,65,15,104,240 ; punpckhbw %xmm8,%xmm6
+ DB 102,65,15,213,244 ; pmullw %xmm12,%xmm6
+ DB 102,65,15,213,235 ; pmullw %xmm11,%xmm5
+ DB 102,65,15,213,226 ; pmullw %xmm10,%xmm4
+ DB 102,65,15,213,249 ; pmullw %xmm9,%xmm7
+ DB 102,65,15,253,235 ; paddw %xmm11,%xmm5
+ DB 102,65,15,253,244 ; paddw %xmm12,%xmm6
+ DB 102,65,15,253,249 ; paddw %xmm9,%xmm7
+ DB 102,65,15,253,226 ; paddw %xmm10,%xmm4
+ DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
+ DB 102,15,113,213,8 ; psrlw $0x8,%xmm5
+ DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
+ DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
+ DB 102,15,103,253 ; packuswb %xmm5,%xmm7
+ DB 102,15,103,230 ; packuswb %xmm6,%xmm4
+ DB 102,15,111,45,77,4,0,0 ; movdqa 0x44d(%rip),%xmm5 # 25a0 <_sk_difference_sse41_8bit+0x431>
+ DB 102,15,248,204 ; psubb %xmm4,%xmm1
+ DB 102,15,219,229 ; pand %xmm5,%xmm4
+ DB 102,15,219,239 ; pand %xmm7,%xmm5
+ DB 102,15,248,199 ; psubb %xmm7,%xmm0
+ DB 102,15,248,197 ; psubb %xmm5,%xmm0
+ DB 102,15,248,204 ; psubb %xmm4,%xmm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_difference_sse41_8bit
_sk_difference_sse41_8bit LABEL PROC
- DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14
- DB 102,68,15,111,5,100,5,0,0 ; movdqa 0x564(%rip),%xmm8 # 2630 <_sk_exclusion_sse41_8bit+0x3fb>
+ DB 102,68,15,111,193 ; movdqa %xmm1,%xmm8
+ DB 102,15,111,13,52,4,0,0 ; movdqa 0x434(%rip),%xmm1 # 25b0 <_sk_difference_sse41_8bit+0x441>
DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9
- DB 102,15,111,251 ; movdqa %xmm3,%xmm7
- DB 102,65,15,56,0,248 ; pshufb %xmm8,%xmm7
+ DB 102,15,111,243 ; movdqa %xmm3,%xmm6
+ DB 102,15,56,0,241 ; pshufb %xmm1,%xmm6
DB 102,68,15,111,234 ; movdqa %xmm2,%xmm13
DB 102,68,15,56,48,210 ; pmovzxbw %xmm2,%xmm10
- DB 102,15,111,234 ; movdqa %xmm2,%xmm5
- DB 102,65,15,56,0,232 ; pshufb %xmm8,%xmm5
+ DB 102,15,111,226 ; movdqa %xmm2,%xmm4
+ DB 102,15,56,0,225 ; pshufb %xmm1,%xmm4
DB 102,69,15,239,228 ; pxor %xmm12,%xmm12
- DB 102,65,15,104,196 ; punpckhbw %xmm12,%xmm0
- DB 102,15,111,225 ; movdqa %xmm1,%xmm4
- DB 102,65,15,104,228 ; punpckhbw %xmm12,%xmm4
- DB 102,69,15,56,48,254 ; pmovzxbw %xmm14,%xmm15
- DB 102,15,56,48,245 ; pmovzxbw %xmm5,%xmm6
+ DB 102,15,111,232 ; movdqa %xmm0,%xmm5
DB 102,65,15,104,236 ; punpckhbw %xmm12,%xmm5
- DB 102,15,213,232 ; pmullw %xmm0,%xmm5
- DB 102,15,253,232 ; paddw %xmm0,%xmm5
- DB 102,15,56,48,199 ; pmovzxbw %xmm7,%xmm0
- DB 102,65,15,104,252 ; punpckhbw %xmm12,%xmm7
- DB 102,15,213,252 ; pmullw %xmm4,%xmm7
- DB 102,15,253,252 ; paddw %xmm4,%xmm7
- DB 102,65,15,111,230 ; movdqa %xmm14,%xmm4
- DB 102,68,15,252,242 ; paddb %xmm2,%xmm14
- DB 102,65,15,213,247 ; pmullw %xmm15,%xmm6
- DB 102,65,15,253,247 ; paddw %xmm15,%xmm6
- DB 102,68,15,56,48,249 ; pmovzxbw %xmm1,%xmm15
- DB 102,65,15,213,199 ; pmullw %xmm15,%xmm0
- DB 102,65,15,253,199 ; paddw %xmm15,%xmm0
- DB 102,68,15,111,249 ; movdqa %xmm1,%xmm15
- DB 102,15,252,203 ; paddb %xmm3,%xmm1
- DB 102,15,113,213,8 ; psrlw $0x8,%xmm5
+ DB 102,65,15,111,200 ; movdqa %xmm8,%xmm1
+ DB 102,65,15,104,204 ; punpckhbw %xmm12,%xmm1
+ DB 102,68,15,56,48,248 ; pmovzxbw %xmm0,%xmm15
+ DB 102,68,15,56,48,244 ; pmovzxbw %xmm4,%xmm14
+ DB 102,65,15,104,228 ; punpckhbw %xmm12,%xmm4
+ DB 102,15,213,229 ; pmullw %xmm5,%xmm4
+ DB 102,15,253,229 ; paddw %xmm5,%xmm4
+ DB 102,15,56,48,254 ; pmovzxbw %xmm6,%xmm7
+ DB 102,65,15,104,244 ; punpckhbw %xmm12,%xmm6
+ DB 102,15,213,241 ; pmullw %xmm1,%xmm6
+ DB 102,15,253,241 ; paddw %xmm1,%xmm6
+ DB 102,15,111,232 ; movdqa %xmm0,%xmm5
+ DB 102,15,252,194 ; paddb %xmm2,%xmm0
+ DB 102,69,15,213,247 ; pmullw %xmm15,%xmm14
+ DB 102,69,15,253,247 ; paddw %xmm15,%xmm14
+ DB 102,65,15,56,48,200 ; pmovzxbw %xmm8,%xmm1
+ DB 102,15,213,249 ; pmullw %xmm1,%xmm7
+ DB 102,15,253,249 ; paddw %xmm1,%xmm7
+ DB 102,65,15,111,200 ; movdqa %xmm8,%xmm1
+ DB 102,68,15,252,195 ; paddb %xmm3,%xmm8
+ DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
+ DB 102,65,15,113,214,8 ; psrlw $0x8,%xmm14
+ DB 102,68,15,103,244 ; packuswb %xmm4,%xmm14
DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
- DB 102,15,103,245 ; packuswb %xmm5,%xmm6
DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
- DB 102,15,113,208,8 ; psrlw $0x8,%xmm0
- DB 102,15,103,199 ; packuswb %xmm7,%xmm0
- DB 102,69,15,56,0,248 ; pshufb %xmm8,%xmm15
- DB 102,65,15,56,0,224 ; pshufb %xmm8,%xmm4
+ DB 102,15,103,254 ; packuswb %xmm6,%xmm7
+ DB 102,15,111,37,121,3,0,0 ; movdqa 0x379(%rip),%xmm4 # 25b0 <_sk_difference_sse41_8bit+0x441>
+ DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
+ DB 102,15,56,0,236 ; pshufb %xmm4,%xmm5
DB 102,69,15,104,236 ; punpckhbw %xmm12,%xmm13
DB 102,69,15,104,220 ; punpckhbw %xmm12,%xmm11
- DB 102,15,56,48,236 ; pmovzxbw %xmm4,%xmm5
- DB 102,65,15,56,48,255 ; pmovzxbw %xmm15,%xmm7
- DB 102,65,15,104,228 ; punpckhbw %xmm12,%xmm4
- DB 102,69,15,104,252 ; punpckhbw %xmm12,%xmm15
- DB 102,69,15,213,251 ; pmullw %xmm11,%xmm15
- DB 102,65,15,213,229 ; pmullw %xmm13,%xmm4
- DB 102,65,15,213,249 ; pmullw %xmm9,%xmm7
- DB 102,65,15,213,234 ; pmullw %xmm10,%xmm5
- DB 102,65,15,253,229 ; paddw %xmm13,%xmm4
- DB 102,69,15,253,251 ; paddw %xmm11,%xmm15
- DB 102,65,15,253,234 ; paddw %xmm10,%xmm5
- DB 102,65,15,253,249 ; paddw %xmm9,%xmm7
- DB 102,65,15,113,215,8 ; psrlw $0x8,%xmm15
- DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
- DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
- DB 102,15,113,213,8 ; psrlw $0x8,%xmm5
- DB 102,15,103,236 ; packuswb %xmm4,%xmm5
- DB 102,65,15,103,255 ; packuswb %xmm15,%xmm7
- DB 102,15,218,199 ; pminub %xmm7,%xmm0
- DB 102,15,218,245 ; pminub %xmm5,%xmm6
- DB 102,15,252,246 ; paddb %xmm6,%xmm6
- DB 102,15,252,192 ; paddb %xmm0,%xmm0
- DB 102,68,15,111,193 ; movdqa %xmm1,%xmm8
- DB 102,68,15,248,192 ; psubb %xmm0,%xmm8
- DB 102,65,15,111,230 ; movdqa %xmm14,%xmm4
- DB 102,15,248,230 ; psubb %xmm6,%xmm4
- DB 102,15,248,207 ; psubb %xmm7,%xmm1
- DB 102,68,15,248,245 ; psubb %xmm5,%xmm14
- DB 15,40,5,32,4,0,0 ; movaps 0x420(%rip),%xmm0 # 2640 <_sk_exclusion_sse41_8bit+0x40b>
- DB 102,68,15,56,16,244 ; pblendvb %xmm0,%xmm4,%xmm14
- DB 102,65,15,56,16,200 ; pblendvb %xmm0,%xmm8,%xmm1
- DB 72,173 ; lods %ds:(%rsi),%rax
- DB 102,65,15,111,198 ; movdqa %xmm14,%xmm0
- DB 255,224 ; jmpq *%rax
-
-PUBLIC _sk_exclusion_sse41_8bit
-_sk_exclusion_sse41_8bit LABEL PROC
- DB 102,69,15,239,201 ; pxor %xmm9,%xmm9
- DB 102,15,111,248 ; movdqa %xmm0,%xmm7
- DB 102,65,15,104,249 ; punpckhbw %xmm9,%xmm7
- DB 102,68,15,56,48,208 ; pmovzxbw %xmm0,%xmm10
- DB 102,68,15,111,218 ; movdqa %xmm2,%xmm11
- DB 102,15,111,234 ; movdqa %xmm2,%xmm5
- DB 102,65,15,104,233 ; punpckhbw %xmm9,%xmm5
- DB 102,68,15,252,216 ; paddb %xmm0,%xmm11
- DB 102,68,15,111,5,235,3,0,0 ; movdqa 0x3eb(%rip),%xmm8 # 2650 <_sk_exclusion_sse41_8bit+0x41b>
- DB 102,65,15,56,0,192 ; pshufb %xmm8,%xmm0
- DB 102,15,56,48,240 ; pmovzxbw %xmm0,%xmm6
- DB 102,65,15,104,193 ; punpckhbw %xmm9,%xmm0
- DB 102,15,213,197 ; pmullw %xmm5,%xmm0
- DB 102,15,253,197 ; paddw %xmm5,%xmm0
- DB 102,15,213,239 ; pmullw %xmm7,%xmm5
- DB 102,15,253,239 ; paddw %xmm7,%xmm5
- DB 102,15,56,48,250 ; pmovzxbw %xmm2,%xmm7
- DB 102,15,213,247 ; pmullw %xmm7,%xmm6
- DB 102,15,253,247 ; paddw %xmm7,%xmm6
- DB 102,65,15,213,250 ; pmullw %xmm10,%xmm7
- DB 102,65,15,253,250 ; paddw %xmm10,%xmm7
- DB 102,15,111,225 ; movdqa %xmm1,%xmm4
- DB 102,65,15,104,225 ; punpckhbw %xmm9,%xmm4
- DB 102,15,113,213,8 ; psrlw $0x8,%xmm5
- DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
- DB 102,15,103,253 ; packuswb %xmm5,%xmm7
- DB 102,15,252,255 ; paddb %xmm7,%xmm7
- DB 102,65,15,111,235 ; movdqa %xmm11,%xmm5
- DB 102,15,248,239 ; psubb %xmm7,%xmm5
- DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10
- DB 102,15,113,208,8 ; psrlw $0x8,%xmm0
- DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
- DB 102,15,103,240 ; packuswb %xmm0,%xmm6
- DB 102,68,15,248,222 ; psubb %xmm6,%xmm11
- DB 15,40,5,128,3,0,0 ; movaps 0x380(%rip),%xmm0 # 2660 <_sk_exclusion_sse41_8bit+0x42b>
- DB 102,68,15,56,16,221 ; pblendvb %xmm0,%xmm5,%xmm11
- DB 102,15,111,235 ; movdqa %xmm3,%xmm5
- DB 102,15,111,243 ; movdqa %xmm3,%xmm6
- DB 102,65,15,104,241 ; punpckhbw %xmm9,%xmm6
- DB 102,15,252,233 ; paddb %xmm1,%xmm5
- DB 102,65,15,56,0,200 ; pshufb %xmm8,%xmm1
- DB 102,15,56,48,249 ; pmovzxbw %xmm1,%xmm7
- DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1
- DB 102,15,213,206 ; pmullw %xmm6,%xmm1
- DB 102,15,253,206 ; paddw %xmm6,%xmm1
- DB 102,15,213,244 ; pmullw %xmm4,%xmm6
- DB 102,15,253,244 ; paddw %xmm4,%xmm6
- DB 102,15,56,48,227 ; pmovzxbw %xmm3,%xmm4
- DB 102,15,213,252 ; pmullw %xmm4,%xmm7
- DB 102,15,253,252 ; paddw %xmm4,%xmm7
+ DB 102,15,56,48,229 ; pmovzxbw %xmm5,%xmm4
+ DB 102,15,56,48,241 ; pmovzxbw %xmm1,%xmm6
+ DB 102,65,15,104,236 ; punpckhbw %xmm12,%xmm5
+ DB 102,65,15,104,204 ; punpckhbw %xmm12,%xmm1
+ DB 102,65,15,213,203 ; pmullw %xmm11,%xmm1
+ DB 102,65,15,213,237 ; pmullw %xmm13,%xmm5
+ DB 102,65,15,213,241 ; pmullw %xmm9,%xmm6
DB 102,65,15,213,226 ; pmullw %xmm10,%xmm4
+ DB 102,65,15,253,237 ; paddw %xmm13,%xmm5
+ DB 102,65,15,253,203 ; paddw %xmm11,%xmm1
DB 102,65,15,253,226 ; paddw %xmm10,%xmm4
+ DB 102,65,15,253,241 ; paddw %xmm9,%xmm6
+ DB 102,15,113,209,8 ; psrlw $0x8,%xmm1
+ DB 102,15,113,213,8 ; psrlw $0x8,%xmm5
DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
- DB 102,15,103,230 ; packuswb %xmm6,%xmm4
- DB 102,15,252,228 ; paddb %xmm4,%xmm4
- DB 102,15,111,245 ; movdqa %xmm5,%xmm6
- DB 102,15,248,244 ; psubb %xmm4,%xmm6
- DB 102,15,113,209,8 ; psrlw $0x8,%xmm1
- DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
- DB 102,15,103,249 ; packuswb %xmm1,%xmm7
- DB 102,15,248,239 ; psubb %xmm7,%xmm5
- DB 102,15,56,16,238 ; pblendvb %xmm0,%xmm6,%xmm5
+ DB 102,15,103,229 ; packuswb %xmm5,%xmm4
+ DB 102,15,103,241 ; packuswb %xmm1,%xmm6
+ DB 102,65,15,218,230 ; pminub %xmm14,%xmm4
+ DB 102,15,218,247 ; pminub %xmm7,%xmm6
+ DB 102,15,111,13,12,3,0,0 ; movdqa 0x30c(%rip),%xmm1 # 25c0 <_sk_difference_sse41_8bit+0x451>
+ DB 102,68,15,248,198 ; psubb %xmm6,%xmm8
+ DB 102,15,219,241 ; pand %xmm1,%xmm6
+ DB 102,15,219,204 ; pand %xmm4,%xmm1
+ DB 102,15,248,196 ; psubb %xmm4,%xmm0
+ DB 102,15,248,193 ; psubb %xmm1,%xmm0
+ DB 102,68,15,248,198 ; psubb %xmm6,%xmm8
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 102,65,15,111,195 ; movdqa %xmm11,%xmm0
- DB 102,15,111,205 ; movdqa %xmm5,%xmm1
+ DB 102,65,15,111,200 ; movdqa %xmm8,%xmm1
DB 255,224 ; jmpq *%rax
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 23b3 <_sk_exclusion_sse41_8bit+0x17e>
+ DB 127,67 ; jg 231f <_sk_difference_sse41_8bit+0x1b0>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 23b7 <_sk_exclusion_sse41_8bit+0x182>
+ DB 127,67 ; jg 2323 <_sk_difference_sse41_8bit+0x1b4>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 23bb <_sk_exclusion_sse41_8bit+0x186>
+ DB 127,67 ; jg 2327 <_sk_difference_sse41_8bit+0x1b8>
ALIGN 16
DB 0,0 ; add %al,(%rax)
@@ -45076,18 +45033,7 @@ ALIGN 16
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,0 ; incl (%rax)
- DB 3,3 ; add (%rbx),%eax
- DB 3,3 ; add (%rbx),%eax
- DB 7 ; (bad)
- DB 7 ; (bad)
- DB 7 ; (bad)
- DB 7 ; (bad)
- DB 11,11 ; or (%rbx),%ecx
- DB 11,11 ; or (%rbx),%ecx
- DB 15 ; (bad)
- DB 15 ; (bad)
- DB 15 ; (bad)
- DB 15,255 ; (bad)
+ DB 255 ; (bad)
DB 255 ; (bad)
DB 255,0 ; incl (%rax)
DB 255 ; (bad)
@@ -45222,7 +45168,7 @@ _sk_uniform_color_sse2_8bit LABEL PROC
PUBLIC _sk_set_rgb_sse2_8bit
_sk_set_rgb_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,15,16,37,108,41,0,0 ; movss 0x296c(%rip),%xmm4 # 2aec <_sk_exclusion_sse2_8bit+0x195>
+ DB 243,15,16,37,100,40,0,0 ; movss 0x2864(%rip),%xmm4 # 29e4 <_sk_difference_sse2_8bit+0x1c3>
DB 243,15,16,40 ; movss (%rax),%xmm5
DB 243,15,89,236 ; mulss %xmm4,%xmm5
DB 243,72,15,44,205 ; cvttss2si %xmm5,%rcx
@@ -45237,7 +45183,7 @@ _sk_set_rgb_sse2_8bit LABEL PROC
DB 9,208 ; or %edx,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,112,228,0 ; pshufd $0x0,%xmm4,%xmm4
- DB 102,15,111,45,64,41,0,0 ; movdqa 0x2940(%rip),%xmm5 # 2b00 <_sk_exclusion_sse2_8bit+0x1a9>
+ DB 102,15,111,45,48,40,0,0 ; movdqa 0x2830(%rip),%xmm5 # 29f0 <_sk_difference_sse2_8bit+0x1cf>
DB 102,15,219,205 ; pand %xmm5,%xmm1
DB 102,15,219,197 ; pand %xmm5,%xmm0
DB 102,15,235,196 ; por %xmm4,%xmm0
@@ -45260,7 +45206,7 @@ _sk_premul_sse2_8bit LABEL PROC
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
DB 243,15,112,248,95 ; pshufhw $0x5f,%xmm0,%xmm7
- DB 102,15,111,5,244,40,0,0 ; movdqa 0x28f4(%rip),%xmm0 # 2b10 <_sk_exclusion_sse2_8bit+0x1b9>
+ DB 102,15,111,5,228,39,0,0 ; movdqa 0x27e4(%rip),%xmm0 # 2a00 <_sk_difference_sse2_8bit+0x1df>
DB 102,15,235,248 ; por %xmm0,%xmm7
DB 102,15,235,240 ; por %xmm0,%xmm6
DB 102,69,15,239,201 ; pxor %xmm9,%xmm9
@@ -45762,7 +45708,7 @@ _sk_load_a8_sse2_8bit LABEL PROC
DB 117,48 ; jne 8b9 <_sk_load_a8_sse2_8bit+0x4d>
DB 243,66,15,126,4,2 ; movq (%rdx,%r8,1),%xmm0
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
- DB 102,15,84,5,133,34,0,0 ; andpd 0x2285(%rip),%xmm0 # 2b20 <_sk_exclusion_sse2_8bit+0x1c9>
+ DB 102,15,84,5,117,33,0,0 ; andpd 0x2175(%rip),%xmm0 # 2a10 <_sk_difference_sse2_8bit+0x1ef>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,40,200 ; movapd %xmm0,%xmm1
DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1
@@ -45837,7 +45783,7 @@ _sk_load_a8_dst_sse2_8bit LABEL PROC
DB 117,48 ; jne 9ad <_sk_load_a8_dst_sse2_8bit+0x4d>
DB 243,66,15,126,20,2 ; movq (%rdx,%r8,1),%xmm2
DB 102,15,96,208 ; punpcklbw %xmm0,%xmm2
- DB 102,15,84,21,161,33,0,0 ; andpd 0x21a1(%rip),%xmm2 # 2b30 <_sk_exclusion_sse2_8bit+0x1d9>
+ DB 102,15,84,21,145,32,0,0 ; andpd 0x2091(%rip),%xmm2 # 2a20 <_sk_difference_sse2_8bit+0x1ff>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,40,218 ; movapd %xmm2,%xmm3
DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3
@@ -45920,7 +45866,7 @@ _sk_store_a8_sse2_8bit LABEL PROC
DB 102,15,107,229 ; packssdw %xmm5,%xmm4
DB 77,133,201 ; test %r9,%r9
DB 117,26 ; jne ab9 <_sk_store_a8_sse2_8bit+0x65>
- DB 102,15,219,37,153,32,0,0 ; pand 0x2099(%rip),%xmm4 # 2b40 <_sk_exclusion_sse2_8bit+0x1e9>
+ DB 102,15,219,37,137,31,0,0 ; pand 0x1f89(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x20f>
DB 102,15,103,228 ; packuswb %xmm4,%xmm4
DB 102,66,15,214,36,2 ; movq %xmm4,(%rdx,%r8,1)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -45942,7 +45888,7 @@ _sk_store_a8_sse2_8bit LABEL PROC
DB 102,15,127,100,36,16 ; movdqa %xmm4,0x10(%rsp)
DB 138,68,36,20 ; mov 0x14(%rsp),%al
DB 66,136,68,2,2 ; mov %al,0x2(%rdx,%r8,1)
- DB 102,15,219,37,65,32,0,0 ; pand 0x2041(%rip),%xmm4 # 2b40 <_sk_exclusion_sse2_8bit+0x1e9>
+ DB 102,15,219,37,49,31,0,0 ; pand 0x1f31(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x20f>
DB 102,15,103,228 ; packuswb %xmm4,%xmm4
DB 102,15,126,224 ; movd %xmm4,%eax
DB 102,66,137,4,2 ; mov %ax,(%rdx,%r8,1)
@@ -45956,7 +45902,7 @@ _sk_store_a8_sse2_8bit LABEL PROC
DB 102,15,127,100,36,32 ; movdqa %xmm4,0x20(%rsp)
DB 138,68,36,40 ; mov 0x28(%rsp),%al
DB 66,136,68,2,4 ; mov %al,0x4(%rdx,%r8,1)
- DB 102,15,219,37,253,31,0,0 ; pand 0x1ffd(%rip),%xmm4 # 2b40 <_sk_exclusion_sse2_8bit+0x1e9>
+ DB 102,15,219,37,237,30,0,0 ; pand 0x1eed(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x20f>
DB 102,15,103,228 ; packuswb %xmm4,%xmm4
DB 102,66,15,126,36,2 ; movd %xmm4,(%rdx,%r8,1)
DB 233,95,255,255,255 ; jmpq ab1 <_sk_store_a8_sse2_8bit+0x5d>
@@ -45994,12 +45940,12 @@ _sk_load_g8_sse2_8bit LABEL PROC
DB 117,116 ; jne c01 <_sk_load_g8_sse2_8bit+0x91>
DB 243,66,15,126,4,2 ; movq (%rdx,%r8,1),%xmm0
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
- DB 102,15,84,5,177,31,0,0 ; andpd 0x1fb1(%rip),%xmm0 # 2b50 <_sk_exclusion_sse2_8bit+0x1f9>
+ DB 102,15,84,5,161,30,0,0 ; andpd 0x1ea1(%rip),%xmm0 # 2a40 <_sk_difference_sse2_8bit+0x21f>
DB 102,15,239,201 ; pxor %xmm1,%xmm1
DB 102,15,40,224 ; movapd %xmm0,%xmm4
DB 102,15,97,225 ; punpcklwd %xmm1,%xmm4
DB 102,15,105,193 ; punpckhwd %xmm1,%xmm0
- DB 102,15,111,45,169,31,0,0 ; movdqa 0x1fa9(%rip),%xmm5 # 2b60 <_sk_exclusion_sse2_8bit+0x209>
+ DB 102,15,111,45,153,30,0,0 ; movdqa 0x1e99(%rip),%xmm5 # 2a50 <_sk_difference_sse2_8bit+0x22f>
DB 102,15,112,240,245 ; pshufd $0xf5,%xmm0,%xmm6
DB 102,15,244,197 ; pmuludq %xmm5,%xmm0
DB 102,15,112,200,232 ; pshufd $0xe8,%xmm0,%xmm1
@@ -46012,7 +45958,7 @@ _sk_load_g8_sse2_8bit LABEL PROC
DB 102,15,244,245 ; pmuludq %xmm5,%xmm6
DB 102,15,112,230,232 ; pshufd $0xe8,%xmm6,%xmm4
DB 102,15,98,196 ; punpckldq %xmm4,%xmm0
- DB 102,15,111,37,123,31,0,0 ; movdqa 0x1f7b(%rip),%xmm4 # 2b70 <_sk_exclusion_sse2_8bit+0x219>
+ DB 102,15,111,37,107,30,0,0 ; movdqa 0x1e6b(%rip),%xmm4 # 2a60 <_sk_difference_sse2_8bit+0x23f>
DB 102,15,235,196 ; por %xmm4,%xmm0
DB 102,15,235,204 ; por %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -46084,12 +46030,12 @@ _sk_load_g8_dst_sse2_8bit LABEL PROC
DB 117,116 ; jne d41 <_sk_load_g8_dst_sse2_8bit+0x91>
DB 243,66,15,126,20,2 ; movq (%rdx,%r8,1),%xmm2
DB 102,15,96,208 ; punpcklbw %xmm0,%xmm2
- DB 102,15,84,21,161,30,0,0 ; andpd 0x1ea1(%rip),%xmm2 # 2b80 <_sk_exclusion_sse2_8bit+0x229>
+ DB 102,15,84,21,145,29,0,0 ; andpd 0x1d91(%rip),%xmm2 # 2a70 <_sk_difference_sse2_8bit+0x24f>
DB 102,15,239,219 ; pxor %xmm3,%xmm3
DB 102,15,40,226 ; movapd %xmm2,%xmm4
DB 102,15,97,227 ; punpcklwd %xmm3,%xmm4
DB 102,15,105,211 ; punpckhwd %xmm3,%xmm2
- DB 102,15,111,45,153,30,0,0 ; movdqa 0x1e99(%rip),%xmm5 # 2b90 <_sk_exclusion_sse2_8bit+0x239>
+ DB 102,15,111,45,137,29,0,0 ; movdqa 0x1d89(%rip),%xmm5 # 2a80 <_sk_difference_sse2_8bit+0x25f>
DB 102,15,112,242,245 ; pshufd $0xf5,%xmm2,%xmm6
DB 102,15,244,213 ; pmuludq %xmm5,%xmm2
DB 102,15,112,218,232 ; pshufd $0xe8,%xmm2,%xmm3
@@ -46102,7 +46048,7 @@ _sk_load_g8_dst_sse2_8bit LABEL PROC
DB 102,15,244,245 ; pmuludq %xmm5,%xmm6
DB 102,15,112,230,232 ; pshufd $0xe8,%xmm6,%xmm4
DB 102,15,98,212 ; punpckldq %xmm4,%xmm2
- DB 102,15,111,37,107,30,0,0 ; movdqa 0x1e6b(%rip),%xmm4 # 2ba0 <_sk_exclusion_sse2_8bit+0x249>
+ DB 102,15,111,37,91,29,0,0 ; movdqa 0x1d5b(%rip),%xmm4 # 2a90 <_sk_difference_sse2_8bit+0x26f>
DB 102,15,235,212 ; por %xmm4,%xmm2
DB 102,15,235,220 ; por %xmm4,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -46315,7 +46261,7 @@ _sk_scale_1_float_sse2_8bit LABEL PROC
DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,0 ; movss (%rax),%xmm0
- DB 243,15,89,5,156,26,0,0 ; mulss 0x1a9c(%rip),%xmm0 # 2af0 <_sk_exclusion_sse2_8bit+0x199>
+ DB 243,15,89,5,148,25,0,0 ; mulss 0x1994(%rip),%xmm0 # 29e8 <_sk_difference_sse2_8bit+0x1c7>
DB 243,15,44,192 ; cvttss2si %xmm0,%eax
DB 102,15,239,246 ; pxor %xmm6,%xmm6
DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0
@@ -46327,7 +46273,7 @@ _sk_scale_1_float_sse2_8bit LABEL PROC
DB 102,15,96,246 ; punpcklbw %xmm6,%xmm6
DB 242,15,112,246,0 ; pshuflw $0x0,%xmm6,%xmm6
DB 102,15,112,246,80 ; pshufd $0x50,%xmm6,%xmm6
- DB 102,15,219,53,35,27,0,0 ; pand 0x1b23(%rip),%xmm6 # 2bb0 <_sk_exclusion_sse2_8bit+0x259>
+ DB 102,15,219,53,19,26,0,0 ; pand 0x1a13(%rip),%xmm6 # 2aa0 <_sk_difference_sse2_8bit+0x27f>
DB 102,15,111,254 ; movdqa %xmm6,%xmm7
DB 102,65,15,213,248 ; pmullw %xmm8,%xmm7
DB 102,15,111,230 ; movdqa %xmm6,%xmm4
@@ -46361,7 +46307,7 @@ _sk_scale_u8_sse2_8bit LABEL PROC
DB 15,133,239,0,0,0 ; jne 11ed <_sk_scale_u8_sse2_8bit+0x110>
DB 243,66,15,126,36,2 ; movq (%rdx,%r8,1),%xmm4
DB 102,15,96,224 ; punpcklbw %xmm0,%xmm4
- DB 102,15,84,37,176,26,0,0 ; andpd 0x1ab0(%rip),%xmm4 # 2bc0 <_sk_exclusion_sse2_8bit+0x269>
+ DB 102,15,84,37,160,25,0,0 ; andpd 0x19a0(%rip),%xmm4 # 2ab0 <_sk_difference_sse2_8bit+0x28f>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,40,236 ; movapd %xmm4,%xmm5
DB 102,65,15,105,232 ; punpckhwd %xmm8,%xmm5
@@ -46468,7 +46414,7 @@ PUBLIC _sk_lerp_1_float_sse2_8bit
_sk_lerp_1_float_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,32 ; movss (%rax),%xmm4
- DB 243,15,89,37,70,24,0,0 ; mulss 0x1846(%rip),%xmm4 # 2af4 <_sk_exclusion_sse2_8bit+0x19d>
+ DB 243,15,89,37,62,23,0,0 ; mulss 0x173e(%rip),%xmm4 # 29ec <_sk_difference_sse2_8bit+0x1cb>
DB 243,15,44,196 ; cvttss2si %xmm4,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
@@ -46481,7 +46427,7 @@ _sk_lerp_1_float_sse2_8bit LABEL PROC
DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11
DB 102,69,15,96,217 ; punpcklbw %xmm9,%xmm11
DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1
- DB 102,15,111,53,225,24,0,0 ; movdqa 0x18e1(%rip),%xmm6 # 2bd0 <_sk_exclusion_sse2_8bit+0x279>
+ DB 102,15,111,53,209,23,0,0 ; movdqa 0x17d1(%rip),%xmm6 # 2ac0 <_sk_difference_sse2_8bit+0x29f>
DB 102,65,15,219,240 ; pand %xmm8,%xmm6
DB 102,15,111,230 ; movdqa %xmm6,%xmm4
DB 102,15,213,225 ; pmullw %xmm1,%xmm4
@@ -46547,7 +46493,7 @@ _sk_lerp_u8_sse2_8bit LABEL PROC
DB 15,133,141,1,0,0 ; jne 1584 <_sk_lerp_u8_sse2_8bit+0x1ae>
DB 243,66,15,126,44,2 ; movq (%rdx,%r8,1),%xmm5
DB 102,15,96,232 ; punpcklbw %xmm0,%xmm5
- DB 102,15,84,45,215,23,0,0 ; andpd 0x17d7(%rip),%xmm5 # 2be0 <_sk_exclusion_sse2_8bit+0x289>
+ DB 102,15,84,45,199,22,0,0 ; andpd 0x16c7(%rip),%xmm5 # 2ad0 <_sk_difference_sse2_8bit+0x2af>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,40,229 ; movapd %xmm5,%xmm4
DB 102,65,15,105,224 ; punpckhwd %xmm8,%xmm4
@@ -46699,7 +46645,7 @@ _sk_move_dst_src_sse2_8bit LABEL PROC
PUBLIC _sk_black_color_sse2_8bit
_sk_black_color_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 15,40,5,159,21,0,0 ; movaps 0x159f(%rip),%xmm0 # 2bf0 <_sk_exclusion_sse2_8bit+0x299>
+ DB 15,40,5,143,20,0,0 ; movaps 0x148f(%rip),%xmm0 # 2ae0 <_sk_difference_sse2_8bit+0x2bf>
DB 15,40,200 ; movaps %xmm0,%xmm1
DB 255,224 ; jmpq *%rax
@@ -47563,7 +47509,7 @@ _sk_darken_sse2_8bit LABEL PROC
DB 102,65,15,248,234 ; psubb %xmm10,%xmm5
DB 102,15,248,207 ; psubb %xmm7,%xmm1
DB 102,15,248,196 ; psubb %xmm4,%xmm0
- DB 102,15,111,37,180,6,0,0 ; movdqa 0x6b4(%rip),%xmm4 # 2c00 <_sk_exclusion_sse2_8bit+0x2a9>
+ DB 102,15,111,37,164,5,0,0 ; movdqa 0x5a4(%rip),%xmm4 # 2af0 <_sk_difference_sse2_8bit+0x2cf>
DB 102,15,219,236 ; pand %xmm4,%xmm5
DB 102,15,111,252 ; movdqa %xmm4,%xmm7
DB 102,15,223,248 ; pandn %xmm0,%xmm7
@@ -47669,7 +47615,7 @@ _sk_lighten_sse2_8bit LABEL PROC
DB 102,65,15,248,234 ; psubb %xmm10,%xmm5
DB 102,15,248,207 ; psubb %xmm7,%xmm1
DB 102,15,248,196 ; psubb %xmm4,%xmm0
- DB 102,15,111,37,216,4,0,0 ; movdqa 0x4d8(%rip),%xmm4 # 2c10 <_sk_exclusion_sse2_8bit+0x2b9>
+ DB 102,15,111,37,200,3,0,0 ; movdqa 0x3c8(%rip),%xmm4 # 2b00 <_sk_difference_sse2_8bit+0x2df>
DB 102,15,219,236 ; pand %xmm4,%xmm5
DB 102,15,111,252 ; movdqa %xmm4,%xmm7
DB 102,15,223,248 ; pandn %xmm0,%xmm7
@@ -47682,6 +47628,51 @@ _sk_lighten_sse2_8bit LABEL PROC
DB 102,15,111,206 ; movdqa %xmm6,%xmm1
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_exclusion_sse2_8bit
+_sk_exclusion_sse2_8bit LABEL PROC
+ DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
+ DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9
+ DB 102,69,15,96,200 ; punpcklbw %xmm8,%xmm9
+ DB 102,68,15,111,208 ; movdqa %xmm0,%xmm10
+ DB 102,69,15,104,208 ; punpckhbw %xmm8,%xmm10
+ DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11
+ DB 102,69,15,96,216 ; punpcklbw %xmm8,%xmm11
+ DB 102,68,15,111,225 ; movdqa %xmm1,%xmm12
+ DB 102,69,15,104,224 ; punpckhbw %xmm8,%xmm12
+ DB 102,15,111,226 ; movdqa %xmm2,%xmm4
+ DB 102,15,252,194 ; paddb %xmm2,%xmm0
+ DB 102,15,111,242 ; movdqa %xmm2,%xmm6
+ DB 102,65,15,96,240 ; punpcklbw %xmm8,%xmm6
+ DB 102,65,15,104,224 ; punpckhbw %xmm8,%xmm4
+ DB 102,15,111,251 ; movdqa %xmm3,%xmm7
+ DB 102,15,252,203 ; paddb %xmm3,%xmm1
+ DB 102,15,111,235 ; movdqa %xmm3,%xmm5
+ DB 102,65,15,96,232 ; punpcklbw %xmm8,%xmm5
+ DB 102,65,15,104,248 ; punpckhbw %xmm8,%xmm7
+ DB 102,65,15,213,252 ; pmullw %xmm12,%xmm7
+ DB 102,65,15,213,235 ; pmullw %xmm11,%xmm5
+ DB 102,65,15,213,226 ; pmullw %xmm10,%xmm4
+ DB 102,65,15,213,241 ; pmullw %xmm9,%xmm6
+ DB 102,65,15,253,241 ; paddw %xmm9,%xmm6
+ DB 102,65,15,253,226 ; paddw %xmm10,%xmm4
+ DB 102,65,15,253,235 ; paddw %xmm11,%xmm5
+ DB 102,65,15,253,252 ; paddw %xmm12,%xmm7
+ DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
+ DB 102,15,113,213,8 ; psrlw $0x8,%xmm5
+ DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
+ DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
+ DB 102,15,103,244 ; packuswb %xmm4,%xmm6
+ DB 102,15,103,239 ; packuswb %xmm7,%xmm5
+ DB 102,15,111,37,11,3,0,0 ; movdqa 0x30b(%rip),%xmm4 # 2b10 <_sk_difference_sse2_8bit+0x2ef>
+ DB 102,15,248,205 ; psubb %xmm5,%xmm1
+ DB 102,15,219,236 ; pand %xmm4,%xmm5
+ DB 102,15,219,230 ; pand %xmm6,%xmm4
+ DB 102,15,248,198 ; psubb %xmm6,%xmm0
+ DB 102,15,248,196 ; psubb %xmm4,%xmm0
+ DB 102,15,248,205 ; psubb %xmm5,%xmm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_difference_sse2_8bit
_sk_difference_sse2_8bit LABEL PROC
DB 242,15,112,227,231 ; pshuflw $0xe7,%xmm3,%xmm4
@@ -47697,194 +47688,95 @@ _sk_difference_sse2_8bit LABEL PROC
DB 242,15,112,237,95 ; pshuflw $0x5f,%xmm5,%xmm5
DB 243,15,112,253,95 ; pshufhw $0x5f,%xmm5,%xmm7
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
- DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9
+ DB 102,15,111,232 ; movdqa %xmm0,%xmm5
+ DB 102,65,15,96,232 ; punpcklbw %xmm8,%xmm5
+ DB 102,15,111,240 ; movdqa %xmm0,%xmm6
+ DB 102,65,15,104,240 ; punpckhbw %xmm8,%xmm6
+ DB 102,68,15,111,201 ; movdqa %xmm1,%xmm9
DB 102,69,15,96,200 ; punpcklbw %xmm8,%xmm9
- DB 102,68,15,111,208 ; movdqa %xmm0,%xmm10
- DB 102,69,15,104,208 ; punpckhbw %xmm8,%xmm10
- DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11
- DB 102,69,15,96,216 ; punpcklbw %xmm8,%xmm11
DB 102,68,15,111,225 ; movdqa %xmm1,%xmm12
DB 102,69,15,104,224 ; punpckhbw %xmm8,%xmm12
- DB 102,68,15,111,239 ; movdqa %xmm7,%xmm13
- DB 102,69,15,96,232 ; punpcklbw %xmm8,%xmm13
+ DB 102,68,15,111,215 ; movdqa %xmm7,%xmm10
+ DB 102,69,15,96,208 ; punpcklbw %xmm8,%xmm10
DB 102,65,15,104,248 ; punpckhbw %xmm8,%xmm7
- DB 102,68,15,111,244 ; movdqa %xmm4,%xmm14
- DB 102,69,15,96,240 ; punpcklbw %xmm8,%xmm14
+ DB 102,68,15,111,220 ; movdqa %xmm4,%xmm11
+ DB 102,69,15,96,216 ; punpcklbw %xmm8,%xmm11
DB 102,65,15,104,224 ; punpckhbw %xmm8,%xmm4
DB 102,65,15,213,228 ; pmullw %xmm12,%xmm4
- DB 102,69,15,213,243 ; pmullw %xmm11,%xmm14
- DB 102,65,15,213,250 ; pmullw %xmm10,%xmm7
- DB 102,69,15,213,233 ; pmullw %xmm9,%xmm13
- DB 102,69,15,253,233 ; paddw %xmm9,%xmm13
- DB 102,65,15,253,250 ; paddw %xmm10,%xmm7
- DB 102,69,15,253,243 ; paddw %xmm11,%xmm14
+ DB 102,69,15,213,217 ; pmullw %xmm9,%xmm11
+ DB 102,15,213,254 ; pmullw %xmm6,%xmm7
+ DB 102,68,15,213,213 ; pmullw %xmm5,%xmm10
+ DB 102,68,15,253,213 ; paddw %xmm5,%xmm10
+ DB 102,15,253,254 ; paddw %xmm6,%xmm7
+ DB 102,69,15,253,217 ; paddw %xmm9,%xmm11
DB 102,65,15,253,228 ; paddw %xmm12,%xmm4
DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
- DB 102,65,15,113,214,8 ; psrlw $0x8,%xmm14
+ DB 102,65,15,113,211,8 ; psrlw $0x8,%xmm11
DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
- DB 102,65,15,113,213,8 ; psrlw $0x8,%xmm13
- DB 102,68,15,103,239 ; packuswb %xmm7,%xmm13
- DB 102,68,15,103,244 ; packuswb %xmm4,%xmm14
+ DB 102,65,15,113,210,8 ; psrlw $0x8,%xmm10
+ DB 102,68,15,103,215 ; packuswb %xmm7,%xmm10
+ DB 102,68,15,103,220 ; packuswb %xmm4,%xmm11
DB 242,15,112,225,231 ; pshuflw $0xe7,%xmm1,%xmm4
DB 243,15,112,228,231 ; pshufhw $0xe7,%xmm4,%xmm4
DB 102,15,112,228,232 ; pshufd $0xe8,%xmm4,%xmm4
DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
DB 242,15,112,228,95 ; pshuflw $0x5f,%xmm4,%xmm4
- DB 243,68,15,112,212,95 ; pshufhw $0x5f,%xmm4,%xmm10
+ DB 243,68,15,112,228,95 ; pshufhw $0x5f,%xmm4,%xmm12
DB 242,15,112,224,231 ; pshuflw $0xe7,%xmm0,%xmm4
DB 243,15,112,228,231 ; pshufhw $0xe7,%xmm4,%xmm4
DB 102,15,112,228,232 ; pshufd $0xe8,%xmm4,%xmm4
DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
DB 242,15,112,228,95 ; pshuflw $0x5f,%xmm4,%xmm4
- DB 243,68,15,112,220,95 ; pshufhw $0x5f,%xmm4,%xmm11
+ DB 243,68,15,112,236,95 ; pshufhw $0x5f,%xmm4,%xmm13
DB 102,68,15,111,202 ; movdqa %xmm2,%xmm9
DB 102,15,252,194 ; paddb %xmm2,%xmm0
- DB 102,15,111,242 ; movdqa %xmm2,%xmm6
- DB 102,65,15,96,240 ; punpcklbw %xmm8,%xmm6
+ DB 102,15,111,234 ; movdqa %xmm2,%xmm5
+ DB 102,65,15,96,232 ; punpcklbw %xmm8,%xmm5
DB 102,69,15,104,200 ; punpckhbw %xmm8,%xmm9
- DB 102,68,15,111,227 ; movdqa %xmm3,%xmm12
+ DB 102,68,15,111,243 ; movdqa %xmm3,%xmm14
DB 102,15,252,203 ; paddb %xmm3,%xmm1
- DB 102,15,111,235 ; movdqa %xmm3,%xmm5
- DB 102,65,15,96,232 ; punpcklbw %xmm8,%xmm5
- DB 102,69,15,104,224 ; punpckhbw %xmm8,%xmm12
- DB 102,65,15,111,227 ; movdqa %xmm11,%xmm4
- DB 102,65,15,96,224 ; punpcklbw %xmm8,%xmm4
- DB 102,69,15,104,216 ; punpckhbw %xmm8,%xmm11
- DB 102,65,15,111,250 ; movdqa %xmm10,%xmm7
+ DB 102,15,111,251 ; movdqa %xmm3,%xmm7
DB 102,65,15,96,248 ; punpcklbw %xmm8,%xmm7
- DB 102,69,15,104,208 ; punpckhbw %xmm8,%xmm10
- DB 102,69,15,213,212 ; pmullw %xmm12,%xmm10
- DB 102,15,213,253 ; pmullw %xmm5,%xmm7
- DB 102,69,15,213,217 ; pmullw %xmm9,%xmm11
- DB 102,15,213,230 ; pmullw %xmm6,%xmm4
- DB 102,15,253,230 ; paddw %xmm6,%xmm4
- DB 102,69,15,253,217 ; paddw %xmm9,%xmm11
- DB 102,15,253,253 ; paddw %xmm5,%xmm7
- DB 102,69,15,253,212 ; paddw %xmm12,%xmm10
- DB 102,65,15,113,210,8 ; psrlw $0x8,%xmm10
- DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
- DB 102,65,15,113,211,8 ; psrlw $0x8,%xmm11
+ DB 102,69,15,104,240 ; punpckhbw %xmm8,%xmm14
+ DB 102,65,15,111,229 ; movdqa %xmm13,%xmm4
+ DB 102,65,15,96,224 ; punpcklbw %xmm8,%xmm4
+ DB 102,69,15,104,232 ; punpckhbw %xmm8,%xmm13
+ DB 102,65,15,111,244 ; movdqa %xmm12,%xmm6
+ DB 102,65,15,96,240 ; punpcklbw %xmm8,%xmm6
+ DB 102,69,15,104,224 ; punpckhbw %xmm8,%xmm12
+ DB 102,69,15,213,230 ; pmullw %xmm14,%xmm12
+ DB 102,15,213,247 ; pmullw %xmm7,%xmm6
+ DB 102,69,15,213,233 ; pmullw %xmm9,%xmm13
+ DB 102,15,213,229 ; pmullw %xmm5,%xmm4
+ DB 102,15,253,229 ; paddw %xmm5,%xmm4
+ DB 102,69,15,253,233 ; paddw %xmm9,%xmm13
+ DB 102,15,253,247 ; paddw %xmm7,%xmm6
+ DB 102,69,15,253,230 ; paddw %xmm14,%xmm12
+ DB 102,65,15,113,212,8 ; psrlw $0x8,%xmm12
+ DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
+ DB 102,65,15,113,213,8 ; psrlw $0x8,%xmm13
DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
- DB 102,65,15,103,227 ; packuswb %xmm11,%xmm4
- DB 102,65,15,103,250 ; packuswb %xmm10,%xmm7
- DB 102,68,15,218,247 ; pminub %xmm7,%xmm14
- DB 102,68,15,218,236 ; pminub %xmm4,%xmm13
- DB 102,69,15,252,237 ; paddb %xmm13,%xmm13
- DB 102,69,15,252,246 ; paddb %xmm14,%xmm14
- DB 102,15,111,241 ; movdqa %xmm1,%xmm6
- DB 102,65,15,248,246 ; psubb %xmm14,%xmm6
- DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,65,15,248,237 ; psubb %xmm13,%xmm5
- DB 102,15,248,207 ; psubb %xmm7,%xmm1
- DB 102,15,248,196 ; psubb %xmm4,%xmm0
- DB 102,15,111,37,241,2,0,0 ; movdqa 0x2f1(%rip),%xmm4 # 2c20 <_sk_exclusion_sse2_8bit+0x2c9>
+ DB 102,65,15,103,229 ; packuswb %xmm13,%xmm4
+ DB 102,65,15,103,244 ; packuswb %xmm12,%xmm6
+ DB 102,65,15,218,226 ; pminub %xmm10,%xmm4
+ DB 102,65,15,218,243 ; pminub %xmm11,%xmm6
+ DB 102,15,111,45,88,1,0,0 ; movdqa 0x158(%rip),%xmm5 # 2b20 <_sk_difference_sse2_8bit+0x2ff>
+ DB 102,15,248,206 ; psubb %xmm6,%xmm1
+ DB 102,15,219,245 ; pand %xmm5,%xmm6
DB 102,15,219,236 ; pand %xmm4,%xmm5
- DB 102,15,111,252 ; movdqa %xmm4,%xmm7
- DB 102,15,223,248 ; pandn %xmm0,%xmm7
- DB 102,15,235,239 ; por %xmm7,%xmm5
- DB 102,15,219,244 ; pand %xmm4,%xmm6
- DB 102,15,223,225 ; pandn %xmm1,%xmm4
- DB 102,15,235,244 ; por %xmm4,%xmm6
- DB 72,173 ; lods %ds:(%rsi),%rax
- DB 102,15,111,197 ; movdqa %xmm5,%xmm0
- DB 102,15,111,206 ; movdqa %xmm6,%xmm1
- DB 255,224 ; jmpq *%rax
-
-PUBLIC _sk_exclusion_sse2_8bit
-_sk_exclusion_sse2_8bit LABEL PROC
- DB 102,69,15,239,201 ; pxor %xmm9,%xmm9
- DB 102,68,15,111,192 ; movdqa %xmm0,%xmm8
- DB 102,68,15,111,226 ; movdqa %xmm2,%xmm12
- DB 102,68,15,252,224 ; paddb %xmm0,%xmm12
- DB 242,68,15,112,208,231 ; pshuflw $0xe7,%xmm0,%xmm10
- DB 102,15,111,224 ; movdqa %xmm0,%xmm4
- DB 102,65,15,96,225 ; punpcklbw %xmm9,%xmm4
- DB 102,69,15,104,193 ; punpckhbw %xmm9,%xmm8
- DB 102,68,15,111,233 ; movdqa %xmm1,%xmm13
- DB 102,68,15,111,243 ; movdqa %xmm3,%xmm14
- DB 102,68,15,252,241 ; paddb %xmm1,%xmm14
- DB 242,68,15,112,217,231 ; pshuflw $0xe7,%xmm1,%xmm11
- DB 102,68,15,111,249 ; movdqa %xmm1,%xmm15
- DB 102,69,15,96,249 ; punpcklbw %xmm9,%xmm15
- DB 102,15,111,234 ; movdqa %xmm2,%xmm5
- DB 102,15,111,250 ; movdqa %xmm2,%xmm7
- DB 102,65,15,96,249 ; punpcklbw %xmm9,%xmm7
- DB 102,65,15,104,233 ; punpckhbw %xmm9,%xmm5
- DB 243,65,15,112,194,231 ; pshufhw $0xe7,%xmm10,%xmm0
- DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0
- DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
- DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
- DB 243,68,15,112,208,95 ; pshufhw $0x5f,%xmm0,%xmm10
- DB 102,65,15,111,194 ; movdqa %xmm10,%xmm0
- DB 102,65,15,96,193 ; punpcklbw %xmm9,%xmm0
- DB 102,15,213,199 ; pmullw %xmm7,%xmm0
- DB 102,15,253,199 ; paddw %xmm7,%xmm0
- DB 102,15,213,252 ; pmullw %xmm4,%xmm7
- DB 102,15,253,252 ; paddw %xmm4,%xmm7
- DB 102,15,111,227 ; movdqa %xmm3,%xmm4
- DB 102,65,15,96,225 ; punpcklbw %xmm9,%xmm4
- DB 102,69,15,104,209 ; punpckhbw %xmm9,%xmm10
- DB 102,68,15,213,213 ; pmullw %xmm5,%xmm10
- DB 102,68,15,253,213 ; paddw %xmm5,%xmm10
- DB 102,65,15,213,232 ; pmullw %xmm8,%xmm5
- DB 102,65,15,253,232 ; paddw %xmm8,%xmm5
- DB 243,65,15,112,243,231 ; pshufhw $0xe7,%xmm11,%xmm6
- DB 102,15,112,246,232 ; pshufd $0xe8,%xmm6,%xmm6
- DB 102,15,96,246 ; punpcklbw %xmm6,%xmm6
- DB 242,15,112,246,95 ; pshuflw $0x5f,%xmm6,%xmm6
- DB 243,15,112,206,95 ; pshufhw $0x5f,%xmm6,%xmm1
- DB 102,15,111,241 ; movdqa %xmm1,%xmm6
- DB 102,65,15,96,241 ; punpcklbw %xmm9,%xmm6
- DB 102,15,213,244 ; pmullw %xmm4,%xmm6
- DB 102,15,253,244 ; paddw %xmm4,%xmm6
- DB 102,65,15,213,231 ; pmullw %xmm15,%xmm4
- DB 102,65,15,253,231 ; paddw %xmm15,%xmm4
- DB 102,68,15,111,195 ; movdqa %xmm3,%xmm8
- DB 102,69,15,104,233 ; punpckhbw %xmm9,%xmm13
- DB 102,69,15,104,193 ; punpckhbw %xmm9,%xmm8
- DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1
- DB 102,65,15,213,200 ; pmullw %xmm8,%xmm1
- DB 102,65,15,253,200 ; paddw %xmm8,%xmm1
- DB 102,69,15,213,197 ; pmullw %xmm13,%xmm8
- DB 102,69,15,253,197 ; paddw %xmm13,%xmm8
- DB 102,15,113,213,8 ; psrlw $0x8,%xmm5
- DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
- DB 102,15,103,253 ; packuswb %xmm5,%xmm7
- DB 102,65,15,113,208,8 ; psrlw $0x8,%xmm8
- DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
- DB 102,65,15,103,224 ; packuswb %xmm8,%xmm4
- DB 102,15,252,255 ; paddb %xmm7,%xmm7
- DB 102,15,252,228 ; paddb %xmm4,%xmm4
- DB 102,65,15,111,238 ; movdqa %xmm14,%xmm5
- DB 102,15,248,236 ; psubb %xmm4,%xmm5
- DB 102,65,15,111,228 ; movdqa %xmm12,%xmm4
- DB 102,15,248,231 ; psubb %xmm7,%xmm4
- DB 102,65,15,113,210,8 ; psrlw $0x8,%xmm10
- DB 102,15,113,208,8 ; psrlw $0x8,%xmm0
- DB 102,65,15,103,194 ; packuswb %xmm10,%xmm0
- DB 102,15,113,209,8 ; psrlw $0x8,%xmm1
- DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
- DB 102,15,103,241 ; packuswb %xmm1,%xmm6
- DB 102,68,15,248,246 ; psubb %xmm6,%xmm14
- DB 102,68,15,248,224 ; psubb %xmm0,%xmm12
- DB 102,15,111,13,102,1,0,0 ; movdqa 0x166(%rip),%xmm1 # 2c30 <_sk_exclusion_sse2_8bit+0x2d9>
- DB 102,15,111,193 ; movdqa %xmm1,%xmm0
- DB 102,65,15,223,196 ; pandn %xmm12,%xmm0
- DB 102,15,219,225 ; pand %xmm1,%xmm4
- DB 102,15,235,196 ; por %xmm4,%xmm0
- DB 102,15,219,233 ; pand %xmm1,%xmm5
- DB 102,65,15,223,206 ; pandn %xmm14,%xmm1
- DB 102,15,235,205 ; por %xmm5,%xmm1
+ DB 102,15,248,196 ; psubb %xmm4,%xmm0
+ DB 102,15,248,197 ; psubb %xmm5,%xmm0
+ DB 102,15,248,206 ; psubb %xmm6,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2b33 <_sk_exclusion_sse2_8bit+0x1dc>
+ DB 127,67 ; jg 2a2b <_sk_difference_sse2_8bit+0x20a>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2b37 <_sk_exclusion_sse2_8bit+0x1e0>
+ DB 127,67 ; jg 2a2f <_sk_difference_sse2_8bit+0x20e>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2b3b <_sk_exclusion_sse2_8bit+0x1e4>
+ DB 127,67 ; jg 2a33 <_sk_difference_sse2_8bit+0x212>
ALIGN 16
DB 0,0 ; add %al,(%rax)
diff --git a/src/jumper/SkJumper_stages_8bit.cpp b/src/jumper/SkJumper_stages_8bit.cpp
index cf037b0209..3d9fec0dbb 100644
--- a/src/jumper/SkJumper_stages_8bit.cpp
+++ b/src/jumper/SkJumper_stages_8bit.cpp
@@ -421,14 +421,24 @@ SI V srcover_alpha(V src, V dst, V rgb) {
STAGE(darken) { src = srcover_alpha(src, dst, src + (dst - max(src*alpha(dst), dst*alpha(src)))); }
STAGE(lighten) { src = srcover_alpha(src, dst, src + (dst - min(src*alpha(dst), dst*alpha(src)))); }
-STAGE(difference) {
- V min_ = min(src*alpha(dst), dst*alpha(src));
- src = srcover_alpha(src, dst, (src - min_) + (dst - min_));
-}
+SI V zero_alpha(V rgba) { return rgba.u32 & 0x00ffffff; }
STAGE(exclusion) {
+ // We could do exclusion with srcover_alpha(), but can fold a little more math through:
+ // rgb = Sc + Dc - 2*Sc*Dc
+ // alpha = Sa + Da - Sa*Da
+ // So we just subtract two sd from rgb, and one from alpha.
V sd = src*dst;
- src = srcover_alpha(src, dst, (src - sd) + (dst - sd));
+ src = (src - sd) + (dst - zero_alpha(sd));
+}
+STAGE(difference) {
+ // Like exclusion, we can fold math through with the same trick:
+ // rgb = Sc + Dc - 2*min(Sc*Da, Dc*Sa)
+ // alpha = Sa + Da - Sa*Da
+ // Here notice (Sa*Da) == min(Sa*Da, Da*Sa) for alpha,
+ // so again we subtract two from rgb, one from alpha.
+ V min_ = min(src*alpha(dst), dst*alpha(src));
+ src = (src - min_) + (dst - zero_alpha(min_));
}
#undef BLEND_MODE