diff options
author | Florin Malita <fmalita@chromium.org> | 2017-08-14 16:49:32 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-08-14 21:31:25 +0000 |
commit | 5bfc85a8b3cb7ec181a1d1a690ad3514ab1d3056 (patch) | |
tree | 5aaf6906d1195090ea5d2e43137a3fe132acedd9 /src/jumper | |
parent | 93ba0a4fc85f04bc1be9429983df1e57473b49a7 (diff) |
Lowp overlay, hardlight stages
Before:
micros bench
7669.09 ? blendmode_rect_HardLight 8888
8707.13 ? blendmode_rect_Overlay 8888
After:
micros bench
6679.60 ? blendmode_rect_HardLight 8888
6789.57 ? blendmode_rect_Overlay 8888
Change-Id: I52f389253fa07dafe18e572af550af7387264a16
Reviewed-on: https://skia-review.googlesource.com/34280
Commit-Queue: Florin Malita <fmalita@chromium.org>
Reviewed-by: Mike Klein <mtklein@google.com>
Diffstat (limited to 'src/jumper')
-rw-r--r-- | src/jumper/SkJumper.cpp | 4 | ||||
-rw-r--r-- | src/jumper/SkJumper_generated.S | 1613 | ||||
-rw-r--r-- | src/jumper/SkJumper_generated_win.S | 1599 | ||||
-rw-r--r-- | src/jumper/SkJumper_stages_8bit.cpp | 67 |
4 files changed, 3006 insertions, 277 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp index 8baa1890b6..e1888a8647 100644 --- a/src/jumper/SkJumper.cpp +++ b/src/jumper/SkJumper.cpp @@ -134,7 +134,9 @@ using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**,K*); M(darken) \ M(lighten) \ M(difference) \ - M(exclusion) + M(exclusion) \ + M(hardlight) \ + M(overlay) #endif extern "C" { diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S index a6b1cbdbfe..d616ed93e9 100644 --- a/src/jumper/SkJumper_generated.S +++ b/src/jumper/SkJumper_generated.S @@ -56656,7 +56656,7 @@ HIDDEN _sk_set_rgb_hsw_8bit FUNCTION(_sk_set_rgb_hsw_8bit) _sk_set_rgb_hsw_8bit: .byte 72,173 // lods %ds:(%rsi),%rax - .byte 197,250,16,37,142,44,0,0 // vmovss 0x2c8e(%rip),%xmm4 # 2d50 <_sk_difference_hsw_8bit+0x182> + .byte 197,250,16,37,110,51,0,0 // vmovss 0x336e(%rip),%xmm4 # 3430 <_sk_overlay_hsw_8bit+0x366> .byte 197,218,89,40 // vmulss (%rax),%xmm4,%xmm5 .byte 196,225,250,44,205 // vcvttss2si %xmm5,%rcx .byte 197,218,89,104,4 // vmulss 0x4(%rax),%xmm4,%xmm5 @@ -56669,7 +56669,7 @@ _sk_set_rgb_hsw_8bit: .byte 9,208 // or %edx,%eax .byte 197,249,110,224 // vmovd %eax,%xmm4 .byte 196,226,125,88,228 // vpbroadcastd %xmm4,%ymm4 - .byte 197,253,111,45,134,44,0,0 // vmovdqa 0x2c86(%rip),%ymm5 # 2d80 <_sk_difference_hsw_8bit+0x1b2> + .byte 197,253,111,45,102,51,0,0 // vmovdqa 0x3366(%rip),%ymm5 # 3460 <_sk_overlay_hsw_8bit+0x396> .byte 197,245,219,205 // vpand %ymm5,%ymm1,%ymm1 .byte 197,253,219,197 // vpand %ymm5,%ymm0,%ymm0 .byte 197,221,235,192 // vpor %ymm0,%ymm4,%ymm0 @@ -56681,10 +56681,10 @@ HIDDEN _sk_premul_hsw_8bit .globl _sk_premul_hsw_8bit FUNCTION(_sk_premul_hsw_8bit) _sk_premul_hsw_8bit: - .byte 197,253,111,37,138,44,0,0 // vmovdqa 0x2c8a(%rip),%ymm4 # 2da0 <_sk_difference_hsw_8bit+0x1d2> + .byte 197,253,111,37,106,51,0,0 // vmovdqa 0x336a(%rip),%ymm4 # 3480 <_sk_overlay_hsw_8bit+0x3b6> .byte 196,226,125,0,236 // vpshufb %ymm4,%ymm0,%ymm5 .byte 196,226,117,0,228 // vpshufb %ymm4,%ymm1,%ymm4 - .byte 197,253,111,53,152,44,0,0 // vmovdqa 0x2c98(%rip),%ymm6 # 2dc0 <_sk_difference_hsw_8bit+0x1f2> + .byte 197,253,111,53,120,51,0,0 // vmovdqa 0x3378(%rip),%ymm6 # 34a0 <_sk_overlay_hsw_8bit+0x3d6> .byte 197,221,235,230 // vpor %ymm6,%ymm4,%ymm4 .byte 197,213,235,238 // vpor %ymm6,%ymm5,%ymm5 .byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6 @@ -56724,7 +56724,7 @@ HIDDEN _sk_swap_rb_hsw_8bit .globl _sk_swap_rb_hsw_8bit FUNCTION(_sk_swap_rb_hsw_8bit) _sk_swap_rb_hsw_8bit: - .byte 197,253,111,37,16,44,0,0 // vmovdqa 0x2c10(%rip),%ymm4 # 2de0 <_sk_difference_hsw_8bit+0x212> + .byte 197,253,111,37,240,50,0,0 // vmovdqa 0x32f0(%rip),%ymm4 # 34c0 <_sk_overlay_hsw_8bit+0x3f6> .byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0 .byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -57074,7 +57074,7 @@ _sk_load_bgra_hsw_8bit: .byte 117,35 // jne 6b4 <_sk_load_bgra_hsw_8bit+0x44> .byte 196,161,126,111,76,130,32 // vmovdqu 0x20(%rdx,%r8,4),%ymm1 .byte 196,161,126,111,4,130 // vmovdqu (%rdx,%r8,4),%ymm0 - .byte 197,253,111,37,90,39,0,0 // vmovdqa 0x275a(%rip),%ymm4 # 2e00 <_sk_difference_hsw_8bit+0x232> + .byte 197,253,111,37,58,46,0,0 // vmovdqa 0x2e3a(%rip),%ymm4 # 34e0 <_sk_overlay_hsw_8bit+0x416> .byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0 .byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -57189,7 +57189,7 @@ _sk_load_bgra_dst_hsw_8bit: .byte 117,35 // jne 86c <_sk_load_bgra_dst_hsw_8bit+0x44> .byte 196,161,126,111,92,130,32 // vmovdqu 0x20(%rdx,%r8,4),%ymm3 .byte 196,161,126,111,20,130 // vmovdqu (%rdx,%r8,4),%ymm2 - .byte 197,253,111,37,194,37,0,0 // vmovdqa 0x25c2(%rip),%ymm4 # 2e20 <_sk_difference_hsw_8bit+0x252> + .byte 197,253,111,37,162,44,0,0 // vmovdqa 0x2ca2(%rip),%ymm4 # 3500 <_sk_overlay_hsw_8bit+0x436> .byte 196,226,109,0,212 // vpshufb %ymm4,%ymm2,%ymm2 .byte 196,226,101,0,220 // vpshufb %ymm4,%ymm3,%ymm3 .byte 72,173 // lods %ds:(%rsi),%rax @@ -57300,7 +57300,7 @@ _sk_store_bgra_hsw_8bit: .byte 72,15,175,209 // imul %rcx,%rdx .byte 72,193,226,2 // shl $0x2,%rdx .byte 72,3,16 // add (%rax),%rdx - .byte 197,253,111,37,60,36,0,0 // vmovdqa 0x243c(%rip),%ymm4 # 2e40 <_sk_difference_hsw_8bit+0x272> + .byte 197,253,111,37,28,43,0,0 // vmovdqa 0x2b1c(%rip),%ymm4 # 3520 <_sk_overlay_hsw_8bit+0x456> .byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5 .byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4 .byte 77,133,201 // test %r9,%r9 @@ -57586,10 +57586,10 @@ _sk_store_a8_hsw_8bit: .byte 72,99,87,8 // movslq 0x8(%rdi),%rdx .byte 72,15,175,209 // imul %rcx,%rdx .byte 72,3,16 // add (%rax),%rdx - .byte 197,253,111,37,104,32,0,0 // vmovdqa 0x2068(%rip),%ymm4 # 2e60 <_sk_difference_hsw_8bit+0x292> + .byte 197,253,111,37,72,39,0,0 // vmovdqa 0x2748(%rip),%ymm4 # 3540 <_sk_overlay_hsw_8bit+0x476> .byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5 .byte 196,227,253,0,237,232 // vpermq $0xe8,%ymm5,%ymm5 - .byte 197,249,111,53,245,34,0,0 // vmovdqa 0x22f5(%rip),%xmm6 # 3100 <_sk_difference_hsw_8bit+0x532> + .byte 197,249,111,53,69,42,0,0 // vmovdqa 0x2a45(%rip),%xmm6 # 3850 <_sk_overlay_hsw_8bit+0x786> .byte 196,226,81,0,238 // vpshufb %xmm6,%xmm5,%xmm5 .byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4 .byte 196,227,253,0,228,232 // vpermq $0xe8,%ymm4,%ymm4 @@ -57681,10 +57681,10 @@ _sk_load_g8_hsw_8bit: .byte 196,226,125,49,200 // vpmovzxbd %xmm0,%ymm1 .byte 197,249,112,192,78 // vpshufd $0x4e,%xmm0,%xmm0 .byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0 - .byte 196,226,125,88,37,9,30,0,0 // vpbroadcastd 0x1e09(%rip),%ymm4 # 2d54 <_sk_difference_hsw_8bit+0x186> + .byte 196,226,125,88,37,233,36,0,0 // vpbroadcastd 0x24e9(%rip),%ymm4 # 3434 <_sk_overlay_hsw_8bit+0x36a> .byte 196,226,125,64,236 // vpmulld %ymm4,%ymm0,%ymm5 .byte 196,226,117,64,196 // vpmulld %ymm4,%ymm1,%ymm0 - .byte 196,226,125,88,13,250,29,0,0 // vpbroadcastd 0x1dfa(%rip),%ymm1 # 2d58 <_sk_difference_hsw_8bit+0x18a> + .byte 196,226,125,88,13,218,36,0,0 // vpbroadcastd 0x24da(%rip),%ymm1 # 3438 <_sk_overlay_hsw_8bit+0x36e> .byte 197,253,235,193 // vpor %ymm1,%ymm0,%ymm0 .byte 197,213,235,201 // vpor %ymm1,%ymm5,%ymm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -57782,10 +57782,10 @@ _sk_load_g8_dst_hsw_8bit: .byte 196,226,125,49,218 // vpmovzxbd %xmm2,%ymm3 .byte 197,249,112,210,78 // vpshufd $0x4e,%xmm2,%xmm2 .byte 196,226,125,49,210 // vpmovzxbd %xmm2,%ymm2 - .byte 196,226,125,88,37,153,28,0,0 // vpbroadcastd 0x1c99(%rip),%ymm4 # 2d5c <_sk_difference_hsw_8bit+0x18e> + .byte 196,226,125,88,37,121,35,0,0 // vpbroadcastd 0x2379(%rip),%ymm4 # 343c <_sk_overlay_hsw_8bit+0x372> .byte 196,226,109,64,236 // vpmulld %ymm4,%ymm2,%ymm5 .byte 196,226,101,64,212 // vpmulld %ymm4,%ymm3,%ymm2 - .byte 196,226,125,88,29,138,28,0,0 // vpbroadcastd 0x1c8a(%rip),%ymm3 # 2d60 <_sk_difference_hsw_8bit+0x192> + .byte 196,226,125,88,29,106,35,0,0 // vpbroadcastd 0x236a(%rip),%ymm3 # 3440 <_sk_overlay_hsw_8bit+0x376> .byte 197,237,235,211 // vpor %ymm3,%ymm2,%ymm2 .byte 197,213,235,219 // vpor %ymm3,%ymm5,%ymm3 .byte 72,173 // lods %ds:(%rsi),%rax @@ -57882,7 +57882,7 @@ _sk_srcover_rgba_8888_hsw_8bit: .byte 15,133,222,0,0,0 // jne 1303 <_sk_srcover_rgba_8888_hsw_8bit+0x103> .byte 196,33,126,111,76,138,32 // vmovdqu 0x20(%rdx,%r9,4),%ymm9 .byte 196,33,126,111,28,138 // vmovdqu (%rdx,%r9,4),%ymm11 - .byte 197,253,111,53,70,28,0,0 // vmovdqa 0x1c46(%rip),%ymm6 # 2e80 <_sk_difference_hsw_8bit+0x2b2> + .byte 197,253,111,53,38,35,0,0 // vmovdqa 0x2326(%rip),%ymm6 # 3560 <_sk_overlay_hsw_8bit+0x496> .byte 196,226,117,0,254 // vpshufb %ymm6,%ymm1,%ymm7 .byte 196,226,125,0,246 // vpshufb %ymm6,%ymm0,%ymm6 .byte 196,66,125,48,195 // vpmovzxbw %xmm11,%ymm8 @@ -58090,7 +58090,7 @@ FUNCTION(_sk_scale_1_float_hsw_8bit) _sk_scale_1_float_hsw_8bit: .byte 72,173 // lods %ds:(%rsi),%rax .byte 197,250,16,32 // vmovss (%rax),%xmm4 - .byte 197,218,89,37,206,23,0,0 // vmulss 0x17ce(%rip),%xmm4,%xmm4 # 2d64 <_sk_difference_hsw_8bit+0x196> + .byte 197,218,89,37,174,30,0,0 // vmulss 0x1eae(%rip),%xmm4,%xmm4 # 3444 <_sk_overlay_hsw_8bit+0x37a> .byte 197,250,44,196 // vcvttss2si %xmm4,%eax .byte 197,249,110,224 // vmovd %eax,%xmm4 .byte 196,226,125,120,228 // vpbroadcastb %xmm4,%ymm4 @@ -58100,7 +58100,7 @@ _sk_scale_1_float_hsw_8bit: .byte 196,226,125,48,241 // vpmovzxbw %xmm1,%ymm6 .byte 196,227,125,57,201,1 // vextracti128 $0x1,%ymm1,%xmm1 .byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1 - .byte 197,221,219,37,213,24,0,0 // vpand 0x18d5(%rip),%ymm4,%ymm4 # 2ea0 <_sk_difference_hsw_8bit+0x2d2> + .byte 197,221,219,37,181,31,0,0 // vpand 0x1fb5(%rip),%ymm4,%ymm4 # 3580 <_sk_overlay_hsw_8bit+0x4b6> .byte 197,221,213,249 // vpmullw %ymm1,%ymm4,%ymm7 .byte 197,93,213,198 // vpmullw %ymm6,%ymm4,%ymm8 .byte 197,93,213,200 // vpmullw %ymm0,%ymm4,%ymm9 @@ -58139,7 +58139,7 @@ _sk_scale_u8_hsw_8bit: .byte 196,226,125,49,236 // vpmovzxbd %xmm4,%ymm5 .byte 197,249,112,228,78 // vpshufd $0x4e,%xmm4,%xmm4 .byte 196,226,125,49,228 // vpmovzxbd %xmm4,%ymm4 - .byte 197,253,111,53,95,24,0,0 // vmovdqa 0x185f(%rip),%ymm6 # 2ec0 <_sk_difference_hsw_8bit+0x2f2> + .byte 197,253,111,53,63,31,0,0 // vmovdqa 0x1f3f(%rip),%ymm6 # 35a0 <_sk_overlay_hsw_8bit+0x4d6> .byte 196,226,93,0,230 // vpshufb %ymm6,%ymm4,%ymm4 .byte 196,226,85,0,238 // vpshufb %ymm6,%ymm5,%ymm5 .byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6 @@ -58256,7 +58256,7 @@ FUNCTION(_sk_lerp_1_float_hsw_8bit) _sk_lerp_1_float_hsw_8bit: .byte 72,173 // lods %ds:(%rsi),%rax .byte 197,250,16,32 // vmovss (%rax),%xmm4 - .byte 197,218,89,37,50,21,0,0 // vmulss 0x1532(%rip),%xmm4,%xmm4 # 2d68 <_sk_difference_hsw_8bit+0x19a> + .byte 197,218,89,37,18,28,0,0 // vmulss 0x1c12(%rip),%xmm4,%xmm4 # 3448 <_sk_overlay_hsw_8bit+0x37e> .byte 197,250,44,196 // vcvttss2si %xmm4,%eax .byte 197,249,110,224 // vmovd %eax,%xmm4 .byte 196,226,125,120,228 // vpbroadcastb %xmm4,%ymm4 @@ -58266,7 +58266,7 @@ _sk_lerp_1_float_hsw_8bit: .byte 196,226,125,48,241 // vpmovzxbw %xmm1,%ymm6 .byte 196,227,125,57,201,1 // vextracti128 $0x1,%ymm1,%xmm1 .byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1 - .byte 197,221,219,61,117,22,0,0 // vpand 0x1675(%rip),%ymm4,%ymm7 # 2ee0 <_sk_difference_hsw_8bit+0x312> + .byte 197,221,219,61,85,29,0,0 // vpand 0x1d55(%rip),%ymm4,%ymm7 # 35c0 <_sk_overlay_hsw_8bit+0x4f6> .byte 197,69,213,193 // vpmullw %ymm1,%ymm7,%ymm8 .byte 197,69,213,206 // vpmullw %ymm6,%ymm7,%ymm9 .byte 197,69,213,208 // vpmullw %ymm0,%ymm7,%ymm10 @@ -58336,7 +58336,7 @@ _sk_lerp_u8_hsw_8bit: .byte 196,226,125,49,236 // vpmovzxbd %xmm4,%ymm5 .byte 197,249,112,228,78 // vpshufd $0x4e,%xmm4,%xmm4 .byte 196,226,125,49,228 // vpmovzxbd %xmm4,%ymm4 - .byte 197,253,111,53,106,21,0,0 // vmovdqa 0x156a(%rip),%ymm6 # 2f00 <_sk_difference_hsw_8bit+0x332> + .byte 197,253,111,53,74,28,0,0 // vmovdqa 0x1c4a(%rip),%ymm6 # 35e0 <_sk_overlay_hsw_8bit+0x516> .byte 196,98,93,0,206 // vpshufb %ymm6,%ymm4,%ymm9 .byte 196,98,85,0,222 // vpshufb %ymm6,%ymm5,%ymm11 .byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6 @@ -58505,7 +58505,7 @@ HIDDEN _sk_black_color_hsw_8bit FUNCTION(_sk_black_color_hsw_8bit) _sk_black_color_hsw_8bit: .byte 72,173 // lods %ds:(%rsi),%rax - .byte 196,226,125,24,5,65,17,0,0 // vbroadcastss 0x1141(%rip),%ymm0 # 2d6c <_sk_difference_hsw_8bit+0x19e> + .byte 196,226,125,24,5,33,24,0,0 // vbroadcastss 0x1821(%rip),%ymm0 # 344c <_sk_overlay_hsw_8bit+0x382> .byte 197,252,40,200 // vmovaps %ymm0,%ymm1 .byte 255,224 // jmpq *%rax @@ -58531,7 +58531,7 @@ HIDDEN _sk_srcatop_hsw_8bit .globl _sk_srcatop_hsw_8bit FUNCTION(_sk_srcatop_hsw_8bit) _sk_srcatop_hsw_8bit: - .byte 197,125,111,5,207,18,0,0 // vmovdqa 0x12cf(%rip),%ymm8 # 2f20 <_sk_difference_hsw_8bit+0x352> + .byte 197,125,111,5,175,25,0,0 // vmovdqa 0x19af(%rip),%ymm8 # 3600 <_sk_overlay_hsw_8bit+0x536> .byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4 .byte 196,194,109,0,232 // vpshufb %ymm8,%ymm2,%ymm5 .byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10 @@ -58608,7 +58608,7 @@ HIDDEN _sk_dstatop_hsw_8bit .globl _sk_dstatop_hsw_8bit FUNCTION(_sk_dstatop_hsw_8bit) _sk_dstatop_hsw_8bit: - .byte 197,125,111,5,138,17,0,0 // vmovdqa 0x118a(%rip),%ymm8 # 2f40 <_sk_difference_hsw_8bit+0x372> + .byte 197,125,111,5,106,24,0,0 // vmovdqa 0x186a(%rip),%ymm8 # 3620 <_sk_overlay_hsw_8bit+0x556> .byte 196,194,117,0,224 // vpshufb %ymm8,%ymm1,%ymm4 .byte 196,194,125,0,232 // vpshufb %ymm8,%ymm0,%ymm5 .byte 196,98,125,48,210 // vpmovzxbw %xmm2,%ymm10 @@ -58685,7 +58685,7 @@ HIDDEN _sk_srcin_hsw_8bit .globl _sk_srcin_hsw_8bit FUNCTION(_sk_srcin_hsw_8bit) _sk_srcin_hsw_8bit: - .byte 197,253,111,37,65,16,0,0 // vmovdqa 0x1041(%rip),%ymm4 # 2f60 <_sk_difference_hsw_8bit+0x392> + .byte 197,253,111,37,33,23,0,0 // vmovdqa 0x1721(%rip),%ymm4 # 3640 <_sk_overlay_hsw_8bit+0x576> .byte 196,226,101,0,236 // vpshufb %ymm4,%ymm3,%ymm5 .byte 196,226,109,0,228 // vpshufb %ymm4,%ymm2,%ymm4 .byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6 @@ -58725,7 +58725,7 @@ HIDDEN _sk_dstin_hsw_8bit .globl _sk_dstin_hsw_8bit FUNCTION(_sk_dstin_hsw_8bit) _sk_dstin_hsw_8bit: - .byte 197,253,111,37,183,15,0,0 // vmovdqa 0xfb7(%rip),%ymm4 # 2f80 <_sk_difference_hsw_8bit+0x3b2> + .byte 197,253,111,37,151,22,0,0 // vmovdqa 0x1697(%rip),%ymm4 # 3660 <_sk_overlay_hsw_8bit+0x596> .byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1 .byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0 .byte 196,226,125,48,226 // vpmovzxbw %xmm2,%ymm4 @@ -58765,7 +58765,7 @@ HIDDEN _sk_srcout_hsw_8bit .globl _sk_srcout_hsw_8bit FUNCTION(_sk_srcout_hsw_8bit) _sk_srcout_hsw_8bit: - .byte 197,253,111,37,43,15,0,0 // vmovdqa 0xf2b(%rip),%ymm4 # 2fa0 <_sk_difference_hsw_8bit+0x3d2> + .byte 197,253,111,37,11,22,0,0 // vmovdqa 0x160b(%rip),%ymm4 # 3680 <_sk_overlay_hsw_8bit+0x5b6> .byte 196,226,109,0,236 // vpshufb %ymm4,%ymm2,%ymm5 .byte 196,226,101,0,228 // vpshufb %ymm4,%ymm3,%ymm4 .byte 197,205,118,246 // vpcmpeqd %ymm6,%ymm6,%ymm6 @@ -58808,7 +58808,7 @@ HIDDEN _sk_dstout_hsw_8bit .globl _sk_dstout_hsw_8bit FUNCTION(_sk_dstout_hsw_8bit) _sk_dstout_hsw_8bit: - .byte 197,253,111,37,149,14,0,0 // vmovdqa 0xe95(%rip),%ymm4 # 2fc0 <_sk_difference_hsw_8bit+0x3f2> + .byte 197,253,111,37,117,21,0,0 // vmovdqa 0x1575(%rip),%ymm4 # 36a0 <_sk_overlay_hsw_8bit+0x5d6> .byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0 .byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1 .byte 197,221,118,228 // vpcmpeqd %ymm4,%ymm4,%ymm4 @@ -58851,7 +58851,7 @@ HIDDEN _sk_srcover_hsw_8bit .globl _sk_srcover_hsw_8bit FUNCTION(_sk_srcover_hsw_8bit) _sk_srcover_hsw_8bit: - .byte 197,253,111,37,253,13,0,0 // vmovdqa 0xdfd(%rip),%ymm4 # 2fe0 <_sk_difference_hsw_8bit+0x412> + .byte 197,253,111,37,221,20,0,0 // vmovdqa 0x14dd(%rip),%ymm4 # 36c0 <_sk_overlay_hsw_8bit+0x5f6> .byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5 .byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4 .byte 196,98,125,48,202 // vpmovzxbw %xmm2,%ymm9 @@ -58895,7 +58895,7 @@ HIDDEN _sk_dstover_hsw_8bit .globl _sk_dstover_hsw_8bit FUNCTION(_sk_dstover_hsw_8bit) _sk_dstover_hsw_8bit: - .byte 197,253,111,37,94,13,0,0 // vmovdqa 0xd5e(%rip),%ymm4 # 3000 <_sk_difference_hsw_8bit+0x432> + .byte 197,253,111,37,62,20,0,0 // vmovdqa 0x143e(%rip),%ymm4 # 36e0 <_sk_overlay_hsw_8bit+0x616> .byte 196,226,101,0,236 // vpshufb %ymm4,%ymm3,%ymm5 .byte 196,226,109,0,228 // vpshufb %ymm4,%ymm2,%ymm4 .byte 196,98,125,48,200 // vpmovzxbw %xmm0,%ymm9 @@ -58978,7 +58978,7 @@ FUNCTION(_sk_multiply_hsw_8bit) _sk_multiply_hsw_8bit: .byte 197,253,111,243 // vmovdqa %ymm3,%ymm6 .byte 197,253,111,218 // vmovdqa %ymm2,%ymm3 - .byte 197,125,111,13,31,12,0,0 // vmovdqa 0xc1f(%rip),%ymm9 # 3020 <_sk_difference_hsw_8bit+0x452> + .byte 197,125,111,13,255,18,0,0 // vmovdqa 0x12ff(%rip),%ymm9 # 3700 <_sk_overlay_hsw_8bit+0x636> .byte 196,194,101,0,225 // vpshufb %ymm9,%ymm3,%ymm4 .byte 196,194,77,0,233 // vpshufb %ymm9,%ymm6,%ymm5 .byte 196,65,45,118,210 // vpcmpeqd %ymm10,%ymm10,%ymm10 @@ -59122,7 +59122,7 @@ HIDDEN _sk_xor__hsw_8bit .globl _sk_xor__hsw_8bit FUNCTION(_sk_xor__hsw_8bit) _sk_xor__hsw_8bit: - .byte 197,125,111,13,173,9,0,0 // vmovdqa 0x9ad(%rip),%ymm9 # 3040 <_sk_difference_hsw_8bit+0x472> + .byte 197,125,111,13,141,16,0,0 // vmovdqa 0x108d(%rip),%ymm9 # 3720 <_sk_overlay_hsw_8bit+0x656> .byte 196,194,109,0,225 // vpshufb %ymm9,%ymm2,%ymm4 .byte 196,194,101,0,249 // vpshufb %ymm9,%ymm3,%ymm7 .byte 196,65,37,118,219 // vpcmpeqd %ymm11,%ymm11,%ymm11 @@ -59201,7 +59201,7 @@ HIDDEN _sk_darken_hsw_8bit .globl _sk_darken_hsw_8bit FUNCTION(_sk_darken_hsw_8bit) _sk_darken_hsw_8bit: - .byte 197,125,111,5,91,8,0,0 // vmovdqa 0x85b(%rip),%ymm8 # 3060 <_sk_difference_hsw_8bit+0x492> + .byte 197,125,111,5,59,15,0,0 // vmovdqa 0xf3b(%rip),%ymm8 # 3740 <_sk_overlay_hsw_8bit+0x676> .byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4 .byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6 .byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10 @@ -59274,7 +59274,7 @@ _sk_darken_hsw_8bit: .byte 197,253,248,246 // vpsubb %ymm6,%ymm0,%ymm6 .byte 197,245,248,205 // vpsubb %ymm5,%ymm1,%ymm1 .byte 197,253,248,196 // vpsubb %ymm4,%ymm0,%ymm0 - .byte 196,226,125,88,37,252,3,0,0 // vpbroadcastd 0x3fc(%rip),%ymm4 # 2d70 <_sk_difference_hsw_8bit+0x1a2> + .byte 196,226,125,88,37,220,10,0,0 // vpbroadcastd 0xadc(%rip),%ymm4 # 3450 <_sk_overlay_hsw_8bit+0x386> .byte 196,227,125,76,198,64 // vpblendvb %ymm4,%ymm6,%ymm0,%ymm0 .byte 196,227,117,76,207,64 // vpblendvb %ymm4,%ymm7,%ymm1,%ymm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -59284,7 +59284,7 @@ HIDDEN _sk_lighten_hsw_8bit .globl _sk_lighten_hsw_8bit FUNCTION(_sk_lighten_hsw_8bit) _sk_lighten_hsw_8bit: - .byte 197,125,111,5,244,6,0,0 // vmovdqa 0x6f4(%rip),%ymm8 # 3080 <_sk_difference_hsw_8bit+0x4b2> + .byte 197,125,111,5,212,13,0,0 // vmovdqa 0xdd4(%rip),%ymm8 # 3760 <_sk_overlay_hsw_8bit+0x696> .byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4 .byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6 .byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10 @@ -59357,7 +59357,7 @@ _sk_lighten_hsw_8bit: .byte 197,253,248,246 // vpsubb %ymm6,%ymm0,%ymm6 .byte 197,245,248,205 // vpsubb %ymm5,%ymm1,%ymm1 .byte 197,253,248,196 // vpsubb %ymm4,%ymm0,%ymm0 - .byte 196,226,125,88,37,121,2,0,0 // vpbroadcastd 0x279(%rip),%ymm4 # 2d74 <_sk_difference_hsw_8bit+0x1a6> + .byte 196,226,125,88,37,89,9,0,0 // vpbroadcastd 0x959(%rip),%ymm4 # 3454 <_sk_overlay_hsw_8bit+0x38a> .byte 196,227,125,76,198,64 // vpblendvb %ymm4,%ymm6,%ymm0,%ymm0 .byte 196,227,117,76,207,64 // vpblendvb %ymm4,%ymm7,%ymm1,%ymm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -59397,7 +59397,7 @@ _sk_exclusion_hsw_8bit: .byte 196,227,77,56,252,1 // vinserti128 $0x1,%xmm4,%ymm6,%ymm7 .byte 196,227,77,70,228,49 // vperm2i128 $0x31,%ymm4,%ymm6,%ymm4 .byte 197,197,103,228 // vpackuswb %ymm4,%ymm7,%ymm4 - .byte 197,253,111,53,246,4,0,0 // vmovdqa 0x4f6(%rip),%ymm6 # 30a0 <_sk_difference_hsw_8bit+0x4d2> + .byte 197,253,111,53,214,11,0,0 // vmovdqa 0xbd6(%rip),%ymm6 # 3780 <_sk_overlay_hsw_8bit+0x6b6> .byte 197,221,219,254 // vpand %ymm6,%ymm4,%ymm7 .byte 197,213,219,246 // vpand %ymm6,%ymm5,%ymm6 .byte 197,237,252,192 // vpaddb %ymm0,%ymm2,%ymm0 @@ -59413,7 +59413,7 @@ HIDDEN _sk_difference_hsw_8bit .globl _sk_difference_hsw_8bit FUNCTION(_sk_difference_hsw_8bit) _sk_difference_hsw_8bit: - .byte 197,125,111,5,234,4,0,0 // vmovdqa 0x4ea(%rip),%ymm8 # 30c0 <_sk_difference_hsw_8bit+0x4f2> + .byte 197,125,111,5,202,11,0,0 // vmovdqa 0xbca(%rip),%ymm8 # 37a0 <_sk_overlay_hsw_8bit+0x6d6> .byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4 .byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6 .byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10 @@ -59480,7 +59480,7 @@ _sk_difference_hsw_8bit: .byte 197,197,103,237 // vpackuswb %ymm5,%ymm7,%ymm5 .byte 197,181,218,228 // vpminub %ymm4,%ymm9,%ymm4 .byte 197,165,218,237 // vpminub %ymm5,%ymm11,%ymm5 - .byte 197,253,111,53,180,3,0,0 // vmovdqa 0x3b4(%rip),%ymm6 # 30e0 <_sk_difference_hsw_8bit+0x512> + .byte 197,253,111,53,148,10,0,0 // vmovdqa 0xa94(%rip),%ymm6 # 37c0 <_sk_overlay_hsw_8bit+0x6f6> .byte 197,213,219,254 // vpand %ymm6,%ymm5,%ymm7 .byte 197,221,219,246 // vpand %ymm6,%ymm4,%ymm6 .byte 197,237,252,192 // vpaddb %ymm0,%ymm2,%ymm0 @@ -59492,9 +59492,354 @@ _sk_difference_hsw_8bit: .byte 72,173 // lods %ds:(%rsi),%rax .byte 255,224 // jmpq *%rax +HIDDEN _sk_hardlight_hsw_8bit +.globl _sk_hardlight_hsw_8bit +FUNCTION(_sk_hardlight_hsw_8bit) +_sk_hardlight_hsw_8bit: + .byte 72,129,236,184,1,0,0 // sub $0x1b8,%rsp + .byte 197,252,17,92,36,128 // vmovups %ymm3,-0x80(%rsp) + .byte 196,226,125,48,248 // vpmovzxbw %xmm0,%ymm7 + .byte 196,227,125,57,195,1 // vextracti128 $0x1,%ymm0,%xmm3 + .byte 196,226,125,48,243 // vpmovzxbw %xmm3,%ymm6 + .byte 196,98,125,48,217 // vpmovzxbw %xmm1,%ymm11 + .byte 197,126,127,156,36,128,0,0,0 // vmovdqu %ymm11,0x80(%rsp) + .byte 196,227,125,57,203,1 // vextracti128 $0x1,%ymm1,%xmm3 + .byte 196,98,125,48,195 // vpmovzxbw %xmm3,%ymm8 + .byte 197,126,127,132,36,192,0,0,0 // vmovdqu %ymm8,0xc0(%rsp) + .byte 197,253,111,29,73,10,0,0 // vmovdqa 0xa49(%rip),%ymm3 # 37e0 <_sk_overlay_hsw_8bit+0x716> + .byte 196,226,125,0,227 // vpshufb %ymm3,%ymm0,%ymm4 + .byte 196,226,117,0,203 // vpshufb %ymm3,%ymm1,%ymm1 + .byte 197,125,111,251 // vmovdqa %ymm3,%ymm15 + .byte 196,227,125,57,200,1 // vextracti128 $0x1,%ymm1,%xmm0 + .byte 196,98,125,48,232 // vpmovzxbw %xmm0,%ymm13 + .byte 196,226,125,48,233 // vpmovzxbw %xmm1,%ymm5 + .byte 196,227,125,57,227,1 // vextracti128 $0x1,%ymm4,%xmm3 + .byte 196,98,125,48,203 // vpmovzxbw %xmm3,%ymm9 + .byte 196,98,125,48,212 // vpmovzxbw %xmm4,%ymm10 + .byte 197,197,253,199 // vpaddw %ymm7,%ymm7,%ymm0 + .byte 197,205,253,222 // vpaddw %ymm6,%ymm6,%ymm3 + .byte 196,65,37,253,227 // vpaddw %ymm11,%ymm11,%ymm12 + .byte 196,65,61,253,216 // vpaddw %ymm8,%ymm8,%ymm11 + .byte 196,66,37,58,245 // vpminuw %ymm13,%ymm11,%ymm14 + .byte 196,193,37,117,206 // vpcmpeqw %ymm14,%ymm11,%ymm1 + .byte 197,254,127,140,36,96,1,0,0 // vmovdqu %ymm1,0x160(%rsp) + .byte 196,98,29,58,245 // vpminuw %ymm5,%ymm12,%ymm14 + .byte 196,193,29,117,206 // vpcmpeqw %ymm14,%ymm12,%ymm1 + .byte 197,254,127,140,36,64,1,0,0 // vmovdqu %ymm1,0x140(%rsp) + .byte 196,66,101,58,241 // vpminuw %ymm9,%ymm3,%ymm14 + .byte 196,193,101,117,206 // vpcmpeqw %ymm14,%ymm3,%ymm1 + .byte 197,254,127,140,36,0,1,0,0 // vmovdqu %ymm1,0x100(%rsp) + .byte 196,194,125,58,218 // vpminuw %ymm10,%ymm0,%ymm3 + .byte 197,125,117,243 // vpcmpeqw %ymm3,%ymm0,%ymm14 + .byte 197,253,111,226 // vmovdqa %ymm2,%ymm4 + .byte 196,65,125,111,231 // vmovdqa %ymm15,%ymm12 + .byte 196,194,93,0,220 // vpshufb %ymm12,%ymm4,%ymm3 + .byte 196,226,125,48,211 // vpmovzxbw %xmm3,%ymm2 + .byte 196,226,125,121,5,203,9,0,0 // vpbroadcastw 0x9cb(%rip),%ymm0 # 3800 <_sk_overlay_hsw_8bit+0x736> + .byte 197,109,239,248 // vpxor %ymm0,%ymm2,%ymm15 + .byte 197,133,213,207 // vpmullw %ymm7,%ymm15,%ymm1 + .byte 197,254,127,140,36,32,1,0,0 // vmovdqu %ymm1,0x120(%rsp) + .byte 196,226,125,48,204 // vpmovzxbw %xmm4,%ymm1 + .byte 197,254,127,164,36,128,1,0,0 // vmovdqu %ymm4,0x180(%rsp) + .byte 197,45,239,248 // vpxor %ymm0,%ymm10,%ymm15 + .byte 197,5,213,193 // vpmullw %ymm1,%ymm15,%ymm8 + .byte 197,126,127,132,36,224,0,0,0 // vmovdqu %ymm8,0xe0(%rsp) + .byte 197,117,213,199 // vpmullw %ymm7,%ymm1,%ymm8 + .byte 197,126,127,132,36,160,0,0,0 // vmovdqu %ymm8,0xa0(%rsp) + .byte 197,237,249,201 // vpsubw %ymm1,%ymm2,%ymm1 + .byte 197,254,127,76,36,192 // vmovdqu %ymm1,-0x40(%rsp) + .byte 196,193,109,213,202 // vpmullw %ymm10,%ymm2,%ymm1 + .byte 197,254,127,76,36,32 // vmovdqu %ymm1,0x20(%rsp) + .byte 197,45,249,223 // vpsubw %ymm7,%ymm10,%ymm11 + .byte 196,227,125,57,226,1 // vextracti128 $0x1,%ymm4,%xmm2 + .byte 196,226,125,48,210 // vpmovzxbw %xmm2,%ymm2 + .byte 196,227,125,57,219,1 // vextracti128 $0x1,%ymm3,%xmm3 + .byte 196,226,125,48,219 // vpmovzxbw %xmm3,%ymm3 + .byte 197,101,239,208 // vpxor %ymm0,%ymm3,%ymm10 + .byte 197,173,213,206 // vpmullw %ymm6,%ymm10,%ymm1 + .byte 197,254,127,76,36,96 // vmovdqu %ymm1,0x60(%rsp) + .byte 197,53,239,208 // vpxor %ymm0,%ymm9,%ymm10 + .byte 197,173,213,202 // vpmullw %ymm2,%ymm10,%ymm1 + .byte 197,254,127,76,36,64 // vmovdqu %ymm1,0x40(%rsp) + .byte 197,237,213,206 // vpmullw %ymm6,%ymm2,%ymm1 + .byte 197,254,127,76,36,160 // vmovdqu %ymm1,-0x60(%rsp) + .byte 197,229,249,226 // vpsubw %ymm2,%ymm3,%ymm4 + .byte 196,65,101,213,193 // vpmullw %ymm9,%ymm3,%ymm8 + .byte 197,181,249,246 // vpsubw %ymm6,%ymm9,%ymm6 + .byte 197,254,111,124,36,128 // vmovdqu -0x80(%rsp),%ymm7 + .byte 196,66,69,0,204 // vpshufb %ymm12,%ymm7,%ymm9 + .byte 196,194,125,48,209 // vpmovzxbw %xmm9,%ymm2 + .byte 197,109,239,248 // vpxor %ymm0,%ymm2,%ymm15 + .byte 197,126,111,164,36,128,0,0,0 // vmovdqu 0x80(%rsp),%ymm12 + .byte 196,193,5,213,204 // vpmullw %ymm12,%ymm15,%ymm1 + .byte 197,254,127,12,36 // vmovdqu %ymm1,(%rsp) + .byte 196,226,125,48,207 // vpmovzxbw %xmm7,%ymm1 + .byte 197,85,239,248 // vpxor %ymm0,%ymm5,%ymm15 + .byte 197,133,213,217 // vpmullw %ymm1,%ymm15,%ymm3 + .byte 197,254,127,92,36,224 // vmovdqu %ymm3,-0x20(%rsp) + .byte 196,65,117,213,212 // vpmullw %ymm12,%ymm1,%ymm10 + .byte 197,237,249,217 // vpsubw %ymm1,%ymm2,%ymm3 + .byte 197,237,213,213 // vpmullw %ymm5,%ymm2,%ymm2 + .byte 196,193,85,249,236 // vpsubw %ymm12,%ymm5,%ymm5 + .byte 196,195,125,57,255,1 // vextracti128 $0x1,%ymm7,%xmm15 + .byte 196,66,125,48,255 // vpmovzxbw %xmm15,%ymm15 + .byte 196,99,125,57,207,1 // vextracti128 $0x1,%ymm9,%xmm7 + .byte 196,226,125,48,255 // vpmovzxbw %xmm7,%ymm7 + .byte 197,69,239,200 // vpxor %ymm0,%ymm7,%ymm9 + .byte 197,254,111,140,36,192,0,0,0 // vmovdqu 0xc0(%rsp),%ymm1 + .byte 197,53,213,225 // vpmullw %ymm1,%ymm9,%ymm12 + .byte 197,149,239,192 // vpxor %ymm0,%ymm13,%ymm0 + .byte 196,65,125,213,207 // vpmullw %ymm15,%ymm0,%ymm9 + .byte 197,133,213,193 // vpmullw %ymm1,%ymm15,%ymm0 + .byte 196,65,69,249,255 // vpsubw %ymm15,%ymm7,%ymm15 + .byte 196,193,69,213,253 // vpmullw %ymm13,%ymm7,%ymm7 + .byte 197,21,249,233 // vpsubw %ymm1,%ymm13,%ymm13 + .byte 196,65,5,213,237 // vpmullw %ymm13,%ymm15,%ymm13 + .byte 197,229,213,205 // vpmullw %ymm5,%ymm3,%ymm1 + .byte 197,221,213,230 // vpmullw %ymm6,%ymm4,%ymm4 + .byte 197,165,213,108,36,192 // vpmullw -0x40(%rsp),%ymm11,%ymm5 + .byte 196,193,21,253,245 // vpaddw %ymm13,%ymm13,%ymm6 + .byte 197,197,249,246 // vpsubw %ymm6,%ymm7,%ymm6 + .byte 197,245,253,201 // vpaddw %ymm1,%ymm1,%ymm1 + .byte 197,237,249,201 // vpsubw %ymm1,%ymm2,%ymm1 + .byte 197,221,253,212 // vpaddw %ymm4,%ymm4,%ymm2 + .byte 197,189,249,210 // vpsubw %ymm2,%ymm8,%ymm2 + .byte 197,213,253,221 // vpaddw %ymm5,%ymm5,%ymm3 + .byte 197,254,111,100,36,32 // vmovdqu 0x20(%rsp),%ymm4 + .byte 197,221,249,219 // vpsubw %ymm3,%ymm4,%ymm3 + .byte 197,254,111,164,36,160,0,0,0 // vmovdqu 0xa0(%rsp),%ymm4 + .byte 197,221,253,228 // vpaddw %ymm4,%ymm4,%ymm4 + .byte 197,141,223,219 // vpandn %ymm3,%ymm14,%ymm3 + .byte 196,193,93,219,230 // vpand %ymm14,%ymm4,%ymm4 + .byte 197,221,235,219 // vpor %ymm3,%ymm4,%ymm3 + .byte 197,254,111,100,36,160 // vmovdqu -0x60(%rsp),%ymm4 + .byte 197,221,253,228 // vpaddw %ymm4,%ymm4,%ymm4 + .byte 197,254,111,172,36,0,1,0,0 // vmovdqu 0x100(%rsp),%ymm5 + .byte 197,213,223,210 // vpandn %ymm2,%ymm5,%ymm2 + .byte 197,221,219,229 // vpand %ymm5,%ymm4,%ymm4 + .byte 197,221,235,210 // vpor %ymm2,%ymm4,%ymm2 + .byte 196,193,45,253,226 // vpaddw %ymm10,%ymm10,%ymm4 + .byte 197,254,111,172,36,64,1,0,0 // vmovdqu 0x140(%rsp),%ymm5 + .byte 197,213,223,201 // vpandn %ymm1,%ymm5,%ymm1 + .byte 197,221,219,229 // vpand %ymm5,%ymm4,%ymm4 + .byte 197,221,235,201 // vpor %ymm1,%ymm4,%ymm1 + .byte 197,253,253,192 // vpaddw %ymm0,%ymm0,%ymm0 + .byte 197,254,111,172,36,96,1,0,0 // vmovdqu 0x160(%rsp),%ymm5 + .byte 197,213,223,230 // vpandn %ymm6,%ymm5,%ymm4 + .byte 197,253,219,197 // vpand %ymm5,%ymm0,%ymm0 + .byte 197,253,235,196 // vpor %ymm4,%ymm0,%ymm0 + .byte 197,254,111,164,36,224,0,0,0 // vmovdqu 0xe0(%rsp),%ymm4 + .byte 197,221,253,164,36,32,1,0,0 // vpaddw 0x120(%rsp),%ymm4,%ymm4 + .byte 197,254,111,108,36,64 // vmovdqu 0x40(%rsp),%ymm5 + .byte 197,213,253,108,36,96 // vpaddw 0x60(%rsp),%ymm5,%ymm5 + .byte 197,254,111,116,36,224 // vmovdqu -0x20(%rsp),%ymm6 + .byte 197,205,253,52,36 // vpaddw (%rsp),%ymm6,%ymm6 + .byte 196,193,53,253,252 // vpaddw %ymm12,%ymm9,%ymm7 + .byte 196,98,125,121,5,195,7,0,0 // vpbroadcastw 0x7c3(%rip),%ymm8 # 3802 <_sk_overlay_hsw_8bit+0x738> + .byte 196,193,93,253,224 // vpaddw %ymm8,%ymm4,%ymm4 + .byte 197,229,253,220 // vpaddw %ymm4,%ymm3,%ymm3 + .byte 196,193,85,253,224 // vpaddw %ymm8,%ymm5,%ymm4 + .byte 197,237,253,212 // vpaddw %ymm4,%ymm2,%ymm2 + .byte 196,193,77,253,224 // vpaddw %ymm8,%ymm6,%ymm4 + .byte 197,245,253,204 // vpaddw %ymm4,%ymm1,%ymm1 + .byte 196,193,69,253,224 // vpaddw %ymm8,%ymm7,%ymm4 + .byte 197,253,253,196 // vpaddw %ymm4,%ymm0,%ymm0 + .byte 196,226,125,121,37,152,7,0,0 // vpbroadcastw 0x798(%rip),%ymm4 # 3804 <_sk_overlay_hsw_8bit+0x73a> + .byte 197,253,228,196 // vpmulhuw %ymm4,%ymm0,%ymm0 + .byte 197,245,228,204 // vpmulhuw %ymm4,%ymm1,%ymm1 + .byte 197,237,228,212 // vpmulhuw %ymm4,%ymm2,%ymm2 + .byte 197,237,113,210,7 // vpsrlw $0x7,%ymm2,%ymm2 + .byte 197,229,228,220 // vpmulhuw %ymm4,%ymm3,%ymm3 + .byte 197,229,113,211,7 // vpsrlw $0x7,%ymm3,%ymm3 + .byte 196,227,101,56,226,1 // vinserti128 $0x1,%xmm2,%ymm3,%ymm4 + .byte 196,227,101,70,210,49 // vperm2i128 $0x31,%ymm2,%ymm3,%ymm2 + .byte 197,229,113,208,7 // vpsrlw $0x7,%ymm0,%ymm3 + .byte 197,245,113,209,7 // vpsrlw $0x7,%ymm1,%ymm1 + .byte 197,221,103,194 // vpackuswb %ymm2,%ymm4,%ymm0 + .byte 196,227,117,56,211,1 // vinserti128 $0x1,%xmm3,%ymm1,%ymm2 + .byte 196,227,117,70,203,49 // vperm2i128 $0x31,%ymm3,%ymm1,%ymm1 + .byte 197,237,103,201 // vpackuswb %ymm1,%ymm2,%ymm1 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 197,252,16,148,36,128,1,0,0 // vmovups 0x180(%rsp),%ymm2 + .byte 197,252,16,92,36,128 // vmovups -0x80(%rsp),%ymm3 + .byte 72,129,196,184,1,0,0 // add $0x1b8,%rsp + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_overlay_hsw_8bit +.globl _sk_overlay_hsw_8bit +FUNCTION(_sk_overlay_hsw_8bit) +_sk_overlay_hsw_8bit: + .byte 72,129,236,152,1,0,0 // sub $0x198,%rsp + .byte 197,252,17,140,36,64,1,0,0 // vmovups %ymm1,0x140(%rsp) + .byte 197,253,111,45,62,7,0,0 // vmovdqa 0x73e(%rip),%ymm5 # 3820 <_sk_overlay_hsw_8bit+0x756> + .byte 197,253,111,226 // vmovdqa %ymm2,%ymm4 + .byte 196,227,125,57,225,1 // vextracti128 $0x1,%ymm4,%xmm1 + .byte 196,98,125,48,193 // vpmovzxbw %xmm1,%ymm8 + .byte 197,254,127,156,36,96,1,0,0 // vmovdqu %ymm3,0x160(%rsp) + .byte 196,226,125,48,251 // vpmovzxbw %xmm3,%ymm7 + .byte 196,227,125,57,217,1 // vextracti128 $0x1,%ymm3,%xmm1 + .byte 196,98,125,48,241 // vpmovzxbw %xmm1,%ymm14 + .byte 196,226,93,0,205 // vpshufb %ymm5,%ymm4,%ymm1 + .byte 197,254,127,164,36,32,1,0,0 // vmovdqu %ymm4,0x120(%rsp) + .byte 196,226,101,0,221 // vpshufb %ymm5,%ymm3,%ymm3 + .byte 196,227,125,57,222,1 // vextracti128 $0x1,%ymm3,%xmm6 + .byte 196,226,125,48,246 // vpmovzxbw %xmm6,%ymm6 + .byte 196,98,125,48,203 // vpmovzxbw %xmm3,%ymm9 + .byte 196,227,125,57,203,1 // vextracti128 $0x1,%ymm1,%xmm3 + .byte 196,98,125,48,219 // vpmovzxbw %xmm3,%ymm11 + .byte 196,193,61,253,216 // vpaddw %ymm8,%ymm8,%ymm3 + .byte 197,69,253,231 // vpaddw %ymm7,%ymm7,%ymm12 + .byte 196,65,13,253,214 // vpaddw %ymm14,%ymm14,%ymm10 + .byte 196,98,45,58,238 // vpminuw %ymm6,%ymm10,%ymm13 + .byte 196,193,45,117,213 // vpcmpeqw %ymm13,%ymm10,%ymm2 + .byte 197,254,127,148,36,0,1,0,0 // vmovdqu %ymm2,0x100(%rsp) + .byte 196,66,29,58,233 // vpminuw %ymm9,%ymm12,%ymm13 + .byte 196,193,29,117,213 // vpcmpeqw %ymm13,%ymm12,%ymm2 + .byte 197,254,127,148,36,224,0,0,0 // vmovdqu %ymm2,0xe0(%rsp) + .byte 196,66,101,58,235 // vpminuw %ymm11,%ymm3,%ymm13 + .byte 196,193,101,117,213 // vpcmpeqw %ymm13,%ymm3,%ymm2 + .byte 197,254,127,148,36,160,0,0,0 // vmovdqu %ymm2,0xa0(%rsp) + .byte 196,226,125,48,212 // vpmovzxbw %xmm4,%ymm2 + .byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1 + .byte 197,237,253,226 // vpaddw %ymm2,%ymm2,%ymm4 + .byte 196,226,93,58,217 // vpminuw %ymm1,%ymm4,%ymm3 + .byte 197,93,117,251 // vpcmpeqw %ymm3,%ymm4,%ymm15 + .byte 197,253,111,216 // vmovdqa %ymm0,%ymm3 + .byte 196,226,125,48,227 // vpmovzxbw %xmm3,%ymm4 + .byte 196,226,125,121,5,152,6,0,0 // vpbroadcastw 0x698(%rip),%ymm0 # 3840 <_sk_overlay_hsw_8bit+0x776> + .byte 197,117,239,232 // vpxor %ymm0,%ymm1,%ymm13 + .byte 197,21,213,212 // vpmullw %ymm4,%ymm13,%ymm10 + .byte 197,126,127,148,36,192,0,0,0 // vmovdqu %ymm10,0xc0(%rsp) + .byte 197,125,111,229 // vmovdqa %ymm5,%ymm12 + .byte 196,194,101,0,236 // vpshufb %ymm12,%ymm3,%ymm5 + .byte 197,125,111,211 // vmovdqa %ymm3,%ymm10 + .byte 196,226,125,48,221 // vpmovzxbw %xmm5,%ymm3 + .byte 197,101,239,232 // vpxor %ymm0,%ymm3,%ymm13 + .byte 197,21,213,234 // vpmullw %ymm2,%ymm13,%ymm13 + .byte 197,126,127,172,36,128,0,0,0 // vmovdqu %ymm13,0x80(%rsp) + .byte 197,109,213,236 // vpmullw %ymm4,%ymm2,%ymm13 + .byte 197,126,127,108,36,96 // vmovdqu %ymm13,0x60(%rsp) + .byte 197,245,249,210 // vpsubw %ymm2,%ymm1,%ymm2 + .byte 197,254,127,84,36,160 // vmovdqu %ymm2,-0x60(%rsp) + .byte 197,245,213,203 // vpmullw %ymm3,%ymm1,%ymm1 + .byte 197,254,127,12,36 // vmovdqu %ymm1,(%rsp) + .byte 197,229,249,220 // vpsubw %ymm4,%ymm3,%ymm3 + .byte 196,99,125,57,212,1 // vextracti128 $0x1,%ymm10,%xmm4 + .byte 196,226,125,48,228 // vpmovzxbw %xmm4,%ymm4 + .byte 196,227,125,57,237,1 // vextracti128 $0x1,%ymm5,%xmm5 + .byte 196,226,125,48,237 // vpmovzxbw %xmm5,%ymm5 + .byte 197,37,239,232 // vpxor %ymm0,%ymm11,%ymm13 + .byte 197,149,213,204 // vpmullw %ymm4,%ymm13,%ymm1 + .byte 197,254,127,76,36,64 // vmovdqu %ymm1,0x40(%rsp) + .byte 197,85,239,232 // vpxor %ymm0,%ymm5,%ymm13 + .byte 196,193,21,213,200 // vpmullw %ymm8,%ymm13,%ymm1 + .byte 197,254,127,76,36,32 // vmovdqu %ymm1,0x20(%rsp) + .byte 197,189,213,204 // vpmullw %ymm4,%ymm8,%ymm1 + .byte 197,254,127,76,36,128 // vmovdqu %ymm1,-0x80(%rsp) + .byte 196,65,37,249,192 // vpsubw %ymm8,%ymm11,%ymm8 + .byte 197,37,213,221 // vpmullw %ymm5,%ymm11,%ymm11 + .byte 197,213,249,228 // vpsubw %ymm4,%ymm5,%ymm4 + .byte 197,254,111,140,36,64,1,0,0 // vmovdqu 0x140(%rsp),%ymm1 + .byte 196,194,117,0,236 // vpshufb %ymm12,%ymm1,%ymm5 + .byte 196,226,125,48,209 // vpmovzxbw %xmm1,%ymm2 + .byte 197,125,111,225 // vmovdqa %ymm1,%ymm12 + .byte 197,53,239,232 // vpxor %ymm0,%ymm9,%ymm13 + .byte 197,149,213,202 // vpmullw %ymm2,%ymm13,%ymm1 + .byte 197,254,127,76,36,224 // vmovdqu %ymm1,-0x20(%rsp) + .byte 196,226,125,48,205 // vpmovzxbw %xmm5,%ymm1 + .byte 197,117,239,232 // vpxor %ymm0,%ymm1,%ymm13 + .byte 197,21,213,215 // vpmullw %ymm7,%ymm13,%ymm10 + .byte 197,126,127,84,36,192 // vmovdqu %ymm10,-0x40(%rsp) + .byte 197,69,213,210 // vpmullw %ymm2,%ymm7,%ymm10 + .byte 197,181,249,255 // vpsubw %ymm7,%ymm9,%ymm7 + .byte 197,53,213,201 // vpmullw %ymm1,%ymm9,%ymm9 + .byte 197,245,249,202 // vpsubw %ymm2,%ymm1,%ymm1 + .byte 196,99,125,57,226,1 // vextracti128 $0x1,%ymm12,%xmm2 + .byte 196,226,125,48,210 // vpmovzxbw %xmm2,%ymm2 + .byte 196,227,125,57,237,1 // vextracti128 $0x1,%ymm5,%xmm5 + .byte 196,226,125,48,237 // vpmovzxbw %xmm5,%ymm5 + .byte 197,77,239,232 // vpxor %ymm0,%ymm6,%ymm13 + .byte 197,21,213,234 // vpmullw %ymm2,%ymm13,%ymm13 + .byte 197,213,239,192 // vpxor %ymm0,%ymm5,%ymm0 + .byte 196,65,125,213,230 // vpmullw %ymm14,%ymm0,%ymm12 + .byte 197,141,213,194 // vpmullw %ymm2,%ymm14,%ymm0 + .byte 196,65,77,249,246 // vpsubw %ymm14,%ymm6,%ymm14 + .byte 197,205,213,245 // vpmullw %ymm5,%ymm6,%ymm6 + .byte 197,213,249,210 // vpsubw %ymm2,%ymm5,%ymm2 + .byte 197,141,213,210 // vpmullw %ymm2,%ymm14,%ymm2 + .byte 197,197,213,201 // vpmullw %ymm1,%ymm7,%ymm1 + .byte 197,189,213,228 // vpmullw %ymm4,%ymm8,%ymm4 + .byte 197,229,213,92,36,160 // vpmullw -0x60(%rsp),%ymm3,%ymm3 + .byte 197,237,253,210 // vpaddw %ymm2,%ymm2,%ymm2 + .byte 197,205,249,210 // vpsubw %ymm2,%ymm6,%ymm2 + .byte 197,245,253,201 // vpaddw %ymm1,%ymm1,%ymm1 + .byte 197,181,249,201 // vpsubw %ymm1,%ymm9,%ymm1 + .byte 197,221,253,228 // vpaddw %ymm4,%ymm4,%ymm4 + .byte 197,165,249,228 // vpsubw %ymm4,%ymm11,%ymm4 + .byte 197,229,253,219 // vpaddw %ymm3,%ymm3,%ymm3 + .byte 197,254,111,44,36 // vmovdqu (%rsp),%ymm5 + .byte 197,213,249,219 // vpsubw %ymm3,%ymm5,%ymm3 + .byte 197,254,111,108,36,96 // vmovdqu 0x60(%rsp),%ymm5 + .byte 197,213,253,237 // vpaddw %ymm5,%ymm5,%ymm5 + .byte 197,133,223,219 // vpandn %ymm3,%ymm15,%ymm3 + .byte 196,193,85,219,239 // vpand %ymm15,%ymm5,%ymm5 + .byte 197,213,235,219 // vpor %ymm3,%ymm5,%ymm3 + .byte 197,254,111,108,36,128 // vmovdqu -0x80(%rsp),%ymm5 + .byte 197,213,253,237 // vpaddw %ymm5,%ymm5,%ymm5 + .byte 197,254,111,180,36,160,0,0,0 // vmovdqu 0xa0(%rsp),%ymm6 + .byte 197,205,223,228 // vpandn %ymm4,%ymm6,%ymm4 + .byte 197,213,219,238 // vpand %ymm6,%ymm5,%ymm5 + .byte 197,213,235,228 // vpor %ymm4,%ymm5,%ymm4 + .byte 196,193,45,253,234 // vpaddw %ymm10,%ymm10,%ymm5 + .byte 197,254,111,180,36,224,0,0,0 // vmovdqu 0xe0(%rsp),%ymm6 + .byte 197,205,223,201 // vpandn %ymm1,%ymm6,%ymm1 + .byte 197,213,219,238 // vpand %ymm6,%ymm5,%ymm5 + .byte 197,213,235,201 // vpor %ymm1,%ymm5,%ymm1 + .byte 197,253,253,192 // vpaddw %ymm0,%ymm0,%ymm0 + .byte 197,254,111,172,36,0,1,0,0 // vmovdqu 0x100(%rsp),%ymm5 + .byte 197,213,223,210 // vpandn %ymm2,%ymm5,%ymm2 + .byte 197,253,219,197 // vpand %ymm5,%ymm0,%ymm0 + .byte 197,253,235,194 // vpor %ymm2,%ymm0,%ymm0 + .byte 197,254,111,148,36,128,0,0,0 // vmovdqu 0x80(%rsp),%ymm2 + .byte 197,237,253,148,36,192,0,0,0 // vpaddw 0xc0(%rsp),%ymm2,%ymm2 + .byte 197,254,111,108,36,32 // vmovdqu 0x20(%rsp),%ymm5 + .byte 197,213,253,108,36,64 // vpaddw 0x40(%rsp),%ymm5,%ymm5 + .byte 197,254,111,116,36,192 // vmovdqu -0x40(%rsp),%ymm6 + .byte 197,205,253,116,36,224 // vpaddw -0x20(%rsp),%ymm6,%ymm6 + .byte 196,193,29,253,253 // vpaddw %ymm13,%ymm12,%ymm7 + .byte 196,98,125,121,5,163,4,0,0 // vpbroadcastw 0x4a3(%rip),%ymm8 # 3842 <_sk_overlay_hsw_8bit+0x778> + .byte 196,193,109,253,208 // vpaddw %ymm8,%ymm2,%ymm2 + .byte 197,229,253,210 // vpaddw %ymm2,%ymm3,%ymm2 + .byte 196,193,85,253,216 // vpaddw %ymm8,%ymm5,%ymm3 + .byte 197,221,253,219 // vpaddw %ymm3,%ymm4,%ymm3 + .byte 196,193,77,253,224 // vpaddw %ymm8,%ymm6,%ymm4 + .byte 197,245,253,204 // vpaddw %ymm4,%ymm1,%ymm1 + .byte 196,193,69,253,224 // vpaddw %ymm8,%ymm7,%ymm4 + .byte 197,253,253,196 // vpaddw %ymm4,%ymm0,%ymm0 + .byte 196,226,125,121,37,120,4,0,0 // vpbroadcastw 0x478(%rip),%ymm4 # 3844 <_sk_overlay_hsw_8bit+0x77a> + .byte 197,253,228,196 // vpmulhuw %ymm4,%ymm0,%ymm0 + .byte 197,245,228,204 // vpmulhuw %ymm4,%ymm1,%ymm1 + .byte 197,229,228,220 // vpmulhuw %ymm4,%ymm3,%ymm3 + .byte 197,229,113,211,7 // vpsrlw $0x7,%ymm3,%ymm3 + .byte 197,237,228,212 // vpmulhuw %ymm4,%ymm2,%ymm2 + .byte 197,237,113,210,7 // vpsrlw $0x7,%ymm2,%ymm2 + .byte 196,227,109,56,227,1 // vinserti128 $0x1,%xmm3,%ymm2,%ymm4 + .byte 196,227,109,70,211,49 // vperm2i128 $0x31,%ymm3,%ymm2,%ymm2 + .byte 197,229,113,208,7 // vpsrlw $0x7,%ymm0,%ymm3 + .byte 197,245,113,209,7 // vpsrlw $0x7,%ymm1,%ymm1 + .byte 197,221,103,194 // vpackuswb %ymm2,%ymm4,%ymm0 + .byte 196,227,117,56,211,1 // vinserti128 $0x1,%xmm3,%ymm1,%ymm2 + .byte 196,227,117,70,203,49 // vperm2i128 $0x31,%ymm3,%ymm1,%ymm1 + .byte 197,237,103,201 // vpackuswb %ymm1,%ymm2,%ymm1 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 197,252,16,148,36,32,1,0,0 // vmovups 0x120(%rsp),%ymm2 + .byte 197,252,16,156,36,96,1,0,0 // vmovups 0x160(%rsp),%ymm3 + .byte 72,129,196,152,1,0,0 // add $0x198,%rsp + .byte 255,224 // jmpq *%rax + BALIGN4 .byte 0,0 // add %al,(%rax) - .byte 127,67 // jg 2d97 <_sk_difference_hsw_8bit+0x1c9> + .byte 127,67 // jg 3477 <_sk_overlay_hsw_8bit+0x3ad> .byte 1,1 // add %eax,(%rcx) .byte 1,0 // add %eax,(%rax) .byte 0,0 // add %al,(%rax) @@ -59504,9 +59849,9 @@ BALIGN4 .byte 0,0 // add %al,(%rax) .byte 0,255 // add %bh,%bh .byte 0,0 // add %al,(%rax) - .byte 127,67 // jg 2dab <_sk_difference_hsw_8bit+0x1dd> + .byte 127,67 // jg 348b <_sk_overlay_hsw_8bit+0x3c1> .byte 0,0 // add %al,(%rax) - .byte 127,67 // jg 2daf <_sk_difference_hsw_8bit+0x1e1> + .byte 127,67 // jg 348f <_sk_overlay_hsw_8bit+0x3c5> .byte 0,0 // add %al,(%rax) .byte 0,255 // add %bh,%bh .byte 255 // (bad) @@ -60063,6 +60408,66 @@ BALIGN32 .byte 255 // (bad) .byte 255 // (bad) .byte 255,0 // incl (%rax) + .byte 3,3 // add (%rbx),%eax + .byte 3,3 // add (%rbx),%eax + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 11,11 // or (%rbx),%ecx + .byte 11,11 // or (%rbx),%ecx + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15,3,3 // lsl (%rbx),%eax + .byte 3,3 // add (%rbx),%eax + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 11,11 // or (%rbx),%ecx + .byte 11,11 // or (%rbx),%ecx + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15,255 // (bad) + .byte 0,127,0 // add %bh,0x0(%rdi) + .byte 129,128,0,0,0,0,0,0,0,0 // addl $0x0,0x0(%rax) + .byte 0,0 // add %al,(%rax) + .byte 0,0 // add %al,(%rax) + .byte 0,0 // add %al,(%rax) + .byte 0,0 // add %al,(%rax) + .byte 0,0 // add %al,(%rax) + .byte 0,0 // add %al,(%rax) + .byte 0,0 // add %al,(%rax) + .byte 0,0 // add %al,(%rax) + .byte 0,0 // add %al,(%rax) + .byte 3,3 // add (%rbx),%eax + .byte 3,3 // add (%rbx),%eax + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 11,11 // or (%rbx),%ecx + .byte 11,11 // or (%rbx),%ecx + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15,3,3 // lsl (%rbx),%eax + .byte 3,3 // add (%rbx),%eax + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 11,11 // or (%rbx),%ecx + .byte 11,11 // or (%rbx),%ecx + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15,255 // (bad) + .byte 0,127,0 // add %bh,0x0(%rdi) + .byte 129 // .byte 0x81 + .byte 128 // .byte 0x80 BALIGN16 .byte 0,2 // add %al,(%rdx) @@ -60157,7 +60562,7 @@ HIDDEN _sk_set_rgb_sse41_8bit FUNCTION(_sk_set_rgb_sse41_8bit) _sk_set_rgb_sse41_8bit: .byte 72,173 // lods %ds:(%rsi),%rax - .byte 243,15,16,37,89,33,0,0 // movss 0x2159(%rip),%xmm4 # 221c <_sk_difference_sse41_8bit+0x169> + .byte 243,15,16,37,61,41,0,0 // movss 0x293d(%rip),%xmm4 # 2a00 <_sk_overlay_sse41_8bit+0x400> .byte 243,15,16,40 // movss (%rax),%xmm5 .byte 243,15,89,236 // mulss %xmm4,%xmm5 .byte 243,72,15,44,205 // cvttss2si %xmm5,%rcx @@ -60172,7 +60577,7 @@ _sk_set_rgb_sse41_8bit: .byte 9,208 // or %edx,%eax .byte 102,15,110,224 // movd %eax,%xmm4 .byte 102,15,112,228,0 // pshufd $0x0,%xmm4,%xmm4 - .byte 102,15,111,45,45,33,0,0 // movdqa 0x212d(%rip),%xmm5 # 2230 <_sk_difference_sse41_8bit+0x17d> + .byte 102,15,111,45,13,41,0,0 // movdqa 0x290d(%rip),%xmm5 # 2a10 <_sk_overlay_sse41_8bit+0x410> .byte 102,15,219,205 // pand %xmm5,%xmm1 .byte 102,15,219,197 // pand %xmm5,%xmm0 .byte 102,15,235,196 // por %xmm4,%xmm0 @@ -60186,12 +60591,12 @@ FUNCTION(_sk_premul_sse41_8bit) _sk_premul_sse41_8bit: .byte 102,15,111,225 // movdqa %xmm1,%xmm4 .byte 102,15,111,232 // movdqa %xmm0,%xmm5 - .byte 102,15,111,5,25,33,0,0 // movdqa 0x2119(%rip),%xmm0 # 2240 <_sk_difference_sse41_8bit+0x18d> + .byte 102,15,111,5,249,40,0,0 // movdqa 0x28f9(%rip),%xmm0 # 2a20 <_sk_overlay_sse41_8bit+0x420> .byte 102,15,111,253 // movdqa %xmm5,%xmm7 .byte 102,15,56,0,248 // pshufb %xmm0,%xmm7 .byte 102,15,111,244 // movdqa %xmm4,%xmm6 .byte 102,15,56,0,240 // pshufb %xmm0,%xmm6 - .byte 102,15,111,5,15,33,0,0 // movdqa 0x210f(%rip),%xmm0 # 2250 <_sk_difference_sse41_8bit+0x19d> + .byte 102,15,111,5,239,40,0,0 // movdqa 0x28ef(%rip),%xmm0 # 2a30 <_sk_overlay_sse41_8bit+0x430> .byte 102,15,235,240 // por %xmm0,%xmm6 .byte 102,15,235,248 // por %xmm0,%xmm7 .byte 102,69,15,239,192 // pxor %xmm8,%xmm8 @@ -60224,7 +60629,7 @@ HIDDEN _sk_swap_rb_sse41_8bit .globl _sk_swap_rb_sse41_8bit FUNCTION(_sk_swap_rb_sse41_8bit) _sk_swap_rb_sse41_8bit: - .byte 102,15,111,37,156,32,0,0 // movdqa 0x209c(%rip),%xmm4 # 2260 <_sk_difference_sse41_8bit+0x1ad> + .byte 102,15,111,37,124,40,0,0 // movdqa 0x287c(%rip),%xmm4 # 2a40 <_sk_overlay_sse41_8bit+0x440> .byte 102,15,56,0,196 // pshufb %xmm4,%xmm0 .byte 102,15,56,0,204 // pshufb %xmm4,%xmm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -60351,7 +60756,7 @@ _sk_load_8888_dst_sse41_8bit: .byte 255 // (bad) .byte 255 // (bad) .byte 255 // (bad) - .byte 233,255,255,255,221 // jmpq ffffffffde000354 <_sk_difference_sse41_8bit+0xffffffffddffe2a1> + .byte 233,255,255,255,221 // jmpq ffffffffde000354 <_sk_overlay_sse41_8bit+0xffffffffddffdd54> .byte 255 // (bad) .byte 255 // (bad) .byte 255 // .byte 0xff @@ -60434,7 +60839,7 @@ _sk_load_bgra_sse41_8bit: .byte 117,35 // jne 448 <_sk_load_bgra_sse41_8bit+0x44> .byte 243,66,15,111,76,130,16 // movdqu 0x10(%rdx,%r8,4),%xmm1 .byte 243,66,15,111,4,130 // movdqu (%rdx,%r8,4),%xmm0 - .byte 102,15,111,37,54,30,0,0 // movdqa 0x1e36(%rip),%xmm4 # 2270 <_sk_difference_sse41_8bit+0x1bd> + .byte 102,15,111,37,22,38,0,0 // movdqa 0x2616(%rip),%xmm4 # 2a50 <_sk_overlay_sse41_8bit+0x450> .byte 102,15,56,0,196 // pshufb %xmm4,%xmm0 .byte 102,15,56,0,204 // pshufb %xmm4,%xmm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -60497,7 +60902,7 @@ _sk_load_bgra_dst_sse41_8bit: .byte 117,35 // jne 518 <_sk_load_bgra_dst_sse41_8bit+0x44> .byte 243,66,15,111,92,130,16 // movdqu 0x10(%rdx,%r8,4),%xmm3 .byte 243,66,15,111,20,130 // movdqu (%rdx,%r8,4),%xmm2 - .byte 102,15,111,37,118,29,0,0 // movdqa 0x1d76(%rip),%xmm4 # 2280 <_sk_difference_sse41_8bit+0x1cd> + .byte 102,15,111,37,86,37,0,0 // movdqa 0x2556(%rip),%xmm4 # 2a60 <_sk_overlay_sse41_8bit+0x460> .byte 102,15,56,0,212 // pshufb %xmm4,%xmm2 .byte 102,15,56,0,220 // pshufb %xmm4,%xmm3 .byte 72,173 // lods %ds:(%rsi),%rax @@ -60556,7 +60961,7 @@ _sk_store_bgra_sse41_8bit: .byte 72,15,175,209 // imul %rcx,%rdx .byte 72,193,226,2 // shl $0x2,%rdx .byte 72,3,16 // add (%rax),%rdx - .byte 102,15,111,53,200,28,0,0 // movdqa 0x1cc8(%rip),%xmm6 # 2290 <_sk_difference_sse41_8bit+0x1dd> + .byte 102,15,111,53,168,36,0,0 // movdqa 0x24a8(%rip),%xmm6 # 2a70 <_sk_overlay_sse41_8bit+0x470> .byte 102,15,111,233 // movdqa %xmm1,%xmm5 .byte 102,15,56,0,238 // pshufb %xmm6,%xmm5 .byte 102,15,111,224 // movdqa %xmm0,%xmm4 @@ -60622,7 +61027,7 @@ _sk_load_a8_sse41_8bit: .byte 77,133,201 // test %r9,%r9 .byte 117,42 // jne 6af <_sk_load_a8_sse41_8bit+0x47> .byte 102,66,15,56,48,12,2 // pmovzxbw (%rdx,%r8,1),%xmm1 - .byte 102,15,219,13,12,28,0,0 // pand 0x1c0c(%rip),%xmm1 # 22a0 <_sk_difference_sse41_8bit+0x1ed> + .byte 102,15,219,13,236,35,0,0 // pand 0x23ec(%rip),%xmm1 # 2a80 <_sk_overlay_sse41_8bit+0x480> .byte 102,15,239,228 // pxor %xmm4,%xmm4 .byte 102,15,56,51,193 // pmovzxwd %xmm1,%xmm0 .byte 102,15,105,204 // punpckhwd %xmm4,%xmm1 @@ -60670,7 +61075,7 @@ _sk_load_a8_sse41_8bit: .byte 255 // (bad) .byte 255 // (bad) .byte 255 // (bad) - .byte 233,255,255,255,222 // jmpq ffffffffdf000750 <_sk_difference_sse41_8bit+0xffffffffdeffe69d> + .byte 233,255,255,255,222 // jmpq ffffffffdf000750 <_sk_overlay_sse41_8bit+0xffffffffdeffe150> .byte 255 // (bad) .byte 255 // (bad) .byte 255,211 // callq *%rbx @@ -60695,7 +61100,7 @@ _sk_load_a8_dst_sse41_8bit: .byte 77,133,201 // test %r9,%r9 .byte 117,42 // jne 7a3 <_sk_load_a8_dst_sse41_8bit+0x47> .byte 102,66,15,56,48,28,2 // pmovzxbw (%rdx,%r8,1),%xmm3 - .byte 102,15,219,29,40,27,0,0 // pand 0x1b28(%rip),%xmm3 # 22b0 <_sk_difference_sse41_8bit+0x1fd> + .byte 102,15,219,29,8,35,0,0 // pand 0x2308(%rip),%xmm3 # 2a90 <_sk_overlay_sse41_8bit+0x490> .byte 102,15,239,228 // pxor %xmm4,%xmm4 .byte 102,15,56,51,211 // pmovzxwd %xmm3,%xmm2 .byte 102,15,105,220 // punpckhwd %xmm4,%xmm3 @@ -60743,7 +61148,7 @@ _sk_load_a8_dst_sse41_8bit: .byte 255 // (bad) .byte 255 // (bad) .byte 255 // (bad) - .byte 233,255,255,255,222 // jmpq ffffffffdf000844 <_sk_difference_sse41_8bit+0xffffffffdeffe791> + .byte 233,255,255,255,222 // jmpq ffffffffdf000844 <_sk_overlay_sse41_8bit+0xffffffffdeffe244> .byte 255 // (bad) .byte 255 // (bad) .byte 255,211 // callq *%rbx @@ -60765,7 +61170,7 @@ _sk_store_a8_sse41_8bit: .byte 72,99,87,8 // movslq 0x8(%rdi),%rdx .byte 72,15,175,209 // imul %rcx,%rdx .byte 72,3,16 // add (%rax),%rdx - .byte 102,15,111,45,80,26,0,0 // movdqa 0x1a50(%rip),%xmm5 # 22c0 <_sk_difference_sse41_8bit+0x20d> + .byte 102,15,111,45,48,34,0,0 // movdqa 0x2230(%rip),%xmm5 # 2aa0 <_sk_overlay_sse41_8bit+0x4a0> .byte 102,15,111,241 // movdqa %xmm1,%xmm6 .byte 102,15,56,0,245 // pshufb %xmm5,%xmm6 .byte 102,15,111,224 // movdqa %xmm0,%xmm4 @@ -60773,7 +61178,7 @@ _sk_store_a8_sse41_8bit: .byte 102,15,108,230 // punpcklqdq %xmm6,%xmm4 .byte 77,133,201 // test %r9,%r9 .byte 117,19 // jne 89e <_sk_store_a8_sse41_8bit+0x4e> - .byte 102,15,56,0,37,92,26,0,0 // pshufb 0x1a5c(%rip),%xmm4 # 22f0 <_sk_difference_sse41_8bit+0x23d> + .byte 102,15,56,0,37,60,34,0,0 // pshufb 0x223c(%rip),%xmm4 # 2ad0 <_sk_overlay_sse41_8bit+0x4d0> .byte 102,66,15,214,36,2 // movq %xmm4,(%rdx,%r8,1) .byte 72,173 // lods %ds:(%rsi),%rax .byte 255,224 // jmpq *%rax @@ -60789,13 +61194,13 @@ _sk_store_a8_sse41_8bit: .byte 102,66,15,58,20,36,2,0 // pextrb $0x0,%xmm4,(%rdx,%r8,1) .byte 235,209 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a> .byte 102,66,15,58,20,100,2,2,4 // pextrb $0x4,%xmm4,0x2(%rdx,%r8,1) - .byte 102,15,56,0,37,5,26,0,0 // pshufb 0x1a05(%rip),%xmm4 # 22e0 <_sk_difference_sse41_8bit+0x22d> + .byte 102,15,56,0,37,229,33,0,0 // pshufb 0x21e5(%rip),%xmm4 # 2ac0 <_sk_overlay_sse41_8bit+0x4c0> .byte 102,66,15,58,21,36,2,0 // pextrw $0x0,%xmm4,(%rdx,%r8,1) .byte 235,181 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a> .byte 102,66,15,58,20,100,2,6,12 // pextrb $0xc,%xmm4,0x6(%rdx,%r8,1) .byte 102,66,15,58,20,100,2,5,10 // pextrb $0xa,%xmm4,0x5(%rdx,%r8,1) .byte 102,66,15,58,20,100,2,4,8 // pextrb $0x8,%xmm4,0x4(%rdx,%r8,1) - .byte 102,15,56,0,37,199,25,0,0 // pshufb 0x19c7(%rip),%xmm4 # 22d0 <_sk_difference_sse41_8bit+0x21d> + .byte 102,15,56,0,37,167,33,0,0 // pshufb 0x21a7(%rip),%xmm4 # 2ab0 <_sk_overlay_sse41_8bit+0x4b0> .byte 102,66,15,126,36,2 // movd %xmm4,(%rdx,%r8,1) .byte 235,137 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a> .byte 15,31,0 // nopl (%rax) @@ -60835,14 +61240,14 @@ _sk_load_g8_sse41_8bit: .byte 77,133,201 // test %r9,%r9 .byte 117,66 // jne 98f <_sk_load_g8_sse41_8bit+0x5f> .byte 102,66,15,56,48,12,2 // pmovzxbw (%rdx,%r8,1),%xmm1 - .byte 102,15,219,13,164,25,0,0 // pand 0x19a4(%rip),%xmm1 # 2300 <_sk_difference_sse41_8bit+0x24d> + .byte 102,15,219,13,132,33,0,0 // pand 0x2184(%rip),%xmm1 # 2ae0 <_sk_overlay_sse41_8bit+0x4e0> .byte 102,15,239,228 // pxor %xmm4,%xmm4 .byte 102,15,56,51,193 // pmovzxwd %xmm1,%xmm0 .byte 102,15,105,204 // punpckhwd %xmm4,%xmm1 - .byte 102,15,111,37,159,25,0,0 // movdqa 0x199f(%rip),%xmm4 # 2310 <_sk_difference_sse41_8bit+0x25d> + .byte 102,15,111,37,127,33,0,0 // movdqa 0x217f(%rip),%xmm4 # 2af0 <_sk_overlay_sse41_8bit+0x4f0> .byte 102,15,56,64,204 // pmulld %xmm4,%xmm1 .byte 102,15,56,64,196 // pmulld %xmm4,%xmm0 - .byte 102,15,111,37,157,25,0,0 // movdqa 0x199d(%rip),%xmm4 # 2320 <_sk_difference_sse41_8bit+0x26d> + .byte 102,15,111,37,125,33,0,0 // movdqa 0x217d(%rip),%xmm4 # 2b00 <_sk_overlay_sse41_8bit+0x500> .byte 102,15,235,196 // por %xmm4,%xmm0 .byte 102,15,235,204 // por %xmm4,%xmm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -60887,7 +61292,7 @@ _sk_load_g8_sse41_8bit: .byte 255 // (bad) .byte 255 // (bad) .byte 255 // (bad) - .byte 232,255,255,255,221 // callq ffffffffde000a34 <_sk_difference_sse41_8bit+0xffffffffddffe981> + .byte 232,255,255,255,221 // callq ffffffffde000a34 <_sk_overlay_sse41_8bit+0xffffffffddffe434> .byte 255 // (bad) .byte 255 // (bad) .byte 255,210 // callq *%rdx @@ -60912,14 +61317,14 @@ _sk_load_g8_dst_sse41_8bit: .byte 77,133,201 // test %r9,%r9 .byte 117,66 // jne a9f <_sk_load_g8_dst_sse41_8bit+0x5f> .byte 102,66,15,56,48,28,2 // pmovzxbw (%rdx,%r8,1),%xmm3 - .byte 102,15,219,29,196,24,0,0 // pand 0x18c4(%rip),%xmm3 # 2330 <_sk_difference_sse41_8bit+0x27d> + .byte 102,15,219,29,164,32,0,0 // pand 0x20a4(%rip),%xmm3 # 2b10 <_sk_overlay_sse41_8bit+0x510> .byte 102,15,239,228 // pxor %xmm4,%xmm4 .byte 102,15,56,51,211 // pmovzxwd %xmm3,%xmm2 .byte 102,15,105,220 // punpckhwd %xmm4,%xmm3 - .byte 102,15,111,37,191,24,0,0 // movdqa 0x18bf(%rip),%xmm4 # 2340 <_sk_difference_sse41_8bit+0x28d> + .byte 102,15,111,37,159,32,0,0 // movdqa 0x209f(%rip),%xmm4 # 2b20 <_sk_overlay_sse41_8bit+0x520> .byte 102,15,56,64,220 // pmulld %xmm4,%xmm3 .byte 102,15,56,64,212 // pmulld %xmm4,%xmm2 - .byte 102,15,111,37,189,24,0,0 // movdqa 0x18bd(%rip),%xmm4 # 2350 <_sk_difference_sse41_8bit+0x29d> + .byte 102,15,111,37,157,32,0,0 // movdqa 0x209d(%rip),%xmm4 # 2b30 <_sk_overlay_sse41_8bit+0x530> .byte 102,15,235,212 // por %xmm4,%xmm2 .byte 102,15,235,220 // por %xmm4,%xmm3 .byte 72,173 // lods %ds:(%rsi),%rax @@ -60964,7 +61369,7 @@ _sk_load_g8_dst_sse41_8bit: .byte 255 // (bad) .byte 255 // (bad) .byte 255 // (bad) - .byte 232,255,255,255,221 // callq ffffffffde000b44 <_sk_difference_sse41_8bit+0xffffffffddffea91> + .byte 232,255,255,255,221 // callq ffffffffde000b44 <_sk_overlay_sse41_8bit+0xffffffffddffe544> .byte 255 // (bad) .byte 255 // (bad) .byte 255,210 // callq *%rdx @@ -60992,7 +61397,7 @@ _sk_srcover_rgba_8888_sse41_8bit: .byte 243,70,15,111,68,138,16 // movdqu 0x10(%rdx,%r9,4),%xmm8 .byte 243,70,15,111,12,138 // movdqu (%rdx,%r9,4),%xmm9 .byte 77,133,192 // test %r8,%r8 - .byte 102,15,111,37,211,23,0,0 // movdqa 0x17d3(%rip),%xmm4 # 2360 <_sk_difference_sse41_8bit+0x2ad> + .byte 102,15,111,37,179,31,0,0 // movdqa 0x1fb3(%rip),%xmm4 # 2b40 <_sk_overlay_sse41_8bit+0x540> .byte 102,15,111,241 // movdqa %xmm1,%xmm6 .byte 102,15,56,0,244 // pshufb %xmm4,%xmm6 .byte 102,15,111,248 // movdqa %xmm0,%xmm7 @@ -61114,7 +61519,7 @@ _sk_scale_1_float_sse41_8bit: .byte 102,15,111,232 // movdqa %xmm0,%xmm5 .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,15,16,0 // movss (%rax),%xmm0 - .byte 243,15,89,5,178,20,0,0 // mulss 0x14b2(%rip),%xmm0 # 2220 <_sk_difference_sse41_8bit+0x16d> + .byte 243,15,89,5,150,28,0,0 // mulss 0x1c96(%rip),%xmm0 # 2a04 <_sk_overlay_sse41_8bit+0x404> .byte 243,15,44,192 // cvttss2si %xmm0,%eax .byte 15,87,192 // xorps %xmm0,%xmm0 .byte 102,68,15,56,48,197 // pmovzxbw %xmm5,%xmm8 @@ -61122,7 +61527,7 @@ _sk_scale_1_float_sse41_8bit: .byte 102,68,15,56,48,204 // pmovzxbw %xmm4,%xmm9 .byte 102,15,104,224 // punpckhbw %xmm0,%xmm4 .byte 102,15,110,240 // movd %eax,%xmm6 - .byte 102,15,56,0,53,218,21,0,0 // pshufb 0x15da(%rip),%xmm6 # 2370 <_sk_difference_sse41_8bit+0x2bd> + .byte 102,15,56,0,53,186,29,0,0 // pshufb 0x1dba(%rip),%xmm6 # 2b50 <_sk_overlay_sse41_8bit+0x550> .byte 102,15,111,206 // movdqa %xmm6,%xmm1 .byte 102,65,15,213,201 // pmullw %xmm9,%xmm1 .byte 102,15,111,198 // movdqa %xmm6,%xmm0 @@ -61157,11 +61562,11 @@ _sk_scale_u8_sse41_8bit: .byte 77,133,201 // test %r9,%r9 .byte 15,133,160,0,0,0 // jne ea7 <_sk_scale_u8_sse41_8bit+0xc1> .byte 102,66,15,56,48,52,2 // pmovzxbw (%rdx,%r8,1),%xmm6 - .byte 102,15,219,53,106,21,0,0 // pand 0x156a(%rip),%xmm6 # 2380 <_sk_difference_sse41_8bit+0x2cd> + .byte 102,15,219,53,74,29,0,0 // pand 0x1d4a(%rip),%xmm6 # 2b60 <_sk_overlay_sse41_8bit+0x560> .byte 102,69,15,239,192 // pxor %xmm8,%xmm8 .byte 102,15,111,254 // movdqa %xmm6,%xmm7 - .byte 102,15,56,0,61,104,21,0,0 // pshufb 0x1568(%rip),%xmm7 # 2390 <_sk_difference_sse41_8bit+0x2dd> - .byte 102,15,56,0,53,111,21,0,0 // pshufb 0x156f(%rip),%xmm6 # 23a0 <_sk_difference_sse41_8bit+0x2ed> + .byte 102,15,56,0,61,72,29,0,0 // pshufb 0x1d48(%rip),%xmm7 # 2b70 <_sk_overlay_sse41_8bit+0x570> + .byte 102,15,56,0,53,79,29,0,0 // pshufb 0x1d4f(%rip),%xmm6 # 2b80 <_sk_overlay_sse41_8bit+0x580> .byte 102,68,15,56,48,200 // pmovzxbw %xmm0,%xmm9 .byte 102,65,15,104,192 // punpckhbw %xmm8,%xmm0 .byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10 @@ -61246,7 +61651,7 @@ FUNCTION(_sk_lerp_1_float_sse41_8bit) _sk_lerp_1_float_sse41_8bit: .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,15,16,32 // movss (%rax),%xmm4 - .byte 243,15,89,37,182,18,0,0 // mulss 0x12b6(%rip),%xmm4 # 2224 <_sk_difference_sse41_8bit+0x171> + .byte 243,15,89,37,154,26,0,0 // mulss 0x1a9a(%rip),%xmm4 # 2a08 <_sk_overlay_sse41_8bit+0x408> .byte 243,15,44,196 // cvttss2si %xmm4,%eax .byte 102,15,110,224 // movd %eax,%xmm4 .byte 102,15,96,228 // punpcklbw %xmm4,%xmm4 @@ -61257,7 +61662,7 @@ _sk_lerp_1_float_sse41_8bit: .byte 102,65,15,104,193 // punpckhbw %xmm9,%xmm0 .byte 102,68,15,56,48,217 // pmovzxbw %xmm1,%xmm11 .byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1 - .byte 102,15,56,0,61,7,20,0,0 // pshufb 0x1407(%rip),%xmm7 # 23b0 <_sk_difference_sse41_8bit+0x2fd> + .byte 102,15,56,0,61,231,27,0,0 // pshufb 0x1be7(%rip),%xmm7 # 2b90 <_sk_overlay_sse41_8bit+0x590> .byte 102,68,15,111,231 // movdqa %xmm7,%xmm12 .byte 102,69,15,213,227 // pmullw %xmm11,%xmm12 .byte 102,68,15,111,239 // movdqa %xmm7,%xmm13 @@ -61320,11 +61725,11 @@ _sk_lerp_u8_sse41_8bit: .byte 77,133,201 // test %r9,%r9 .byte 15,133,46,1,0,0 // jne 11d1 <_sk_lerp_u8_sse41_8bit+0x14f> .byte 102,66,15,56,48,60,2 // pmovzxbw (%rdx,%r8,1),%xmm7 - .byte 102,15,219,61,14,19,0,0 // pand 0x130e(%rip),%xmm7 # 23c0 <_sk_difference_sse41_8bit+0x30d> + .byte 102,15,219,61,238,26,0,0 // pand 0x1aee(%rip),%xmm7 # 2ba0 <_sk_overlay_sse41_8bit+0x5a0> .byte 102,69,15,239,192 // pxor %xmm8,%xmm8 .byte 102,15,111,247 // movdqa %xmm7,%xmm6 - .byte 102,15,56,0,53,12,19,0,0 // pshufb 0x130c(%rip),%xmm6 # 23d0 <_sk_difference_sse41_8bit+0x31d> - .byte 102,15,56,0,61,19,19,0,0 // pshufb 0x1313(%rip),%xmm7 # 23e0 <_sk_difference_sse41_8bit+0x32d> + .byte 102,15,56,0,53,236,26,0,0 // pshufb 0x1aec(%rip),%xmm6 # 2bb0 <_sk_overlay_sse41_8bit+0x5b0> + .byte 102,15,56,0,61,243,26,0,0 // pshufb 0x1af3(%rip),%xmm7 # 2bc0 <_sk_overlay_sse41_8bit+0x5c0> .byte 102,68,15,56,48,200 // pmovzxbw %xmm0,%xmm9 .byte 102,65,15,104,192 // punpckhbw %xmm8,%xmm0 .byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10 @@ -61419,7 +61824,7 @@ _sk_lerp_u8_sse41_8bit: .byte 255 // (bad) .byte 255 // (bad) .byte 255 // (bad) - .byte 233,255,255,255,222 // jmpq ffffffffdf00127c <_sk_difference_sse41_8bit+0xffffffffdefff1c9> + .byte 233,255,255,255,222 // jmpq ffffffffdf00127c <_sk_overlay_sse41_8bit+0xffffffffdeffec7c> .byte 255 // (bad) .byte 255 // (bad) .byte 255,211 // callq *%rbx @@ -61453,7 +61858,7 @@ HIDDEN _sk_black_color_sse41_8bit FUNCTION(_sk_black_color_sse41_8bit) _sk_black_color_sse41_8bit: .byte 72,173 // lods %ds:(%rsi),%rax - .byte 15,40,5,75,17,0,0 // movaps 0x114b(%rip),%xmm0 # 23f0 <_sk_difference_sse41_8bit+0x33d> + .byte 15,40,5,43,25,0,0 // movaps 0x192b(%rip),%xmm0 # 2bd0 <_sk_overlay_sse41_8bit+0x5d0> .byte 15,40,200 // movaps %xmm0,%xmm1 .byte 255,224 // jmpq *%rax @@ -61479,7 +61884,7 @@ HIDDEN _sk_srcatop_sse41_8bit .globl _sk_srcatop_sse41_8bit FUNCTION(_sk_srcatop_sse41_8bit) _sk_srcatop_sse41_8bit: - .byte 102,68,15,111,21,55,17,0,0 // movdqa 0x1137(%rip),%xmm10 # 2400 <_sk_difference_sse41_8bit+0x34d> + .byte 102,68,15,111,21,23,25,0,0 // movdqa 0x1917(%rip),%xmm10 # 2be0 <_sk_overlay_sse41_8bit+0x5e0> .byte 102,68,15,111,219 // movdqa %xmm3,%xmm11 .byte 102,68,15,56,48,195 // pmovzxbw %xmm3,%xmm8 .byte 102,15,111,235 // movdqa %xmm3,%xmm5 @@ -61549,7 +61954,7 @@ HIDDEN _sk_dstatop_sse41_8bit .globl _sk_dstatop_sse41_8bit FUNCTION(_sk_dstatop_sse41_8bit) _sk_dstatop_sse41_8bit: - .byte 102,68,15,111,29,12,16,0,0 // movdqa 0x100c(%rip),%xmm11 # 2410 <_sk_difference_sse41_8bit+0x35d> + .byte 102,68,15,111,29,236,23,0,0 // movdqa 0x17ec(%rip),%xmm11 # 2bf0 <_sk_overlay_sse41_8bit+0x5f0> .byte 102,68,15,111,233 // movdqa %xmm1,%xmm13 .byte 102,69,15,56,0,235 // pshufb %xmm11,%xmm13 .byte 102,68,15,111,248 // movdqa %xmm0,%xmm15 @@ -61621,7 +62026,7 @@ FUNCTION(_sk_srcin_sse41_8bit) _sk_srcin_sse41_8bit: .byte 102,15,111,225 // movdqa %xmm1,%xmm4 .byte 102,15,111,232 // movdqa %xmm0,%xmm5 - .byte 102,15,111,5,211,14,0,0 // movdqa 0xed3(%rip),%xmm0 # 2420 <_sk_difference_sse41_8bit+0x36d> + .byte 102,15,111,5,179,22,0,0 // movdqa 0x16b3(%rip),%xmm0 # 2c00 <_sk_overlay_sse41_8bit+0x600> .byte 102,15,111,243 // movdqa %xmm3,%xmm6 .byte 102,15,56,0,240 // pshufb %xmm0,%xmm6 .byte 102,15,111,250 // movdqa %xmm2,%xmm7 @@ -61656,7 +62061,7 @@ HIDDEN _sk_dstin_sse41_8bit .globl _sk_dstin_sse41_8bit FUNCTION(_sk_dstin_sse41_8bit) _sk_dstin_sse41_8bit: - .byte 102,15,111,37,86,14,0,0 // movdqa 0xe56(%rip),%xmm4 # 2430 <_sk_difference_sse41_8bit+0x37d> + .byte 102,15,111,37,54,22,0,0 // movdqa 0x1636(%rip),%xmm4 # 2c10 <_sk_overlay_sse41_8bit+0x610> .byte 102,15,56,0,204 // pshufb %xmm4,%xmm1 .byte 102,15,56,0,196 // pshufb %xmm4,%xmm0 .byte 102,69,15,239,210 // pxor %xmm10,%xmm10 @@ -61695,7 +62100,7 @@ FUNCTION(_sk_srcout_sse41_8bit) _sk_srcout_sse41_8bit: .byte 102,15,111,225 // movdqa %xmm1,%xmm4 .byte 102,15,111,232 // movdqa %xmm0,%xmm5 - .byte 102,15,111,5,201,13,0,0 // movdqa 0xdc9(%rip),%xmm0 # 2440 <_sk_difference_sse41_8bit+0x38d> + .byte 102,15,111,5,169,21,0,0 // movdqa 0x15a9(%rip),%xmm0 # 2c20 <_sk_overlay_sse41_8bit+0x620> .byte 102,15,111,250 // movdqa %xmm2,%xmm7 .byte 102,15,56,0,248 // pshufb %xmm0,%xmm7 .byte 102,15,111,243 // movdqa %xmm3,%xmm6 @@ -61733,7 +62138,7 @@ HIDDEN _sk_dstout_sse41_8bit .globl _sk_dstout_sse41_8bit FUNCTION(_sk_dstout_sse41_8bit) _sk_dstout_sse41_8bit: - .byte 102,15,111,37,64,13,0,0 // movdqa 0xd40(%rip),%xmm4 # 2450 <_sk_difference_sse41_8bit+0x39d> + .byte 102,15,111,37,32,21,0,0 // movdqa 0x1520(%rip),%xmm4 # 2c30 <_sk_overlay_sse41_8bit+0x630> .byte 102,15,56,0,196 // pshufb %xmm4,%xmm0 .byte 102,15,56,0,204 // pshufb %xmm4,%xmm1 .byte 102,15,118,228 // pcmpeqd %xmm4,%xmm4 @@ -61773,7 +62178,7 @@ HIDDEN _sk_srcover_sse41_8bit .globl _sk_srcover_sse41_8bit FUNCTION(_sk_srcover_sse41_8bit) _sk_srcover_sse41_8bit: - .byte 102,15,111,53,175,12,0,0 // movdqa 0xcaf(%rip),%xmm6 # 2460 <_sk_difference_sse41_8bit+0x3ad> + .byte 102,15,111,53,143,20,0,0 // movdqa 0x148f(%rip),%xmm6 # 2c40 <_sk_overlay_sse41_8bit+0x640> .byte 102,68,15,111,217 // movdqa %xmm1,%xmm11 .byte 102,68,15,56,0,222 // pshufb %xmm6,%xmm11 .byte 102,15,111,232 // movdqa %xmm0,%xmm5 @@ -61814,7 +62219,7 @@ HIDDEN _sk_dstover_sse41_8bit .globl _sk_dstover_sse41_8bit FUNCTION(_sk_dstover_sse41_8bit) _sk_dstover_sse41_8bit: - .byte 102,68,15,111,5,15,12,0,0 // movdqa 0xc0f(%rip),%xmm8 # 2470 <_sk_difference_sse41_8bit+0x3bd> + .byte 102,68,15,111,5,239,19,0,0 // movdqa 0x13ef(%rip),%xmm8 # 2c50 <_sk_overlay_sse41_8bit+0x650> .byte 102,68,15,111,209 // movdqa %xmm1,%xmm10 .byte 102,68,15,56,48,201 // pmovzxbw %xmm1,%xmm9 .byte 102,15,252,203 // paddb %xmm3,%xmm1 @@ -61893,7 +62298,7 @@ _sk_multiply_sse41_8bit: .byte 102,15,111,218 // movdqa %xmm2,%xmm3 .byte 102,15,111,209 // movdqa %xmm1,%xmm2 .byte 102,15,111,200 // movdqa %xmm0,%xmm1 - .byte 102,68,15,111,53,221,10,0,0 // movdqa 0xadd(%rip),%xmm14 # 2480 <_sk_difference_sse41_8bit+0x3cd> + .byte 102,68,15,111,53,189,18,0,0 // movdqa 0x12bd(%rip),%xmm14 # 2c60 <_sk_overlay_sse41_8bit+0x660> .byte 102,68,15,111,195 // movdqa %xmm3,%xmm8 .byte 102,15,111,235 // movdqa %xmm3,%xmm5 .byte 102,65,15,56,0,238 // pshufb %xmm14,%xmm5 @@ -62022,7 +62427,7 @@ HIDDEN _sk_xor__sse41_8bit .globl _sk_xor__sse41_8bit FUNCTION(_sk_xor__sse41_8bit) _sk_xor__sse41_8bit: - .byte 102,68,15,111,21,171,8,0,0 // movdqa 0x8ab(%rip),%xmm10 # 2490 <_sk_difference_sse41_8bit+0x3dd> + .byte 102,68,15,111,21,139,16,0,0 // movdqa 0x108b(%rip),%xmm10 # 2c70 <_sk_overlay_sse41_8bit+0x670> .byte 102,68,15,111,226 // movdqa %xmm2,%xmm12 .byte 102,68,15,56,48,194 // pmovzxbw %xmm2,%xmm8 .byte 102,15,111,234 // movdqa %xmm2,%xmm5 @@ -62095,7 +62500,7 @@ HIDDEN _sk_darken_sse41_8bit FUNCTION(_sk_darken_sse41_8bit) _sk_darken_sse41_8bit: .byte 102,68,15,111,240 // movdqa %xmm0,%xmm14 - .byte 102,68,15,111,5,110,7,0,0 // movdqa 0x76e(%rip),%xmm8 # 24a0 <_sk_difference_sse41_8bit+0x3ed> + .byte 102,68,15,111,5,78,15,0,0 // movdqa 0xf4e(%rip),%xmm8 # 2c80 <_sk_overlay_sse41_8bit+0x680> .byte 102,68,15,111,219 // movdqa %xmm3,%xmm11 .byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9 .byte 102,15,111,243 // movdqa %xmm3,%xmm6 @@ -62162,7 +62567,7 @@ _sk_darken_sse41_8bit: .byte 102,15,248,231 // psubb %xmm7,%xmm4 .byte 102,15,248,206 // psubb %xmm6,%xmm1 .byte 102,68,15,248,245 // psubb %xmm5,%xmm14 - .byte 15,40,5,50,6,0,0 // movaps 0x632(%rip),%xmm0 # 24b0 <_sk_difference_sse41_8bit+0x3fd> + .byte 15,40,5,18,14,0,0 // movaps 0xe12(%rip),%xmm0 # 2c90 <_sk_overlay_sse41_8bit+0x690> .byte 102,68,15,56,16,244 // pblendvb %xmm0,%xmm4,%xmm14 .byte 102,65,15,56,16,200 // pblendvb %xmm0,%xmm8,%xmm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -62174,7 +62579,7 @@ HIDDEN _sk_lighten_sse41_8bit FUNCTION(_sk_lighten_sse41_8bit) _sk_lighten_sse41_8bit: .byte 102,68,15,111,240 // movdqa %xmm0,%xmm14 - .byte 102,68,15,111,5,31,6,0,0 // movdqa 0x61f(%rip),%xmm8 # 24c0 <_sk_difference_sse41_8bit+0x40d> + .byte 102,68,15,111,5,255,13,0,0 // movdqa 0xdff(%rip),%xmm8 # 2ca0 <_sk_overlay_sse41_8bit+0x6a0> .byte 102,68,15,111,219 // movdqa %xmm3,%xmm11 .byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9 .byte 102,15,111,243 // movdqa %xmm3,%xmm6 @@ -62241,7 +62646,7 @@ _sk_lighten_sse41_8bit: .byte 102,15,248,231 // psubb %xmm7,%xmm4 .byte 102,15,248,206 // psubb %xmm6,%xmm1 .byte 102,68,15,248,245 // psubb %xmm5,%xmm14 - .byte 15,40,5,227,4,0,0 // movaps 0x4e3(%rip),%xmm0 # 24d0 <_sk_difference_sse41_8bit+0x41d> + .byte 15,40,5,195,12,0,0 // movaps 0xcc3(%rip),%xmm0 # 2cb0 <_sk_overlay_sse41_8bit+0x6b0> .byte 102,68,15,56,16,244 // pblendvb %xmm0,%xmm4,%xmm14 .byte 102,65,15,56,16,200 // pblendvb %xmm0,%xmm8,%xmm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -62281,7 +62686,7 @@ _sk_exclusion_sse41_8bit: .byte 102,15,113,215,8 // psrlw $0x8,%xmm7 .byte 102,15,103,253 // packuswb %xmm5,%xmm7 .byte 102,15,103,230 // packuswb %xmm6,%xmm4 - .byte 102,15,111,45,73,4,0,0 // movdqa 0x449(%rip),%xmm5 # 24e0 <_sk_difference_sse41_8bit+0x42d> + .byte 102,15,111,45,41,12,0,0 // movdqa 0xc29(%rip),%xmm5 # 2cc0 <_sk_overlay_sse41_8bit+0x6c0> .byte 102,15,248,204 // psubb %xmm4,%xmm1 .byte 102,15,219,229 // pand %xmm5,%xmm4 .byte 102,15,219,239 // pand %xmm7,%xmm5 @@ -62296,7 +62701,7 @@ HIDDEN _sk_difference_sse41_8bit FUNCTION(_sk_difference_sse41_8bit) _sk_difference_sse41_8bit: .byte 102,68,15,111,193 // movdqa %xmm1,%xmm8 - .byte 102,15,111,13,48,4,0,0 // movdqa 0x430(%rip),%xmm1 # 24f0 <_sk_difference_sse41_8bit+0x43d> + .byte 102,15,111,13,16,12,0,0 // movdqa 0xc10(%rip),%xmm1 # 2cd0 <_sk_overlay_sse41_8bit+0x6d0> .byte 102,68,15,111,219 // movdqa %xmm3,%xmm11 .byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9 .byte 102,15,111,243 // movdqa %xmm3,%xmm6 @@ -62334,7 +62739,7 @@ _sk_difference_sse41_8bit: .byte 102,15,113,214,8 // psrlw $0x8,%xmm6 .byte 102,15,113,215,8 // psrlw $0x8,%xmm7 .byte 102,15,103,254 // packuswb %xmm6,%xmm7 - .byte 102,15,111,37,117,3,0,0 // movdqa 0x375(%rip),%xmm4 # 24f0 <_sk_difference_sse41_8bit+0x43d> + .byte 102,15,111,37,85,11,0,0 // movdqa 0xb55(%rip),%xmm4 # 2cd0 <_sk_overlay_sse41_8bit+0x6d0> .byte 102,15,56,0,204 // pshufb %xmm4,%xmm1 .byte 102,15,56,0,236 // pshufb %xmm4,%xmm5 .byte 102,69,15,104,236 // punpckhbw %xmm12,%xmm13 @@ -62359,7 +62764,7 @@ _sk_difference_sse41_8bit: .byte 102,15,103,241 // packuswb %xmm1,%xmm6 .byte 102,65,15,218,230 // pminub %xmm14,%xmm4 .byte 102,15,218,247 // pminub %xmm7,%xmm6 - .byte 102,15,111,13,8,3,0,0 // movdqa 0x308(%rip),%xmm1 # 2500 <_sk_difference_sse41_8bit+0x44d> + .byte 102,15,111,13,232,10,0,0 // movdqa 0xae8(%rip),%xmm1 # 2ce0 <_sk_overlay_sse41_8bit+0x6e0> .byte 102,68,15,248,198 // psubb %xmm6,%xmm8 .byte 102,15,219,241 // pand %xmm1,%xmm6 .byte 102,15,219,204 // pand %xmm4,%xmm1 @@ -62370,13 +62775,420 @@ _sk_difference_sse41_8bit: .byte 102,65,15,111,200 // movdqa %xmm8,%xmm1 .byte 255,224 // jmpq *%rax +HIDDEN _sk_hardlight_sse41_8bit +.globl _sk_hardlight_sse41_8bit +FUNCTION(_sk_hardlight_sse41_8bit) +_sk_hardlight_sse41_8bit: + .byte 72,129,236,152,0,0,0 // sub $0x98,%rsp + .byte 102,68,15,111,251 // movdqa %xmm3,%xmm15 + .byte 102,68,15,111,233 // movdqa %xmm1,%xmm13 + .byte 102,68,15,111,240 // movdqa %xmm0,%xmm14 + .byte 102,15,239,192 // pxor %xmm0,%xmm0 + .byte 102,69,15,111,198 // movdqa %xmm14,%xmm8 + .byte 102,68,15,104,192 // punpckhbw %xmm0,%xmm8 + .byte 102,15,104,200 // punpckhbw %xmm0,%xmm1 + .byte 102,15,111,225 // movdqa %xmm1,%xmm4 + .byte 102,15,127,100,36,144 // movdqa %xmm4,-0x70(%rsp) + .byte 102,65,15,56,48,206 // pmovzxbw %xmm14,%xmm1 + .byte 102,15,127,76,36,128 // movdqa %xmm1,-0x80(%rsp) + .byte 102,69,15,56,48,221 // pmovzxbw %xmm13,%xmm11 + .byte 102,68,15,127,92,36,48 // movdqa %xmm11,0x30(%rsp) + .byte 102,15,111,13,130,10,0,0 // movdqa 0xa82(%rip),%xmm1 # 2cf0 <_sk_overlay_sse41_8bit+0x6f0> + .byte 102,68,15,56,0,241 // pshufb %xmm1,%xmm14 + .byte 102,68,15,56,0,233 // pshufb %xmm1,%xmm13 + .byte 102,69,15,56,48,205 // pmovzxbw %xmm13,%xmm9 + .byte 102,68,15,104,232 // punpckhbw %xmm0,%xmm13 + .byte 102,69,15,56,48,214 // pmovzxbw %xmm14,%xmm10 + .byte 102,68,15,104,240 // punpckhbw %xmm0,%xmm14 + .byte 102,15,127,84,36,192 // movdqa %xmm2,-0x40(%rsp) + .byte 102,15,111,242 // movdqa %xmm2,%xmm6 + .byte 102,15,111,234 // movdqa %xmm2,%xmm5 + .byte 102,15,104,232 // punpckhbw %xmm0,%xmm5 + .byte 102,68,15,127,124,36,208 // movdqa %xmm15,-0x30(%rsp) + .byte 102,65,15,111,215 // movdqa %xmm15,%xmm2 + .byte 102,68,15,104,248 // punpckhbw %xmm0,%xmm15 + .byte 102,15,56,0,241 // pshufb %xmm1,%xmm6 + .byte 102,15,56,0,209 // pshufb %xmm1,%xmm2 + .byte 102,68,15,56,48,226 // pmovzxbw %xmm2,%xmm12 + .byte 102,68,15,127,100,36,160 // movdqa %xmm12,-0x60(%rsp) + .byte 102,15,104,208 // punpckhbw %xmm0,%xmm2 + .byte 102,15,127,84,36,176 // movdqa %xmm2,-0x50(%rsp) + .byte 102,15,56,48,222 // pmovzxbw %xmm6,%xmm3 + .byte 102,15,104,240 // punpckhbw %xmm0,%xmm6 + .byte 102,15,111,254 // movdqa %xmm6,%xmm7 + .byte 102,15,111,196 // movdqa %xmm4,%xmm0 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,15,111,200 // movdqa %xmm0,%xmm1 + .byte 102,65,15,56,58,205 // pminuw %xmm13,%xmm1 + .byte 102,15,117,200 // pcmpeqw %xmm0,%xmm1 + .byte 102,15,127,76,36,112 // movdqa %xmm1,0x70(%rsp) + .byte 102,65,15,111,192 // movdqa %xmm8,%xmm0 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,15,111,200 // movdqa %xmm0,%xmm1 + .byte 102,65,15,56,58,206 // pminuw %xmm14,%xmm1 + .byte 102,15,117,200 // pcmpeqw %xmm0,%xmm1 + .byte 102,15,127,76,36,80 // movdqa %xmm1,0x50(%rsp) + .byte 102,65,15,111,195 // movdqa %xmm11,%xmm0 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,15,111,208 // movdqa %xmm0,%xmm2 + .byte 102,65,15,111,225 // movdqa %xmm9,%xmm4 + .byte 102,15,127,100,36,224 // movdqa %xmm4,-0x20(%rsp) + .byte 102,15,56,58,212 // pminuw %xmm4,%xmm2 + .byte 102,15,117,208 // pcmpeqw %xmm0,%xmm2 + .byte 102,15,127,148,36,128,0,0,0 // movdqa %xmm2,0x80(%rsp) + .byte 102,15,111,68,36,128 // movdqa -0x80(%rsp),%xmm0 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,15,111,240 // movdqa %xmm0,%xmm6 + .byte 102,65,15,56,58,242 // pminuw %xmm10,%xmm6 + .byte 102,15,117,240 // pcmpeqw %xmm0,%xmm6 + .byte 102,15,127,116,36,64 // movdqa %xmm6,0x40(%rsp) + .byte 102,15,111,53,150,9,0,0 // movdqa 0x996(%rip),%xmm6 # 2d00 <_sk_overlay_sse41_8bit+0x700> + .byte 102,15,111,199 // movdqa %xmm7,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,65,15,213,192 // pmullw %xmm8,%xmm0 + .byte 102,15,127,68,36,96 // movdqa %xmm0,0x60(%rsp) + .byte 102,69,15,111,222 // movdqa %xmm14,%xmm11 + .byte 102,68,15,239,222 // pxor %xmm6,%xmm11 + .byte 102,68,15,213,221 // pmullw %xmm5,%xmm11 + .byte 102,15,111,199 // movdqa %xmm7,%xmm0 + .byte 102,15,249,253 // psubw %xmm5,%xmm7 + .byte 102,15,127,60,36 // movdqa %xmm7,(%rsp) + .byte 102,68,15,111,205 // movdqa %xmm5,%xmm9 + .byte 102,69,15,213,200 // pmullw %xmm8,%xmm9 + .byte 102,65,15,213,198 // pmullw %xmm14,%xmm0 + .byte 102,15,127,68,36,16 // movdqa %xmm0,0x10(%rsp) + .byte 102,69,15,249,240 // psubw %xmm8,%xmm14 + .byte 102,15,111,195 // movdqa %xmm3,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,68,15,111,192 // movdqa %xmm0,%xmm8 + .byte 102,65,15,111,212 // movdqa %xmm12,%xmm2 + .byte 102,15,239,214 // pxor %xmm6,%xmm2 + .byte 102,15,111,76,36,176 // movdqa -0x50(%rsp),%xmm1 + .byte 102,15,111,193 // movdqa %xmm1,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,68,15,111,100,36,144 // movdqa -0x70(%rsp),%xmm12 + .byte 102,65,15,213,196 // pmullw %xmm12,%xmm0 + .byte 102,15,127,68,36,32 // movdqa %xmm0,0x20(%rsp) + .byte 102,65,15,111,194 // movdqa %xmm10,%xmm0 + .byte 102,15,111,248 // movdqa %xmm0,%xmm7 + .byte 102,15,239,254 // pxor %xmm6,%xmm7 + .byte 102,15,111,236 // movdqa %xmm4,%xmm5 + .byte 102,15,239,238 // pxor %xmm6,%xmm5 + .byte 102,65,15,239,245 // pxor %xmm13,%xmm6 + .byte 102,65,15,111,231 // movdqa %xmm15,%xmm4 + .byte 102,15,213,244 // pmullw %xmm4,%xmm6 + .byte 102,68,15,111,249 // movdqa %xmm1,%xmm15 + .byte 102,15,249,204 // psubw %xmm4,%xmm1 + .byte 102,15,127,76,36,176 // movdqa %xmm1,-0x50(%rsp) + .byte 102,65,15,213,228 // pmullw %xmm12,%xmm4 + .byte 102,69,15,213,253 // pmullw %xmm13,%xmm15 + .byte 102,69,15,249,236 // psubw %xmm12,%xmm13 + .byte 102,68,15,111,84,36,128 // movdqa -0x80(%rsp),%xmm10 + .byte 102,69,15,213,194 // pmullw %xmm10,%xmm8 + .byte 102,68,15,127,68,36,144 // movdqa %xmm8,-0x70(%rsp) + .byte 102,68,15,56,48,68,36,192 // pmovzxbw -0x40(%rsp),%xmm8 + .byte 102,65,15,213,248 // pmullw %xmm8,%xmm7 + .byte 102,68,15,111,227 // movdqa %xmm3,%xmm12 + .byte 102,65,15,249,216 // psubw %xmm8,%xmm3 + .byte 102,15,127,92,36,240 // movdqa %xmm3,-0x10(%rsp) + .byte 102,69,15,213,194 // pmullw %xmm10,%xmm8 + .byte 102,68,15,213,224 // pmullw %xmm0,%xmm12 + .byte 102,65,15,249,194 // psubw %xmm10,%xmm0 + .byte 102,15,111,200 // movdqa %xmm0,%xmm1 + .byte 102,15,111,92,36,48 // movdqa 0x30(%rsp),%xmm3 + .byte 102,15,213,211 // pmullw %xmm3,%xmm2 + .byte 102,15,127,84,36,128 // movdqa %xmm2,-0x80(%rsp) + .byte 102,15,56,48,84,36,208 // pmovzxbw -0x30(%rsp),%xmm2 + .byte 102,15,213,234 // pmullw %xmm2,%xmm5 + .byte 102,15,111,68,36,160 // movdqa -0x60(%rsp),%xmm0 + .byte 102,68,15,111,208 // movdqa %xmm0,%xmm10 + .byte 102,15,249,194 // psubw %xmm2,%xmm0 + .byte 102,15,127,68,36,160 // movdqa %xmm0,-0x60(%rsp) + .byte 102,15,213,211 // pmullw %xmm3,%xmm2 + .byte 102,15,111,68,36,224 // movdqa -0x20(%rsp),%xmm0 + .byte 102,68,15,213,208 // pmullw %xmm0,%xmm10 + .byte 102,15,249,195 // psubw %xmm3,%xmm0 + .byte 102,68,15,213,108,36,176 // pmullw -0x50(%rsp),%xmm13 + .byte 102,68,15,213,52,36 // pmullw (%rsp),%xmm14 + .byte 102,15,213,68,36,160 // pmullw -0x60(%rsp),%xmm0 + .byte 102,15,213,76,36,240 // pmullw -0x10(%rsp),%xmm1 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,68,15,249,208 // psubw %xmm0,%xmm10 + .byte 102,15,253,201 // paddw %xmm1,%xmm1 + .byte 102,68,15,249,225 // psubw %xmm1,%xmm12 + .byte 102,69,15,253,237 // paddw %xmm13,%xmm13 + .byte 102,69,15,249,253 // psubw %xmm13,%xmm15 + .byte 102,69,15,253,246 // paddw %xmm14,%xmm14 + .byte 102,15,111,76,36,16 // movdqa 0x10(%rsp),%xmm1 + .byte 102,65,15,249,206 // psubw %xmm14,%xmm1 + .byte 102,69,15,253,201 // paddw %xmm9,%xmm9 + .byte 102,15,111,68,36,80 // movdqa 0x50(%rsp),%xmm0 + .byte 102,68,15,219,200 // pand %xmm0,%xmm9 + .byte 102,15,223,193 // pandn %xmm1,%xmm0 + .byte 102,65,15,235,193 // por %xmm9,%xmm0 + .byte 102,15,111,216 // movdqa %xmm0,%xmm3 + .byte 102,15,253,228 // paddw %xmm4,%xmm4 + .byte 102,15,111,68,36,112 // movdqa 0x70(%rsp),%xmm0 + .byte 102,15,219,224 // pand %xmm0,%xmm4 + .byte 102,65,15,223,199 // pandn %xmm15,%xmm0 + .byte 102,15,235,196 // por %xmm4,%xmm0 + .byte 102,15,111,200 // movdqa %xmm0,%xmm1 + .byte 102,69,15,253,192 // paddw %xmm8,%xmm8 + .byte 102,15,111,68,36,64 // movdqa 0x40(%rsp),%xmm0 + .byte 102,68,15,219,192 // pand %xmm0,%xmm8 + .byte 102,65,15,223,196 // pandn %xmm12,%xmm0 + .byte 102,65,15,235,192 // por %xmm8,%xmm0 + .byte 102,15,111,224 // movdqa %xmm0,%xmm4 + .byte 102,15,253,210 // paddw %xmm2,%xmm2 + .byte 102,15,111,132,36,128,0,0,0 // movdqa 0x80(%rsp),%xmm0 + .byte 102,15,219,208 // pand %xmm0,%xmm2 + .byte 102,65,15,223,194 // pandn %xmm10,%xmm0 + .byte 102,15,235,194 // por %xmm2,%xmm0 + .byte 102,15,111,208 // movdqa %xmm0,%xmm2 + .byte 102,68,15,253,92,36,96 // paddw 0x60(%rsp),%xmm11 + .byte 102,15,253,116,36,32 // paddw 0x20(%rsp),%xmm6 + .byte 102,15,253,124,36,144 // paddw -0x70(%rsp),%xmm7 + .byte 102,15,253,108,36,128 // paddw -0x80(%rsp),%xmm5 + .byte 102,15,111,5,134,7,0,0 // movdqa 0x786(%rip),%xmm0 # 2d10 <_sk_overlay_sse41_8bit+0x710> + .byte 102,68,15,253,216 // paddw %xmm0,%xmm11 + .byte 102,68,15,253,219 // paddw %xmm3,%xmm11 + .byte 102,15,253,240 // paddw %xmm0,%xmm6 + .byte 102,15,253,241 // paddw %xmm1,%xmm6 + .byte 102,15,253,248 // paddw %xmm0,%xmm7 + .byte 102,15,253,252 // paddw %xmm4,%xmm7 + .byte 102,15,253,232 // paddw %xmm0,%xmm5 + .byte 102,15,253,234 // paddw %xmm2,%xmm5 + .byte 102,15,111,5,108,7,0,0 // movdqa 0x76c(%rip),%xmm0 # 2d20 <_sk_overlay_sse41_8bit+0x720> + .byte 102,15,228,240 // pmulhuw %xmm0,%xmm6 + .byte 102,68,15,228,216 // pmulhuw %xmm0,%xmm11 + .byte 102,15,228,232 // pmulhuw %xmm0,%xmm5 + .byte 102,15,228,248 // pmulhuw %xmm0,%xmm7 + .byte 102,65,15,113,211,7 // psrlw $0x7,%xmm11 + .byte 102,15,113,215,7 // psrlw $0x7,%xmm7 + .byte 102,65,15,103,251 // packuswb %xmm11,%xmm7 + .byte 102,15,113,214,7 // psrlw $0x7,%xmm6 + .byte 102,15,113,213,7 // psrlw $0x7,%xmm5 + .byte 102,15,103,238 // packuswb %xmm6,%xmm5 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 15,40,84,36,192 // movaps -0x40(%rsp),%xmm2 + .byte 15,40,92,36,208 // movaps -0x30(%rsp),%xmm3 + .byte 102,15,111,199 // movdqa %xmm7,%xmm0 + .byte 102,15,111,205 // movdqa %xmm5,%xmm1 + .byte 72,129,196,152,0,0,0 // add $0x98,%rsp + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_overlay_sse41_8bit +.globl _sk_overlay_sse41_8bit +FUNCTION(_sk_overlay_sse41_8bit) +_sk_overlay_sse41_8bit: + .byte 72,129,236,200,0,0,0 // sub $0xc8,%rsp + .byte 102,68,15,111,241 // movdqa %xmm1,%xmm14 + .byte 102,68,15,111,248 // movdqa %xmm0,%xmm15 + .byte 102,15,239,192 // pxor %xmm0,%xmm0 + .byte 102,65,15,111,231 // movdqa %xmm15,%xmm4 + .byte 102,15,104,224 // punpckhbw %xmm0,%xmm4 + .byte 102,69,15,111,230 // movdqa %xmm14,%xmm12 + .byte 102,68,15,104,224 // punpckhbw %xmm0,%xmm12 + .byte 102,65,15,56,48,207 // pmovzxbw %xmm15,%xmm1 + .byte 102,15,127,76,36,128 // movdqa %xmm1,-0x80(%rsp) + .byte 102,65,15,56,48,206 // pmovzxbw %xmm14,%xmm1 + .byte 102,15,127,140,36,144,0,0,0 // movdqa %xmm1,0x90(%rsp) + .byte 102,15,111,13,229,6,0,0 // movdqa 0x6e5(%rip),%xmm1 # 2d30 <_sk_overlay_sse41_8bit+0x730> + .byte 102,68,15,56,0,249 // pshufb %xmm1,%xmm15 + .byte 102,68,15,56,0,241 // pshufb %xmm1,%xmm14 + .byte 102,65,15,56,48,238 // pmovzxbw %xmm14,%xmm5 + .byte 102,15,127,108,36,160 // movdqa %xmm5,-0x60(%rsp) + .byte 102,68,15,104,240 // punpckhbw %xmm0,%xmm14 + .byte 102,65,15,56,48,239 // pmovzxbw %xmm15,%xmm5 + .byte 102,15,127,172,36,128,0,0,0 // movdqa %xmm5,0x80(%rsp) + .byte 102,68,15,104,248 // punpckhbw %xmm0,%xmm15 + .byte 102,15,111,242 // movdqa %xmm2,%xmm6 + .byte 102,15,111,234 // movdqa %xmm2,%xmm5 + .byte 102,68,15,111,218 // movdqa %xmm2,%xmm11 + .byte 102,68,15,127,156,36,160,0,0,0 // movdqa %xmm11,0xa0(%rsp) + .byte 102,15,104,232 // punpckhbw %xmm0,%xmm5 + .byte 102,15,111,251 // movdqa %xmm3,%xmm7 + .byte 102,15,111,211 // movdqa %xmm3,%xmm2 + .byte 102,15,127,156,36,176,0,0,0 // movdqa %xmm3,0xb0(%rsp) + .byte 102,15,104,208 // punpckhbw %xmm0,%xmm2 + .byte 102,15,56,0,241 // pshufb %xmm1,%xmm6 + .byte 102,15,56,0,249 // pshufb %xmm1,%xmm7 + .byte 102,15,56,48,207 // pmovzxbw %xmm7,%xmm1 + .byte 102,15,104,248 // punpckhbw %xmm0,%xmm7 + .byte 102,68,15,56,48,238 // pmovzxbw %xmm6,%xmm13 + .byte 102,15,104,240 // punpckhbw %xmm0,%xmm6 + .byte 102,68,15,111,206 // movdqa %xmm6,%xmm9 + .byte 102,15,111,194 // movdqa %xmm2,%xmm0 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,15,111,240 // movdqa %xmm0,%xmm6 + .byte 102,15,56,58,247 // pminuw %xmm7,%xmm6 + .byte 102,15,117,240 // pcmpeqw %xmm0,%xmm6 + .byte 102,15,127,116,36,80 // movdqa %xmm6,0x50(%rsp) + .byte 102,15,111,197 // movdqa %xmm5,%xmm0 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,15,111,240 // movdqa %xmm0,%xmm6 + .byte 102,65,15,56,58,241 // pminuw %xmm9,%xmm6 + .byte 102,15,117,240 // pcmpeqw %xmm0,%xmm6 + .byte 102,15,127,116,36,64 // movdqa %xmm6,0x40(%rsp) + .byte 102,15,56,48,195 // pmovzxbw %xmm3,%xmm0 + .byte 102,15,127,68,36,16 // movdqa %xmm0,0x10(%rsp) + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,15,111,216 // movdqa %xmm0,%xmm3 + .byte 102,15,56,58,217 // pminuw %xmm1,%xmm3 + .byte 102,15,127,76,36,144 // movdqa %xmm1,-0x70(%rsp) + .byte 102,15,117,216 // pcmpeqw %xmm0,%xmm3 + .byte 102,15,127,92,36,96 // movdqa %xmm3,0x60(%rsp) + .byte 102,65,15,56,48,195 // pmovzxbw %xmm11,%xmm0 + .byte 102,15,127,68,36,224 // movdqa %xmm0,-0x20(%rsp) + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,15,111,216 // movdqa %xmm0,%xmm3 + .byte 102,65,15,56,58,221 // pminuw %xmm13,%xmm3 + .byte 102,15,117,216 // pcmpeqw %xmm0,%xmm3 + .byte 102,15,127,92,36,48 // movdqa %xmm3,0x30(%rsp) + .byte 102,15,111,53,231,5,0,0 // movdqa 0x5e7(%rip),%xmm6 # 2d40 <_sk_overlay_sse41_8bit+0x740> + .byte 102,65,15,111,193 // movdqa %xmm9,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,15,213,196 // pmullw %xmm4,%xmm0 + .byte 102,15,127,68,36,112 // movdqa %xmm0,0x70(%rsp) + .byte 102,69,15,111,223 // movdqa %xmm15,%xmm11 + .byte 102,68,15,239,222 // pxor %xmm6,%xmm11 + .byte 102,68,15,213,221 // pmullw %xmm5,%xmm11 + .byte 102,65,15,111,193 // movdqa %xmm9,%xmm0 + .byte 102,68,15,249,205 // psubw %xmm5,%xmm9 + .byte 102,68,15,127,76,36,208 // movdqa %xmm9,-0x30(%rsp) + .byte 102,68,15,111,205 // movdqa %xmm5,%xmm9 + .byte 102,68,15,213,204 // pmullw %xmm4,%xmm9 + .byte 102,65,15,213,199 // pmullw %xmm15,%xmm0 + .byte 102,15,127,68,36,240 // movdqa %xmm0,-0x10(%rsp) + .byte 102,68,15,249,252 // psubw %xmm4,%xmm15 + .byte 102,65,15,111,197 // movdqa %xmm13,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,15,111,216 // movdqa %xmm0,%xmm3 + .byte 102,15,239,206 // pxor %xmm6,%xmm1 + .byte 102,68,15,111,209 // movdqa %xmm1,%xmm10 + .byte 102,15,111,207 // movdqa %xmm7,%xmm1 + .byte 102,15,111,193 // movdqa %xmm1,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,65,15,111,252 // movdqa %xmm12,%xmm7 + .byte 102,15,213,199 // pmullw %xmm7,%xmm0 + .byte 102,15,127,68,36,32 // movdqa %xmm0,0x20(%rsp) + .byte 102,15,111,132,36,128,0,0,0 // movdqa 0x80(%rsp),%xmm0 + .byte 102,68,15,111,192 // movdqa %xmm0,%xmm8 + .byte 102,68,15,239,198 // pxor %xmm6,%xmm8 + .byte 102,15,111,108,36,160 // movdqa -0x60(%rsp),%xmm5 + .byte 102,15,239,238 // pxor %xmm6,%xmm5 + .byte 102,65,15,239,246 // pxor %xmm14,%xmm6 + .byte 102,15,213,242 // pmullw %xmm2,%xmm6 + .byte 102,68,15,111,225 // movdqa %xmm1,%xmm12 + .byte 102,15,249,202 // psubw %xmm2,%xmm1 + .byte 102,15,127,76,36,192 // movdqa %xmm1,-0x40(%rsp) + .byte 102,15,111,226 // movdqa %xmm2,%xmm4 + .byte 102,15,213,231 // pmullw %xmm7,%xmm4 + .byte 102,69,15,213,230 // pmullw %xmm14,%xmm12 + .byte 102,68,15,249,247 // psubw %xmm7,%xmm14 + .byte 102,15,111,76,36,128 // movdqa -0x80(%rsp),%xmm1 + .byte 102,15,213,217 // pmullw %xmm1,%xmm3 + .byte 102,15,127,28,36 // movdqa %xmm3,(%rsp) + .byte 102,15,111,84,36,224 // movdqa -0x20(%rsp),%xmm2 + .byte 102,68,15,213,194 // pmullw %xmm2,%xmm8 + .byte 102,65,15,111,221 // movdqa %xmm13,%xmm3 + .byte 102,15,249,218 // psubw %xmm2,%xmm3 + .byte 102,15,127,92,36,176 // movdqa %xmm3,-0x50(%rsp) + .byte 102,15,111,218 // movdqa %xmm2,%xmm3 + .byte 102,15,213,217 // pmullw %xmm1,%xmm3 + .byte 102,68,15,213,232 // pmullw %xmm0,%xmm13 + .byte 102,15,249,193 // psubw %xmm1,%xmm0 + .byte 102,15,111,200 // movdqa %xmm0,%xmm1 + .byte 102,15,111,148,36,144,0,0,0 // movdqa 0x90(%rsp),%xmm2 + .byte 102,68,15,213,210 // pmullw %xmm2,%xmm10 + .byte 102,68,15,127,84,36,128 // movdqa %xmm10,-0x80(%rsp) + .byte 102,15,111,68,36,16 // movdqa 0x10(%rsp),%xmm0 + .byte 102,15,213,232 // pmullw %xmm0,%xmm5 + .byte 102,15,111,124,36,144 // movdqa -0x70(%rsp),%xmm7 + .byte 102,68,15,111,215 // movdqa %xmm7,%xmm10 + .byte 102,15,249,248 // psubw %xmm0,%xmm7 + .byte 102,15,127,124,36,144 // movdqa %xmm7,-0x70(%rsp) + .byte 102,15,111,248 // movdqa %xmm0,%xmm7 + .byte 102,15,213,250 // pmullw %xmm2,%xmm7 + .byte 102,15,111,68,36,160 // movdqa -0x60(%rsp),%xmm0 + .byte 102,68,15,213,208 // pmullw %xmm0,%xmm10 + .byte 102,15,249,194 // psubw %xmm2,%xmm0 + .byte 102,68,15,213,116,36,192 // pmullw -0x40(%rsp),%xmm14 + .byte 102,68,15,213,124,36,208 // pmullw -0x30(%rsp),%xmm15 + .byte 102,15,213,68,36,144 // pmullw -0x70(%rsp),%xmm0 + .byte 102,15,213,76,36,176 // pmullw -0x50(%rsp),%xmm1 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,68,15,249,208 // psubw %xmm0,%xmm10 + .byte 102,15,253,201 // paddw %xmm1,%xmm1 + .byte 102,68,15,249,233 // psubw %xmm1,%xmm13 + .byte 102,69,15,253,246 // paddw %xmm14,%xmm14 + .byte 102,69,15,249,230 // psubw %xmm14,%xmm12 + .byte 102,69,15,253,255 // paddw %xmm15,%xmm15 + .byte 102,15,111,76,36,240 // movdqa -0x10(%rsp),%xmm1 + .byte 102,65,15,249,207 // psubw %xmm15,%xmm1 + .byte 102,69,15,253,201 // paddw %xmm9,%xmm9 + .byte 102,15,111,68,36,64 // movdqa 0x40(%rsp),%xmm0 + .byte 102,68,15,219,200 // pand %xmm0,%xmm9 + .byte 102,15,223,193 // pandn %xmm1,%xmm0 + .byte 102,65,15,235,193 // por %xmm9,%xmm0 + .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9 + .byte 102,15,253,228 // paddw %xmm4,%xmm4 + .byte 102,15,111,68,36,80 // movdqa 0x50(%rsp),%xmm0 + .byte 102,15,219,224 // pand %xmm0,%xmm4 + .byte 102,65,15,223,196 // pandn %xmm12,%xmm0 + .byte 102,15,235,196 // por %xmm4,%xmm0 + .byte 102,15,111,200 // movdqa %xmm0,%xmm1 + .byte 102,15,253,219 // paddw %xmm3,%xmm3 + .byte 102,15,111,68,36,48 // movdqa 0x30(%rsp),%xmm0 + .byte 102,15,219,216 // pand %xmm0,%xmm3 + .byte 102,65,15,223,197 // pandn %xmm13,%xmm0 + .byte 102,15,235,195 // por %xmm3,%xmm0 + .byte 102,15,111,216 // movdqa %xmm0,%xmm3 + .byte 102,15,253,255 // paddw %xmm7,%xmm7 + .byte 102,15,111,68,36,96 // movdqa 0x60(%rsp),%xmm0 + .byte 102,15,219,248 // pand %xmm0,%xmm7 + .byte 102,65,15,223,194 // pandn %xmm10,%xmm0 + .byte 102,15,235,199 // por %xmm7,%xmm0 + .byte 102,15,111,208 // movdqa %xmm0,%xmm2 + .byte 102,68,15,253,92,36,112 // paddw 0x70(%rsp),%xmm11 + .byte 102,15,253,116,36,32 // paddw 0x20(%rsp),%xmm6 + .byte 102,68,15,253,4,36 // paddw (%rsp),%xmm8 + .byte 102,15,253,108,36,128 // paddw -0x80(%rsp),%xmm5 + .byte 102,15,111,5,211,3,0,0 // movdqa 0x3d3(%rip),%xmm0 # 2d50 <_sk_overlay_sse41_8bit+0x750> + .byte 102,68,15,253,216 // paddw %xmm0,%xmm11 + .byte 102,69,15,253,217 // paddw %xmm9,%xmm11 + .byte 102,15,253,240 // paddw %xmm0,%xmm6 + .byte 102,15,253,241 // paddw %xmm1,%xmm6 + .byte 102,68,15,253,192 // paddw %xmm0,%xmm8 + .byte 102,68,15,253,195 // paddw %xmm3,%xmm8 + .byte 102,15,253,232 // paddw %xmm0,%xmm5 + .byte 102,15,253,234 // paddw %xmm2,%xmm5 + .byte 102,15,111,5,183,3,0,0 // movdqa 0x3b7(%rip),%xmm0 # 2d60 <_sk_overlay_sse41_8bit+0x760> + .byte 102,15,228,240 // pmulhuw %xmm0,%xmm6 + .byte 102,68,15,228,216 // pmulhuw %xmm0,%xmm11 + .byte 102,15,228,232 // pmulhuw %xmm0,%xmm5 + .byte 102,68,15,228,192 // pmulhuw %xmm0,%xmm8 + .byte 102,65,15,113,211,7 // psrlw $0x7,%xmm11 + .byte 102,65,15,113,208,7 // psrlw $0x7,%xmm8 + .byte 102,69,15,103,195 // packuswb %xmm11,%xmm8 + .byte 102,15,113,214,7 // psrlw $0x7,%xmm6 + .byte 102,15,113,213,7 // psrlw $0x7,%xmm5 + .byte 102,15,103,238 // packuswb %xmm6,%xmm5 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 15,40,148,36,160,0,0,0 // movaps 0xa0(%rsp),%xmm2 + .byte 15,40,156,36,176,0,0,0 // movaps 0xb0(%rsp),%xmm3 + .byte 102,65,15,111,192 // movdqa %xmm8,%xmm0 + .byte 102,15,111,205 // movdqa %xmm5,%xmm1 + .byte 72,129,196,200,0,0,0 // add $0xc8,%rsp + .byte 255,224 // jmpq *%rax + BALIGN4 .byte 0,0 // add %al,(%rax) - .byte 127,67 // jg 2263 <_sk_difference_sse41_8bit+0x1b0> + .byte 127,67 // jg 2a47 <_sk_overlay_sse41_8bit+0x447> .byte 0,0 // add %al,(%rax) - .byte 127,67 // jg 2267 <_sk_difference_sse41_8bit+0x1b4> + .byte 127,67 // jg 2a4b <_sk_overlay_sse41_8bit+0x44b> .byte 0,0 // add %al,(%rax) - .byte 127,67 // jg 226b <_sk_difference_sse41_8bit+0x1b8> + .byte 127,67 // jg 2a4f <_sk_overlay_sse41_8bit+0x44f> BALIGN16 .byte 0,0 // add %al,(%rax) @@ -62807,6 +63619,67 @@ BALIGN16 .byte 255 // (bad) .byte 255 // (bad) .byte 255,0 // incl (%rax) + .byte 3,3 // add (%rbx),%eax + .byte 3,3 // add (%rbx),%eax + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 11,11 // or (%rbx),%ecx + .byte 11,11 // or (%rbx),%ecx + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15,255 // (bad) + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,127,0 // add %bh,0x0(%rdi) + .byte 127,0 // jg 2d14 <.literal16+0x304> + .byte 127,0 // jg 2d16 <.literal16+0x306> + .byte 127,0 // jg 2d18 <.literal16+0x308> + .byte 127,0 // jg 2d1a <.literal16+0x30a> + .byte 127,0 // jg 2d1c <.literal16+0x30c> + .byte 127,0 // jg 2d1e <.literal16+0x30e> + .byte 127,0 // jg 2d20 <.literal16+0x310> + .byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax) + .byte 129,128,129,128,129,128,3,3,3,3 // addl $0x3030303,-0x7f7e7f7f(%rax) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 7 // (bad) + .byte 11,11 // or (%rbx),%ecx + .byte 11,11 // or (%rbx),%ecx + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15 // (bad) + .byte 15,255 // (bad) + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,255 // add %bh,%bh + .byte 0,127,0 // add %bh,0x0(%rdi) + .byte 127,0 // jg 2d54 <.literal16+0x344> + .byte 127,0 // jg 2d56 <.literal16+0x346> + .byte 127,0 // jg 2d58 <.literal16+0x348> + .byte 127,0 // jg 2d5a <.literal16+0x34a> + .byte 127,0 // jg 2d5c <.literal16+0x34c> + .byte 127,0 // jg 2d5e <.literal16+0x34e> + .byte 127,0 // jg 2d60 <.literal16+0x350> + .byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax) + .byte 129 // .byte 0x81 + .byte 128 // .byte 0x80 + .byte 129 // .byte 0x81 + .byte 128 // .byte 0x80 + .byte 129 // .byte 0x81 + .byte 128 // .byte 0x80 BALIGN32 HIDDEN _sk_start_pipeline_sse2_8bit @@ -62891,7 +63764,7 @@ HIDDEN _sk_set_rgb_sse2_8bit FUNCTION(_sk_set_rgb_sse2_8bit) _sk_set_rgb_sse2_8bit: .byte 72,173 // lods %ds:(%rsi),%rax - .byte 243,15,16,37,89,40,0,0 // movss 0x2859(%rip),%xmm4 # 291c <_sk_difference_sse2_8bit+0x1c5> + .byte 243,15,16,37,209,48,0,0 // movss 0x30d1(%rip),%xmm4 # 3194 <_sk_overlay_sse2_8bit+0x43f> .byte 243,15,16,40 // movss (%rax),%xmm5 .byte 243,15,89,236 // mulss %xmm4,%xmm5 .byte 243,72,15,44,205 // cvttss2si %xmm5,%rcx @@ -62906,7 +63779,7 @@ _sk_set_rgb_sse2_8bit: .byte 9,208 // or %edx,%eax .byte 102,15,110,224 // movd %eax,%xmm4 .byte 102,15,112,228,0 // pshufd $0x0,%xmm4,%xmm4 - .byte 102,15,111,45,45,40,0,0 // movdqa 0x282d(%rip),%xmm5 # 2930 <_sk_difference_sse2_8bit+0x1d9> + .byte 102,15,111,45,157,48,0,0 // movdqa 0x309d(%rip),%xmm5 # 31a0 <_sk_overlay_sse2_8bit+0x44b> .byte 102,15,219,205 // pand %xmm5,%xmm1 .byte 102,15,219,197 // pand %xmm5,%xmm0 .byte 102,15,235,196 // por %xmm4,%xmm0 @@ -62931,7 +63804,7 @@ _sk_premul_sse2_8bit: .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0 .byte 243,15,112,248,95 // pshufhw $0x5f,%xmm0,%xmm7 - .byte 102,15,111,5,225,39,0,0 // movdqa 0x27e1(%rip),%xmm0 # 2940 <_sk_difference_sse2_8bit+0x1e9> + .byte 102,15,111,5,81,48,0,0 // movdqa 0x3051(%rip),%xmm0 # 31b0 <_sk_overlay_sse2_8bit+0x45b> .byte 102,15,235,248 // por %xmm0,%xmm7 .byte 102,15,235,240 // por %xmm0,%xmm6 .byte 102,69,15,239,201 // pxor %xmm9,%xmm9 @@ -63452,7 +64325,7 @@ _sk_load_a8_sse2_8bit: .byte 117,48 // jne 7fd <_sk_load_a8_sse2_8bit+0x4d> .byte 243,66,15,126,4,2 // movq (%rdx,%r8,1),%xmm0 .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 - .byte 102,15,84,5,113,33,0,0 // andpd 0x2171(%rip),%xmm0 # 2950 <_sk_difference_sse2_8bit+0x1f9> + .byte 102,15,84,5,225,41,0,0 // andpd 0x29e1(%rip),%xmm0 # 31c0 <_sk_overlay_sse2_8bit+0x46b> .byte 102,15,239,228 // pxor %xmm4,%xmm4 .byte 102,15,40,200 // movapd %xmm0,%xmm1 .byte 102,15,105,204 // punpckhwd %xmm4,%xmm1 @@ -63529,7 +64402,7 @@ _sk_load_a8_dst_sse2_8bit: .byte 117,48 // jne 8f1 <_sk_load_a8_dst_sse2_8bit+0x4d> .byte 243,66,15,126,20,2 // movq (%rdx,%r8,1),%xmm2 .byte 102,15,96,208 // punpcklbw %xmm0,%xmm2 - .byte 102,15,84,21,141,32,0,0 // andpd 0x208d(%rip),%xmm2 # 2960 <_sk_difference_sse2_8bit+0x209> + .byte 102,15,84,21,253,40,0,0 // andpd 0x28fd(%rip),%xmm2 # 31d0 <_sk_overlay_sse2_8bit+0x47b> .byte 102,15,239,228 // pxor %xmm4,%xmm4 .byte 102,15,40,218 // movapd %xmm2,%xmm3 .byte 102,15,105,220 // punpckhwd %xmm4,%xmm3 @@ -63613,7 +64486,7 @@ _sk_store_a8_sse2_8bit: .byte 102,15,107,229 // packssdw %xmm5,%xmm4 .byte 77,133,201 // test %r9,%r9 .byte 117,22 // jne 9f5 <_sk_store_a8_sse2_8bit+0x5d> - .byte 102,15,219,37,137,31,0,0 // pand 0x1f89(%rip),%xmm4 # 2970 <_sk_difference_sse2_8bit+0x219> + .byte 102,15,219,37,249,39,0,0 // pand 0x27f9(%rip),%xmm4 # 31e0 <_sk_overlay_sse2_8bit+0x48b> .byte 102,15,103,228 // packuswb %xmm4,%xmm4 .byte 102,66,15,214,36,2 // movq %xmm4,(%rdx,%r8,1) .byte 72,173 // lods %ds:(%rsi),%rax @@ -63634,7 +64507,7 @@ _sk_store_a8_sse2_8bit: .byte 102,15,127,100,36,184 // movdqa %xmm4,-0x48(%rsp) .byte 138,68,36,188 // mov -0x44(%rsp),%al .byte 66,136,68,2,2 // mov %al,0x2(%rdx,%r8,1) - .byte 102,15,219,37,51,31,0,0 // pand 0x1f33(%rip),%xmm4 # 2970 <_sk_difference_sse2_8bit+0x219> + .byte 102,15,219,37,163,39,0,0 // pand 0x27a3(%rip),%xmm4 # 31e0 <_sk_overlay_sse2_8bit+0x48b> .byte 102,15,103,228 // packuswb %xmm4,%xmm4 .byte 102,15,126,224 // movd %xmm4,%eax .byte 102,66,137,4,2 // mov %ax,(%rdx,%r8,1) @@ -63648,7 +64521,7 @@ _sk_store_a8_sse2_8bit: .byte 102,15,127,100,36,200 // movdqa %xmm4,-0x38(%rsp) .byte 138,68,36,208 // mov -0x30(%rsp),%al .byte 66,136,68,2,4 // mov %al,0x4(%rdx,%r8,1) - .byte 102,15,219,37,239,30,0,0 // pand 0x1eef(%rip),%xmm4 # 2970 <_sk_difference_sse2_8bit+0x219> + .byte 102,15,219,37,95,39,0,0 // pand 0x275f(%rip),%xmm4 # 31e0 <_sk_overlay_sse2_8bit+0x48b> .byte 102,15,103,228 // packuswb %xmm4,%xmm4 .byte 102,66,15,126,36,2 // movd %xmm4,(%rdx,%r8,1) .byte 233,97,255,255,255 // jmpq 9f1 <_sk_store_a8_sse2_8bit+0x59> @@ -63658,7 +64531,7 @@ _sk_store_a8_sse2_8bit: .byte 255 // (bad) .byte 255 // (bad) .byte 255 // (bad) - .byte 233,255,255,255,218 // jmpq ffffffffdb000aa0 <_sk_difference_sse2_8bit+0xffffffffdaffe349> + .byte 233,255,255,255,218 // jmpq ffffffffdb000aa0 <_sk_overlay_sse2_8bit+0xffffffffdaffdd4b> .byte 255 // (bad) .byte 255 // (bad) .byte 255,203 // dec %ebx @@ -63685,12 +64558,12 @@ _sk_load_g8_sse2_8bit: .byte 117,116 // jne b3d <_sk_load_g8_sse2_8bit+0x91> .byte 243,66,15,126,4,2 // movq (%rdx,%r8,1),%xmm0 .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 - .byte 102,15,84,5,165,30,0,0 // andpd 0x1ea5(%rip),%xmm0 # 2980 <_sk_difference_sse2_8bit+0x229> + .byte 102,15,84,5,21,39,0,0 // andpd 0x2715(%rip),%xmm0 # 31f0 <_sk_overlay_sse2_8bit+0x49b> .byte 102,15,239,201 // pxor %xmm1,%xmm1 .byte 102,15,40,224 // movapd %xmm0,%xmm4 .byte 102,15,97,225 // punpcklwd %xmm1,%xmm4 .byte 102,15,105,193 // punpckhwd %xmm1,%xmm0 - .byte 102,15,111,45,157,30,0,0 // movdqa 0x1e9d(%rip),%xmm5 # 2990 <_sk_difference_sse2_8bit+0x239> + .byte 102,15,111,45,13,39,0,0 // movdqa 0x270d(%rip),%xmm5 # 3200 <_sk_overlay_sse2_8bit+0x4ab> .byte 102,15,112,240,245 // pshufd $0xf5,%xmm0,%xmm6 .byte 102,15,244,197 // pmuludq %xmm5,%xmm0 .byte 102,15,112,200,232 // pshufd $0xe8,%xmm0,%xmm1 @@ -63703,7 +64576,7 @@ _sk_load_g8_sse2_8bit: .byte 102,15,244,245 // pmuludq %xmm5,%xmm6 .byte 102,15,112,230,232 // pshufd $0xe8,%xmm6,%xmm4 .byte 102,15,98,196 // punpckldq %xmm4,%xmm0 - .byte 102,15,111,37,111,30,0,0 // movdqa 0x1e6f(%rip),%xmm4 # 29a0 <_sk_difference_sse2_8bit+0x249> + .byte 102,15,111,37,223,38,0,0 // movdqa 0x26df(%rip),%xmm4 # 3210 <_sk_overlay_sse2_8bit+0x4bb> .byte 102,15,235,196 // por %xmm4,%xmm0 .byte 102,15,235,204 // por %xmm4,%xmm1 .byte 72,173 // lods %ds:(%rsi),%rax @@ -63777,12 +64650,12 @@ _sk_load_g8_dst_sse2_8bit: .byte 117,116 // jne c7d <_sk_load_g8_dst_sse2_8bit+0x91> .byte 243,66,15,126,20,2 // movq (%rdx,%r8,1),%xmm2 .byte 102,15,96,208 // punpcklbw %xmm0,%xmm2 - .byte 102,15,84,21,149,29,0,0 // andpd 0x1d95(%rip),%xmm2 # 29b0 <_sk_difference_sse2_8bit+0x259> + .byte 102,15,84,21,5,38,0,0 // andpd 0x2605(%rip),%xmm2 # 3220 <_sk_overlay_sse2_8bit+0x4cb> .byte 102,15,239,219 // pxor %xmm3,%xmm3 .byte 102,15,40,226 // movapd %xmm2,%xmm4 .byte 102,15,97,227 // punpcklwd %xmm3,%xmm4 .byte 102,15,105,211 // punpckhwd %xmm3,%xmm2 - .byte 102,15,111,45,141,29,0,0 // movdqa 0x1d8d(%rip),%xmm5 # 29c0 <_sk_difference_sse2_8bit+0x269> + .byte 102,15,111,45,253,37,0,0 // movdqa 0x25fd(%rip),%xmm5 # 3230 <_sk_overlay_sse2_8bit+0x4db> .byte 102,15,112,242,245 // pshufd $0xf5,%xmm2,%xmm6 .byte 102,15,244,213 // pmuludq %xmm5,%xmm2 .byte 102,15,112,218,232 // pshufd $0xe8,%xmm2,%xmm3 @@ -63795,7 +64668,7 @@ _sk_load_g8_dst_sse2_8bit: .byte 102,15,244,245 // pmuludq %xmm5,%xmm6 .byte 102,15,112,230,232 // pshufd $0xe8,%xmm6,%xmm4 .byte 102,15,98,212 // punpckldq %xmm4,%xmm2 - .byte 102,15,111,37,95,29,0,0 // movdqa 0x1d5f(%rip),%xmm4 # 29d0 <_sk_difference_sse2_8bit+0x279> + .byte 102,15,111,37,207,37,0,0 // movdqa 0x25cf(%rip),%xmm4 # 3240 <_sk_overlay_sse2_8bit+0x4eb> .byte 102,15,235,212 // por %xmm4,%xmm2 .byte 102,15,235,220 // por %xmm4,%xmm3 .byte 72,173 // lods %ds:(%rsi),%rax @@ -64012,7 +64885,7 @@ _sk_scale_1_float_sse2_8bit: .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9 .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,15,16,0 // movss (%rax),%xmm0 - .byte 243,15,89,5,144,25,0,0 // mulss 0x1990(%rip),%xmm0 # 2920 <_sk_difference_sse2_8bit+0x1c9> + .byte 243,15,89,5,8,34,0,0 // mulss 0x2208(%rip),%xmm0 # 3198 <_sk_overlay_sse2_8bit+0x443> .byte 243,15,44,192 // cvttss2si %xmm0,%eax .byte 102,15,239,246 // pxor %xmm6,%xmm6 .byte 102,65,15,111,193 // movdqa %xmm9,%xmm0 @@ -64024,7 +64897,7 @@ _sk_scale_1_float_sse2_8bit: .byte 102,15,96,246 // punpcklbw %xmm6,%xmm6 .byte 242,15,112,246,0 // pshuflw $0x0,%xmm6,%xmm6 .byte 102,15,112,246,80 // pshufd $0x50,%xmm6,%xmm6 - .byte 102,15,219,53,23,26,0,0 // pand 0x1a17(%rip),%xmm6 # 29e0 <_sk_difference_sse2_8bit+0x289> + .byte 102,15,219,53,135,34,0,0 // pand 0x2287(%rip),%xmm6 # 3250 <_sk_overlay_sse2_8bit+0x4fb> .byte 102,15,111,254 // movdqa %xmm6,%xmm7 .byte 102,65,15,213,248 // pmullw %xmm8,%xmm7 .byte 102,15,111,230 // movdqa %xmm6,%xmm4 @@ -64060,7 +64933,7 @@ _sk_scale_u8_sse2_8bit: .byte 15,133,239,0,0,0 // jne 1129 <_sk_scale_u8_sse2_8bit+0x110> .byte 243,66,15,126,36,2 // movq (%rdx,%r8,1),%xmm4 .byte 102,15,96,224 // punpcklbw %xmm0,%xmm4 - .byte 102,15,84,37,164,25,0,0 // andpd 0x19a4(%rip),%xmm4 # 29f0 <_sk_difference_sse2_8bit+0x299> + .byte 102,15,84,37,20,34,0,0 // andpd 0x2214(%rip),%xmm4 # 3260 <_sk_overlay_sse2_8bit+0x50b> .byte 102,69,15,239,192 // pxor %xmm8,%xmm8 .byte 102,15,40,236 // movapd %xmm4,%xmm5 .byte 102,65,15,105,232 // punpckhwd %xmm8,%xmm5 @@ -64169,7 +65042,7 @@ FUNCTION(_sk_lerp_1_float_sse2_8bit) _sk_lerp_1_float_sse2_8bit: .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,15,16,32 // movss (%rax),%xmm4 - .byte 243,15,89,37,58,23,0,0 // mulss 0x173a(%rip),%xmm4 # 2924 <_sk_difference_sse2_8bit+0x1cd> + .byte 243,15,89,37,178,31,0,0 // mulss 0x1fb2(%rip),%xmm4 # 319c <_sk_overlay_sse2_8bit+0x447> .byte 243,15,44,196 // cvttss2si %xmm4,%eax .byte 102,15,110,224 // movd %eax,%xmm4 .byte 102,15,96,228 // punpcklbw %xmm4,%xmm4 @@ -64182,7 +65055,7 @@ _sk_lerp_1_float_sse2_8bit: .byte 102,68,15,111,217 // movdqa %xmm1,%xmm11 .byte 102,69,15,96,217 // punpcklbw %xmm9,%xmm11 .byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1 - .byte 102,15,111,53,213,23,0,0 // movdqa 0x17d5(%rip),%xmm6 # 2a00 <_sk_difference_sse2_8bit+0x2a9> + .byte 102,15,111,53,69,32,0,0 // movdqa 0x2045(%rip),%xmm6 # 3270 <_sk_overlay_sse2_8bit+0x51b> .byte 102,65,15,219,240 // pand %xmm8,%xmm6 .byte 102,15,111,230 // movdqa %xmm6,%xmm4 .byte 102,15,213,225 // pmullw %xmm1,%xmm4 @@ -64250,7 +65123,7 @@ _sk_lerp_u8_sse2_8bit: .byte 15,133,141,1,0,0 // jne 14c0 <_sk_lerp_u8_sse2_8bit+0x1ae> .byte 243,66,15,126,44,2 // movq (%rdx,%r8,1),%xmm5 .byte 102,15,96,232 // punpcklbw %xmm0,%xmm5 - .byte 102,15,84,45,203,22,0,0 // andpd 0x16cb(%rip),%xmm5 # 2a10 <_sk_difference_sse2_8bit+0x2b9> + .byte 102,15,84,45,59,31,0,0 // andpd 0x1f3b(%rip),%xmm5 # 3280 <_sk_overlay_sse2_8bit+0x52b> .byte 102,69,15,239,192 // pxor %xmm8,%xmm8 .byte 102,15,40,229 // movapd %xmm5,%xmm4 .byte 102,65,15,105,224 // punpckhwd %xmm8,%xmm4 @@ -64408,7 +65281,7 @@ HIDDEN _sk_black_color_sse2_8bit FUNCTION(_sk_black_color_sse2_8bit) _sk_black_color_sse2_8bit: .byte 72,173 // lods %ds:(%rsi),%rax - .byte 15,40,5,147,20,0,0 // movaps 0x1493(%rip),%xmm0 # 2a20 <_sk_difference_sse2_8bit+0x2c9> + .byte 15,40,5,3,29,0,0 // movaps 0x1d03(%rip),%xmm0 # 3290 <_sk_overlay_sse2_8bit+0x53b> .byte 15,40,200 // movaps %xmm0,%xmm1 .byte 255,224 // jmpq *%rax @@ -65300,7 +66173,7 @@ _sk_darken_sse2_8bit: .byte 102,65,15,248,234 // psubb %xmm10,%xmm5 .byte 102,15,248,207 // psubb %xmm7,%xmm1 .byte 102,15,248,196 // psubb %xmm4,%xmm0 - .byte 102,15,111,37,174,5,0,0 // movdqa 0x5ae(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x2d9> + .byte 102,15,111,37,30,14,0,0 // movdqa 0xe1e(%rip),%xmm4 # 32a0 <_sk_overlay_sse2_8bit+0x54b> .byte 102,15,219,236 // pand %xmm4,%xmm5 .byte 102,15,111,252 // movdqa %xmm4,%xmm7 .byte 102,15,223,248 // pandn %xmm0,%xmm7 @@ -65408,7 +66281,7 @@ _sk_lighten_sse2_8bit: .byte 102,65,15,248,234 // psubb %xmm10,%xmm5 .byte 102,15,248,207 // psubb %xmm7,%xmm1 .byte 102,15,248,196 // psubb %xmm4,%xmm0 - .byte 102,15,111,37,210,3,0,0 // movdqa 0x3d2(%rip),%xmm4 # 2a40 <_sk_difference_sse2_8bit+0x2e9> + .byte 102,15,111,37,66,12,0,0 // movdqa 0xc42(%rip),%xmm4 # 32b0 <_sk_overlay_sse2_8bit+0x55b> .byte 102,15,219,236 // pand %xmm4,%xmm5 .byte 102,15,111,252 // movdqa %xmm4,%xmm7 .byte 102,15,223,248 // pandn %xmm0,%xmm7 @@ -65458,7 +66331,7 @@ _sk_exclusion_sse2_8bit: .byte 102,15,113,214,8 // psrlw $0x8,%xmm6 .byte 102,15,103,244 // packuswb %xmm4,%xmm6 .byte 102,15,103,239 // packuswb %xmm7,%xmm5 - .byte 102,15,111,37,21,3,0,0 // movdqa 0x315(%rip),%xmm4 # 2a50 <_sk_difference_sse2_8bit+0x2f9> + .byte 102,15,111,37,133,11,0,0 // movdqa 0xb85(%rip),%xmm4 # 32c0 <_sk_overlay_sse2_8bit+0x56b> .byte 102,15,248,205 // psubb %xmm5,%xmm1 .byte 102,15,219,236 // pand %xmm4,%xmm5 .byte 102,15,219,230 // pand %xmm6,%xmm4 @@ -65557,7 +66430,7 @@ _sk_difference_sse2_8bit: .byte 102,65,15,103,244 // packuswb %xmm12,%xmm6 .byte 102,65,15,218,226 // pminub %xmm10,%xmm4 .byte 102,65,15,218,243 // pminub %xmm11,%xmm6 - .byte 102,15,111,45,98,1,0,0 // movdqa 0x162(%rip),%xmm5 # 2a60 <_sk_difference_sse2_8bit+0x309> + .byte 102,15,111,45,210,9,0,0 // movdqa 0x9d2(%rip),%xmm5 # 32d0 <_sk_overlay_sse2_8bit+0x57b> .byte 102,15,248,206 // psubb %xmm6,%xmm1 .byte 102,15,219,245 // pand %xmm5,%xmm6 .byte 102,15,219,236 // pand %xmm4,%xmm5 @@ -65567,13 +66440,454 @@ _sk_difference_sse2_8bit: .byte 72,173 // lods %ds:(%rsi),%rax .byte 255,224 // jmpq *%rax +HIDDEN _sk_hardlight_sse2_8bit +.globl _sk_hardlight_sse2_8bit +FUNCTION(_sk_hardlight_sse2_8bit) +_sk_hardlight_sse2_8bit: + .byte 72,129,236,184,0,0,0 // sub $0xb8,%rsp + .byte 102,68,15,111,250 // movdqa %xmm2,%xmm15 + .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9 + .byte 102,15,239,237 // pxor %xmm5,%xmm5 + .byte 102,69,15,111,241 // movdqa %xmm9,%xmm14 + .byte 102,68,15,96,245 // punpcklbw %xmm5,%xmm14 + .byte 102,15,111,193 // movdqa %xmm1,%xmm0 + .byte 102,15,96,197 // punpcklbw %xmm5,%xmm0 + .byte 102,68,15,111,192 // movdqa %xmm0,%xmm8 + .byte 242,65,15,112,193,231 // pshuflw $0xe7,%xmm9,%xmm0 + .byte 102,68,15,104,205 // punpckhbw %xmm5,%xmm9 + .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0 + .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0 + .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 + .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0 + .byte 243,15,112,224,95 // pshufhw $0x5f,%xmm0,%xmm4 + .byte 242,15,112,193,231 // pshuflw $0xe7,%xmm1,%xmm0 + .byte 102,15,104,205 // punpckhbw %xmm5,%xmm1 + .byte 102,15,127,76,36,192 // movdqa %xmm1,-0x40(%rsp) + .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0 + .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0 + .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 + .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0 + .byte 243,68,15,112,224,95 // pshufhw $0x5f,%xmm0,%xmm12 + .byte 102,65,15,111,252 // movdqa %xmm12,%xmm7 + .byte 102,15,104,253 // punpckhbw %xmm5,%xmm7 + .byte 102,15,127,124,36,48 // movdqa %xmm7,0x30(%rsp) + .byte 102,68,15,96,229 // punpcklbw %xmm5,%xmm12 + .byte 102,15,111,196 // movdqa %xmm4,%xmm0 + .byte 102,15,104,197 // punpckhbw %xmm5,%xmm0 + .byte 102,15,127,68,36,160 // movdqa %xmm0,-0x60(%rsp) + .byte 102,15,96,229 // punpcklbw %xmm5,%xmm4 + .byte 102,68,15,127,188,36,160,0,0,0 // movdqa %xmm15,0xa0(%rsp) + .byte 102,65,15,111,207 // movdqa %xmm15,%xmm1 + .byte 242,65,15,112,199,231 // pshuflw $0xe7,%xmm15,%xmm0 + .byte 102,68,15,96,253 // punpcklbw %xmm5,%xmm15 + .byte 102,15,104,205 // punpckhbw %xmm5,%xmm1 + .byte 102,15,127,76,36,144 // movdqa %xmm1,-0x70(%rsp) + .byte 102,15,127,156,36,144,0,0,0 // movdqa %xmm3,0x90(%rsp) + .byte 102,15,111,211 // movdqa %xmm3,%xmm2 + .byte 102,15,111,203 // movdqa %xmm3,%xmm1 + .byte 102,15,96,205 // punpcklbw %xmm5,%xmm1 + .byte 102,15,127,76,36,128 // movdqa %xmm1,-0x80(%rsp) + .byte 102,15,104,213 // punpckhbw %xmm5,%xmm2 + .byte 102,15,127,148,36,128,0,0,0 // movdqa %xmm2,0x80(%rsp) + .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0 + .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0 + .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 + .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0 + .byte 243,15,112,240,95 // pshufhw $0x5f,%xmm0,%xmm6 + .byte 242,15,112,195,231 // pshuflw $0xe7,%xmm3,%xmm0 + .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0 + .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0 + .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 + .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0 + .byte 243,68,15,112,232,95 // pshufhw $0x5f,%xmm0,%xmm13 + .byte 102,65,15,111,205 // movdqa %xmm13,%xmm1 + .byte 102,15,104,205 // punpckhbw %xmm5,%xmm1 + .byte 102,15,127,76,36,176 // movdqa %xmm1,-0x50(%rsp) + .byte 102,68,15,96,237 // punpcklbw %xmm5,%xmm13 + .byte 102,15,111,206 // movdqa %xmm6,%xmm1 + .byte 102,15,104,205 // punpckhbw %xmm5,%xmm1 + .byte 102,15,96,245 // punpcklbw %xmm5,%xmm6 + .byte 102,69,15,111,214 // movdqa %xmm14,%xmm10 + .byte 102,69,15,253,210 // paddw %xmm10,%xmm10 + .byte 102,65,15,111,193 // movdqa %xmm9,%xmm0 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,65,15,111,216 // movdqa %xmm8,%xmm3 + .byte 102,69,15,253,192 // paddw %xmm8,%xmm8 + .byte 102,15,111,84,36,192 // movdqa -0x40(%rsp),%xmm2 + .byte 102,15,253,210 // paddw %xmm2,%xmm2 + .byte 102,15,217,215 // psubusw %xmm7,%xmm2 + .byte 102,15,117,213 // pcmpeqw %xmm5,%xmm2 + .byte 102,15,127,84,36,112 // movdqa %xmm2,0x70(%rsp) + .byte 102,69,15,217,196 // psubusw %xmm12,%xmm8 + .byte 102,68,15,117,197 // pcmpeqw %xmm5,%xmm8 + .byte 102,15,111,84,36,160 // movdqa -0x60(%rsp),%xmm2 + .byte 102,15,217,194 // psubusw %xmm2,%xmm0 + .byte 102,15,117,197 // pcmpeqw %xmm5,%xmm0 + .byte 102,15,127,68,36,64 // movdqa %xmm0,0x40(%rsp) + .byte 102,68,15,111,220 // movdqa %xmm4,%xmm11 + .byte 102,69,15,217,211 // psubusw %xmm11,%xmm10 + .byte 102,68,15,117,213 // pcmpeqw %xmm5,%xmm10 + .byte 102,15,111,45,29,8,0,0 // movdqa 0x81d(%rip),%xmm5 # 32e0 <_sk_overlay_sse2_8bit+0x58b> + .byte 102,15,111,198 // movdqa %xmm6,%xmm0 + .byte 102,15,239,197 // pxor %xmm5,%xmm0 + .byte 102,65,15,213,198 // pmullw %xmm14,%xmm0 + .byte 102,15,127,68,36,96 // movdqa %xmm0,0x60(%rsp) + .byte 102,65,15,111,195 // movdqa %xmm11,%xmm0 + .byte 102,15,239,197 // pxor %xmm5,%xmm0 + .byte 102,65,15,213,199 // pmullw %xmm15,%xmm0 + .byte 102,15,127,68,36,80 // movdqa %xmm0,0x50(%rsp) + .byte 102,15,111,198 // movdqa %xmm6,%xmm0 + .byte 102,65,15,249,247 // psubw %xmm15,%xmm6 + .byte 102,15,127,52,36 // movdqa %xmm6,(%rsp) + .byte 102,65,15,111,255 // movdqa %xmm15,%xmm7 + .byte 102,65,15,213,254 // pmullw %xmm14,%xmm7 + .byte 102,65,15,213,195 // pmullw %xmm11,%xmm0 + .byte 102,15,127,68,36,16 // movdqa %xmm0,0x10(%rsp) + .byte 102,69,15,249,222 // psubw %xmm14,%xmm11 + .byte 102,15,111,193 // movdqa %xmm1,%xmm0 + .byte 102,15,239,197 // pxor %xmm5,%xmm0 + .byte 102,65,15,213,193 // pmullw %xmm9,%xmm0 + .byte 102,15,127,68,36,32 // movdqa %xmm0,0x20(%rsp) + .byte 102,68,15,111,250 // movdqa %xmm2,%xmm15 + .byte 102,68,15,239,253 // pxor %xmm5,%xmm15 + .byte 102,15,111,116,36,144 // movdqa -0x70(%rsp),%xmm6 + .byte 102,68,15,213,254 // pmullw %xmm6,%xmm15 + .byte 102,15,111,193 // movdqa %xmm1,%xmm0 + .byte 102,15,249,206 // psubw %xmm6,%xmm1 + .byte 102,15,127,76,36,224 // movdqa %xmm1,-0x20(%rsp) + .byte 102,65,15,213,241 // pmullw %xmm9,%xmm6 + .byte 102,15,213,194 // pmullw %xmm2,%xmm0 + .byte 102,15,127,68,36,240 // movdqa %xmm0,-0x10(%rsp) + .byte 102,65,15,249,209 // psubw %xmm9,%xmm2 + .byte 102,15,127,84,36,160 // movdqa %xmm2,-0x60(%rsp) + .byte 102,65,15,111,197 // movdqa %xmm13,%xmm0 + .byte 102,15,111,200 // movdqa %xmm0,%xmm1 + .byte 102,15,239,205 // pxor %xmm5,%xmm1 + .byte 102,15,213,203 // pmullw %xmm3,%xmm1 + .byte 102,15,127,76,36,144 // movdqa %xmm1,-0x70(%rsp) + .byte 102,69,15,111,204 // movdqa %xmm12,%xmm9 + .byte 102,68,15,239,205 // pxor %xmm5,%xmm9 + .byte 102,15,111,100,36,128 // movdqa -0x80(%rsp),%xmm4 + .byte 102,68,15,213,204 // pmullw %xmm4,%xmm9 + .byte 102,68,15,111,232 // movdqa %xmm0,%xmm13 + .byte 102,15,249,196 // psubw %xmm4,%xmm0 + .byte 102,15,127,68,36,208 // movdqa %xmm0,-0x30(%rsp) + .byte 102,15,213,227 // pmullw %xmm3,%xmm4 + .byte 102,69,15,213,236 // pmullw %xmm12,%xmm13 + .byte 102,68,15,249,227 // psubw %xmm3,%xmm12 + .byte 102,15,111,76,36,176 // movdqa -0x50(%rsp),%xmm1 + .byte 102,15,111,193 // movdqa %xmm1,%xmm0 + .byte 102,15,239,197 // pxor %xmm5,%xmm0 + .byte 102,15,111,92,36,192 // movdqa -0x40(%rsp),%xmm3 + .byte 102,15,213,195 // pmullw %xmm3,%xmm0 + .byte 102,15,127,68,36,128 // movdqa %xmm0,-0x80(%rsp) + .byte 102,15,111,68,36,48 // movdqa 0x30(%rsp),%xmm0 + .byte 102,15,239,232 // pxor %xmm0,%xmm5 + .byte 102,15,111,148,36,128,0,0,0 // movdqa 0x80(%rsp),%xmm2 + .byte 102,15,213,234 // pmullw %xmm2,%xmm5 + .byte 102,68,15,111,241 // movdqa %xmm1,%xmm14 + .byte 102,15,249,202 // psubw %xmm2,%xmm1 + .byte 102,15,127,76,36,176 // movdqa %xmm1,-0x50(%rsp) + .byte 102,15,213,211 // pmullw %xmm3,%xmm2 + .byte 102,68,15,213,240 // pmullw %xmm0,%xmm14 + .byte 102,15,249,195 // psubw %xmm3,%xmm0 + .byte 102,15,213,68,36,176 // pmullw -0x50(%rsp),%xmm0 + .byte 102,68,15,213,100,36,208 // pmullw -0x30(%rsp),%xmm12 + .byte 102,15,111,76,36,160 // movdqa -0x60(%rsp),%xmm1 + .byte 102,15,213,76,36,224 // pmullw -0x20(%rsp),%xmm1 + .byte 102,68,15,213,28,36 // pmullw (%rsp),%xmm11 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,68,15,249,240 // psubw %xmm0,%xmm14 + .byte 102,69,15,253,228 // paddw %xmm12,%xmm12 + .byte 102,69,15,249,236 // psubw %xmm12,%xmm13 + .byte 102,15,253,201 // paddw %xmm1,%xmm1 + .byte 102,15,111,92,36,240 // movdqa -0x10(%rsp),%xmm3 + .byte 102,15,249,217 // psubw %xmm1,%xmm3 + .byte 102,69,15,253,219 // paddw %xmm11,%xmm11 + .byte 102,15,111,68,36,16 // movdqa 0x10(%rsp),%xmm0 + .byte 102,65,15,249,195 // psubw %xmm11,%xmm0 + .byte 102,15,253,255 // paddw %xmm7,%xmm7 + .byte 102,65,15,219,250 // pand %xmm10,%xmm7 + .byte 102,68,15,223,208 // pandn %xmm0,%xmm10 + .byte 102,68,15,235,215 // por %xmm7,%xmm10 + .byte 102,15,253,246 // paddw %xmm6,%xmm6 + .byte 102,15,111,68,36,64 // movdqa 0x40(%rsp),%xmm0 + .byte 102,15,219,240 // pand %xmm0,%xmm6 + .byte 102,15,223,195 // pandn %xmm3,%xmm0 + .byte 102,15,235,198 // por %xmm6,%xmm0 + .byte 102,15,111,216 // movdqa %xmm0,%xmm3 + .byte 102,15,253,228 // paddw %xmm4,%xmm4 + .byte 102,65,15,219,224 // pand %xmm8,%xmm4 + .byte 102,69,15,223,197 // pandn %xmm13,%xmm8 + .byte 102,68,15,235,196 // por %xmm4,%xmm8 + .byte 102,15,253,210 // paddw %xmm2,%xmm2 + .byte 102,15,111,68,36,112 // movdqa 0x70(%rsp),%xmm0 + .byte 102,15,219,208 // pand %xmm0,%xmm2 + .byte 102,65,15,223,198 // pandn %xmm14,%xmm0 + .byte 102,15,235,194 // por %xmm2,%xmm0 + .byte 102,15,111,200 // movdqa %xmm0,%xmm1 + .byte 102,15,111,84,36,80 // movdqa 0x50(%rsp),%xmm2 + .byte 102,15,253,84,36,96 // paddw 0x60(%rsp),%xmm2 + .byte 102,68,15,253,124,36,32 // paddw 0x20(%rsp),%xmm15 + .byte 102,68,15,253,76,36,144 // paddw -0x70(%rsp),%xmm9 + .byte 102,15,253,108,36,128 // paddw -0x80(%rsp),%xmm5 + .byte 102,15,111,5,33,6,0,0 // movdqa 0x621(%rip),%xmm0 # 32f0 <_sk_overlay_sse2_8bit+0x59b> + .byte 102,15,253,208 // paddw %xmm0,%xmm2 + .byte 102,68,15,253,210 // paddw %xmm2,%xmm10 + .byte 102,68,15,253,248 // paddw %xmm0,%xmm15 + .byte 102,68,15,253,251 // paddw %xmm3,%xmm15 + .byte 102,68,15,253,200 // paddw %xmm0,%xmm9 + .byte 102,69,15,253,193 // paddw %xmm9,%xmm8 + .byte 102,15,253,232 // paddw %xmm0,%xmm5 + .byte 102,15,253,233 // paddw %xmm1,%xmm5 + .byte 102,15,111,5,4,6,0,0 // movdqa 0x604(%rip),%xmm0 # 3300 <_sk_overlay_sse2_8bit+0x5ab> + .byte 102,15,228,232 // pmulhuw %xmm0,%xmm5 + .byte 102,68,15,228,192 // pmulhuw %xmm0,%xmm8 + .byte 102,68,15,228,248 // pmulhuw %xmm0,%xmm15 + .byte 102,68,15,228,208 // pmulhuw %xmm0,%xmm10 + .byte 102,65,15,113,215,7 // psrlw $0x7,%xmm15 + .byte 102,65,15,113,210,7 // psrlw $0x7,%xmm10 + .byte 102,69,15,103,215 // packuswb %xmm15,%xmm10 + .byte 102,15,113,213,7 // psrlw $0x7,%xmm5 + .byte 102,65,15,113,208,7 // psrlw $0x7,%xmm8 + .byte 102,68,15,103,197 // packuswb %xmm5,%xmm8 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 15,40,148,36,160,0,0,0 // movaps 0xa0(%rsp),%xmm2 + .byte 15,40,156,36,144,0,0,0 // movaps 0x90(%rsp),%xmm3 + .byte 102,65,15,111,194 // movdqa %xmm10,%xmm0 + .byte 102,65,15,111,200 // movdqa %xmm8,%xmm1 + .byte 72,129,196,184,0,0,0 // add $0xb8,%rsp + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_overlay_sse2_8bit +.globl _sk_overlay_sse2_8bit +FUNCTION(_sk_overlay_sse2_8bit) +_sk_overlay_sse2_8bit: + .byte 72,129,236,200,0,0,0 // sub $0xc8,%rsp + .byte 102,15,239,228 // pxor %xmm4,%xmm4 + .byte 102,68,15,111,248 // movdqa %xmm0,%xmm15 + .byte 102,68,15,96,252 // punpcklbw %xmm4,%xmm15 + .byte 102,15,111,233 // movdqa %xmm1,%xmm5 + .byte 102,15,96,236 // punpcklbw %xmm4,%xmm5 + .byte 102,15,127,108,36,144 // movdqa %xmm5,-0x70(%rsp) + .byte 242,15,112,232,231 // pshuflw $0xe7,%xmm0,%xmm5 + .byte 102,15,104,196 // punpckhbw %xmm4,%xmm0 + .byte 102,15,127,68,36,128 // movdqa %xmm0,-0x80(%rsp) + .byte 243,15,112,197,231 // pshufhw $0xe7,%xmm5,%xmm0 + .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0 + .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 + .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0 + .byte 243,15,112,232,95 // pshufhw $0x5f,%xmm0,%xmm5 + .byte 242,15,112,193,231 // pshuflw $0xe7,%xmm1,%xmm0 + .byte 102,15,104,204 // punpckhbw %xmm4,%xmm1 + .byte 102,15,127,140,36,144,0,0,0 // movdqa %xmm1,0x90(%rsp) + .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0 + .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0 + .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 + .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0 + .byte 243,15,112,200,95 // pshufhw $0x5f,%xmm0,%xmm1 + .byte 102,15,111,193 // movdqa %xmm1,%xmm0 + .byte 102,15,104,196 // punpckhbw %xmm4,%xmm0 + .byte 102,15,127,132,36,128,0,0,0 // movdqa %xmm0,0x80(%rsp) + .byte 102,15,96,204 // punpcklbw %xmm4,%xmm1 + .byte 102,15,127,76,36,96 // movdqa %xmm1,0x60(%rsp) + .byte 102,68,15,111,221 // movdqa %xmm5,%xmm11 + .byte 102,68,15,104,220 // punpckhbw %xmm4,%xmm11 + .byte 102,15,96,236 // punpcklbw %xmm4,%xmm5 + .byte 102,68,15,111,213 // movdqa %xmm5,%xmm10 + .byte 102,15,111,202 // movdqa %xmm2,%xmm1 + .byte 102,15,127,140,36,176,0,0,0 // movdqa %xmm1,0xb0(%rsp) + .byte 102,68,15,111,193 // movdqa %xmm1,%xmm8 + .byte 242,15,112,193,231 // pshuflw $0xe7,%xmm1,%xmm0 + .byte 102,15,96,204 // punpcklbw %xmm4,%xmm1 + .byte 102,68,15,104,196 // punpckhbw %xmm4,%xmm8 + .byte 102,15,127,156,36,160,0,0,0 // movdqa %xmm3,0xa0(%rsp) + .byte 102,15,111,211 // movdqa %xmm3,%xmm2 + .byte 102,68,15,111,243 // movdqa %xmm3,%xmm14 + .byte 102,68,15,96,244 // punpcklbw %xmm4,%xmm14 + .byte 102,15,104,212 // punpckhbw %xmm4,%xmm2 + .byte 102,15,111,242 // movdqa %xmm2,%xmm6 + .byte 102,15,127,116,36,16 // movdqa %xmm6,0x10(%rsp) + .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0 + .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0 + .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 + .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0 + .byte 243,15,112,208,95 // pshufhw $0x5f,%xmm0,%xmm2 + .byte 242,15,112,195,231 // pshuflw $0xe7,%xmm3,%xmm0 + .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0 + .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0 + .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0 + .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0 + .byte 243,15,112,216,95 // pshufhw $0x5f,%xmm0,%xmm3 + .byte 102,15,111,195 // movdqa %xmm3,%xmm0 + .byte 102,15,104,196 // punpckhbw %xmm4,%xmm0 + .byte 102,15,127,4,36 // movdqa %xmm0,(%rsp) + .byte 102,15,96,220 // punpcklbw %xmm4,%xmm3 + .byte 102,68,15,111,202 // movdqa %xmm2,%xmm9 + .byte 102,68,15,104,204 // punpckhbw %xmm4,%xmm9 + .byte 102,15,96,212 // punpcklbw %xmm4,%xmm2 + .byte 102,15,111,233 // movdqa %xmm1,%xmm5 + .byte 102,15,253,237 // paddw %xmm5,%xmm5 + .byte 102,65,15,111,248 // movdqa %xmm8,%xmm7 + .byte 102,15,253,255 // paddw %xmm7,%xmm7 + .byte 102,69,15,111,238 // movdqa %xmm14,%xmm13 + .byte 102,69,15,253,237 // paddw %xmm13,%xmm13 + .byte 102,15,253,246 // paddw %xmm6,%xmm6 + .byte 102,15,217,240 // psubusw %xmm0,%xmm6 + .byte 102,15,117,244 // pcmpeqw %xmm4,%xmm6 + .byte 102,15,127,116,36,112 // movdqa %xmm6,0x70(%rsp) + .byte 102,68,15,217,235 // psubusw %xmm3,%xmm13 + .byte 102,68,15,117,236 // pcmpeqw %xmm4,%xmm13 + .byte 102,68,15,127,108,36,80 // movdqa %xmm13,0x50(%rsp) + .byte 102,65,15,217,249 // psubusw %xmm9,%xmm7 + .byte 102,15,117,252 // pcmpeqw %xmm4,%xmm7 + .byte 102,15,127,124,36,48 // movdqa %xmm7,0x30(%rsp) + .byte 102,15,217,234 // psubusw %xmm2,%xmm5 + .byte 102,15,117,236 // pcmpeqw %xmm4,%xmm5 + .byte 102,15,127,108,36,32 // movdqa %xmm5,0x20(%rsp) + .byte 102,15,111,53,29,4,0,0 // movdqa 0x41d(%rip),%xmm6 # 3310 <_sk_overlay_sse2_8bit+0x5bb> + .byte 102,15,111,194 // movdqa %xmm2,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,65,15,213,199 // pmullw %xmm15,%xmm0 + .byte 102,15,127,68,36,64 // movdqa %xmm0,0x40(%rsp) + .byte 102,65,15,111,194 // movdqa %xmm10,%xmm0 + .byte 102,68,15,111,224 // movdqa %xmm0,%xmm12 + .byte 102,68,15,239,230 // pxor %xmm6,%xmm12 + .byte 102,68,15,213,225 // pmullw %xmm1,%xmm12 + .byte 102,15,111,226 // movdqa %xmm2,%xmm4 + .byte 102,15,249,209 // psubw %xmm1,%xmm2 + .byte 102,15,127,84,36,208 // movdqa %xmm2,-0x30(%rsp) + .byte 102,68,15,111,209 // movdqa %xmm1,%xmm10 + .byte 102,69,15,213,215 // pmullw %xmm15,%xmm10 + .byte 102,15,213,224 // pmullw %xmm0,%xmm4 + .byte 102,15,127,100,36,224 // movdqa %xmm4,-0x20(%rsp) + .byte 102,65,15,249,199 // psubw %xmm15,%xmm0 + .byte 102,15,127,68,36,176 // movdqa %xmm0,-0x50(%rsp) + .byte 102,65,15,111,193 // movdqa %xmm9,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,15,111,84,36,128 // movdqa -0x80(%rsp),%xmm2 + .byte 102,15,213,194 // pmullw %xmm2,%xmm0 + .byte 102,15,127,68,36,240 // movdqa %xmm0,-0x10(%rsp) + .byte 102,65,15,111,195 // movdqa %xmm11,%xmm0 + .byte 102,68,15,239,222 // pxor %xmm6,%xmm11 + .byte 102,69,15,213,216 // pmullw %xmm8,%xmm11 + .byte 102,69,15,111,249 // movdqa %xmm9,%xmm15 + .byte 102,69,15,249,200 // psubw %xmm8,%xmm9 + .byte 102,68,15,127,76,36,192 // movdqa %xmm9,-0x40(%rsp) + .byte 102,68,15,213,194 // pmullw %xmm2,%xmm8 + .byte 102,68,15,213,248 // pmullw %xmm0,%xmm15 + .byte 102,15,249,194 // psubw %xmm2,%xmm0 + .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9 + .byte 102,15,111,195 // movdqa %xmm3,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,15,111,84,36,144 // movdqa -0x70(%rsp),%xmm2 + .byte 102,15,213,194 // pmullw %xmm2,%xmm0 + .byte 102,15,127,68,36,128 // movdqa %xmm0,-0x80(%rsp) + .byte 102,15,111,68,36,96 // movdqa 0x60(%rsp),%xmm0 + .byte 102,15,111,232 // movdqa %xmm0,%xmm5 + .byte 102,15,239,238 // pxor %xmm6,%xmm5 + .byte 102,65,15,213,238 // pmullw %xmm14,%xmm5 + .byte 102,68,15,111,235 // movdqa %xmm3,%xmm13 + .byte 102,65,15,249,222 // psubw %xmm14,%xmm3 + .byte 102,15,127,92,36,160 // movdqa %xmm3,-0x60(%rsp) + .byte 102,65,15,111,254 // movdqa %xmm14,%xmm7 + .byte 102,15,213,250 // pmullw %xmm2,%xmm7 + .byte 102,68,15,213,232 // pmullw %xmm0,%xmm13 + .byte 102,15,249,194 // psubw %xmm2,%xmm0 + .byte 102,15,111,208 // movdqa %xmm0,%xmm2 + .byte 102,15,111,12,36 // movdqa (%rsp),%xmm1 + .byte 102,15,111,193 // movdqa %xmm1,%xmm0 + .byte 102,15,239,198 // pxor %xmm6,%xmm0 + .byte 102,15,111,156,36,144,0,0,0 // movdqa 0x90(%rsp),%xmm3 + .byte 102,15,213,195 // pmullw %xmm3,%xmm0 + .byte 102,15,127,68,36,144 // movdqa %xmm0,-0x70(%rsp) + .byte 102,15,111,132,36,128,0,0,0 // movdqa 0x80(%rsp),%xmm0 + .byte 102,15,239,240 // pxor %xmm0,%xmm6 + .byte 102,15,111,100,36,16 // movdqa 0x10(%rsp),%xmm4 + .byte 102,15,213,244 // pmullw %xmm4,%xmm6 + .byte 102,68,15,111,241 // movdqa %xmm1,%xmm14 + .byte 102,15,249,204 // psubw %xmm4,%xmm1 + .byte 102,15,213,227 // pmullw %xmm3,%xmm4 + .byte 102,68,15,213,240 // pmullw %xmm0,%xmm14 + .byte 102,15,249,195 // psubw %xmm3,%xmm0 + .byte 102,15,213,193 // pmullw %xmm1,%xmm0 + .byte 102,15,213,84,36,160 // pmullw -0x60(%rsp),%xmm2 + .byte 102,68,15,213,76,36,192 // pmullw -0x40(%rsp),%xmm9 + .byte 102,15,111,76,36,176 // movdqa -0x50(%rsp),%xmm1 + .byte 102,15,213,76,36,208 // pmullw -0x30(%rsp),%xmm1 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,68,15,249,240 // psubw %xmm0,%xmm14 + .byte 102,15,253,210 // paddw %xmm2,%xmm2 + .byte 102,68,15,249,234 // psubw %xmm2,%xmm13 + .byte 102,69,15,253,201 // paddw %xmm9,%xmm9 + .byte 102,69,15,249,249 // psubw %xmm9,%xmm15 + .byte 102,15,111,193 // movdqa %xmm1,%xmm0 + .byte 102,15,253,192 // paddw %xmm0,%xmm0 + .byte 102,15,111,76,36,224 // movdqa -0x20(%rsp),%xmm1 + .byte 102,15,249,200 // psubw %xmm0,%xmm1 + .byte 102,69,15,253,210 // paddw %xmm10,%xmm10 + .byte 102,15,111,68,36,32 // movdqa 0x20(%rsp),%xmm0 + .byte 102,68,15,219,208 // pand %xmm0,%xmm10 + .byte 102,15,223,193 // pandn %xmm1,%xmm0 + .byte 102,65,15,235,194 // por %xmm10,%xmm0 + .byte 102,15,111,216 // movdqa %xmm0,%xmm3 + .byte 102,69,15,253,192 // paddw %xmm8,%xmm8 + .byte 102,15,111,68,36,48 // movdqa 0x30(%rsp),%xmm0 + .byte 102,68,15,219,192 // pand %xmm0,%xmm8 + .byte 102,65,15,223,199 // pandn %xmm15,%xmm0 + .byte 102,65,15,235,192 // por %xmm8,%xmm0 + .byte 102,68,15,111,192 // movdqa %xmm0,%xmm8 + .byte 102,15,253,255 // paddw %xmm7,%xmm7 + .byte 102,15,111,68,36,80 // movdqa 0x50(%rsp),%xmm0 + .byte 102,15,219,248 // pand %xmm0,%xmm7 + .byte 102,65,15,223,197 // pandn %xmm13,%xmm0 + .byte 102,15,235,199 // por %xmm7,%xmm0 + .byte 102,15,111,208 // movdqa %xmm0,%xmm2 + .byte 102,15,253,228 // paddw %xmm4,%xmm4 + .byte 102,15,111,68,36,112 // movdqa 0x70(%rsp),%xmm0 + .byte 102,15,219,224 // pand %xmm0,%xmm4 + .byte 102,65,15,223,198 // pandn %xmm14,%xmm0 + .byte 102,15,235,196 // por %xmm4,%xmm0 + .byte 102,15,111,200 // movdqa %xmm0,%xmm1 + .byte 102,68,15,253,100,36,64 // paddw 0x40(%rsp),%xmm12 + .byte 102,68,15,253,92,36,240 // paddw -0x10(%rsp),%xmm11 + .byte 102,15,253,108,36,128 // paddw -0x80(%rsp),%xmm5 + .byte 102,15,253,116,36,144 // paddw -0x70(%rsp),%xmm6 + .byte 102,15,111,5,16,2,0,0 // movdqa 0x210(%rip),%xmm0 # 3320 <_sk_overlay_sse2_8bit+0x5cb> + .byte 102,68,15,253,224 // paddw %xmm0,%xmm12 + .byte 102,68,15,253,227 // paddw %xmm3,%xmm12 + .byte 102,68,15,253,216 // paddw %xmm0,%xmm11 + .byte 102,69,15,253,216 // paddw %xmm8,%xmm11 + .byte 102,15,253,232 // paddw %xmm0,%xmm5 + .byte 102,15,253,234 // paddw %xmm2,%xmm5 + .byte 102,15,253,240 // paddw %xmm0,%xmm6 + .byte 102,15,253,241 // paddw %xmm1,%xmm6 + .byte 102,15,111,5,244,1,0,0 // movdqa 0x1f4(%rip),%xmm0 # 3330 <_sk_overlay_sse2_8bit+0x5db> + .byte 102,15,228,240 // pmulhuw %xmm0,%xmm6 + .byte 102,15,228,232 // pmulhuw %xmm0,%xmm5 + .byte 102,68,15,228,216 // pmulhuw %xmm0,%xmm11 + .byte 102,68,15,228,224 // pmulhuw %xmm0,%xmm12 + .byte 102,65,15,113,211,7 // psrlw $0x7,%xmm11 + .byte 102,65,15,113,212,7 // psrlw $0x7,%xmm12 + .byte 102,69,15,103,227 // packuswb %xmm11,%xmm12 + .byte 102,15,113,214,7 // psrlw $0x7,%xmm6 + .byte 102,15,113,213,7 // psrlw $0x7,%xmm5 + .byte 102,15,103,238 // packuswb %xmm6,%xmm5 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 15,40,148,36,176,0,0,0 // movaps 0xb0(%rsp),%xmm2 + .byte 15,40,156,36,160,0,0,0 // movaps 0xa0(%rsp),%xmm3 + .byte 102,65,15,111,196 // movdqa %xmm12,%xmm0 + .byte 102,15,111,205 // movdqa %xmm5,%xmm1 + .byte 72,129,196,200,0,0,0 // add $0xc8,%rsp + .byte 255,224 // jmpq *%rax + BALIGN4 .byte 0,0 // add %al,(%rax) - .byte 127,67 // jg 2963 <_sk_difference_sse2_8bit+0x20c> + .byte 127,67 // jg 31db <_sk_overlay_sse2_8bit+0x486> .byte 0,0 // add %al,(%rax) - .byte 127,67 // jg 2967 <_sk_difference_sse2_8bit+0x210> + .byte 127,67 // jg 31df <_sk_overlay_sse2_8bit+0x48a> .byte 0,0 // add %al,(%rax) - .byte 127,67 // jg 296b <_sk_difference_sse2_8bit+0x214> + .byte 127,67 // jg 31e3 <_sk_overlay_sse2_8bit+0x48e> BALIGN16 .byte 0,0 // add %al,(%rax) @@ -65752,6 +67066,45 @@ BALIGN16 .byte 255 // (bad) .byte 255 // (bad) .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 127,0 // jg 32f2 <.literal16+0x152> + .byte 127,0 // jg 32f4 <.literal16+0x154> + .byte 127,0 // jg 32f6 <.literal16+0x156> + .byte 127,0 // jg 32f8 <.literal16+0x158> + .byte 127,0 // jg 32fa <.literal16+0x15a> + .byte 127,0 // jg 32fc <.literal16+0x15c> + .byte 127,0 // jg 32fe <.literal16+0x15e> + .byte 127,0 // jg 3300 <.literal16+0x160> + .byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax) + .byte 129,128,129,128,129,128,255,0,255,0 // addl $0xff00ff,-0x7f7e7f7f(%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 255,0 // incl (%rax) + .byte 127,0 // jg 3322 <.literal16+0x182> + .byte 127,0 // jg 3324 <.literal16+0x184> + .byte 127,0 // jg 3326 <.literal16+0x186> + .byte 127,0 // jg 3328 <.literal16+0x188> + .byte 127,0 // jg 332a <.literal16+0x18a> + .byte 127,0 // jg 332c <.literal16+0x18c> + .byte 127,0 // jg 332e <.literal16+0x18e> + .byte 127,0 // jg 3330 <.literal16+0x190> + .byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax) + .byte 129 // .byte 0x81 + .byte 128 // .byte 0x80 + .byte 129 // .byte 0x81 + .byte 128 // .byte 0x80 + .byte 129 // .byte 0x81 + .byte 128 // .byte 0x80 #elif defined(__i386__) BALIGN32 diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S index 02d1015c03..2222717b44 100644 --- a/src/jumper/SkJumper_generated_win.S +++ b/src/jumper/SkJumper_generated_win.S @@ -39059,7 +39059,7 @@ _sk_uniform_color_hsw_8bit LABEL PROC PUBLIC _sk_set_rgb_hsw_8bit _sk_set_rgb_hsw_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 197,250,16,37,149,44,0,0 ; vmovss 0x2c95(%rip),%xmm4 # 2e18 <_sk_difference_hsw_8bit+0x184> + DB 197,250,16,37,161,51,0,0 ; vmovss 0x33a1(%rip),%xmm4 # 3524 <_sk_overlay_hsw_8bit+0x37d> DB 197,218,89,40 ; vmulss (%rax),%xmm4,%xmm5 DB 196,225,250,44,205 ; vcvttss2si %xmm5,%rcx DB 197,218,89,104,4 ; vmulss 0x4(%rax),%xmm4,%xmm5 @@ -39072,7 +39072,7 @@ _sk_set_rgb_hsw_8bit LABEL PROC DB 9,208 ; or %edx,%eax DB 197,249,110,224 ; vmovd %eax,%xmm4 DB 196,226,125,88,228 ; vpbroadcastd %xmm4,%ymm4 - DB 197,253,111,45,133,44,0,0 ; vmovdqa 0x2c85(%rip),%ymm5 # 2e40 <_sk_difference_hsw_8bit+0x1ac> + DB 197,253,111,45,165,51,0,0 ; vmovdqa 0x33a5(%rip),%ymm5 # 3560 <_sk_overlay_hsw_8bit+0x3b9> DB 197,245,219,205 ; vpand %ymm5,%ymm1,%ymm1 DB 197,253,219,197 ; vpand %ymm5,%ymm0,%ymm0 DB 197,221,235,192 ; vpor %ymm0,%ymm4,%ymm0 @@ -39082,10 +39082,10 @@ _sk_set_rgb_hsw_8bit LABEL PROC PUBLIC _sk_premul_hsw_8bit _sk_premul_hsw_8bit LABEL PROC - DB 197,253,111,37,137,44,0,0 ; vmovdqa 0x2c89(%rip),%ymm4 # 2e60 <_sk_difference_hsw_8bit+0x1cc> + DB 197,253,111,37,169,51,0,0 ; vmovdqa 0x33a9(%rip),%ymm4 # 3580 <_sk_overlay_hsw_8bit+0x3d9> DB 196,226,125,0,236 ; vpshufb %ymm4,%ymm0,%ymm5 DB 196,226,117,0,228 ; vpshufb %ymm4,%ymm1,%ymm4 - DB 197,253,111,53,151,44,0,0 ; vmovdqa 0x2c97(%rip),%ymm6 # 2e80 <_sk_difference_hsw_8bit+0x1ec> + DB 197,253,111,53,183,51,0,0 ; vmovdqa 0x33b7(%rip),%ymm6 # 35a0 <_sk_overlay_hsw_8bit+0x3f9> DB 197,221,235,230 ; vpor %ymm6,%ymm4,%ymm4 DB 197,213,235,238 ; vpor %ymm6,%ymm5,%ymm5 DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6 @@ -39123,7 +39123,7 @@ _sk_premul_hsw_8bit LABEL PROC PUBLIC _sk_swap_rb_hsw_8bit _sk_swap_rb_hsw_8bit LABEL PROC - DB 197,253,111,37,15,44,0,0 ; vmovdqa 0x2c0f(%rip),%ymm4 # 2ea0 <_sk_difference_hsw_8bit+0x20c> + DB 197,253,111,37,47,51,0,0 ; vmovdqa 0x332f(%rip),%ymm4 # 35c0 <_sk_overlay_hsw_8bit+0x419> DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0 DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -39463,7 +39463,7 @@ _sk_load_bgra_hsw_8bit LABEL PROC DB 117,35 ; jne 774 <_sk_load_bgra_hsw_8bit+0x44> DB 196,161,126,111,76,130,32 ; vmovdqu 0x20(%rdx,%r8,4),%ymm1 DB 196,161,126,111,4,130 ; vmovdqu (%rdx,%r8,4),%ymm0 - DB 197,253,111,37,90,39,0,0 ; vmovdqa 0x275a(%rip),%ymm4 # 2ec0 <_sk_difference_hsw_8bit+0x22c> + DB 197,253,111,37,122,46,0,0 ; vmovdqa 0x2e7a(%rip),%ymm4 # 35e0 <_sk_overlay_hsw_8bit+0x439> DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0 DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -39576,7 +39576,7 @@ _sk_load_bgra_dst_hsw_8bit LABEL PROC DB 117,35 ; jne 92c <_sk_load_bgra_dst_hsw_8bit+0x44> DB 196,161,126,111,92,130,32 ; vmovdqu 0x20(%rdx,%r8,4),%ymm3 DB 196,161,126,111,20,130 ; vmovdqu (%rdx,%r8,4),%ymm2 - DB 197,253,111,37,194,37,0,0 ; vmovdqa 0x25c2(%rip),%ymm4 # 2ee0 <_sk_difference_hsw_8bit+0x24c> + DB 197,253,111,37,226,44,0,0 ; vmovdqa 0x2ce2(%rip),%ymm4 # 3600 <_sk_overlay_hsw_8bit+0x459> DB 196,226,109,0,212 ; vpshufb %ymm4,%ymm2,%ymm2 DB 196,226,101,0,220 ; vpshufb %ymm4,%ymm3,%ymm3 DB 72,173 ; lods %ds:(%rsi),%rax @@ -39685,7 +39685,7 @@ _sk_store_bgra_hsw_8bit LABEL PROC DB 72,15,175,209 ; imul %rcx,%rdx DB 72,193,226,2 ; shl $0x2,%rdx DB 72,3,16 ; add (%rax),%rdx - DB 197,253,111,37,60,36,0,0 ; vmovdqa 0x243c(%rip),%ymm4 # 2f00 <_sk_difference_hsw_8bit+0x26c> + DB 197,253,111,37,92,43,0,0 ; vmovdqa 0x2b5c(%rip),%ymm4 # 3620 <_sk_overlay_hsw_8bit+0x479> DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5 DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4 DB 77,133,201 ; test %r9,%r9 @@ -39965,10 +39965,10 @@ _sk_store_a8_hsw_8bit LABEL PROC DB 72,99,87,8 ; movslq 0x8(%rdi),%rdx DB 72,15,175,209 ; imul %rcx,%rdx DB 72,3,16 ; add (%rax),%rdx - DB 197,253,111,37,104,32,0,0 ; vmovdqa 0x2068(%rip),%ymm4 # 2f20 <_sk_difference_hsw_8bit+0x28c> + DB 197,253,111,37,136,39,0,0 ; vmovdqa 0x2788(%rip),%ymm4 # 3640 <_sk_overlay_hsw_8bit+0x499> DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5 DB 196,227,253,0,237,232 ; vpermq $0xe8,%ymm5,%ymm5 - DB 197,249,111,53,245,34,0,0 ; vmovdqa 0x22f5(%rip),%xmm6 # 31c0 <_sk_difference_hsw_8bit+0x52c> + DB 197,249,111,53,133,42,0,0 ; vmovdqa 0x2a85(%rip),%xmm6 # 3950 <_sk_overlay_hsw_8bit+0x7a9> DB 196,226,81,0,238 ; vpshufb %xmm6,%xmm5,%xmm5 DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4 DB 196,227,253,0,228,232 ; vpermq $0xe8,%ymm4,%ymm4 @@ -40058,10 +40058,10 @@ _sk_load_g8_hsw_8bit LABEL PROC DB 196,226,125,49,200 ; vpmovzxbd %xmm0,%ymm1 DB 197,249,112,192,78 ; vpshufd $0x4e,%xmm0,%xmm0 DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0 - DB 196,226,125,88,37,17,30,0,0 ; vpbroadcastd 0x1e11(%rip),%ymm4 # 2e1c <_sk_difference_hsw_8bit+0x188> + DB 196,226,125,88,37,29,37,0,0 ; vpbroadcastd 0x251d(%rip),%ymm4 # 3528 <_sk_overlay_hsw_8bit+0x381> DB 196,226,125,64,236 ; vpmulld %ymm4,%ymm0,%ymm5 DB 196,226,117,64,196 ; vpmulld %ymm4,%ymm1,%ymm0 - DB 196,226,125,88,13,2,30,0,0 ; vpbroadcastd 0x1e02(%rip),%ymm1 # 2e20 <_sk_difference_hsw_8bit+0x18c> + DB 196,226,125,88,13,14,37,0,0 ; vpbroadcastd 0x250e(%rip),%ymm1 # 352c <_sk_overlay_hsw_8bit+0x385> DB 197,253,235,193 ; vpor %ymm1,%ymm0,%ymm0 DB 197,213,235,201 ; vpor %ymm1,%ymm5,%ymm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -40157,10 +40157,10 @@ _sk_load_g8_dst_hsw_8bit LABEL PROC DB 196,226,125,49,218 ; vpmovzxbd %xmm2,%ymm3 DB 197,249,112,210,78 ; vpshufd $0x4e,%xmm2,%xmm2 DB 196,226,125,49,210 ; vpmovzxbd %xmm2,%ymm2 - DB 196,226,125,88,37,161,28,0,0 ; vpbroadcastd 0x1ca1(%rip),%ymm4 # 2e24 <_sk_difference_hsw_8bit+0x190> + DB 196,226,125,88,37,173,35,0,0 ; vpbroadcastd 0x23ad(%rip),%ymm4 # 3530 <_sk_overlay_hsw_8bit+0x389> DB 196,226,109,64,236 ; vpmulld %ymm4,%ymm2,%ymm5 DB 196,226,101,64,212 ; vpmulld %ymm4,%ymm3,%ymm2 - DB 196,226,125,88,29,146,28,0,0 ; vpbroadcastd 0x1c92(%rip),%ymm3 # 2e28 <_sk_difference_hsw_8bit+0x194> + DB 196,226,125,88,29,158,35,0,0 ; vpbroadcastd 0x239e(%rip),%ymm3 # 3534 <_sk_overlay_hsw_8bit+0x38d> DB 197,237,235,211 ; vpor %ymm3,%ymm2,%ymm2 DB 197,213,235,219 ; vpor %ymm3,%ymm5,%ymm3 DB 72,173 ; lods %ds:(%rsi),%rax @@ -40255,7 +40255,7 @@ _sk_srcover_rgba_8888_hsw_8bit LABEL PROC DB 15,133,222,0,0,0 ; jne 13c3 <_sk_srcover_rgba_8888_hsw_8bit+0x103> DB 196,33,126,111,76,138,32 ; vmovdqu 0x20(%rdx,%r9,4),%ymm9 DB 196,33,126,111,28,138 ; vmovdqu (%rdx,%r9,4),%ymm11 - DB 197,253,111,53,70,28,0,0 ; vmovdqa 0x1c46(%rip),%ymm6 # 2f40 <_sk_difference_hsw_8bit+0x2ac> + DB 197,253,111,53,102,35,0,0 ; vmovdqa 0x2366(%rip),%ymm6 # 3660 <_sk_overlay_hsw_8bit+0x4b9> DB 196,226,117,0,254 ; vpshufb %ymm6,%ymm1,%ymm7 DB 196,226,125,0,246 ; vpshufb %ymm6,%ymm0,%ymm6 DB 196,66,125,48,195 ; vpmovzxbw %xmm11,%ymm8 @@ -40461,7 +40461,7 @@ PUBLIC _sk_scale_1_float_hsw_8bit _sk_scale_1_float_hsw_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 197,250,16,32 ; vmovss (%rax),%xmm4 - DB 197,218,89,37,214,23,0,0 ; vmulss 0x17d6(%rip),%xmm4,%xmm4 # 2e2c <_sk_difference_hsw_8bit+0x198> + DB 197,218,89,37,226,30,0,0 ; vmulss 0x1ee2(%rip),%xmm4,%xmm4 # 3538 <_sk_overlay_hsw_8bit+0x391> DB 197,250,44,196 ; vcvttss2si %xmm4,%eax DB 197,249,110,224 ; vmovd %eax,%xmm4 DB 196,226,125,120,228 ; vpbroadcastb %xmm4,%ymm4 @@ -40471,7 +40471,7 @@ _sk_scale_1_float_hsw_8bit LABEL PROC DB 196,226,125,48,241 ; vpmovzxbw %xmm1,%ymm6 DB 196,227,125,57,201,1 ; vextracti128 $0x1,%ymm1,%xmm1 DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1 - DB 197,221,219,37,213,24,0,0 ; vpand 0x18d5(%rip),%ymm4,%ymm4 # 2f60 <_sk_difference_hsw_8bit+0x2cc> + DB 197,221,219,37,245,31,0,0 ; vpand 0x1ff5(%rip),%ymm4,%ymm4 # 3680 <_sk_overlay_hsw_8bit+0x4d9> DB 197,221,213,249 ; vpmullw %ymm1,%ymm4,%ymm7 DB 197,93,213,198 ; vpmullw %ymm6,%ymm4,%ymm8 DB 197,93,213,200 ; vpmullw %ymm0,%ymm4,%ymm9 @@ -40508,7 +40508,7 @@ _sk_scale_u8_hsw_8bit LABEL PROC DB 196,226,125,49,236 ; vpmovzxbd %xmm4,%ymm5 DB 197,249,112,228,78 ; vpshufd $0x4e,%xmm4,%xmm4 DB 196,226,125,49,228 ; vpmovzxbd %xmm4,%ymm4 - DB 197,253,111,53,95,24,0,0 ; vmovdqa 0x185f(%rip),%ymm6 # 2f80 <_sk_difference_hsw_8bit+0x2ec> + DB 197,253,111,53,127,31,0,0 ; vmovdqa 0x1f7f(%rip),%ymm6 # 36a0 <_sk_overlay_hsw_8bit+0x4f9> DB 196,226,93,0,230 ; vpshufb %ymm6,%ymm4,%ymm4 DB 196,226,85,0,238 ; vpshufb %ymm6,%ymm5,%ymm5 DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6 @@ -40623,7 +40623,7 @@ PUBLIC _sk_lerp_1_float_hsw_8bit _sk_lerp_1_float_hsw_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 197,250,16,32 ; vmovss (%rax),%xmm4 - DB 197,218,89,37,58,21,0,0 ; vmulss 0x153a(%rip),%xmm4,%xmm4 # 2e30 <_sk_difference_hsw_8bit+0x19c> + DB 197,218,89,37,70,28,0,0 ; vmulss 0x1c46(%rip),%xmm4,%xmm4 # 353c <_sk_overlay_hsw_8bit+0x395> DB 197,250,44,196 ; vcvttss2si %xmm4,%eax DB 197,249,110,224 ; vmovd %eax,%xmm4 DB 196,226,125,120,228 ; vpbroadcastb %xmm4,%ymm4 @@ -40633,7 +40633,7 @@ _sk_lerp_1_float_hsw_8bit LABEL PROC DB 196,226,125,48,241 ; vpmovzxbw %xmm1,%ymm6 DB 196,227,125,57,201,1 ; vextracti128 $0x1,%ymm1,%xmm1 DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1 - DB 197,221,219,61,117,22,0,0 ; vpand 0x1675(%rip),%ymm4,%ymm7 # 2fa0 <_sk_difference_hsw_8bit+0x30c> + DB 197,221,219,61,149,29,0,0 ; vpand 0x1d95(%rip),%ymm4,%ymm7 # 36c0 <_sk_overlay_hsw_8bit+0x519> DB 197,69,213,193 ; vpmullw %ymm1,%ymm7,%ymm8 DB 197,69,213,206 ; vpmullw %ymm6,%ymm7,%ymm9 DB 197,69,213,208 ; vpmullw %ymm0,%ymm7,%ymm10 @@ -40701,7 +40701,7 @@ _sk_lerp_u8_hsw_8bit LABEL PROC DB 196,226,125,49,236 ; vpmovzxbd %xmm4,%ymm5 DB 197,249,112,228,78 ; vpshufd $0x4e,%xmm4,%xmm4 DB 196,226,125,49,228 ; vpmovzxbd %xmm4,%ymm4 - DB 197,253,111,53,106,21,0,0 ; vmovdqa 0x156a(%rip),%ymm6 # 2fc0 <_sk_difference_hsw_8bit+0x32c> + DB 197,253,111,53,138,28,0,0 ; vmovdqa 0x1c8a(%rip),%ymm6 # 36e0 <_sk_overlay_hsw_8bit+0x539> DB 196,98,93,0,206 ; vpshufb %ymm6,%ymm4,%ymm9 DB 196,98,85,0,222 ; vpshufb %ymm6,%ymm5,%ymm11 DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6 @@ -40864,7 +40864,7 @@ _sk_move_dst_src_hsw_8bit LABEL PROC PUBLIC _sk_black_color_hsw_8bit _sk_black_color_hsw_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 196,226,125,24,5,73,17,0,0 ; vbroadcastss 0x1149(%rip),%ymm0 # 2e34 <_sk_difference_hsw_8bit+0x1a0> + DB 196,226,125,24,5,85,24,0,0 ; vbroadcastss 0x1855(%rip),%ymm0 # 3540 <_sk_overlay_hsw_8bit+0x399> DB 197,252,40,200 ; vmovaps %ymm0,%ymm1 DB 255,224 ; jmpq *%rax @@ -40884,7 +40884,7 @@ _sk_clear_hsw_8bit LABEL PROC PUBLIC _sk_srcatop_hsw_8bit _sk_srcatop_hsw_8bit LABEL PROC - DB 197,125,111,5,207,18,0,0 ; vmovdqa 0x12cf(%rip),%ymm8 # 2fe0 <_sk_difference_hsw_8bit+0x34c> + DB 197,125,111,5,239,25,0,0 ; vmovdqa 0x19ef(%rip),%ymm8 # 3700 <_sk_overlay_hsw_8bit+0x559> DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4 DB 196,194,109,0,232 ; vpshufb %ymm8,%ymm2,%ymm5 DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10 @@ -40959,7 +40959,7 @@ _sk_srcatop_hsw_8bit LABEL PROC PUBLIC _sk_dstatop_hsw_8bit _sk_dstatop_hsw_8bit LABEL PROC - DB 197,125,111,5,138,17,0,0 ; vmovdqa 0x118a(%rip),%ymm8 # 3000 <_sk_difference_hsw_8bit+0x36c> + DB 197,125,111,5,170,24,0,0 ; vmovdqa 0x18aa(%rip),%ymm8 # 3720 <_sk_overlay_hsw_8bit+0x579> DB 196,194,117,0,224 ; vpshufb %ymm8,%ymm1,%ymm4 DB 196,194,125,0,232 ; vpshufb %ymm8,%ymm0,%ymm5 DB 196,98,125,48,210 ; vpmovzxbw %xmm2,%ymm10 @@ -41034,7 +41034,7 @@ _sk_dstatop_hsw_8bit LABEL PROC PUBLIC _sk_srcin_hsw_8bit _sk_srcin_hsw_8bit LABEL PROC - DB 197,253,111,37,65,16,0,0 ; vmovdqa 0x1041(%rip),%ymm4 # 3020 <_sk_difference_hsw_8bit+0x38c> + DB 197,253,111,37,97,23,0,0 ; vmovdqa 0x1761(%rip),%ymm4 # 3740 <_sk_overlay_hsw_8bit+0x599> DB 196,226,101,0,236 ; vpshufb %ymm4,%ymm3,%ymm5 DB 196,226,109,0,228 ; vpshufb %ymm4,%ymm2,%ymm4 DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6 @@ -41072,7 +41072,7 @@ _sk_srcin_hsw_8bit LABEL PROC PUBLIC _sk_dstin_hsw_8bit _sk_dstin_hsw_8bit LABEL PROC - DB 197,253,111,37,183,15,0,0 ; vmovdqa 0xfb7(%rip),%ymm4 # 3040 <_sk_difference_hsw_8bit+0x3ac> + DB 197,253,111,37,215,22,0,0 ; vmovdqa 0x16d7(%rip),%ymm4 # 3760 <_sk_overlay_hsw_8bit+0x5b9> DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1 DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0 DB 196,226,125,48,226 ; vpmovzxbw %xmm2,%ymm4 @@ -41110,7 +41110,7 @@ _sk_dstin_hsw_8bit LABEL PROC PUBLIC _sk_srcout_hsw_8bit _sk_srcout_hsw_8bit LABEL PROC - DB 197,253,111,37,43,15,0,0 ; vmovdqa 0xf2b(%rip),%ymm4 # 3060 <_sk_difference_hsw_8bit+0x3cc> + DB 197,253,111,37,75,22,0,0 ; vmovdqa 0x164b(%rip),%ymm4 # 3780 <_sk_overlay_hsw_8bit+0x5d9> DB 196,226,109,0,236 ; vpshufb %ymm4,%ymm2,%ymm5 DB 196,226,101,0,228 ; vpshufb %ymm4,%ymm3,%ymm4 DB 197,205,118,246 ; vpcmpeqd %ymm6,%ymm6,%ymm6 @@ -41151,7 +41151,7 @@ _sk_srcout_hsw_8bit LABEL PROC PUBLIC _sk_dstout_hsw_8bit _sk_dstout_hsw_8bit LABEL PROC - DB 197,253,111,37,149,14,0,0 ; vmovdqa 0xe95(%rip),%ymm4 # 3080 <_sk_difference_hsw_8bit+0x3ec> + DB 197,253,111,37,181,21,0,0 ; vmovdqa 0x15b5(%rip),%ymm4 # 37a0 <_sk_overlay_hsw_8bit+0x5f9> DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0 DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1 DB 197,221,118,228 ; vpcmpeqd %ymm4,%ymm4,%ymm4 @@ -41192,7 +41192,7 @@ _sk_dstout_hsw_8bit LABEL PROC PUBLIC _sk_srcover_hsw_8bit _sk_srcover_hsw_8bit LABEL PROC - DB 197,253,111,37,253,13,0,0 ; vmovdqa 0xdfd(%rip),%ymm4 # 30a0 <_sk_difference_hsw_8bit+0x40c> + DB 197,253,111,37,29,21,0,0 ; vmovdqa 0x151d(%rip),%ymm4 # 37c0 <_sk_overlay_hsw_8bit+0x619> DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5 DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4 DB 196,98,125,48,202 ; vpmovzxbw %xmm2,%ymm9 @@ -41234,7 +41234,7 @@ _sk_srcover_hsw_8bit LABEL PROC PUBLIC _sk_dstover_hsw_8bit _sk_dstover_hsw_8bit LABEL PROC - DB 197,253,111,37,94,13,0,0 ; vmovdqa 0xd5e(%rip),%ymm4 # 30c0 <_sk_difference_hsw_8bit+0x42c> + DB 197,253,111,37,126,20,0,0 ; vmovdqa 0x147e(%rip),%ymm4 # 37e0 <_sk_overlay_hsw_8bit+0x639> DB 196,226,101,0,236 ; vpshufb %ymm4,%ymm3,%ymm5 DB 196,226,109,0,228 ; vpshufb %ymm4,%ymm2,%ymm4 DB 196,98,125,48,200 ; vpmovzxbw %xmm0,%ymm9 @@ -41314,7 +41314,7 @@ _sk_multiply_hsw_8bit LABEL PROC DB 72,131,236,56 ; sub $0x38,%rsp DB 197,253,111,243 ; vmovdqa %ymm3,%ymm6 DB 197,253,111,218 ; vmovdqa %ymm2,%ymm3 - DB 197,125,111,13,27,12,0,0 ; vmovdqa 0xc1b(%rip),%ymm9 # 30e0 <_sk_difference_hsw_8bit+0x44c> + DB 197,125,111,13,59,19,0,0 ; vmovdqa 0x133b(%rip),%ymm9 # 3800 <_sk_overlay_hsw_8bit+0x659> DB 196,194,101,0,225 ; vpshufb %ymm9,%ymm3,%ymm4 DB 196,194,77,0,233 ; vpshufb %ymm9,%ymm6,%ymm5 DB 196,65,45,118,210 ; vpcmpeqd %ymm10,%ymm10,%ymm10 @@ -41455,7 +41455,7 @@ _sk_screen_hsw_8bit LABEL PROC PUBLIC _sk_xor__hsw_8bit _sk_xor__hsw_8bit LABEL PROC - DB 197,125,111,13,167,9,0,0 ; vmovdqa 0x9a7(%rip),%ymm9 # 3100 <_sk_difference_hsw_8bit+0x46c> + DB 197,125,111,13,199,16,0,0 ; vmovdqa 0x10c7(%rip),%ymm9 # 3820 <_sk_overlay_hsw_8bit+0x679> DB 196,194,109,0,225 ; vpshufb %ymm9,%ymm2,%ymm4 DB 196,194,101,0,249 ; vpshufb %ymm9,%ymm3,%ymm7 DB 196,65,37,118,219 ; vpcmpeqd %ymm11,%ymm11,%ymm11 @@ -41532,7 +41532,7 @@ _sk_xor__hsw_8bit LABEL PROC PUBLIC _sk_darken_hsw_8bit _sk_darken_hsw_8bit LABEL PROC - DB 197,125,111,5,85,8,0,0 ; vmovdqa 0x855(%rip),%ymm8 # 3120 <_sk_difference_hsw_8bit+0x48c> + DB 197,125,111,5,117,15,0,0 ; vmovdqa 0xf75(%rip),%ymm8 # 3840 <_sk_overlay_hsw_8bit+0x699> DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4 DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6 DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10 @@ -41605,7 +41605,7 @@ _sk_darken_hsw_8bit LABEL PROC DB 197,253,248,246 ; vpsubb %ymm6,%ymm0,%ymm6 DB 197,245,248,205 ; vpsubb %ymm5,%ymm1,%ymm1 DB 197,253,248,196 ; vpsubb %ymm4,%ymm0,%ymm0 - DB 196,226,125,88,37,254,3,0,0 ; vpbroadcastd 0x3fe(%rip),%ymm4 # 2e38 <_sk_difference_hsw_8bit+0x1a4> + DB 196,226,125,88,37,10,11,0,0 ; vpbroadcastd 0xb0a(%rip),%ymm4 # 3544 <_sk_overlay_hsw_8bit+0x39d> DB 196,227,125,76,198,64 ; vpblendvb %ymm4,%ymm6,%ymm0,%ymm0 DB 196,227,117,76,207,64 ; vpblendvb %ymm4,%ymm7,%ymm1,%ymm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -41613,7 +41613,7 @@ _sk_darken_hsw_8bit LABEL PROC PUBLIC _sk_lighten_hsw_8bit _sk_lighten_hsw_8bit LABEL PROC - DB 197,125,111,5,238,6,0,0 ; vmovdqa 0x6ee(%rip),%ymm8 # 3140 <_sk_difference_hsw_8bit+0x4ac> + DB 197,125,111,5,14,14,0,0 ; vmovdqa 0xe0e(%rip),%ymm8 # 3860 <_sk_overlay_hsw_8bit+0x6b9> DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4 DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6 DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10 @@ -41686,7 +41686,7 @@ _sk_lighten_hsw_8bit LABEL PROC DB 197,253,248,246 ; vpsubb %ymm6,%ymm0,%ymm6 DB 197,245,248,205 ; vpsubb %ymm5,%ymm1,%ymm1 DB 197,253,248,196 ; vpsubb %ymm4,%ymm0,%ymm0 - DB 196,226,125,88,37,123,2,0,0 ; vpbroadcastd 0x27b(%rip),%ymm4 # 2e3c <_sk_difference_hsw_8bit+0x1a8> + DB 196,226,125,88,37,135,9,0,0 ; vpbroadcastd 0x987(%rip),%ymm4 # 3548 <_sk_overlay_hsw_8bit+0x3a1> DB 196,227,125,76,198,64 ; vpblendvb %ymm4,%ymm6,%ymm0,%ymm0 DB 196,227,117,76,207,64 ; vpblendvb %ymm4,%ymm7,%ymm1,%ymm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -41724,7 +41724,7 @@ _sk_exclusion_hsw_8bit LABEL PROC DB 196,227,77,56,252,1 ; vinserti128 $0x1,%xmm4,%ymm6,%ymm7 DB 196,227,77,70,228,49 ; vperm2i128 $0x31,%ymm4,%ymm6,%ymm4 DB 197,197,103,228 ; vpackuswb %ymm4,%ymm7,%ymm4 - DB 197,253,111,53,240,4,0,0 ; vmovdqa 0x4f0(%rip),%ymm6 # 3160 <_sk_difference_hsw_8bit+0x4cc> + DB 197,253,111,53,16,12,0,0 ; vmovdqa 0xc10(%rip),%ymm6 # 3880 <_sk_overlay_hsw_8bit+0x6d9> DB 197,221,219,254 ; vpand %ymm6,%ymm4,%ymm7 DB 197,213,219,246 ; vpand %ymm6,%ymm5,%ymm6 DB 197,237,252,192 ; vpaddb %ymm0,%ymm2,%ymm0 @@ -41738,7 +41738,7 @@ _sk_exclusion_hsw_8bit LABEL PROC PUBLIC _sk_difference_hsw_8bit _sk_difference_hsw_8bit LABEL PROC - DB 197,125,111,5,228,4,0,0 ; vmovdqa 0x4e4(%rip),%ymm8 # 3180 <_sk_difference_hsw_8bit+0x4ec> + DB 197,125,111,5,4,12,0,0 ; vmovdqa 0xc04(%rip),%ymm8 # 38a0 <_sk_overlay_hsw_8bit+0x6f9> DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4 DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6 DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10 @@ -41805,7 +41805,7 @@ _sk_difference_hsw_8bit LABEL PROC DB 197,197,103,237 ; vpackuswb %ymm5,%ymm7,%ymm5 DB 197,181,218,228 ; vpminub %ymm4,%ymm9,%ymm4 DB 197,165,218,237 ; vpminub %ymm5,%ymm11,%ymm5 - DB 197,253,111,53,174,3,0,0 ; vmovdqa 0x3ae(%rip),%ymm6 # 31a0 <_sk_difference_hsw_8bit+0x50c> + DB 197,253,111,53,206,10,0,0 ; vmovdqa 0xace(%rip),%ymm6 # 38c0 <_sk_overlay_hsw_8bit+0x719> DB 197,213,219,254 ; vpand %ymm6,%ymm5,%ymm7 DB 197,221,219,246 ; vpand %ymm6,%ymm4,%ymm6 DB 197,237,252,192 ; vpaddb %ymm0,%ymm2,%ymm0 @@ -41817,9 +41817,350 @@ _sk_difference_hsw_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +PUBLIC _sk_hardlight_hsw_8bit +_sk_hardlight_hsw_8bit LABEL PROC + DB 72,129,236,56,2,0,0 ; sub $0x238,%rsp + DB 197,252,17,28,36 ; vmovups %ymm3,(%rsp) + DB 196,226,125,48,248 ; vpmovzxbw %xmm0,%ymm7 + DB 196,227,125,57,195,1 ; vextracti128 $0x1,%ymm0,%xmm3 + DB 196,226,125,48,243 ; vpmovzxbw %xmm3,%ymm6 + DB 196,98,125,48,217 ; vpmovzxbw %xmm1,%ymm11 + DB 197,126,127,156,36,0,1,0,0 ; vmovdqu %ymm11,0x100(%rsp) + DB 196,227,125,57,203,1 ; vextracti128 $0x1,%ymm1,%xmm3 + DB 196,98,125,48,195 ; vpmovzxbw %xmm3,%ymm8 + DB 197,126,127,132,36,64,1,0,0 ; vmovdqu %ymm8,0x140(%rsp) + DB 197,253,111,29,132,10,0,0 ; vmovdqa 0xa84(%rip),%ymm3 # 38e0 <_sk_overlay_hsw_8bit+0x739> + DB 196,226,125,0,227 ; vpshufb %ymm3,%ymm0,%ymm4 + DB 196,226,117,0,203 ; vpshufb %ymm3,%ymm1,%ymm1 + DB 197,125,111,251 ; vmovdqa %ymm3,%ymm15 + DB 196,227,125,57,200,1 ; vextracti128 $0x1,%ymm1,%xmm0 + DB 196,98,125,48,232 ; vpmovzxbw %xmm0,%ymm13 + DB 196,226,125,48,233 ; vpmovzxbw %xmm1,%ymm5 + DB 196,227,125,57,227,1 ; vextracti128 $0x1,%ymm4,%xmm3 + DB 196,98,125,48,203 ; vpmovzxbw %xmm3,%ymm9 + DB 196,98,125,48,212 ; vpmovzxbw %xmm4,%ymm10 + DB 197,197,253,199 ; vpaddw %ymm7,%ymm7,%ymm0 + DB 197,205,253,222 ; vpaddw %ymm6,%ymm6,%ymm3 + DB 196,65,37,253,227 ; vpaddw %ymm11,%ymm11,%ymm12 + DB 196,65,61,253,216 ; vpaddw %ymm8,%ymm8,%ymm11 + DB 196,66,37,58,245 ; vpminuw %ymm13,%ymm11,%ymm14 + DB 196,193,37,117,206 ; vpcmpeqw %ymm14,%ymm11,%ymm1 + DB 197,254,127,140,36,224,1,0,0 ; vmovdqu %ymm1,0x1e0(%rsp) + DB 196,98,29,58,245 ; vpminuw %ymm5,%ymm12,%ymm14 + DB 196,193,29,117,206 ; vpcmpeqw %ymm14,%ymm12,%ymm1 + DB 197,254,127,140,36,192,1,0,0 ; vmovdqu %ymm1,0x1c0(%rsp) + DB 196,66,101,58,241 ; vpminuw %ymm9,%ymm3,%ymm14 + DB 196,193,101,117,206 ; vpcmpeqw %ymm14,%ymm3,%ymm1 + DB 197,254,127,140,36,128,1,0,0 ; vmovdqu %ymm1,0x180(%rsp) + DB 196,194,125,58,218 ; vpminuw %ymm10,%ymm0,%ymm3 + DB 197,125,117,243 ; vpcmpeqw %ymm3,%ymm0,%ymm14 + DB 197,253,111,226 ; vmovdqa %ymm2,%ymm4 + DB 196,65,125,111,231 ; vmovdqa %ymm15,%ymm12 + DB 196,194,93,0,220 ; vpshufb %ymm12,%ymm4,%ymm3 + DB 196,226,125,48,211 ; vpmovzxbw %xmm3,%ymm2 + DB 196,226,125,121,5,6,10,0,0 ; vpbroadcastw 0xa06(%rip),%ymm0 # 3900 <_sk_overlay_hsw_8bit+0x759> + DB 197,109,239,248 ; vpxor %ymm0,%ymm2,%ymm15 + DB 197,133,213,207 ; vpmullw %ymm7,%ymm15,%ymm1 + DB 197,254,127,140,36,160,1,0,0 ; vmovdqu %ymm1,0x1a0(%rsp) + DB 196,226,125,48,204 ; vpmovzxbw %xmm4,%ymm1 + DB 197,254,127,164,36,0,2,0,0 ; vmovdqu %ymm4,0x200(%rsp) + DB 197,45,239,248 ; vpxor %ymm0,%ymm10,%ymm15 + DB 197,5,213,193 ; vpmullw %ymm1,%ymm15,%ymm8 + DB 197,126,127,132,36,96,1,0,0 ; vmovdqu %ymm8,0x160(%rsp) + DB 197,117,213,199 ; vpmullw %ymm7,%ymm1,%ymm8 + DB 197,126,127,132,36,32,1,0,0 ; vmovdqu %ymm8,0x120(%rsp) + DB 197,237,249,201 ; vpsubw %ymm1,%ymm2,%ymm1 + DB 197,254,127,76,36,64 ; vmovdqu %ymm1,0x40(%rsp) + DB 196,193,109,213,202 ; vpmullw %ymm10,%ymm2,%ymm1 + DB 197,254,127,140,36,160,0,0,0 ; vmovdqu %ymm1,0xa0(%rsp) + DB 197,45,249,223 ; vpsubw %ymm7,%ymm10,%ymm11 + DB 196,227,125,57,226,1 ; vextracti128 $0x1,%ymm4,%xmm2 + DB 196,226,125,48,210 ; vpmovzxbw %xmm2,%ymm2 + DB 196,227,125,57,219,1 ; vextracti128 $0x1,%ymm3,%xmm3 + DB 196,226,125,48,219 ; vpmovzxbw %xmm3,%ymm3 + DB 197,101,239,208 ; vpxor %ymm0,%ymm3,%ymm10 + DB 197,173,213,206 ; vpmullw %ymm6,%ymm10,%ymm1 + DB 197,254,127,140,36,224,0,0,0 ; vmovdqu %ymm1,0xe0(%rsp) + DB 197,53,239,208 ; vpxor %ymm0,%ymm9,%ymm10 + DB 197,173,213,202 ; vpmullw %ymm2,%ymm10,%ymm1 + DB 197,254,127,140,36,192,0,0,0 ; vmovdqu %ymm1,0xc0(%rsp) + DB 197,237,213,206 ; vpmullw %ymm6,%ymm2,%ymm1 + DB 197,254,127,76,36,32 ; vmovdqu %ymm1,0x20(%rsp) + DB 197,229,249,226 ; vpsubw %ymm2,%ymm3,%ymm4 + DB 196,65,101,213,193 ; vpmullw %ymm9,%ymm3,%ymm8 + DB 197,181,249,246 ; vpsubw %ymm6,%ymm9,%ymm6 + DB 197,254,111,60,36 ; vmovdqu (%rsp),%ymm7 + DB 196,66,69,0,204 ; vpshufb %ymm12,%ymm7,%ymm9 + DB 196,194,125,48,209 ; vpmovzxbw %xmm9,%ymm2 + DB 197,109,239,248 ; vpxor %ymm0,%ymm2,%ymm15 + DB 197,126,111,164,36,0,1,0,0 ; vmovdqu 0x100(%rsp),%ymm12 + DB 196,193,5,213,204 ; vpmullw %ymm12,%ymm15,%ymm1 + DB 197,254,127,140,36,128,0,0,0 ; vmovdqu %ymm1,0x80(%rsp) + DB 196,226,125,48,207 ; vpmovzxbw %xmm7,%ymm1 + DB 197,85,239,248 ; vpxor %ymm0,%ymm5,%ymm15 + DB 197,133,213,217 ; vpmullw %ymm1,%ymm15,%ymm3 + DB 197,254,127,92,36,96 ; vmovdqu %ymm3,0x60(%rsp) + DB 196,65,117,213,212 ; vpmullw %ymm12,%ymm1,%ymm10 + DB 197,237,249,217 ; vpsubw %ymm1,%ymm2,%ymm3 + DB 197,237,213,213 ; vpmullw %ymm5,%ymm2,%ymm2 + DB 196,193,85,249,236 ; vpsubw %ymm12,%ymm5,%ymm5 + DB 196,195,125,57,255,1 ; vextracti128 $0x1,%ymm7,%xmm15 + DB 196,66,125,48,255 ; vpmovzxbw %xmm15,%ymm15 + DB 196,99,125,57,207,1 ; vextracti128 $0x1,%ymm9,%xmm7 + DB 196,226,125,48,255 ; vpmovzxbw %xmm7,%ymm7 + DB 197,69,239,200 ; vpxor %ymm0,%ymm7,%ymm9 + DB 197,254,111,140,36,64,1,0,0 ; vmovdqu 0x140(%rsp),%ymm1 + DB 197,53,213,225 ; vpmullw %ymm1,%ymm9,%ymm12 + DB 197,149,239,192 ; vpxor %ymm0,%ymm13,%ymm0 + DB 196,65,125,213,207 ; vpmullw %ymm15,%ymm0,%ymm9 + DB 197,133,213,193 ; vpmullw %ymm1,%ymm15,%ymm0 + DB 196,65,69,249,255 ; vpsubw %ymm15,%ymm7,%ymm15 + DB 196,193,69,213,253 ; vpmullw %ymm13,%ymm7,%ymm7 + DB 197,21,249,233 ; vpsubw %ymm1,%ymm13,%ymm13 + DB 196,65,5,213,237 ; vpmullw %ymm13,%ymm15,%ymm13 + DB 197,229,213,205 ; vpmullw %ymm5,%ymm3,%ymm1 + DB 197,221,213,230 ; vpmullw %ymm6,%ymm4,%ymm4 + DB 197,165,213,108,36,64 ; vpmullw 0x40(%rsp),%ymm11,%ymm5 + DB 196,193,21,253,245 ; vpaddw %ymm13,%ymm13,%ymm6 + DB 197,197,249,246 ; vpsubw %ymm6,%ymm7,%ymm6 + DB 197,245,253,201 ; vpaddw %ymm1,%ymm1,%ymm1 + DB 197,237,249,201 ; vpsubw %ymm1,%ymm2,%ymm1 + DB 197,221,253,212 ; vpaddw %ymm4,%ymm4,%ymm2 + DB 197,189,249,210 ; vpsubw %ymm2,%ymm8,%ymm2 + DB 197,213,253,221 ; vpaddw %ymm5,%ymm5,%ymm3 + DB 197,254,111,164,36,160,0,0,0 ; vmovdqu 0xa0(%rsp),%ymm4 + DB 197,221,249,219 ; vpsubw %ymm3,%ymm4,%ymm3 + DB 197,254,111,164,36,32,1,0,0 ; vmovdqu 0x120(%rsp),%ymm4 + DB 197,221,253,228 ; vpaddw %ymm4,%ymm4,%ymm4 + DB 197,141,223,219 ; vpandn %ymm3,%ymm14,%ymm3 + DB 196,193,93,219,230 ; vpand %ymm14,%ymm4,%ymm4 + DB 197,221,235,219 ; vpor %ymm3,%ymm4,%ymm3 + DB 197,254,111,100,36,32 ; vmovdqu 0x20(%rsp),%ymm4 + DB 197,221,253,228 ; vpaddw %ymm4,%ymm4,%ymm4 + DB 197,254,111,172,36,128,1,0,0 ; vmovdqu 0x180(%rsp),%ymm5 + DB 197,213,223,210 ; vpandn %ymm2,%ymm5,%ymm2 + DB 197,221,219,229 ; vpand %ymm5,%ymm4,%ymm4 + DB 197,221,235,210 ; vpor %ymm2,%ymm4,%ymm2 + DB 196,193,45,253,226 ; vpaddw %ymm10,%ymm10,%ymm4 + DB 197,254,111,172,36,192,1,0,0 ; vmovdqu 0x1c0(%rsp),%ymm5 + DB 197,213,223,201 ; vpandn %ymm1,%ymm5,%ymm1 + DB 197,221,219,229 ; vpand %ymm5,%ymm4,%ymm4 + DB 197,221,235,201 ; vpor %ymm1,%ymm4,%ymm1 + DB 197,253,253,192 ; vpaddw %ymm0,%ymm0,%ymm0 + DB 197,254,111,172,36,224,1,0,0 ; vmovdqu 0x1e0(%rsp),%ymm5 + DB 197,213,223,230 ; vpandn %ymm6,%ymm5,%ymm4 + DB 197,253,219,197 ; vpand %ymm5,%ymm0,%ymm0 + DB 197,253,235,196 ; vpor %ymm4,%ymm0,%ymm0 + DB 197,254,111,164,36,96,1,0,0 ; vmovdqu 0x160(%rsp),%ymm4 + DB 197,221,253,164,36,160,1,0,0 ; vpaddw 0x1a0(%rsp),%ymm4,%ymm4 + DB 197,254,111,172,36,192,0,0,0 ; vmovdqu 0xc0(%rsp),%ymm5 + DB 197,213,253,172,36,224,0,0,0 ; vpaddw 0xe0(%rsp),%ymm5,%ymm5 + DB 197,254,111,116,36,96 ; vmovdqu 0x60(%rsp),%ymm6 + DB 197,205,253,180,36,128,0,0,0 ; vpaddw 0x80(%rsp),%ymm6,%ymm6 + DB 196,193,53,253,252 ; vpaddw %ymm12,%ymm9,%ymm7 + DB 196,98,125,121,5,229,7,0,0 ; vpbroadcastw 0x7e5(%rip),%ymm8 # 3902 <_sk_overlay_hsw_8bit+0x75b> + DB 196,193,93,253,224 ; vpaddw %ymm8,%ymm4,%ymm4 + DB 197,229,253,220 ; vpaddw %ymm4,%ymm3,%ymm3 + DB 196,193,85,253,224 ; vpaddw %ymm8,%ymm5,%ymm4 + DB 197,237,253,212 ; vpaddw %ymm4,%ymm2,%ymm2 + DB 196,193,77,253,224 ; vpaddw %ymm8,%ymm6,%ymm4 + DB 197,245,253,204 ; vpaddw %ymm4,%ymm1,%ymm1 + DB 196,193,69,253,224 ; vpaddw %ymm8,%ymm7,%ymm4 + DB 197,253,253,196 ; vpaddw %ymm4,%ymm0,%ymm0 + DB 196,226,125,121,37,186,7,0,0 ; vpbroadcastw 0x7ba(%rip),%ymm4 # 3904 <_sk_overlay_hsw_8bit+0x75d> + DB 197,253,228,196 ; vpmulhuw %ymm4,%ymm0,%ymm0 + DB 197,245,228,204 ; vpmulhuw %ymm4,%ymm1,%ymm1 + DB 197,237,228,212 ; vpmulhuw %ymm4,%ymm2,%ymm2 + DB 197,237,113,210,7 ; vpsrlw $0x7,%ymm2,%ymm2 + DB 197,229,228,220 ; vpmulhuw %ymm4,%ymm3,%ymm3 + DB 197,229,113,211,7 ; vpsrlw $0x7,%ymm3,%ymm3 + DB 196,227,101,56,226,1 ; vinserti128 $0x1,%xmm2,%ymm3,%ymm4 + DB 196,227,101,70,210,49 ; vperm2i128 $0x31,%ymm2,%ymm3,%ymm2 + DB 197,229,113,208,7 ; vpsrlw $0x7,%ymm0,%ymm3 + DB 197,245,113,209,7 ; vpsrlw $0x7,%ymm1,%ymm1 + DB 197,221,103,194 ; vpackuswb %ymm2,%ymm4,%ymm0 + DB 196,227,117,56,211,1 ; vinserti128 $0x1,%xmm3,%ymm1,%ymm2 + DB 196,227,117,70,203,49 ; vperm2i128 $0x31,%ymm3,%ymm1,%ymm1 + DB 197,237,103,201 ; vpackuswb %ymm1,%ymm2,%ymm1 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 197,252,16,148,36,0,2,0,0 ; vmovups 0x200(%rsp),%ymm2 + DB 197,252,16,28,36 ; vmovups (%rsp),%ymm3 + DB 72,129,196,56,2,0,0 ; add $0x238,%rsp + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_overlay_hsw_8bit +_sk_overlay_hsw_8bit LABEL PROC + DB 72,129,236,24,2,0,0 ; sub $0x218,%rsp + DB 197,252,17,140,36,192,1,0,0 ; vmovups %ymm1,0x1c0(%rsp) + DB 197,253,111,45,97,7,0,0 ; vmovdqa 0x761(%rip),%ymm5 # 3920 <_sk_overlay_hsw_8bit+0x779> + DB 197,253,111,226 ; vmovdqa %ymm2,%ymm4 + DB 196,227,125,57,225,1 ; vextracti128 $0x1,%ymm4,%xmm1 + DB 196,98,125,48,193 ; vpmovzxbw %xmm1,%ymm8 + DB 197,254,127,156,36,224,1,0,0 ; vmovdqu %ymm3,0x1e0(%rsp) + DB 196,226,125,48,251 ; vpmovzxbw %xmm3,%ymm7 + DB 196,227,125,57,217,1 ; vextracti128 $0x1,%ymm3,%xmm1 + DB 196,98,125,48,241 ; vpmovzxbw %xmm1,%ymm14 + DB 196,226,93,0,205 ; vpshufb %ymm5,%ymm4,%ymm1 + DB 197,254,127,164,36,160,1,0,0 ; vmovdqu %ymm4,0x1a0(%rsp) + DB 196,226,101,0,221 ; vpshufb %ymm5,%ymm3,%ymm3 + DB 196,227,125,57,222,1 ; vextracti128 $0x1,%ymm3,%xmm6 + DB 196,226,125,48,246 ; vpmovzxbw %xmm6,%ymm6 + DB 196,98,125,48,203 ; vpmovzxbw %xmm3,%ymm9 + DB 196,227,125,57,203,1 ; vextracti128 $0x1,%ymm1,%xmm3 + DB 196,98,125,48,219 ; vpmovzxbw %xmm3,%ymm11 + DB 196,193,61,253,216 ; vpaddw %ymm8,%ymm8,%ymm3 + DB 197,69,253,231 ; vpaddw %ymm7,%ymm7,%ymm12 + DB 196,65,13,253,214 ; vpaddw %ymm14,%ymm14,%ymm10 + DB 196,98,45,58,238 ; vpminuw %ymm6,%ymm10,%ymm13 + DB 196,193,45,117,213 ; vpcmpeqw %ymm13,%ymm10,%ymm2 + DB 197,254,127,148,36,128,1,0,0 ; vmovdqu %ymm2,0x180(%rsp) + DB 196,66,29,58,233 ; vpminuw %ymm9,%ymm12,%ymm13 + DB 196,193,29,117,213 ; vpcmpeqw %ymm13,%ymm12,%ymm2 + DB 197,254,127,148,36,96,1,0,0 ; vmovdqu %ymm2,0x160(%rsp) + DB 196,66,101,58,235 ; vpminuw %ymm11,%ymm3,%ymm13 + DB 196,193,101,117,213 ; vpcmpeqw %ymm13,%ymm3,%ymm2 + DB 197,254,127,148,36,32,1,0,0 ; vmovdqu %ymm2,0x120(%rsp) + DB 196,226,125,48,212 ; vpmovzxbw %xmm4,%ymm2 + DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1 + DB 197,237,253,226 ; vpaddw %ymm2,%ymm2,%ymm4 + DB 196,226,93,58,217 ; vpminuw %ymm1,%ymm4,%ymm3 + DB 197,93,117,251 ; vpcmpeqw %ymm3,%ymm4,%ymm15 + DB 197,253,111,216 ; vmovdqa %ymm0,%ymm3 + DB 196,226,125,48,227 ; vpmovzxbw %xmm3,%ymm4 + DB 196,226,125,121,5,187,6,0,0 ; vpbroadcastw 0x6bb(%rip),%ymm0 # 3940 <_sk_overlay_hsw_8bit+0x799> + DB 197,117,239,232 ; vpxor %ymm0,%ymm1,%ymm13 + DB 197,21,213,212 ; vpmullw %ymm4,%ymm13,%ymm10 + DB 197,126,127,148,36,64,1,0,0 ; vmovdqu %ymm10,0x140(%rsp) + DB 197,125,111,229 ; vmovdqa %ymm5,%ymm12 + DB 196,194,101,0,236 ; vpshufb %ymm12,%ymm3,%ymm5 + DB 197,125,111,211 ; vmovdqa %ymm3,%ymm10 + DB 196,226,125,48,221 ; vpmovzxbw %xmm5,%ymm3 + DB 197,101,239,232 ; vpxor %ymm0,%ymm3,%ymm13 + DB 197,21,213,234 ; vpmullw %ymm2,%ymm13,%ymm13 + DB 197,126,127,172,36,0,1,0,0 ; vmovdqu %ymm13,0x100(%rsp) + DB 197,109,213,236 ; vpmullw %ymm4,%ymm2,%ymm13 + DB 197,126,127,172,36,224,0,0,0 ; vmovdqu %ymm13,0xe0(%rsp) + DB 197,245,249,210 ; vpsubw %ymm2,%ymm1,%ymm2 + DB 197,254,127,84,36,32 ; vmovdqu %ymm2,0x20(%rsp) + DB 197,245,213,203 ; vpmullw %ymm3,%ymm1,%ymm1 + DB 197,254,127,140,36,128,0,0,0 ; vmovdqu %ymm1,0x80(%rsp) + DB 197,229,249,220 ; vpsubw %ymm4,%ymm3,%ymm3 + DB 196,99,125,57,212,1 ; vextracti128 $0x1,%ymm10,%xmm4 + DB 196,226,125,48,228 ; vpmovzxbw %xmm4,%ymm4 + DB 196,227,125,57,237,1 ; vextracti128 $0x1,%ymm5,%xmm5 + DB 196,226,125,48,237 ; vpmovzxbw %xmm5,%ymm5 + DB 197,37,239,232 ; vpxor %ymm0,%ymm11,%ymm13 + DB 197,149,213,204 ; vpmullw %ymm4,%ymm13,%ymm1 + DB 197,254,127,140,36,192,0,0,0 ; vmovdqu %ymm1,0xc0(%rsp) + DB 197,85,239,232 ; vpxor %ymm0,%ymm5,%ymm13 + DB 196,193,21,213,200 ; vpmullw %ymm8,%ymm13,%ymm1 + DB 197,254,127,140,36,160,0,0,0 ; vmovdqu %ymm1,0xa0(%rsp) + DB 197,189,213,204 ; vpmullw %ymm4,%ymm8,%ymm1 + DB 197,254,127,12,36 ; vmovdqu %ymm1,(%rsp) + DB 196,65,37,249,192 ; vpsubw %ymm8,%ymm11,%ymm8 + DB 197,37,213,221 ; vpmullw %ymm5,%ymm11,%ymm11 + DB 197,213,249,228 ; vpsubw %ymm4,%ymm5,%ymm4 + DB 197,254,111,140,36,192,1,0,0 ; vmovdqu 0x1c0(%rsp),%ymm1 + DB 196,194,117,0,236 ; vpshufb %ymm12,%ymm1,%ymm5 + DB 196,226,125,48,209 ; vpmovzxbw %xmm1,%ymm2 + DB 197,125,111,225 ; vmovdqa %ymm1,%ymm12 + DB 197,53,239,232 ; vpxor %ymm0,%ymm9,%ymm13 + DB 197,149,213,202 ; vpmullw %ymm2,%ymm13,%ymm1 + DB 197,254,127,76,36,96 ; vmovdqu %ymm1,0x60(%rsp) + DB 196,226,125,48,205 ; vpmovzxbw %xmm5,%ymm1 + DB 197,117,239,232 ; vpxor %ymm0,%ymm1,%ymm13 + DB 197,21,213,215 ; vpmullw %ymm7,%ymm13,%ymm10 + DB 197,126,127,84,36,64 ; vmovdqu %ymm10,0x40(%rsp) + DB 197,69,213,210 ; vpmullw %ymm2,%ymm7,%ymm10 + DB 197,181,249,255 ; vpsubw %ymm7,%ymm9,%ymm7 + DB 197,53,213,201 ; vpmullw %ymm1,%ymm9,%ymm9 + DB 197,245,249,202 ; vpsubw %ymm2,%ymm1,%ymm1 + DB 196,99,125,57,226,1 ; vextracti128 $0x1,%ymm12,%xmm2 + DB 196,226,125,48,210 ; vpmovzxbw %xmm2,%ymm2 + DB 196,227,125,57,237,1 ; vextracti128 $0x1,%ymm5,%xmm5 + DB 196,226,125,48,237 ; vpmovzxbw %xmm5,%ymm5 + DB 197,77,239,232 ; vpxor %ymm0,%ymm6,%ymm13 + DB 197,21,213,234 ; vpmullw %ymm2,%ymm13,%ymm13 + DB 197,213,239,192 ; vpxor %ymm0,%ymm5,%ymm0 + DB 196,65,125,213,230 ; vpmullw %ymm14,%ymm0,%ymm12 + DB 197,141,213,194 ; vpmullw %ymm2,%ymm14,%ymm0 + DB 196,65,77,249,246 ; vpsubw %ymm14,%ymm6,%ymm14 + DB 197,205,213,245 ; vpmullw %ymm5,%ymm6,%ymm6 + DB 197,213,249,210 ; vpsubw %ymm2,%ymm5,%ymm2 + DB 197,141,213,210 ; vpmullw %ymm2,%ymm14,%ymm2 + DB 197,197,213,201 ; vpmullw %ymm1,%ymm7,%ymm1 + DB 197,189,213,228 ; vpmullw %ymm4,%ymm8,%ymm4 + DB 197,229,213,92,36,32 ; vpmullw 0x20(%rsp),%ymm3,%ymm3 + DB 197,237,253,210 ; vpaddw %ymm2,%ymm2,%ymm2 + DB 197,205,249,210 ; vpsubw %ymm2,%ymm6,%ymm2 + DB 197,245,253,201 ; vpaddw %ymm1,%ymm1,%ymm1 + DB 197,181,249,201 ; vpsubw %ymm1,%ymm9,%ymm1 + DB 197,221,253,228 ; vpaddw %ymm4,%ymm4,%ymm4 + DB 197,165,249,228 ; vpsubw %ymm4,%ymm11,%ymm4 + DB 197,229,253,219 ; vpaddw %ymm3,%ymm3,%ymm3 + DB 197,254,111,172,36,128,0,0,0 ; vmovdqu 0x80(%rsp),%ymm5 + DB 197,213,249,219 ; vpsubw %ymm3,%ymm5,%ymm3 + DB 197,254,111,172,36,224,0,0,0 ; vmovdqu 0xe0(%rsp),%ymm5 + DB 197,213,253,237 ; vpaddw %ymm5,%ymm5,%ymm5 + DB 197,133,223,219 ; vpandn %ymm3,%ymm15,%ymm3 + DB 196,193,85,219,239 ; vpand %ymm15,%ymm5,%ymm5 + DB 197,213,235,219 ; vpor %ymm3,%ymm5,%ymm3 + DB 197,254,111,44,36 ; vmovdqu (%rsp),%ymm5 + DB 197,213,253,237 ; vpaddw %ymm5,%ymm5,%ymm5 + DB 197,254,111,180,36,32,1,0,0 ; vmovdqu 0x120(%rsp),%ymm6 + DB 197,205,223,228 ; vpandn %ymm4,%ymm6,%ymm4 + DB 197,213,219,238 ; vpand %ymm6,%ymm5,%ymm5 + DB 197,213,235,228 ; vpor %ymm4,%ymm5,%ymm4 + DB 196,193,45,253,234 ; vpaddw %ymm10,%ymm10,%ymm5 + DB 197,254,111,180,36,96,1,0,0 ; vmovdqu 0x160(%rsp),%ymm6 + DB 197,205,223,201 ; vpandn %ymm1,%ymm6,%ymm1 + DB 197,213,219,238 ; vpand %ymm6,%ymm5,%ymm5 + DB 197,213,235,201 ; vpor %ymm1,%ymm5,%ymm1 + DB 197,253,253,192 ; vpaddw %ymm0,%ymm0,%ymm0 + DB 197,254,111,172,36,128,1,0,0 ; vmovdqu 0x180(%rsp),%ymm5 + DB 197,213,223,210 ; vpandn %ymm2,%ymm5,%ymm2 + DB 197,253,219,197 ; vpand %ymm5,%ymm0,%ymm0 + DB 197,253,235,194 ; vpor %ymm2,%ymm0,%ymm0 + DB 197,254,111,148,36,0,1,0,0 ; vmovdqu 0x100(%rsp),%ymm2 + DB 197,237,253,148,36,64,1,0,0 ; vpaddw 0x140(%rsp),%ymm2,%ymm2 + DB 197,254,111,172,36,160,0,0,0 ; vmovdqu 0xa0(%rsp),%ymm5 + DB 197,213,253,172,36,192,0,0,0 ; vpaddw 0xc0(%rsp),%ymm5,%ymm5 + DB 197,254,111,116,36,64 ; vmovdqu 0x40(%rsp),%ymm6 + DB 197,205,253,116,36,96 ; vpaddw 0x60(%rsp),%ymm6,%ymm6 + DB 196,193,29,253,253 ; vpaddw %ymm13,%ymm12,%ymm7 + DB 196,98,125,121,5,174,4,0,0 ; vpbroadcastw 0x4ae(%rip),%ymm8 # 3942 <_sk_overlay_hsw_8bit+0x79b> + DB 196,193,109,253,208 ; vpaddw %ymm8,%ymm2,%ymm2 + DB 197,229,253,210 ; vpaddw %ymm2,%ymm3,%ymm2 + DB 196,193,85,253,216 ; vpaddw %ymm8,%ymm5,%ymm3 + DB 197,221,253,219 ; vpaddw %ymm3,%ymm4,%ymm3 + DB 196,193,77,253,224 ; vpaddw %ymm8,%ymm6,%ymm4 + DB 197,245,253,204 ; vpaddw %ymm4,%ymm1,%ymm1 + DB 196,193,69,253,224 ; vpaddw %ymm8,%ymm7,%ymm4 + DB 197,253,253,196 ; vpaddw %ymm4,%ymm0,%ymm0 + DB 196,226,125,121,37,131,4,0,0 ; vpbroadcastw 0x483(%rip),%ymm4 # 3944 <_sk_overlay_hsw_8bit+0x79d> + DB 197,253,228,196 ; vpmulhuw %ymm4,%ymm0,%ymm0 + DB 197,245,228,204 ; vpmulhuw %ymm4,%ymm1,%ymm1 + DB 197,229,228,220 ; vpmulhuw %ymm4,%ymm3,%ymm3 + DB 197,229,113,211,7 ; vpsrlw $0x7,%ymm3,%ymm3 + DB 197,237,228,212 ; vpmulhuw %ymm4,%ymm2,%ymm2 + DB 197,237,113,210,7 ; vpsrlw $0x7,%ymm2,%ymm2 + DB 196,227,109,56,227,1 ; vinserti128 $0x1,%xmm3,%ymm2,%ymm4 + DB 196,227,109,70,211,49 ; vperm2i128 $0x31,%ymm3,%ymm2,%ymm2 + DB 197,229,113,208,7 ; vpsrlw $0x7,%ymm0,%ymm3 + DB 197,245,113,209,7 ; vpsrlw $0x7,%ymm1,%ymm1 + DB 197,221,103,194 ; vpackuswb %ymm2,%ymm4,%ymm0 + DB 196,227,117,56,211,1 ; vinserti128 $0x1,%xmm3,%ymm1,%ymm2 + DB 196,227,117,70,203,49 ; vperm2i128 $0x31,%ymm3,%ymm1,%ymm1 + DB 197,237,103,201 ; vpackuswb %ymm1,%ymm2,%ymm1 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 197,252,16,148,36,160,1,0,0 ; vmovups 0x1a0(%rsp),%ymm2 + DB 197,252,16,156,36,224,1,0,0 ; vmovups 0x1e0(%rsp),%ymm3 + DB 72,129,196,24,2,0,0 ; add $0x218,%rsp + DB 255,224 ; jmpq *%rax + ALIGN 4 DB 0,0 ; add %al,(%rax) - DB 127,67 ; jg 2e5f <_sk_difference_hsw_8bit+0x1cb> + DB 127,67 ; jg 356b <_sk_overlay_hsw_8bit+0x3c4> DB 1,1 ; add %eax,(%rcx) DB 1,0 ; add %eax,(%rax) DB 0,0 ; add %al,(%rax) @@ -41829,9 +42170,9 @@ ALIGN 4 DB 0,0 ; add %al,(%rax) DB 0,255 ; add %bh,%bh DB 0,0 ; add %al,(%rax) - DB 127,67 ; jg 2e73 <_sk_difference_hsw_8bit+0x1df> + DB 127,67 ; jg 357f <_sk_overlay_hsw_8bit+0x3d8> DB 0,0 ; add %al,(%rax) - DB 127,67 ; jg 2e77 <_sk_difference_hsw_8bit+0x1e3> + DB 127,67 ; jg 3583 <_sk_overlay_hsw_8bit+0x3dc> DB 0,0 ; add %al,(%rax) DB 0,255 ; add %bh,%bh DB 255 ; (bad) @@ -42388,6 +42729,66 @@ ALIGN 32 DB 255 ; (bad) DB 255 ; (bad) DB 255,0 ; incl (%rax) + DB 3,3 ; add (%rbx),%eax + DB 3,3 ; add (%rbx),%eax + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 11,11 ; or (%rbx),%ecx + DB 11,11 ; or (%rbx),%ecx + DB 15 ; (bad) + DB 15 ; (bad) + DB 15 ; (bad) + DB 15,3,3 ; lsl (%rbx),%eax + DB 3,3 ; add (%rbx),%eax + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 11,11 ; or (%rbx),%ecx + DB 11,11 ; or (%rbx),%ecx + DB 15 ; (bad) + DB 15 ; (bad) + DB 15 ; (bad) + DB 15,255 ; (bad) + DB 0,127,0 ; add %bh,0x0(%rdi) + DB 129,128,0,0,0,0,0,0,0,0 ; addl $0x0,0x0(%rax) + DB 0,0 ; add %al,(%rax) + DB 0,0 ; add %al,(%rax) + DB 0,0 ; add %al,(%rax) + DB 0,0 ; add %al,(%rax) + DB 0,0 ; add %al,(%rax) + DB 0,0 ; add %al,(%rax) + DB 0,0 ; add %al,(%rax) + DB 0,0 ; add %al,(%rax) + DB 0,0 ; add %al,(%rax) + DB 3,3 ; add (%rbx),%eax + DB 3,3 ; add (%rbx),%eax + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 11,11 ; or (%rbx),%ecx + DB 11,11 ; or (%rbx),%ecx + DB 15 ; (bad) + DB 15 ; (bad) + DB 15 ; (bad) + DB 15,3,3 ; lsl (%rbx),%eax + DB 3,3 ; add (%rbx),%eax + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 11,11 ; or (%rbx),%ecx + DB 11,11 ; or (%rbx),%ecx + DB 15 ; (bad) + DB 15 ; (bad) + DB 15 ; (bad) + DB 15,255 ; (bad) + DB 0,127,0 ; add %bh,0x0(%rdi) + DB 129 ; .byte 0x81 + DB 128 ; .byte 0x80 ALIGN 16 DB 0,2 ; add %al,(%rdx) @@ -42498,7 +42899,7 @@ _sk_uniform_color_sse41_8bit LABEL PROC PUBLIC _sk_set_rgb_sse41_8bit _sk_set_rgb_sse41_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 243,15,16,37,88,33,0,0 ; movss 0x2158(%rip),%xmm4 # 22d8 <_sk_difference_sse41_8bit+0x169> + DB 243,15,16,37,152,41,0,0 ; movss 0x2998(%rip),%xmm4 # 2b18 <_sk_overlay_sse41_8bit+0x42f> DB 243,15,16,40 ; movss (%rax),%xmm5 DB 243,15,89,236 ; mulss %xmm4,%xmm5 DB 243,72,15,44,205 ; cvttss2si %xmm5,%rcx @@ -42513,7 +42914,7 @@ _sk_set_rgb_sse41_8bit LABEL PROC DB 9,208 ; or %edx,%eax DB 102,15,110,224 ; movd %eax,%xmm4 DB 102,15,112,228,0 ; pshufd $0x0,%xmm4,%xmm4 - DB 102,15,111,45,48,33,0,0 ; movdqa 0x2130(%rip),%xmm5 # 22f0 <_sk_difference_sse41_8bit+0x181> + DB 102,15,111,45,112,41,0,0 ; movdqa 0x2970(%rip),%xmm5 # 2b30 <_sk_overlay_sse41_8bit+0x447> DB 102,15,219,205 ; pand %xmm5,%xmm1 DB 102,15,219,197 ; pand %xmm5,%xmm0 DB 102,15,235,196 ; por %xmm4,%xmm0 @@ -42525,12 +42926,12 @@ PUBLIC _sk_premul_sse41_8bit _sk_premul_sse41_8bit LABEL PROC DB 102,15,111,225 ; movdqa %xmm1,%xmm4 DB 102,15,111,232 ; movdqa %xmm0,%xmm5 - DB 102,15,111,5,28,33,0,0 ; movdqa 0x211c(%rip),%xmm0 # 2300 <_sk_difference_sse41_8bit+0x191> + DB 102,15,111,5,92,41,0,0 ; movdqa 0x295c(%rip),%xmm0 # 2b40 <_sk_overlay_sse41_8bit+0x457> DB 102,15,111,253 ; movdqa %xmm5,%xmm7 DB 102,15,56,0,248 ; pshufb %xmm0,%xmm7 DB 102,15,111,244 ; movdqa %xmm4,%xmm6 DB 102,15,56,0,240 ; pshufb %xmm0,%xmm6 - DB 102,15,111,5,18,33,0,0 ; movdqa 0x2112(%rip),%xmm0 # 2310 <_sk_difference_sse41_8bit+0x1a1> + DB 102,15,111,5,82,41,0,0 ; movdqa 0x2952(%rip),%xmm0 # 2b50 <_sk_overlay_sse41_8bit+0x467> DB 102,15,235,240 ; por %xmm0,%xmm6 DB 102,15,235,248 ; por %xmm0,%xmm7 DB 102,69,15,239,192 ; pxor %xmm8,%xmm8 @@ -42561,7 +42962,7 @@ _sk_premul_sse41_8bit LABEL PROC PUBLIC _sk_swap_rb_sse41_8bit _sk_swap_rb_sse41_8bit LABEL PROC - DB 102,15,111,37,159,32,0,0 ; movdqa 0x209f(%rip),%xmm4 # 2320 <_sk_difference_sse41_8bit+0x1b1> + DB 102,15,111,37,223,40,0,0 ; movdqa 0x28df(%rip),%xmm4 # 2b60 <_sk_overlay_sse41_8bit+0x477> DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0 DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -42682,7 +43083,7 @@ _sk_load_8888_dst_sse41_8bit LABEL PROC DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 233,255,255,255,221 ; jmpq ffffffffde000410 <_sk_difference_sse41_8bit+0xffffffffddffe2a1> + DB 233,255,255,255,221 ; jmpq ffffffffde000410 <_sk_overlay_sse41_8bit+0xffffffffddffdd27> DB 255 ; (bad) DB 255 ; (bad) DB 255 ; .byte 0xff @@ -42761,7 +43162,7 @@ _sk_load_bgra_sse41_8bit LABEL PROC DB 117,35 ; jne 504 <_sk_load_bgra_sse41_8bit+0x44> DB 243,66,15,111,76,130,16 ; movdqu 0x10(%rdx,%r8,4),%xmm1 DB 243,66,15,111,4,130 ; movdqu (%rdx,%r8,4),%xmm0 - DB 102,15,111,37,58,30,0,0 ; movdqa 0x1e3a(%rip),%xmm4 # 2330 <_sk_difference_sse41_8bit+0x1c1> + DB 102,15,111,37,122,38,0,0 ; movdqa 0x267a(%rip),%xmm4 # 2b70 <_sk_overlay_sse41_8bit+0x487> DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0 DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -42822,7 +43223,7 @@ _sk_load_bgra_dst_sse41_8bit LABEL PROC DB 117,35 ; jne 5d4 <_sk_load_bgra_dst_sse41_8bit+0x44> DB 243,66,15,111,92,130,16 ; movdqu 0x10(%rdx,%r8,4),%xmm3 DB 243,66,15,111,20,130 ; movdqu (%rdx,%r8,4),%xmm2 - DB 102,15,111,37,122,29,0,0 ; movdqa 0x1d7a(%rip),%xmm4 # 2340 <_sk_difference_sse41_8bit+0x1d1> + DB 102,15,111,37,186,37,0,0 ; movdqa 0x25ba(%rip),%xmm4 # 2b80 <_sk_overlay_sse41_8bit+0x497> DB 102,15,56,0,212 ; pshufb %xmm4,%xmm2 DB 102,15,56,0,220 ; pshufb %xmm4,%xmm3 DB 72,173 ; lods %ds:(%rsi),%rax @@ -42879,7 +43280,7 @@ _sk_store_bgra_sse41_8bit LABEL PROC DB 72,15,175,209 ; imul %rcx,%rdx DB 72,193,226,2 ; shl $0x2,%rdx DB 72,3,16 ; add (%rax),%rdx - DB 102,15,111,53,204,28,0,0 ; movdqa 0x1ccc(%rip),%xmm6 # 2350 <_sk_difference_sse41_8bit+0x1e1> + DB 102,15,111,53,12,37,0,0 ; movdqa 0x250c(%rip),%xmm6 # 2b90 <_sk_overlay_sse41_8bit+0x4a7> DB 102,15,111,233 ; movdqa %xmm1,%xmm5 DB 102,15,56,0,238 ; pshufb %xmm6,%xmm5 DB 102,15,111,224 ; movdqa %xmm0,%xmm4 @@ -42943,7 +43344,7 @@ _sk_load_a8_sse41_8bit LABEL PROC DB 77,133,201 ; test %r9,%r9 DB 117,42 ; jne 76b <_sk_load_a8_sse41_8bit+0x47> DB 102,66,15,56,48,12,2 ; pmovzxbw (%rdx,%r8,1),%xmm1 - DB 102,15,219,13,16,28,0,0 ; pand 0x1c10(%rip),%xmm1 # 2360 <_sk_difference_sse41_8bit+0x1f1> + DB 102,15,219,13,80,36,0,0 ; pand 0x2450(%rip),%xmm1 # 2ba0 <_sk_overlay_sse41_8bit+0x4b7> DB 102,15,239,228 ; pxor %xmm4,%xmm4 DB 102,15,56,51,193 ; pmovzxwd %xmm1,%xmm0 DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1 @@ -42991,7 +43392,7 @@ _sk_load_a8_sse41_8bit LABEL PROC DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 233,255,255,255,222 ; jmpq ffffffffdf00080c <_sk_difference_sse41_8bit+0xffffffffdeffe69d> + DB 233,255,255,255,222 ; jmpq ffffffffdf00080c <_sk_overlay_sse41_8bit+0xffffffffdeffe123> DB 255 ; (bad) DB 255 ; (bad) DB 255,211 ; callq *%rbx @@ -43014,7 +43415,7 @@ _sk_load_a8_dst_sse41_8bit LABEL PROC DB 77,133,201 ; test %r9,%r9 DB 117,42 ; jne 85f <_sk_load_a8_dst_sse41_8bit+0x47> DB 102,66,15,56,48,28,2 ; pmovzxbw (%rdx,%r8,1),%xmm3 - DB 102,15,219,29,44,27,0,0 ; pand 0x1b2c(%rip),%xmm3 # 2370 <_sk_difference_sse41_8bit+0x201> + DB 102,15,219,29,108,35,0,0 ; pand 0x236c(%rip),%xmm3 # 2bb0 <_sk_overlay_sse41_8bit+0x4c7> DB 102,15,239,228 ; pxor %xmm4,%xmm4 DB 102,15,56,51,211 ; pmovzxwd %xmm3,%xmm2 DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3 @@ -43062,7 +43463,7 @@ _sk_load_a8_dst_sse41_8bit LABEL PROC DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 233,255,255,255,222 ; jmpq ffffffffdf000900 <_sk_difference_sse41_8bit+0xffffffffdeffe791> + DB 233,255,255,255,222 ; jmpq ffffffffdf000900 <_sk_overlay_sse41_8bit+0xffffffffdeffe217> DB 255 ; (bad) DB 255 ; (bad) DB 255,211 ; callq *%rbx @@ -43082,7 +43483,7 @@ _sk_store_a8_sse41_8bit LABEL PROC DB 72,99,87,8 ; movslq 0x8(%rdi),%rdx DB 72,15,175,209 ; imul %rcx,%rdx DB 72,3,16 ; add (%rax),%rdx - DB 102,15,111,45,84,26,0,0 ; movdqa 0x1a54(%rip),%xmm5 # 2380 <_sk_difference_sse41_8bit+0x211> + DB 102,15,111,45,148,34,0,0 ; movdqa 0x2294(%rip),%xmm5 # 2bc0 <_sk_overlay_sse41_8bit+0x4d7> DB 102,15,111,241 ; movdqa %xmm1,%xmm6 DB 102,15,56,0,245 ; pshufb %xmm5,%xmm6 DB 102,15,111,224 ; movdqa %xmm0,%xmm4 @@ -43090,7 +43491,7 @@ _sk_store_a8_sse41_8bit LABEL PROC DB 102,15,108,230 ; punpcklqdq %xmm6,%xmm4 DB 77,133,201 ; test %r9,%r9 DB 117,19 ; jne 95a <_sk_store_a8_sse41_8bit+0x4e> - DB 102,15,56,0,37,96,26,0,0 ; pshufb 0x1a60(%rip),%xmm4 # 23b0 <_sk_difference_sse41_8bit+0x241> + DB 102,15,56,0,37,160,34,0,0 ; pshufb 0x22a0(%rip),%xmm4 # 2bf0 <_sk_overlay_sse41_8bit+0x507> DB 102,66,15,214,36,2 ; movq %xmm4,(%rdx,%r8,1) DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax @@ -43106,13 +43507,13 @@ _sk_store_a8_sse41_8bit LABEL PROC DB 102,66,15,58,20,36,2,0 ; pextrb $0x0,%xmm4,(%rdx,%r8,1) DB 235,209 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a> DB 102,66,15,58,20,100,2,2,4 ; pextrb $0x4,%xmm4,0x2(%rdx,%r8,1) - DB 102,15,56,0,37,9,26,0,0 ; pshufb 0x1a09(%rip),%xmm4 # 23a0 <_sk_difference_sse41_8bit+0x231> + DB 102,15,56,0,37,73,34,0,0 ; pshufb 0x2249(%rip),%xmm4 # 2be0 <_sk_overlay_sse41_8bit+0x4f7> DB 102,66,15,58,21,36,2,0 ; pextrw $0x0,%xmm4,(%rdx,%r8,1) DB 235,181 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a> DB 102,66,15,58,20,100,2,6,12 ; pextrb $0xc,%xmm4,0x6(%rdx,%r8,1) DB 102,66,15,58,20,100,2,5,10 ; pextrb $0xa,%xmm4,0x5(%rdx,%r8,1) DB 102,66,15,58,20,100,2,4,8 ; pextrb $0x8,%xmm4,0x4(%rdx,%r8,1) - DB 102,15,56,0,37,203,25,0,0 ; pshufb 0x19cb(%rip),%xmm4 # 2390 <_sk_difference_sse41_8bit+0x221> + DB 102,15,56,0,37,11,34,0,0 ; pshufb 0x220b(%rip),%xmm4 # 2bd0 <_sk_overlay_sse41_8bit+0x4e7> DB 102,66,15,126,36,2 ; movd %xmm4,(%rdx,%r8,1) DB 235,137 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a> DB 15,31,0 ; nopl (%rax) @@ -43150,14 +43551,14 @@ _sk_load_g8_sse41_8bit LABEL PROC DB 77,133,201 ; test %r9,%r9 DB 117,66 ; jne a4b <_sk_load_g8_sse41_8bit+0x5f> DB 102,66,15,56,48,12,2 ; pmovzxbw (%rdx,%r8,1),%xmm1 - DB 102,15,219,13,168,25,0,0 ; pand 0x19a8(%rip),%xmm1 # 23c0 <_sk_difference_sse41_8bit+0x251> + DB 102,15,219,13,232,33,0,0 ; pand 0x21e8(%rip),%xmm1 # 2c00 <_sk_overlay_sse41_8bit+0x517> DB 102,15,239,228 ; pxor %xmm4,%xmm4 DB 102,15,56,51,193 ; pmovzxwd %xmm1,%xmm0 DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1 - DB 102,15,111,37,163,25,0,0 ; movdqa 0x19a3(%rip),%xmm4 # 23d0 <_sk_difference_sse41_8bit+0x261> + DB 102,15,111,37,227,33,0,0 ; movdqa 0x21e3(%rip),%xmm4 # 2c10 <_sk_overlay_sse41_8bit+0x527> DB 102,15,56,64,204 ; pmulld %xmm4,%xmm1 DB 102,15,56,64,196 ; pmulld %xmm4,%xmm0 - DB 102,15,111,37,161,25,0,0 ; movdqa 0x19a1(%rip),%xmm4 # 23e0 <_sk_difference_sse41_8bit+0x271> + DB 102,15,111,37,225,33,0,0 ; movdqa 0x21e1(%rip),%xmm4 # 2c20 <_sk_overlay_sse41_8bit+0x537> DB 102,15,235,196 ; por %xmm4,%xmm0 DB 102,15,235,204 ; por %xmm4,%xmm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -43202,7 +43603,7 @@ _sk_load_g8_sse41_8bit LABEL PROC DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 232,255,255,255,221 ; callq ffffffffde000af0 <_sk_difference_sse41_8bit+0xffffffffddffe981> + DB 232,255,255,255,221 ; callq ffffffffde000af0 <_sk_overlay_sse41_8bit+0xffffffffddffe407> DB 255 ; (bad) DB 255 ; (bad) DB 255,210 ; callq *%rdx @@ -43225,14 +43626,14 @@ _sk_load_g8_dst_sse41_8bit LABEL PROC DB 77,133,201 ; test %r9,%r9 DB 117,66 ; jne b5b <_sk_load_g8_dst_sse41_8bit+0x5f> DB 102,66,15,56,48,28,2 ; pmovzxbw (%rdx,%r8,1),%xmm3 - DB 102,15,219,29,200,24,0,0 ; pand 0x18c8(%rip),%xmm3 # 23f0 <_sk_difference_sse41_8bit+0x281> + DB 102,15,219,29,8,33,0,0 ; pand 0x2108(%rip),%xmm3 # 2c30 <_sk_overlay_sse41_8bit+0x547> DB 102,15,239,228 ; pxor %xmm4,%xmm4 DB 102,15,56,51,211 ; pmovzxwd %xmm3,%xmm2 DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3 - DB 102,15,111,37,195,24,0,0 ; movdqa 0x18c3(%rip),%xmm4 # 2400 <_sk_difference_sse41_8bit+0x291> + DB 102,15,111,37,3,33,0,0 ; movdqa 0x2103(%rip),%xmm4 # 2c40 <_sk_overlay_sse41_8bit+0x557> DB 102,15,56,64,220 ; pmulld %xmm4,%xmm3 DB 102,15,56,64,212 ; pmulld %xmm4,%xmm2 - DB 102,15,111,37,193,24,0,0 ; movdqa 0x18c1(%rip),%xmm4 # 2410 <_sk_difference_sse41_8bit+0x2a1> + DB 102,15,111,37,1,33,0,0 ; movdqa 0x2101(%rip),%xmm4 # 2c50 <_sk_overlay_sse41_8bit+0x567> DB 102,15,235,212 ; por %xmm4,%xmm2 DB 102,15,235,220 ; por %xmm4,%xmm3 DB 72,173 ; lods %ds:(%rsi),%rax @@ -43277,7 +43678,7 @@ _sk_load_g8_dst_sse41_8bit LABEL PROC DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 232,255,255,255,221 ; callq ffffffffde000c00 <_sk_difference_sse41_8bit+0xffffffffddffea91> + DB 232,255,255,255,221 ; callq ffffffffde000c00 <_sk_overlay_sse41_8bit+0xffffffffddffe517> DB 255 ; (bad) DB 255 ; (bad) DB 255,210 ; callq *%rdx @@ -43303,7 +43704,7 @@ _sk_srcover_rgba_8888_sse41_8bit LABEL PROC DB 243,70,15,111,68,138,16 ; movdqu 0x10(%rdx,%r9,4),%xmm8 DB 243,70,15,111,12,138 ; movdqu (%rdx,%r9,4),%xmm9 DB 77,133,192 ; test %r8,%r8 - DB 102,15,111,37,215,23,0,0 ; movdqa 0x17d7(%rip),%xmm4 # 2420 <_sk_difference_sse41_8bit+0x2b1> + DB 102,15,111,37,23,32,0,0 ; movdqa 0x2017(%rip),%xmm4 # 2c60 <_sk_overlay_sse41_8bit+0x577> DB 102,15,111,241 ; movdqa %xmm1,%xmm6 DB 102,15,56,0,244 ; pshufb %xmm4,%xmm6 DB 102,15,111,248 ; movdqa %xmm0,%xmm7 @@ -43423,7 +43824,7 @@ _sk_scale_1_float_sse41_8bit LABEL PROC DB 102,15,111,232 ; movdqa %xmm0,%xmm5 DB 72,173 ; lods %ds:(%rsi),%rax DB 243,15,16,0 ; movss (%rax),%xmm0 - DB 243,15,89,5,178,20,0,0 ; mulss 0x14b2(%rip),%xmm0 # 22dc <_sk_difference_sse41_8bit+0x16d> + DB 243,15,89,5,242,28,0,0 ; mulss 0x1cf2(%rip),%xmm0 # 2b1c <_sk_overlay_sse41_8bit+0x433> DB 243,15,44,192 ; cvttss2si %xmm0,%eax DB 15,87,192 ; xorps %xmm0,%xmm0 DB 102,68,15,56,48,197 ; pmovzxbw %xmm5,%xmm8 @@ -43431,7 +43832,7 @@ _sk_scale_1_float_sse41_8bit LABEL PROC DB 102,68,15,56,48,204 ; pmovzxbw %xmm4,%xmm9 DB 102,15,104,224 ; punpckhbw %xmm0,%xmm4 DB 102,15,110,240 ; movd %eax,%xmm6 - DB 102,15,56,0,53,222,21,0,0 ; pshufb 0x15de(%rip),%xmm6 # 2430 <_sk_difference_sse41_8bit+0x2c1> + DB 102,15,56,0,53,30,30,0,0 ; pshufb 0x1e1e(%rip),%xmm6 # 2c70 <_sk_overlay_sse41_8bit+0x587> DB 102,15,111,206 ; movdqa %xmm6,%xmm1 DB 102,65,15,213,201 ; pmullw %xmm9,%xmm1 DB 102,15,111,198 ; movdqa %xmm6,%xmm0 @@ -43464,11 +43865,11 @@ _sk_scale_u8_sse41_8bit LABEL PROC DB 77,133,201 ; test %r9,%r9 DB 15,133,160,0,0,0 ; jne f63 <_sk_scale_u8_sse41_8bit+0xc1> DB 102,66,15,56,48,52,2 ; pmovzxbw (%rdx,%r8,1),%xmm6 - DB 102,15,219,53,110,21,0,0 ; pand 0x156e(%rip),%xmm6 # 2440 <_sk_difference_sse41_8bit+0x2d1> + DB 102,15,219,53,174,29,0,0 ; pand 0x1dae(%rip),%xmm6 # 2c80 <_sk_overlay_sse41_8bit+0x597> DB 102,69,15,239,192 ; pxor %xmm8,%xmm8 DB 102,15,111,254 ; movdqa %xmm6,%xmm7 - DB 102,15,56,0,61,108,21,0,0 ; pshufb 0x156c(%rip),%xmm7 # 2450 <_sk_difference_sse41_8bit+0x2e1> - DB 102,15,56,0,53,115,21,0,0 ; pshufb 0x1573(%rip),%xmm6 # 2460 <_sk_difference_sse41_8bit+0x2f1> + DB 102,15,56,0,61,172,29,0,0 ; pshufb 0x1dac(%rip),%xmm7 # 2c90 <_sk_overlay_sse41_8bit+0x5a7> + DB 102,15,56,0,53,179,29,0,0 ; pshufb 0x1db3(%rip),%xmm6 # 2ca0 <_sk_overlay_sse41_8bit+0x5b7> DB 102,68,15,56,48,200 ; pmovzxbw %xmm0,%xmm9 DB 102,65,15,104,192 ; punpckhbw %xmm8,%xmm0 DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10 @@ -43551,7 +43952,7 @@ PUBLIC _sk_lerp_1_float_sse41_8bit _sk_lerp_1_float_sse41_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 243,15,16,32 ; movss (%rax),%xmm4 - DB 243,15,89,37,182,18,0,0 ; mulss 0x12b6(%rip),%xmm4 # 22e0 <_sk_difference_sse41_8bit+0x171> + DB 243,15,89,37,246,26,0,0 ; mulss 0x1af6(%rip),%xmm4 # 2b20 <_sk_overlay_sse41_8bit+0x437> DB 243,15,44,196 ; cvttss2si %xmm4,%eax DB 102,15,110,224 ; movd %eax,%xmm4 DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4 @@ -43562,7 +43963,7 @@ _sk_lerp_1_float_sse41_8bit LABEL PROC DB 102,65,15,104,193 ; punpckhbw %xmm9,%xmm0 DB 102,68,15,56,48,217 ; pmovzxbw %xmm1,%xmm11 DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1 - DB 102,15,56,0,61,11,20,0,0 ; pshufb 0x140b(%rip),%xmm7 # 2470 <_sk_difference_sse41_8bit+0x301> + DB 102,15,56,0,61,75,28,0,0 ; pshufb 0x1c4b(%rip),%xmm7 # 2cb0 <_sk_overlay_sse41_8bit+0x5c7> DB 102,68,15,111,231 ; movdqa %xmm7,%xmm12 DB 102,69,15,213,227 ; pmullw %xmm11,%xmm12 DB 102,68,15,111,239 ; movdqa %xmm7,%xmm13 @@ -43623,11 +44024,11 @@ _sk_lerp_u8_sse41_8bit LABEL PROC DB 77,133,201 ; test %r9,%r9 DB 15,133,46,1,0,0 ; jne 128d <_sk_lerp_u8_sse41_8bit+0x14f> DB 102,66,15,56,48,60,2 ; pmovzxbw (%rdx,%r8,1),%xmm7 - DB 102,15,219,61,18,19,0,0 ; pand 0x1312(%rip),%xmm7 # 2480 <_sk_difference_sse41_8bit+0x311> + DB 102,15,219,61,82,27,0,0 ; pand 0x1b52(%rip),%xmm7 # 2cc0 <_sk_overlay_sse41_8bit+0x5d7> DB 102,69,15,239,192 ; pxor %xmm8,%xmm8 DB 102,15,111,247 ; movdqa %xmm7,%xmm6 - DB 102,15,56,0,53,16,19,0,0 ; pshufb 0x1310(%rip),%xmm6 # 2490 <_sk_difference_sse41_8bit+0x321> - DB 102,15,56,0,61,23,19,0,0 ; pshufb 0x1317(%rip),%xmm7 # 24a0 <_sk_difference_sse41_8bit+0x331> + DB 102,15,56,0,53,80,27,0,0 ; pshufb 0x1b50(%rip),%xmm6 # 2cd0 <_sk_overlay_sse41_8bit+0x5e7> + DB 102,15,56,0,61,87,27,0,0 ; pshufb 0x1b57(%rip),%xmm7 # 2ce0 <_sk_overlay_sse41_8bit+0x5f7> DB 102,68,15,56,48,200 ; pmovzxbw %xmm0,%xmm9 DB 102,65,15,104,192 ; punpckhbw %xmm8,%xmm0 DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10 @@ -43722,7 +44123,7 @@ _sk_lerp_u8_sse41_8bit LABEL PROC DB 255 ; (bad) DB 255 ; (bad) DB 255 ; (bad) - DB 233,255,255,255,222 ; jmpq ffffffffdf001338 <_sk_difference_sse41_8bit+0xffffffffdefff1c9> + DB 233,255,255,255,222 ; jmpq ffffffffdf001338 <_sk_overlay_sse41_8bit+0xffffffffdeffec4f> DB 255 ; (bad) DB 255 ; (bad) DB 255,211 ; callq *%rbx @@ -43750,7 +44151,7 @@ _sk_move_dst_src_sse41_8bit LABEL PROC PUBLIC _sk_black_color_sse41_8bit _sk_black_color_sse41_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 15,40,5,79,17,0,0 ; movaps 0x114f(%rip),%xmm0 # 24b0 <_sk_difference_sse41_8bit+0x341> + DB 15,40,5,143,25,0,0 ; movaps 0x198f(%rip),%xmm0 # 2cf0 <_sk_overlay_sse41_8bit+0x607> DB 15,40,200 ; movaps %xmm0,%xmm1 DB 255,224 ; jmpq *%rax @@ -43770,7 +44171,7 @@ _sk_clear_sse41_8bit LABEL PROC PUBLIC _sk_srcatop_sse41_8bit _sk_srcatop_sse41_8bit LABEL PROC - DB 102,68,15,111,21,59,17,0,0 ; movdqa 0x113b(%rip),%xmm10 # 24c0 <_sk_difference_sse41_8bit+0x351> + DB 102,68,15,111,21,123,25,0,0 ; movdqa 0x197b(%rip),%xmm10 # 2d00 <_sk_overlay_sse41_8bit+0x617> DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11 DB 102,68,15,56,48,195 ; pmovzxbw %xmm3,%xmm8 DB 102,15,111,235 ; movdqa %xmm3,%xmm5 @@ -43838,7 +44239,7 @@ _sk_srcatop_sse41_8bit LABEL PROC PUBLIC _sk_dstatop_sse41_8bit _sk_dstatop_sse41_8bit LABEL PROC - DB 102,68,15,111,29,16,16,0,0 ; movdqa 0x1010(%rip),%xmm11 # 24d0 <_sk_difference_sse41_8bit+0x361> + DB 102,68,15,111,29,80,24,0,0 ; movdqa 0x1850(%rip),%xmm11 # 2d10 <_sk_overlay_sse41_8bit+0x627> DB 102,68,15,111,233 ; movdqa %xmm1,%xmm13 DB 102,69,15,56,0,235 ; pshufb %xmm11,%xmm13 DB 102,68,15,111,248 ; movdqa %xmm0,%xmm15 @@ -43908,7 +44309,7 @@ PUBLIC _sk_srcin_sse41_8bit _sk_srcin_sse41_8bit LABEL PROC DB 102,15,111,225 ; movdqa %xmm1,%xmm4 DB 102,15,111,232 ; movdqa %xmm0,%xmm5 - DB 102,15,111,5,215,14,0,0 ; movdqa 0xed7(%rip),%xmm0 # 24e0 <_sk_difference_sse41_8bit+0x371> + DB 102,15,111,5,23,23,0,0 ; movdqa 0x1717(%rip),%xmm0 # 2d20 <_sk_overlay_sse41_8bit+0x637> DB 102,15,111,243 ; movdqa %xmm3,%xmm6 DB 102,15,56,0,240 ; pshufb %xmm0,%xmm6 DB 102,15,111,250 ; movdqa %xmm2,%xmm7 @@ -43941,7 +44342,7 @@ _sk_srcin_sse41_8bit LABEL PROC PUBLIC _sk_dstin_sse41_8bit _sk_dstin_sse41_8bit LABEL PROC - DB 102,15,111,37,90,14,0,0 ; movdqa 0xe5a(%rip),%xmm4 # 24f0 <_sk_difference_sse41_8bit+0x381> + DB 102,15,111,37,154,22,0,0 ; movdqa 0x169a(%rip),%xmm4 # 2d30 <_sk_overlay_sse41_8bit+0x647> DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1 DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0 DB 102,69,15,239,210 ; pxor %xmm10,%xmm10 @@ -43978,7 +44379,7 @@ PUBLIC _sk_srcout_sse41_8bit _sk_srcout_sse41_8bit LABEL PROC DB 102,15,111,225 ; movdqa %xmm1,%xmm4 DB 102,15,111,232 ; movdqa %xmm0,%xmm5 - DB 102,15,111,5,205,13,0,0 ; movdqa 0xdcd(%rip),%xmm0 # 2500 <_sk_difference_sse41_8bit+0x391> + DB 102,15,111,5,13,22,0,0 ; movdqa 0x160d(%rip),%xmm0 # 2d40 <_sk_overlay_sse41_8bit+0x657> DB 102,15,111,250 ; movdqa %xmm2,%xmm7 DB 102,15,56,0,248 ; pshufb %xmm0,%xmm7 DB 102,15,111,243 ; movdqa %xmm3,%xmm6 @@ -44014,7 +44415,7 @@ _sk_srcout_sse41_8bit LABEL PROC PUBLIC _sk_dstout_sse41_8bit _sk_dstout_sse41_8bit LABEL PROC - DB 102,15,111,37,68,13,0,0 ; movdqa 0xd44(%rip),%xmm4 # 2510 <_sk_difference_sse41_8bit+0x3a1> + DB 102,15,111,37,132,21,0,0 ; movdqa 0x1584(%rip),%xmm4 # 2d50 <_sk_overlay_sse41_8bit+0x667> DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0 DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1 DB 102,15,118,228 ; pcmpeqd %xmm4,%xmm4 @@ -44052,7 +44453,7 @@ _sk_dstout_sse41_8bit LABEL PROC PUBLIC _sk_srcover_sse41_8bit _sk_srcover_sse41_8bit LABEL PROC - DB 102,15,111,53,179,12,0,0 ; movdqa 0xcb3(%rip),%xmm6 # 2520 <_sk_difference_sse41_8bit+0x3b1> + DB 102,15,111,53,243,20,0,0 ; movdqa 0x14f3(%rip),%xmm6 # 2d60 <_sk_overlay_sse41_8bit+0x677> DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11 DB 102,68,15,56,0,222 ; pshufb %xmm6,%xmm11 DB 102,15,111,232 ; movdqa %xmm0,%xmm5 @@ -44091,7 +44492,7 @@ _sk_srcover_sse41_8bit LABEL PROC PUBLIC _sk_dstover_sse41_8bit _sk_dstover_sse41_8bit LABEL PROC - DB 102,68,15,111,5,19,12,0,0 ; movdqa 0xc13(%rip),%xmm8 # 2530 <_sk_difference_sse41_8bit+0x3c1> + DB 102,68,15,111,5,83,20,0,0 ; movdqa 0x1453(%rip),%xmm8 # 2d70 <_sk_overlay_sse41_8bit+0x687> DB 102,68,15,111,209 ; movdqa %xmm1,%xmm10 DB 102,68,15,56,48,201 ; pmovzxbw %xmm1,%xmm9 DB 102,15,252,203 ; paddb %xmm3,%xmm1 @@ -44166,7 +44567,7 @@ _sk_multiply_sse41_8bit LABEL PROC DB 102,15,111,218 ; movdqa %xmm2,%xmm3 DB 102,15,111,209 ; movdqa %xmm1,%xmm2 DB 102,15,111,200 ; movdqa %xmm0,%xmm1 - DB 102,68,15,111,53,225,10,0,0 ; movdqa 0xae1(%rip),%xmm14 # 2540 <_sk_difference_sse41_8bit+0x3d1> + DB 102,68,15,111,53,33,19,0,0 ; movdqa 0x1321(%rip),%xmm14 # 2d80 <_sk_overlay_sse41_8bit+0x697> DB 102,68,15,111,195 ; movdqa %xmm3,%xmm8 DB 102,15,111,235 ; movdqa %xmm3,%xmm5 DB 102,65,15,56,0,238 ; pshufb %xmm14,%xmm5 @@ -44291,7 +44692,7 @@ _sk_screen_sse41_8bit LABEL PROC PUBLIC _sk_xor__sse41_8bit _sk_xor__sse41_8bit LABEL PROC - DB 102,68,15,111,21,175,8,0,0 ; movdqa 0x8af(%rip),%xmm10 # 2550 <_sk_difference_sse41_8bit+0x3e1> + DB 102,68,15,111,21,239,16,0,0 ; movdqa 0x10ef(%rip),%xmm10 # 2d90 <_sk_overlay_sse41_8bit+0x6a7> DB 102,68,15,111,226 ; movdqa %xmm2,%xmm12 DB 102,68,15,56,48,194 ; pmovzxbw %xmm2,%xmm8 DB 102,15,111,234 ; movdqa %xmm2,%xmm5 @@ -44362,7 +44763,7 @@ _sk_xor__sse41_8bit LABEL PROC PUBLIC _sk_darken_sse41_8bit _sk_darken_sse41_8bit LABEL PROC DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14 - DB 102,68,15,111,5,114,7,0,0 ; movdqa 0x772(%rip),%xmm8 # 2560 <_sk_difference_sse41_8bit+0x3f1> + DB 102,68,15,111,5,178,15,0,0 ; movdqa 0xfb2(%rip),%xmm8 # 2da0 <_sk_overlay_sse41_8bit+0x6b7> DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11 DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9 DB 102,15,111,243 ; movdqa %xmm3,%xmm6 @@ -44429,7 +44830,7 @@ _sk_darken_sse41_8bit LABEL PROC DB 102,15,248,231 ; psubb %xmm7,%xmm4 DB 102,15,248,206 ; psubb %xmm6,%xmm1 DB 102,68,15,248,245 ; psubb %xmm5,%xmm14 - DB 15,40,5,54,6,0,0 ; movaps 0x636(%rip),%xmm0 # 2570 <_sk_difference_sse41_8bit+0x401> + DB 15,40,5,118,14,0,0 ; movaps 0xe76(%rip),%xmm0 # 2db0 <_sk_overlay_sse41_8bit+0x6c7> DB 102,68,15,56,16,244 ; pblendvb %xmm0,%xmm4,%xmm14 DB 102,65,15,56,16,200 ; pblendvb %xmm0,%xmm8,%xmm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -44439,7 +44840,7 @@ _sk_darken_sse41_8bit LABEL PROC PUBLIC _sk_lighten_sse41_8bit _sk_lighten_sse41_8bit LABEL PROC DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14 - DB 102,68,15,111,5,35,6,0,0 ; movdqa 0x623(%rip),%xmm8 # 2580 <_sk_difference_sse41_8bit+0x411> + DB 102,68,15,111,5,99,14,0,0 ; movdqa 0xe63(%rip),%xmm8 # 2dc0 <_sk_overlay_sse41_8bit+0x6d7> DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11 DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9 DB 102,15,111,243 ; movdqa %xmm3,%xmm6 @@ -44506,7 +44907,7 @@ _sk_lighten_sse41_8bit LABEL PROC DB 102,15,248,231 ; psubb %xmm7,%xmm4 DB 102,15,248,206 ; psubb %xmm6,%xmm1 DB 102,68,15,248,245 ; psubb %xmm5,%xmm14 - DB 15,40,5,231,4,0,0 ; movaps 0x4e7(%rip),%xmm0 # 2590 <_sk_difference_sse41_8bit+0x421> + DB 15,40,5,39,13,0,0 ; movaps 0xd27(%rip),%xmm0 # 2dd0 <_sk_overlay_sse41_8bit+0x6e7> DB 102,68,15,56,16,244 ; pblendvb %xmm0,%xmm4,%xmm14 DB 102,65,15,56,16,200 ; pblendvb %xmm0,%xmm8,%xmm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -44544,7 +44945,7 @@ _sk_exclusion_sse41_8bit LABEL PROC DB 102,15,113,215,8 ; psrlw $0x8,%xmm7 DB 102,15,103,253 ; packuswb %xmm5,%xmm7 DB 102,15,103,230 ; packuswb %xmm6,%xmm4 - DB 102,15,111,45,77,4,0,0 ; movdqa 0x44d(%rip),%xmm5 # 25a0 <_sk_difference_sse41_8bit+0x431> + DB 102,15,111,45,141,12,0,0 ; movdqa 0xc8d(%rip),%xmm5 # 2de0 <_sk_overlay_sse41_8bit+0x6f7> DB 102,15,248,204 ; psubb %xmm4,%xmm1 DB 102,15,219,229 ; pand %xmm5,%xmm4 DB 102,15,219,239 ; pand %xmm7,%xmm5 @@ -44557,7 +44958,7 @@ _sk_exclusion_sse41_8bit LABEL PROC PUBLIC _sk_difference_sse41_8bit _sk_difference_sse41_8bit LABEL PROC DB 102,68,15,111,193 ; movdqa %xmm1,%xmm8 - DB 102,15,111,13,52,4,0,0 ; movdqa 0x434(%rip),%xmm1 # 25b0 <_sk_difference_sse41_8bit+0x441> + DB 102,15,111,13,116,12,0,0 ; movdqa 0xc74(%rip),%xmm1 # 2df0 <_sk_overlay_sse41_8bit+0x707> DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11 DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9 DB 102,15,111,243 ; movdqa %xmm3,%xmm6 @@ -44595,7 +44996,7 @@ _sk_difference_sse41_8bit LABEL PROC DB 102,15,113,214,8 ; psrlw $0x8,%xmm6 DB 102,15,113,215,8 ; psrlw $0x8,%xmm7 DB 102,15,103,254 ; packuswb %xmm6,%xmm7 - DB 102,15,111,37,121,3,0,0 ; movdqa 0x379(%rip),%xmm4 # 25b0 <_sk_difference_sse41_8bit+0x441> + DB 102,15,111,37,185,11,0,0 ; movdqa 0xbb9(%rip),%xmm4 # 2df0 <_sk_overlay_sse41_8bit+0x707> DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1 DB 102,15,56,0,236 ; pshufb %xmm4,%xmm5 DB 102,69,15,104,236 ; punpckhbw %xmm12,%xmm13 @@ -44620,7 +45021,7 @@ _sk_difference_sse41_8bit LABEL PROC DB 102,15,103,241 ; packuswb %xmm1,%xmm6 DB 102,65,15,218,230 ; pminub %xmm14,%xmm4 DB 102,15,218,247 ; pminub %xmm7,%xmm6 - DB 102,15,111,13,12,3,0,0 ; movdqa 0x30c(%rip),%xmm1 # 25c0 <_sk_difference_sse41_8bit+0x451> + DB 102,15,111,13,76,11,0,0 ; movdqa 0xb4c(%rip),%xmm1 # 2e00 <_sk_overlay_sse41_8bit+0x717> DB 102,68,15,248,198 ; psubb %xmm6,%xmm8 DB 102,15,219,241 ; pand %xmm1,%xmm6 DB 102,15,219,204 ; pand %xmm4,%xmm1 @@ -44631,13 +45032,416 @@ _sk_difference_sse41_8bit LABEL PROC DB 102,65,15,111,200 ; movdqa %xmm8,%xmm1 DB 255,224 ; jmpq *%rax +PUBLIC _sk_hardlight_sse41_8bit +_sk_hardlight_sse41_8bit LABEL PROC + DB 72,129,236,24,1,0,0 ; sub $0x118,%rsp + DB 102,68,15,111,251 ; movdqa %xmm3,%xmm15 + DB 102,68,15,111,233 ; movdqa %xmm1,%xmm13 + DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14 + DB 102,15,239,192 ; pxor %xmm0,%xmm0 + DB 102,69,15,111,198 ; movdqa %xmm14,%xmm8 + DB 102,68,15,104,192 ; punpckhbw %xmm0,%xmm8 + DB 102,15,104,200 ; punpckhbw %xmm0,%xmm1 + DB 102,15,111,225 ; movdqa %xmm1,%xmm4 + DB 102,15,127,100,36,16 ; movdqa %xmm4,0x10(%rsp) + DB 102,65,15,56,48,206 ; pmovzxbw %xmm14,%xmm1 + DB 102,15,127,12,36 ; movdqa %xmm1,(%rsp) + DB 102,69,15,56,48,221 ; pmovzxbw %xmm13,%xmm11 + DB 102,68,15,127,156,36,176,0,0,0 ; movdqa %xmm11,0xb0(%rsp) + DB 102,15,111,13,228,10,0,0 ; movdqa 0xae4(%rip),%xmm1 # 2e10 <_sk_overlay_sse41_8bit+0x727> + DB 102,68,15,56,0,241 ; pshufb %xmm1,%xmm14 + DB 102,68,15,56,0,233 ; pshufb %xmm1,%xmm13 + DB 102,69,15,56,48,205 ; pmovzxbw %xmm13,%xmm9 + DB 102,68,15,104,232 ; punpckhbw %xmm0,%xmm13 + DB 102,69,15,56,48,214 ; pmovzxbw %xmm14,%xmm10 + DB 102,68,15,104,240 ; punpckhbw %xmm0,%xmm14 + DB 102,15,127,84,36,64 ; movdqa %xmm2,0x40(%rsp) + DB 102,15,111,242 ; movdqa %xmm2,%xmm6 + DB 102,15,111,234 ; movdqa %xmm2,%xmm5 + DB 102,15,104,232 ; punpckhbw %xmm0,%xmm5 + DB 102,68,15,127,124,36,80 ; movdqa %xmm15,0x50(%rsp) + DB 102,65,15,111,215 ; movdqa %xmm15,%xmm2 + DB 102,68,15,104,248 ; punpckhbw %xmm0,%xmm15 + DB 102,15,56,0,241 ; pshufb %xmm1,%xmm6 + DB 102,15,56,0,209 ; pshufb %xmm1,%xmm2 + DB 102,68,15,56,48,226 ; pmovzxbw %xmm2,%xmm12 + DB 102,68,15,127,100,36,32 ; movdqa %xmm12,0x20(%rsp) + DB 102,15,104,208 ; punpckhbw %xmm0,%xmm2 + DB 102,15,127,84,36,48 ; movdqa %xmm2,0x30(%rsp) + DB 102,15,56,48,222 ; pmovzxbw %xmm6,%xmm3 + DB 102,15,104,240 ; punpckhbw %xmm0,%xmm6 + DB 102,15,111,254 ; movdqa %xmm6,%xmm7 + DB 102,15,111,196 ; movdqa %xmm4,%xmm0 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,15,111,200 ; movdqa %xmm0,%xmm1 + DB 102,65,15,56,58,205 ; pminuw %xmm13,%xmm1 + DB 102,15,117,200 ; pcmpeqw %xmm0,%xmm1 + DB 102,15,127,140,36,240,0,0,0 ; movdqa %xmm1,0xf0(%rsp) + DB 102,65,15,111,192 ; movdqa %xmm8,%xmm0 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,15,111,200 ; movdqa %xmm0,%xmm1 + DB 102,65,15,56,58,206 ; pminuw %xmm14,%xmm1 + DB 102,15,117,200 ; pcmpeqw %xmm0,%xmm1 + DB 102,15,127,140,36,208,0,0,0 ; movdqa %xmm1,0xd0(%rsp) + DB 102,65,15,111,195 ; movdqa %xmm11,%xmm0 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,15,111,208 ; movdqa %xmm0,%xmm2 + DB 102,65,15,111,225 ; movdqa %xmm9,%xmm4 + DB 102,15,127,100,36,96 ; movdqa %xmm4,0x60(%rsp) + DB 102,15,56,58,212 ; pminuw %xmm4,%xmm2 + DB 102,15,117,208 ; pcmpeqw %xmm0,%xmm2 + DB 102,15,127,148,36,0,1,0,0 ; movdqa %xmm2,0x100(%rsp) + DB 102,15,111,4,36 ; movdqa (%rsp),%xmm0 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,15,111,240 ; movdqa %xmm0,%xmm6 + DB 102,65,15,56,58,242 ; pminuw %xmm10,%xmm6 + DB 102,15,117,240 ; pcmpeqw %xmm0,%xmm6 + DB 102,15,127,180,36,192,0,0,0 ; movdqa %xmm6,0xc0(%rsp) + DB 102,15,111,53,240,9,0,0 ; movdqa 0x9f0(%rip),%xmm6 # 2e20 <_sk_overlay_sse41_8bit+0x737> + DB 102,15,111,199 ; movdqa %xmm7,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,65,15,213,192 ; pmullw %xmm8,%xmm0 + DB 102,15,127,132,36,224,0,0,0 ; movdqa %xmm0,0xe0(%rsp) + DB 102,69,15,111,222 ; movdqa %xmm14,%xmm11 + DB 102,68,15,239,222 ; pxor %xmm6,%xmm11 + DB 102,68,15,213,221 ; pmullw %xmm5,%xmm11 + DB 102,15,111,199 ; movdqa %xmm7,%xmm0 + DB 102,15,249,253 ; psubw %xmm5,%xmm7 + DB 102,15,127,188,36,128,0,0,0 ; movdqa %xmm7,0x80(%rsp) + DB 102,68,15,111,205 ; movdqa %xmm5,%xmm9 + DB 102,69,15,213,200 ; pmullw %xmm8,%xmm9 + DB 102,65,15,213,198 ; pmullw %xmm14,%xmm0 + DB 102,15,127,132,36,144,0,0,0 ; movdqa %xmm0,0x90(%rsp) + DB 102,69,15,249,240 ; psubw %xmm8,%xmm14 + DB 102,15,111,195 ; movdqa %xmm3,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,68,15,111,192 ; movdqa %xmm0,%xmm8 + DB 102,65,15,111,212 ; movdqa %xmm12,%xmm2 + DB 102,15,239,214 ; pxor %xmm6,%xmm2 + DB 102,15,111,76,36,48 ; movdqa 0x30(%rsp),%xmm1 + DB 102,15,111,193 ; movdqa %xmm1,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,68,15,111,100,36,16 ; movdqa 0x10(%rsp),%xmm12 + DB 102,65,15,213,196 ; pmullw %xmm12,%xmm0 + DB 102,15,127,132,36,160,0,0,0 ; movdqa %xmm0,0xa0(%rsp) + DB 102,65,15,111,194 ; movdqa %xmm10,%xmm0 + DB 102,15,111,248 ; movdqa %xmm0,%xmm7 + DB 102,15,239,254 ; pxor %xmm6,%xmm7 + DB 102,15,111,236 ; movdqa %xmm4,%xmm5 + DB 102,15,239,238 ; pxor %xmm6,%xmm5 + DB 102,65,15,239,245 ; pxor %xmm13,%xmm6 + DB 102,65,15,111,231 ; movdqa %xmm15,%xmm4 + DB 102,15,213,244 ; pmullw %xmm4,%xmm6 + DB 102,68,15,111,249 ; movdqa %xmm1,%xmm15 + DB 102,15,249,204 ; psubw %xmm4,%xmm1 + DB 102,15,127,76,36,48 ; movdqa %xmm1,0x30(%rsp) + DB 102,65,15,213,228 ; pmullw %xmm12,%xmm4 + DB 102,69,15,213,253 ; pmullw %xmm13,%xmm15 + DB 102,69,15,249,236 ; psubw %xmm12,%xmm13 + DB 102,68,15,111,20,36 ; movdqa (%rsp),%xmm10 + DB 102,69,15,213,194 ; pmullw %xmm10,%xmm8 + DB 102,68,15,127,68,36,16 ; movdqa %xmm8,0x10(%rsp) + DB 102,68,15,56,48,68,36,64 ; pmovzxbw 0x40(%rsp),%xmm8 + DB 102,65,15,213,248 ; pmullw %xmm8,%xmm7 + DB 102,68,15,111,227 ; movdqa %xmm3,%xmm12 + DB 102,65,15,249,216 ; psubw %xmm8,%xmm3 + DB 102,15,127,92,36,112 ; movdqa %xmm3,0x70(%rsp) + DB 102,69,15,213,194 ; pmullw %xmm10,%xmm8 + DB 102,68,15,213,224 ; pmullw %xmm0,%xmm12 + DB 102,65,15,249,194 ; psubw %xmm10,%xmm0 + DB 102,15,111,200 ; movdqa %xmm0,%xmm1 + DB 102,15,111,156,36,176,0,0,0 ; movdqa 0xb0(%rsp),%xmm3 + DB 102,15,213,211 ; pmullw %xmm3,%xmm2 + DB 102,15,127,20,36 ; movdqa %xmm2,(%rsp) + DB 102,15,56,48,84,36,80 ; pmovzxbw 0x50(%rsp),%xmm2 + DB 102,15,213,234 ; pmullw %xmm2,%xmm5 + DB 102,15,111,68,36,32 ; movdqa 0x20(%rsp),%xmm0 + DB 102,68,15,111,208 ; movdqa %xmm0,%xmm10 + DB 102,15,249,194 ; psubw %xmm2,%xmm0 + DB 102,15,127,68,36,32 ; movdqa %xmm0,0x20(%rsp) + DB 102,15,213,211 ; pmullw %xmm3,%xmm2 + DB 102,15,111,68,36,96 ; movdqa 0x60(%rsp),%xmm0 + DB 102,68,15,213,208 ; pmullw %xmm0,%xmm10 + DB 102,15,249,195 ; psubw %xmm3,%xmm0 + DB 102,68,15,213,108,36,48 ; pmullw 0x30(%rsp),%xmm13 + DB 102,68,15,213,180,36,128,0,0,0 ; pmullw 0x80(%rsp),%xmm14 + DB 102,15,213,68,36,32 ; pmullw 0x20(%rsp),%xmm0 + DB 102,15,213,76,36,112 ; pmullw 0x70(%rsp),%xmm1 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,68,15,249,208 ; psubw %xmm0,%xmm10 + DB 102,15,253,201 ; paddw %xmm1,%xmm1 + DB 102,68,15,249,225 ; psubw %xmm1,%xmm12 + DB 102,69,15,253,237 ; paddw %xmm13,%xmm13 + DB 102,69,15,249,253 ; psubw %xmm13,%xmm15 + DB 102,69,15,253,246 ; paddw %xmm14,%xmm14 + DB 102,15,111,140,36,144,0,0,0 ; movdqa 0x90(%rsp),%xmm1 + DB 102,65,15,249,206 ; psubw %xmm14,%xmm1 + DB 102,69,15,253,201 ; paddw %xmm9,%xmm9 + DB 102,15,111,132,36,208,0,0,0 ; movdqa 0xd0(%rsp),%xmm0 + DB 102,68,15,219,200 ; pand %xmm0,%xmm9 + DB 102,15,223,193 ; pandn %xmm1,%xmm0 + DB 102,65,15,235,193 ; por %xmm9,%xmm0 + DB 102,15,111,216 ; movdqa %xmm0,%xmm3 + DB 102,15,253,228 ; paddw %xmm4,%xmm4 + DB 102,15,111,132,36,240,0,0,0 ; movdqa 0xf0(%rsp),%xmm0 + DB 102,15,219,224 ; pand %xmm0,%xmm4 + DB 102,65,15,223,199 ; pandn %xmm15,%xmm0 + DB 102,15,235,196 ; por %xmm4,%xmm0 + DB 102,15,111,200 ; movdqa %xmm0,%xmm1 + DB 102,69,15,253,192 ; paddw %xmm8,%xmm8 + DB 102,15,111,132,36,192,0,0,0 ; movdqa 0xc0(%rsp),%xmm0 + DB 102,68,15,219,192 ; pand %xmm0,%xmm8 + DB 102,65,15,223,196 ; pandn %xmm12,%xmm0 + DB 102,65,15,235,192 ; por %xmm8,%xmm0 + DB 102,15,111,224 ; movdqa %xmm0,%xmm4 + DB 102,15,253,210 ; paddw %xmm2,%xmm2 + DB 102,15,111,132,36,0,1,0,0 ; movdqa 0x100(%rsp),%xmm0 + DB 102,15,219,208 ; pand %xmm0,%xmm2 + DB 102,65,15,223,194 ; pandn %xmm10,%xmm0 + DB 102,15,235,194 ; por %xmm2,%xmm0 + DB 102,15,111,208 ; movdqa %xmm0,%xmm2 + DB 102,68,15,253,156,36,224,0,0,0 ; paddw 0xe0(%rsp),%xmm11 + DB 102,15,253,180,36,160,0,0,0 ; paddw 0xa0(%rsp),%xmm6 + DB 102,15,253,124,36,16 ; paddw 0x10(%rsp),%xmm7 + DB 102,15,253,44,36 ; paddw (%rsp),%xmm5 + DB 102,15,111,5,189,7,0,0 ; movdqa 0x7bd(%rip),%xmm0 # 2e30 <_sk_overlay_sse41_8bit+0x747> + DB 102,68,15,253,216 ; paddw %xmm0,%xmm11 + DB 102,68,15,253,219 ; paddw %xmm3,%xmm11 + DB 102,15,253,240 ; paddw %xmm0,%xmm6 + DB 102,15,253,241 ; paddw %xmm1,%xmm6 + DB 102,15,253,248 ; paddw %xmm0,%xmm7 + DB 102,15,253,252 ; paddw %xmm4,%xmm7 + DB 102,15,253,232 ; paddw %xmm0,%xmm5 + DB 102,15,253,234 ; paddw %xmm2,%xmm5 + DB 102,15,111,5,163,7,0,0 ; movdqa 0x7a3(%rip),%xmm0 # 2e40 <_sk_overlay_sse41_8bit+0x757> + DB 102,15,228,240 ; pmulhuw %xmm0,%xmm6 + DB 102,68,15,228,216 ; pmulhuw %xmm0,%xmm11 + DB 102,15,228,232 ; pmulhuw %xmm0,%xmm5 + DB 102,15,228,248 ; pmulhuw %xmm0,%xmm7 + DB 102,65,15,113,211,7 ; psrlw $0x7,%xmm11 + DB 102,15,113,215,7 ; psrlw $0x7,%xmm7 + DB 102,65,15,103,251 ; packuswb %xmm11,%xmm7 + DB 102,15,113,214,7 ; psrlw $0x7,%xmm6 + DB 102,15,113,213,7 ; psrlw $0x7,%xmm5 + DB 102,15,103,238 ; packuswb %xmm6,%xmm5 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 15,40,84,36,64 ; movaps 0x40(%rsp),%xmm2 + DB 15,40,92,36,80 ; movaps 0x50(%rsp),%xmm3 + DB 102,15,111,199 ; movdqa %xmm7,%xmm0 + DB 102,15,111,205 ; movdqa %xmm5,%xmm1 + DB 72,129,196,24,1,0,0 ; add $0x118,%rsp + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_overlay_sse41_8bit +_sk_overlay_sse41_8bit LABEL PROC + DB 72,129,236,72,1,0,0 ; sub $0x148,%rsp + DB 102,68,15,111,241 ; movdqa %xmm1,%xmm14 + DB 102,68,15,111,248 ; movdqa %xmm0,%xmm15 + DB 102,15,239,192 ; pxor %xmm0,%xmm0 + DB 102,65,15,111,231 ; movdqa %xmm15,%xmm4 + DB 102,15,104,224 ; punpckhbw %xmm0,%xmm4 + DB 102,69,15,111,230 ; movdqa %xmm14,%xmm12 + DB 102,68,15,104,224 ; punpckhbw %xmm0,%xmm12 + DB 102,65,15,56,48,207 ; pmovzxbw %xmm15,%xmm1 + DB 102,15,127,12,36 ; movdqa %xmm1,(%rsp) + DB 102,65,15,56,48,206 ; pmovzxbw %xmm14,%xmm1 + DB 102,15,127,140,36,16,1,0,0 ; movdqa %xmm1,0x110(%rsp) + DB 102,15,111,13,29,7,0,0 ; movdqa 0x71d(%rip),%xmm1 # 2e50 <_sk_overlay_sse41_8bit+0x767> + DB 102,68,15,56,0,249 ; pshufb %xmm1,%xmm15 + DB 102,68,15,56,0,241 ; pshufb %xmm1,%xmm14 + DB 102,65,15,56,48,238 ; pmovzxbw %xmm14,%xmm5 + DB 102,15,127,108,36,32 ; movdqa %xmm5,0x20(%rsp) + DB 102,68,15,104,240 ; punpckhbw %xmm0,%xmm14 + DB 102,65,15,56,48,239 ; pmovzxbw %xmm15,%xmm5 + DB 102,15,127,172,36,0,1,0,0 ; movdqa %xmm5,0x100(%rsp) + DB 102,68,15,104,248 ; punpckhbw %xmm0,%xmm15 + DB 102,15,111,242 ; movdqa %xmm2,%xmm6 + DB 102,15,111,234 ; movdqa %xmm2,%xmm5 + DB 102,68,15,111,218 ; movdqa %xmm2,%xmm11 + DB 102,68,15,127,156,36,32,1,0,0 ; movdqa %xmm11,0x120(%rsp) + DB 102,15,104,232 ; punpckhbw %xmm0,%xmm5 + DB 102,15,111,251 ; movdqa %xmm3,%xmm7 + DB 102,15,111,211 ; movdqa %xmm3,%xmm2 + DB 102,15,127,156,36,48,1,0,0 ; movdqa %xmm3,0x130(%rsp) + DB 102,15,104,208 ; punpckhbw %xmm0,%xmm2 + DB 102,15,56,0,241 ; pshufb %xmm1,%xmm6 + DB 102,15,56,0,249 ; pshufb %xmm1,%xmm7 + DB 102,15,56,48,207 ; pmovzxbw %xmm7,%xmm1 + DB 102,15,104,248 ; punpckhbw %xmm0,%xmm7 + DB 102,68,15,56,48,238 ; pmovzxbw %xmm6,%xmm13 + DB 102,15,104,240 ; punpckhbw %xmm0,%xmm6 + DB 102,68,15,111,206 ; movdqa %xmm6,%xmm9 + DB 102,15,111,194 ; movdqa %xmm2,%xmm0 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,15,111,240 ; movdqa %xmm0,%xmm6 + DB 102,15,56,58,247 ; pminuw %xmm7,%xmm6 + DB 102,15,117,240 ; pcmpeqw %xmm0,%xmm6 + DB 102,15,127,180,36,208,0,0,0 ; movdqa %xmm6,0xd0(%rsp) + DB 102,15,111,197 ; movdqa %xmm5,%xmm0 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,15,111,240 ; movdqa %xmm0,%xmm6 + DB 102,65,15,56,58,241 ; pminuw %xmm9,%xmm6 + DB 102,15,117,240 ; pcmpeqw %xmm0,%xmm6 + DB 102,15,127,180,36,192,0,0,0 ; movdqa %xmm6,0xc0(%rsp) + DB 102,15,56,48,195 ; pmovzxbw %xmm3,%xmm0 + DB 102,15,127,132,36,144,0,0,0 ; movdqa %xmm0,0x90(%rsp) + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,15,111,216 ; movdqa %xmm0,%xmm3 + DB 102,15,56,58,217 ; pminuw %xmm1,%xmm3 + DB 102,15,127,76,36,16 ; movdqa %xmm1,0x10(%rsp) + DB 102,15,117,216 ; pcmpeqw %xmm0,%xmm3 + DB 102,15,127,156,36,224,0,0,0 ; movdqa %xmm3,0xe0(%rsp) + DB 102,65,15,56,48,195 ; pmovzxbw %xmm11,%xmm0 + DB 102,15,127,68,36,96 ; movdqa %xmm0,0x60(%rsp) + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,15,111,216 ; movdqa %xmm0,%xmm3 + DB 102,65,15,56,58,221 ; pminuw %xmm13,%xmm3 + DB 102,15,117,216 ; pcmpeqw %xmm0,%xmm3 + DB 102,15,127,156,36,176,0,0,0 ; movdqa %xmm3,0xb0(%rsp) + DB 102,15,111,53,16,6,0,0 ; movdqa 0x610(%rip),%xmm6 # 2e60 <_sk_overlay_sse41_8bit+0x777> + DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,15,213,196 ; pmullw %xmm4,%xmm0 + DB 102,15,127,132,36,240,0,0,0 ; movdqa %xmm0,0xf0(%rsp) + DB 102,69,15,111,223 ; movdqa %xmm15,%xmm11 + DB 102,68,15,239,222 ; pxor %xmm6,%xmm11 + DB 102,68,15,213,221 ; pmullw %xmm5,%xmm11 + DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0 + DB 102,68,15,249,205 ; psubw %xmm5,%xmm9 + DB 102,68,15,127,76,36,80 ; movdqa %xmm9,0x50(%rsp) + DB 102,68,15,111,205 ; movdqa %xmm5,%xmm9 + DB 102,68,15,213,204 ; pmullw %xmm4,%xmm9 + DB 102,65,15,213,199 ; pmullw %xmm15,%xmm0 + DB 102,15,127,68,36,112 ; movdqa %xmm0,0x70(%rsp) + DB 102,68,15,249,252 ; psubw %xmm4,%xmm15 + DB 102,65,15,111,197 ; movdqa %xmm13,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,15,111,216 ; movdqa %xmm0,%xmm3 + DB 102,15,239,206 ; pxor %xmm6,%xmm1 + DB 102,68,15,111,209 ; movdqa %xmm1,%xmm10 + DB 102,15,111,207 ; movdqa %xmm7,%xmm1 + DB 102,15,111,193 ; movdqa %xmm1,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,65,15,111,252 ; movdqa %xmm12,%xmm7 + DB 102,15,213,199 ; pmullw %xmm7,%xmm0 + DB 102,15,127,132,36,160,0,0,0 ; movdqa %xmm0,0xa0(%rsp) + DB 102,15,111,132,36,0,1,0,0 ; movdqa 0x100(%rsp),%xmm0 + DB 102,68,15,111,192 ; movdqa %xmm0,%xmm8 + DB 102,68,15,239,198 ; pxor %xmm6,%xmm8 + DB 102,15,111,108,36,32 ; movdqa 0x20(%rsp),%xmm5 + DB 102,15,239,238 ; pxor %xmm6,%xmm5 + DB 102,65,15,239,246 ; pxor %xmm14,%xmm6 + DB 102,15,213,242 ; pmullw %xmm2,%xmm6 + DB 102,68,15,111,225 ; movdqa %xmm1,%xmm12 + DB 102,15,249,202 ; psubw %xmm2,%xmm1 + DB 102,15,127,76,36,64 ; movdqa %xmm1,0x40(%rsp) + DB 102,15,111,226 ; movdqa %xmm2,%xmm4 + DB 102,15,213,231 ; pmullw %xmm7,%xmm4 + DB 102,69,15,213,230 ; pmullw %xmm14,%xmm12 + DB 102,68,15,249,247 ; psubw %xmm7,%xmm14 + DB 102,15,111,12,36 ; movdqa (%rsp),%xmm1 + DB 102,15,213,217 ; pmullw %xmm1,%xmm3 + DB 102,15,127,156,36,128,0,0,0 ; movdqa %xmm3,0x80(%rsp) + DB 102,15,111,84,36,96 ; movdqa 0x60(%rsp),%xmm2 + DB 102,68,15,213,194 ; pmullw %xmm2,%xmm8 + DB 102,65,15,111,221 ; movdqa %xmm13,%xmm3 + DB 102,15,249,218 ; psubw %xmm2,%xmm3 + DB 102,15,127,92,36,48 ; movdqa %xmm3,0x30(%rsp) + DB 102,15,111,218 ; movdqa %xmm2,%xmm3 + DB 102,15,213,217 ; pmullw %xmm1,%xmm3 + DB 102,68,15,213,232 ; pmullw %xmm0,%xmm13 + DB 102,15,249,193 ; psubw %xmm1,%xmm0 + DB 102,15,111,200 ; movdqa %xmm0,%xmm1 + DB 102,15,111,148,36,16,1,0,0 ; movdqa 0x110(%rsp),%xmm2 + DB 102,68,15,213,210 ; pmullw %xmm2,%xmm10 + DB 102,68,15,127,20,36 ; movdqa %xmm10,(%rsp) + DB 102,15,111,132,36,144,0,0,0 ; movdqa 0x90(%rsp),%xmm0 + DB 102,15,213,232 ; pmullw %xmm0,%xmm5 + DB 102,15,111,124,36,16 ; movdqa 0x10(%rsp),%xmm7 + DB 102,68,15,111,215 ; movdqa %xmm7,%xmm10 + DB 102,15,249,248 ; psubw %xmm0,%xmm7 + DB 102,15,127,124,36,16 ; movdqa %xmm7,0x10(%rsp) + DB 102,15,111,248 ; movdqa %xmm0,%xmm7 + DB 102,15,213,250 ; pmullw %xmm2,%xmm7 + DB 102,15,111,68,36,32 ; movdqa 0x20(%rsp),%xmm0 + DB 102,68,15,213,208 ; pmullw %xmm0,%xmm10 + DB 102,15,249,194 ; psubw %xmm2,%xmm0 + DB 102,68,15,213,116,36,64 ; pmullw 0x40(%rsp),%xmm14 + DB 102,68,15,213,124,36,80 ; pmullw 0x50(%rsp),%xmm15 + DB 102,15,213,68,36,16 ; pmullw 0x10(%rsp),%xmm0 + DB 102,15,213,76,36,48 ; pmullw 0x30(%rsp),%xmm1 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,68,15,249,208 ; psubw %xmm0,%xmm10 + DB 102,15,253,201 ; paddw %xmm1,%xmm1 + DB 102,68,15,249,233 ; psubw %xmm1,%xmm13 + DB 102,69,15,253,246 ; paddw %xmm14,%xmm14 + DB 102,69,15,249,230 ; psubw %xmm14,%xmm12 + DB 102,69,15,253,255 ; paddw %xmm15,%xmm15 + DB 102,15,111,76,36,112 ; movdqa 0x70(%rsp),%xmm1 + DB 102,65,15,249,207 ; psubw %xmm15,%xmm1 + DB 102,69,15,253,201 ; paddw %xmm9,%xmm9 + DB 102,15,111,132,36,192,0,0,0 ; movdqa 0xc0(%rsp),%xmm0 + DB 102,68,15,219,200 ; pand %xmm0,%xmm9 + DB 102,15,223,193 ; pandn %xmm1,%xmm0 + DB 102,65,15,235,193 ; por %xmm9,%xmm0 + DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9 + DB 102,15,253,228 ; paddw %xmm4,%xmm4 + DB 102,15,111,132,36,208,0,0,0 ; movdqa 0xd0(%rsp),%xmm0 + DB 102,15,219,224 ; pand %xmm0,%xmm4 + DB 102,65,15,223,196 ; pandn %xmm12,%xmm0 + DB 102,15,235,196 ; por %xmm4,%xmm0 + DB 102,15,111,200 ; movdqa %xmm0,%xmm1 + DB 102,15,253,219 ; paddw %xmm3,%xmm3 + DB 102,15,111,132,36,176,0,0,0 ; movdqa 0xb0(%rsp),%xmm0 + DB 102,15,219,216 ; pand %xmm0,%xmm3 + DB 102,65,15,223,197 ; pandn %xmm13,%xmm0 + DB 102,15,235,195 ; por %xmm3,%xmm0 + DB 102,15,111,216 ; movdqa %xmm0,%xmm3 + DB 102,15,253,255 ; paddw %xmm7,%xmm7 + DB 102,15,111,132,36,224,0,0,0 ; movdqa 0xe0(%rsp),%xmm0 + DB 102,15,219,248 ; pand %xmm0,%xmm7 + DB 102,65,15,223,194 ; pandn %xmm10,%xmm0 + DB 102,15,235,199 ; por %xmm7,%xmm0 + DB 102,15,111,208 ; movdqa %xmm0,%xmm2 + DB 102,68,15,253,156,36,240,0,0,0 ; paddw 0xf0(%rsp),%xmm11 + DB 102,15,253,180,36,160,0,0,0 ; paddw 0xa0(%rsp),%xmm6 + DB 102,68,15,253,132,36,128,0,0,0 ; paddw 0x80(%rsp),%xmm8 + DB 102,15,253,44,36 ; paddw (%rsp),%xmm5 + DB 102,15,111,5,220,3,0,0 ; movdqa 0x3dc(%rip),%xmm0 # 2e70 <_sk_overlay_sse41_8bit+0x787> + DB 102,68,15,253,216 ; paddw %xmm0,%xmm11 + DB 102,69,15,253,217 ; paddw %xmm9,%xmm11 + DB 102,15,253,240 ; paddw %xmm0,%xmm6 + DB 102,15,253,241 ; paddw %xmm1,%xmm6 + DB 102,68,15,253,192 ; paddw %xmm0,%xmm8 + DB 102,68,15,253,195 ; paddw %xmm3,%xmm8 + DB 102,15,253,232 ; paddw %xmm0,%xmm5 + DB 102,15,253,234 ; paddw %xmm2,%xmm5 + DB 102,15,111,5,192,3,0,0 ; movdqa 0x3c0(%rip),%xmm0 # 2e80 <_sk_overlay_sse41_8bit+0x797> + DB 102,15,228,240 ; pmulhuw %xmm0,%xmm6 + DB 102,68,15,228,216 ; pmulhuw %xmm0,%xmm11 + DB 102,15,228,232 ; pmulhuw %xmm0,%xmm5 + DB 102,68,15,228,192 ; pmulhuw %xmm0,%xmm8 + DB 102,65,15,113,211,7 ; psrlw $0x7,%xmm11 + DB 102,65,15,113,208,7 ; psrlw $0x7,%xmm8 + DB 102,69,15,103,195 ; packuswb %xmm11,%xmm8 + DB 102,15,113,214,7 ; psrlw $0x7,%xmm6 + DB 102,15,113,213,7 ; psrlw $0x7,%xmm5 + DB 102,15,103,238 ; packuswb %xmm6,%xmm5 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 15,40,148,36,32,1,0,0 ; movaps 0x120(%rsp),%xmm2 + DB 15,40,156,36,48,1,0,0 ; movaps 0x130(%rsp),%xmm3 + DB 102,65,15,111,192 ; movdqa %xmm8,%xmm0 + DB 102,15,111,205 ; movdqa %xmm5,%xmm1 + DB 72,129,196,72,1,0,0 ; add $0x148,%rsp + DB 255,224 ; jmpq *%rax + ALIGN 4 DB 0,0 ; add %al,(%rax) - DB 127,67 ; jg 231f <_sk_difference_sse41_8bit+0x1b0> + DB 127,67 ; jg 2b5f <_sk_overlay_sse41_8bit+0x476> DB 0,0 ; add %al,(%rax) - DB 127,67 ; jg 2323 <_sk_difference_sse41_8bit+0x1b4> + DB 127,67 ; jg 2b63 <_sk_overlay_sse41_8bit+0x47a> DB 0,0 ; add %al,(%rax) - DB 127,67 ; jg 2327 <_sk_difference_sse41_8bit+0x1b8> + DB 127,67 ; jg 2b67 <_sk_overlay_sse41_8bit+0x47e> ALIGN 16 DB 0,0 ; add %al,(%rax) @@ -45068,6 +45872,67 @@ ALIGN 16 DB 255 ; (bad) DB 255 ; (bad) DB 255,0 ; incl (%rax) + DB 3,3 ; add (%rbx),%eax + DB 3,3 ; add (%rbx),%eax + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 11,11 ; or (%rbx),%ecx + DB 11,11 ; or (%rbx),%ecx + DB 15 ; (bad) + DB 15 ; (bad) + DB 15 ; (bad) + DB 15,255 ; (bad) + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,127,0 ; add %bh,0x0(%rdi) + DB 127,0 ; jg 2e34 <.literal16+0x304> + DB 127,0 ; jg 2e36 <.literal16+0x306> + DB 127,0 ; jg 2e38 <.literal16+0x308> + DB 127,0 ; jg 2e3a <.literal16+0x30a> + DB 127,0 ; jg 2e3c <.literal16+0x30c> + DB 127,0 ; jg 2e3e <.literal16+0x30e> + DB 127,0 ; jg 2e40 <.literal16+0x310> + DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax) + DB 129,128,129,128,129,128,3,3,3,3 ; addl $0x3030303,-0x7f7e7f7f(%rax) + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 7 ; (bad) + DB 11,11 ; or (%rbx),%ecx + DB 11,11 ; or (%rbx),%ecx + DB 15 ; (bad) + DB 15 ; (bad) + DB 15 ; (bad) + DB 15,255 ; (bad) + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,255 ; add %bh,%bh + DB 0,127,0 ; add %bh,0x0(%rdi) + DB 127,0 ; jg 2e74 <.literal16+0x344> + DB 127,0 ; jg 2e76 <.literal16+0x346> + DB 127,0 ; jg 2e78 <.literal16+0x348> + DB 127,0 ; jg 2e7a <.literal16+0x34a> + DB 127,0 ; jg 2e7c <.literal16+0x34c> + DB 127,0 ; jg 2e7e <.literal16+0x34e> + DB 127,0 ; jg 2e80 <.literal16+0x350> + DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax) + DB 129 ; .byte 0x81 + DB 128 ; .byte 0x80 + DB 129 ; .byte 0x81 + DB 128 ; .byte 0x80 + DB 129 ; .byte 0x81 + DB 128 ; .byte 0x80 ALIGN 32 PUBLIC _sk_start_pipeline_sse2_8bit @@ -45168,7 +46033,7 @@ _sk_uniform_color_sse2_8bit LABEL PROC PUBLIC _sk_set_rgb_sse2_8bit _sk_set_rgb_sse2_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 243,15,16,37,100,40,0,0 ; movss 0x2864(%rip),%xmm4 # 29e4 <_sk_difference_sse2_8bit+0x1c3> + DB 243,15,16,37,56,49,0,0 ; movss 0x3138(%rip),%xmm4 # 32b8 <_sk_overlay_sse2_8bit+0x46b> DB 243,15,16,40 ; movss (%rax),%xmm5 DB 243,15,89,236 ; mulss %xmm4,%xmm5 DB 243,72,15,44,205 ; cvttss2si %xmm5,%rcx @@ -45183,7 +46048,7 @@ _sk_set_rgb_sse2_8bit LABEL PROC DB 9,208 ; or %edx,%eax DB 102,15,110,224 ; movd %eax,%xmm4 DB 102,15,112,228,0 ; pshufd $0x0,%xmm4,%xmm4 - DB 102,15,111,45,48,40,0,0 ; movdqa 0x2830(%rip),%xmm5 # 29f0 <_sk_difference_sse2_8bit+0x1cf> + DB 102,15,111,45,16,49,0,0 ; movdqa 0x3110(%rip),%xmm5 # 32d0 <_sk_overlay_sse2_8bit+0x483> DB 102,15,219,205 ; pand %xmm5,%xmm1 DB 102,15,219,197 ; pand %xmm5,%xmm0 DB 102,15,235,196 ; por %xmm4,%xmm0 @@ -45206,7 +46071,7 @@ _sk_premul_sse2_8bit LABEL PROC DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0 DB 243,15,112,248,95 ; pshufhw $0x5f,%xmm0,%xmm7 - DB 102,15,111,5,228,39,0,0 ; movdqa 0x27e4(%rip),%xmm0 # 2a00 <_sk_difference_sse2_8bit+0x1df> + DB 102,15,111,5,196,48,0,0 ; movdqa 0x30c4(%rip),%xmm0 # 32e0 <_sk_overlay_sse2_8bit+0x493> DB 102,15,235,248 ; por %xmm0,%xmm7 DB 102,15,235,240 ; por %xmm0,%xmm6 DB 102,69,15,239,201 ; pxor %xmm9,%xmm9 @@ -45708,7 +46573,7 @@ _sk_load_a8_sse2_8bit LABEL PROC DB 117,48 ; jne 8b9 <_sk_load_a8_sse2_8bit+0x4d> DB 243,66,15,126,4,2 ; movq (%rdx,%r8,1),%xmm0 DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 - DB 102,15,84,5,117,33,0,0 ; andpd 0x2175(%rip),%xmm0 # 2a10 <_sk_difference_sse2_8bit+0x1ef> + DB 102,15,84,5,85,42,0,0 ; andpd 0x2a55(%rip),%xmm0 # 32f0 <_sk_overlay_sse2_8bit+0x4a3> DB 102,15,239,228 ; pxor %xmm4,%xmm4 DB 102,15,40,200 ; movapd %xmm0,%xmm1 DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1 @@ -45783,7 +46648,7 @@ _sk_load_a8_dst_sse2_8bit LABEL PROC DB 117,48 ; jne 9ad <_sk_load_a8_dst_sse2_8bit+0x4d> DB 243,66,15,126,20,2 ; movq (%rdx,%r8,1),%xmm2 DB 102,15,96,208 ; punpcklbw %xmm0,%xmm2 - DB 102,15,84,21,145,32,0,0 ; andpd 0x2091(%rip),%xmm2 # 2a20 <_sk_difference_sse2_8bit+0x1ff> + DB 102,15,84,21,113,41,0,0 ; andpd 0x2971(%rip),%xmm2 # 3300 <_sk_overlay_sse2_8bit+0x4b3> DB 102,15,239,228 ; pxor %xmm4,%xmm4 DB 102,15,40,218 ; movapd %xmm2,%xmm3 DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3 @@ -45866,7 +46731,7 @@ _sk_store_a8_sse2_8bit LABEL PROC DB 102,15,107,229 ; packssdw %xmm5,%xmm4 DB 77,133,201 ; test %r9,%r9 DB 117,26 ; jne ab9 <_sk_store_a8_sse2_8bit+0x65> - DB 102,15,219,37,137,31,0,0 ; pand 0x1f89(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x20f> + DB 102,15,219,37,105,40,0,0 ; pand 0x2869(%rip),%xmm4 # 3310 <_sk_overlay_sse2_8bit+0x4c3> DB 102,15,103,228 ; packuswb %xmm4,%xmm4 DB 102,66,15,214,36,2 ; movq %xmm4,(%rdx,%r8,1) DB 72,173 ; lods %ds:(%rsi),%rax @@ -45888,7 +46753,7 @@ _sk_store_a8_sse2_8bit LABEL PROC DB 102,15,127,100,36,16 ; movdqa %xmm4,0x10(%rsp) DB 138,68,36,20 ; mov 0x14(%rsp),%al DB 66,136,68,2,2 ; mov %al,0x2(%rdx,%r8,1) - DB 102,15,219,37,49,31,0,0 ; pand 0x1f31(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x20f> + DB 102,15,219,37,17,40,0,0 ; pand 0x2811(%rip),%xmm4 # 3310 <_sk_overlay_sse2_8bit+0x4c3> DB 102,15,103,228 ; packuswb %xmm4,%xmm4 DB 102,15,126,224 ; movd %xmm4,%eax DB 102,66,137,4,2 ; mov %ax,(%rdx,%r8,1) @@ -45902,7 +46767,7 @@ _sk_store_a8_sse2_8bit LABEL PROC DB 102,15,127,100,36,32 ; movdqa %xmm4,0x20(%rsp) DB 138,68,36,40 ; mov 0x28(%rsp),%al DB 66,136,68,2,4 ; mov %al,0x4(%rdx,%r8,1) - DB 102,15,219,37,237,30,0,0 ; pand 0x1eed(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x20f> + DB 102,15,219,37,205,39,0,0 ; pand 0x27cd(%rip),%xmm4 # 3310 <_sk_overlay_sse2_8bit+0x4c3> DB 102,15,103,228 ; packuswb %xmm4,%xmm4 DB 102,66,15,126,36,2 ; movd %xmm4,(%rdx,%r8,1) DB 233,95,255,255,255 ; jmpq ab1 <_sk_store_a8_sse2_8bit+0x5d> @@ -45940,12 +46805,12 @@ _sk_load_g8_sse2_8bit LABEL PROC DB 117,116 ; jne c01 <_sk_load_g8_sse2_8bit+0x91> DB 243,66,15,126,4,2 ; movq (%rdx,%r8,1),%xmm0 DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 - DB 102,15,84,5,161,30,0,0 ; andpd 0x1ea1(%rip),%xmm0 # 2a40 <_sk_difference_sse2_8bit+0x21f> + DB 102,15,84,5,129,39,0,0 ; andpd 0x2781(%rip),%xmm0 # 3320 <_sk_overlay_sse2_8bit+0x4d3> DB 102,15,239,201 ; pxor %xmm1,%xmm1 DB 102,15,40,224 ; movapd %xmm0,%xmm4 DB 102,15,97,225 ; punpcklwd %xmm1,%xmm4 DB 102,15,105,193 ; punpckhwd %xmm1,%xmm0 - DB 102,15,111,45,153,30,0,0 ; movdqa 0x1e99(%rip),%xmm5 # 2a50 <_sk_difference_sse2_8bit+0x22f> + DB 102,15,111,45,121,39,0,0 ; movdqa 0x2779(%rip),%xmm5 # 3330 <_sk_overlay_sse2_8bit+0x4e3> DB 102,15,112,240,245 ; pshufd $0xf5,%xmm0,%xmm6 DB 102,15,244,197 ; pmuludq %xmm5,%xmm0 DB 102,15,112,200,232 ; pshufd $0xe8,%xmm0,%xmm1 @@ -45958,7 +46823,7 @@ _sk_load_g8_sse2_8bit LABEL PROC DB 102,15,244,245 ; pmuludq %xmm5,%xmm6 DB 102,15,112,230,232 ; pshufd $0xe8,%xmm6,%xmm4 DB 102,15,98,196 ; punpckldq %xmm4,%xmm0 - DB 102,15,111,37,107,30,0,0 ; movdqa 0x1e6b(%rip),%xmm4 # 2a60 <_sk_difference_sse2_8bit+0x23f> + DB 102,15,111,37,75,39,0,0 ; movdqa 0x274b(%rip),%xmm4 # 3340 <_sk_overlay_sse2_8bit+0x4f3> DB 102,15,235,196 ; por %xmm4,%xmm0 DB 102,15,235,204 ; por %xmm4,%xmm1 DB 72,173 ; lods %ds:(%rsi),%rax @@ -46030,12 +46895,12 @@ _sk_load_g8_dst_sse2_8bit LABEL PROC DB 117,116 ; jne d41 <_sk_load_g8_dst_sse2_8bit+0x91> DB 243,66,15,126,20,2 ; movq (%rdx,%r8,1),%xmm2 DB 102,15,96,208 ; punpcklbw %xmm0,%xmm2 - DB 102,15,84,21,145,29,0,0 ; andpd 0x1d91(%rip),%xmm2 # 2a70 <_sk_difference_sse2_8bit+0x24f> + DB 102,15,84,21,113,38,0,0 ; andpd 0x2671(%rip),%xmm2 # 3350 <_sk_overlay_sse2_8bit+0x503> DB 102,15,239,219 ; pxor %xmm3,%xmm3 DB 102,15,40,226 ; movapd %xmm2,%xmm4 DB 102,15,97,227 ; punpcklwd %xmm3,%xmm4 DB 102,15,105,211 ; punpckhwd %xmm3,%xmm2 - DB 102,15,111,45,137,29,0,0 ; movdqa 0x1d89(%rip),%xmm5 # 2a80 <_sk_difference_sse2_8bit+0x25f> + DB 102,15,111,45,105,38,0,0 ; movdqa 0x2669(%rip),%xmm5 # 3360 <_sk_overlay_sse2_8bit+0x513> DB 102,15,112,242,245 ; pshufd $0xf5,%xmm2,%xmm6 DB 102,15,244,213 ; pmuludq %xmm5,%xmm2 DB 102,15,112,218,232 ; pshufd $0xe8,%xmm2,%xmm3 @@ -46048,7 +46913,7 @@ _sk_load_g8_dst_sse2_8bit LABEL PROC DB 102,15,244,245 ; pmuludq %xmm5,%xmm6 DB 102,15,112,230,232 ; pshufd $0xe8,%xmm6,%xmm4 DB 102,15,98,212 ; punpckldq %xmm4,%xmm2 - DB 102,15,111,37,91,29,0,0 ; movdqa 0x1d5b(%rip),%xmm4 # 2a90 <_sk_difference_sse2_8bit+0x26f> + DB 102,15,111,37,59,38,0,0 ; movdqa 0x263b(%rip),%xmm4 # 3370 <_sk_overlay_sse2_8bit+0x523> DB 102,15,235,212 ; por %xmm4,%xmm2 DB 102,15,235,220 ; por %xmm4,%xmm3 DB 72,173 ; lods %ds:(%rsi),%rax @@ -46261,7 +47126,7 @@ _sk_scale_1_float_sse2_8bit LABEL PROC DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9 DB 72,173 ; lods %ds:(%rsi),%rax DB 243,15,16,0 ; movss (%rax),%xmm0 - DB 243,15,89,5,148,25,0,0 ; mulss 0x1994(%rip),%xmm0 # 29e8 <_sk_difference_sse2_8bit+0x1c7> + DB 243,15,89,5,104,34,0,0 ; mulss 0x2268(%rip),%xmm0 # 32bc <_sk_overlay_sse2_8bit+0x46f> DB 243,15,44,192 ; cvttss2si %xmm0,%eax DB 102,15,239,246 ; pxor %xmm6,%xmm6 DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0 @@ -46273,7 +47138,7 @@ _sk_scale_1_float_sse2_8bit LABEL PROC DB 102,15,96,246 ; punpcklbw %xmm6,%xmm6 DB 242,15,112,246,0 ; pshuflw $0x0,%xmm6,%xmm6 DB 102,15,112,246,80 ; pshufd $0x50,%xmm6,%xmm6 - DB 102,15,219,53,19,26,0,0 ; pand 0x1a13(%rip),%xmm6 # 2aa0 <_sk_difference_sse2_8bit+0x27f> + DB 102,15,219,53,243,34,0,0 ; pand 0x22f3(%rip),%xmm6 # 3380 <_sk_overlay_sse2_8bit+0x533> DB 102,15,111,254 ; movdqa %xmm6,%xmm7 DB 102,65,15,213,248 ; pmullw %xmm8,%xmm7 DB 102,15,111,230 ; movdqa %xmm6,%xmm4 @@ -46307,7 +47172,7 @@ _sk_scale_u8_sse2_8bit LABEL PROC DB 15,133,239,0,0,0 ; jne 11ed <_sk_scale_u8_sse2_8bit+0x110> DB 243,66,15,126,36,2 ; movq (%rdx,%r8,1),%xmm4 DB 102,15,96,224 ; punpcklbw %xmm0,%xmm4 - DB 102,15,84,37,160,25,0,0 ; andpd 0x19a0(%rip),%xmm4 # 2ab0 <_sk_difference_sse2_8bit+0x28f> + DB 102,15,84,37,128,34,0,0 ; andpd 0x2280(%rip),%xmm4 # 3390 <_sk_overlay_sse2_8bit+0x543> DB 102,69,15,239,192 ; pxor %xmm8,%xmm8 DB 102,15,40,236 ; movapd %xmm4,%xmm5 DB 102,65,15,105,232 ; punpckhwd %xmm8,%xmm5 @@ -46414,7 +47279,7 @@ PUBLIC _sk_lerp_1_float_sse2_8bit _sk_lerp_1_float_sse2_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 243,15,16,32 ; movss (%rax),%xmm4 - DB 243,15,89,37,62,23,0,0 ; mulss 0x173e(%rip),%xmm4 # 29ec <_sk_difference_sse2_8bit+0x1cb> + DB 243,15,89,37,18,32,0,0 ; mulss 0x2012(%rip),%xmm4 # 32c0 <_sk_overlay_sse2_8bit+0x473> DB 243,15,44,196 ; cvttss2si %xmm4,%eax DB 102,15,110,224 ; movd %eax,%xmm4 DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4 @@ -46427,7 +47292,7 @@ _sk_lerp_1_float_sse2_8bit LABEL PROC DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11 DB 102,69,15,96,217 ; punpcklbw %xmm9,%xmm11 DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1 - DB 102,15,111,53,209,23,0,0 ; movdqa 0x17d1(%rip),%xmm6 # 2ac0 <_sk_difference_sse2_8bit+0x29f> + DB 102,15,111,53,177,32,0,0 ; movdqa 0x20b1(%rip),%xmm6 # 33a0 <_sk_overlay_sse2_8bit+0x553> DB 102,65,15,219,240 ; pand %xmm8,%xmm6 DB 102,15,111,230 ; movdqa %xmm6,%xmm4 DB 102,15,213,225 ; pmullw %xmm1,%xmm4 @@ -46493,7 +47358,7 @@ _sk_lerp_u8_sse2_8bit LABEL PROC DB 15,133,141,1,0,0 ; jne 1584 <_sk_lerp_u8_sse2_8bit+0x1ae> DB 243,66,15,126,44,2 ; movq (%rdx,%r8,1),%xmm5 DB 102,15,96,232 ; punpcklbw %xmm0,%xmm5 - DB 102,15,84,45,199,22,0,0 ; andpd 0x16c7(%rip),%xmm5 # 2ad0 <_sk_difference_sse2_8bit+0x2af> + DB 102,15,84,45,167,31,0,0 ; andpd 0x1fa7(%rip),%xmm5 # 33b0 <_sk_overlay_sse2_8bit+0x563> DB 102,69,15,239,192 ; pxor %xmm8,%xmm8 DB 102,15,40,229 ; movapd %xmm5,%xmm4 DB 102,65,15,105,224 ; punpckhwd %xmm8,%xmm4 @@ -46645,7 +47510,7 @@ _sk_move_dst_src_sse2_8bit LABEL PROC PUBLIC _sk_black_color_sse2_8bit _sk_black_color_sse2_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 15,40,5,143,20,0,0 ; movaps 0x148f(%rip),%xmm0 # 2ae0 <_sk_difference_sse2_8bit+0x2bf> + DB 15,40,5,111,29,0,0 ; movaps 0x1d6f(%rip),%xmm0 # 33c0 <_sk_overlay_sse2_8bit+0x573> DB 15,40,200 ; movaps %xmm0,%xmm1 DB 255,224 ; jmpq *%rax @@ -47509,7 +48374,7 @@ _sk_darken_sse2_8bit LABEL PROC DB 102,65,15,248,234 ; psubb %xmm10,%xmm5 DB 102,15,248,207 ; psubb %xmm7,%xmm1 DB 102,15,248,196 ; psubb %xmm4,%xmm0 - DB 102,15,111,37,164,5,0,0 ; movdqa 0x5a4(%rip),%xmm4 # 2af0 <_sk_difference_sse2_8bit+0x2cf> + DB 102,15,111,37,132,14,0,0 ; movdqa 0xe84(%rip),%xmm4 # 33d0 <_sk_overlay_sse2_8bit+0x583> DB 102,15,219,236 ; pand %xmm4,%xmm5 DB 102,15,111,252 ; movdqa %xmm4,%xmm7 DB 102,15,223,248 ; pandn %xmm0,%xmm7 @@ -47615,7 +48480,7 @@ _sk_lighten_sse2_8bit LABEL PROC DB 102,65,15,248,234 ; psubb %xmm10,%xmm5 DB 102,15,248,207 ; psubb %xmm7,%xmm1 DB 102,15,248,196 ; psubb %xmm4,%xmm0 - DB 102,15,111,37,200,3,0,0 ; movdqa 0x3c8(%rip),%xmm4 # 2b00 <_sk_difference_sse2_8bit+0x2df> + DB 102,15,111,37,168,12,0,0 ; movdqa 0xca8(%rip),%xmm4 # 33e0 <_sk_overlay_sse2_8bit+0x593> DB 102,15,219,236 ; pand %xmm4,%xmm5 DB 102,15,111,252 ; movdqa %xmm4,%xmm7 DB 102,15,223,248 ; pandn %xmm0,%xmm7 @@ -47663,7 +48528,7 @@ _sk_exclusion_sse2_8bit LABEL PROC DB 102,15,113,214,8 ; psrlw $0x8,%xmm6 DB 102,15,103,244 ; packuswb %xmm4,%xmm6 DB 102,15,103,239 ; packuswb %xmm7,%xmm5 - DB 102,15,111,37,11,3,0,0 ; movdqa 0x30b(%rip),%xmm4 # 2b10 <_sk_difference_sse2_8bit+0x2ef> + DB 102,15,111,37,235,11,0,0 ; movdqa 0xbeb(%rip),%xmm4 # 33f0 <_sk_overlay_sse2_8bit+0x5a3> DB 102,15,248,205 ; psubb %xmm5,%xmm1 DB 102,15,219,236 ; pand %xmm4,%xmm5 DB 102,15,219,230 ; pand %xmm6,%xmm4 @@ -47760,7 +48625,7 @@ _sk_difference_sse2_8bit LABEL PROC DB 102,65,15,103,244 ; packuswb %xmm12,%xmm6 DB 102,65,15,218,226 ; pminub %xmm10,%xmm4 DB 102,65,15,218,243 ; pminub %xmm11,%xmm6 - DB 102,15,111,45,88,1,0,0 ; movdqa 0x158(%rip),%xmm5 # 2b20 <_sk_difference_sse2_8bit+0x2ff> + DB 102,15,111,45,56,10,0,0 ; movdqa 0xa38(%rip),%xmm5 # 3400 <_sk_overlay_sse2_8bit+0x5b3> DB 102,15,248,206 ; psubb %xmm6,%xmm1 DB 102,15,219,245 ; pand %xmm5,%xmm6 DB 102,15,219,236 ; pand %xmm4,%xmm5 @@ -47770,13 +48635,450 @@ _sk_difference_sse2_8bit LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +PUBLIC _sk_hardlight_sse2_8bit +_sk_hardlight_sse2_8bit LABEL PROC + DB 72,129,236,56,1,0,0 ; sub $0x138,%rsp + DB 102,68,15,111,250 ; movdqa %xmm2,%xmm15 + DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9 + DB 102,15,239,237 ; pxor %xmm5,%xmm5 + DB 102,69,15,111,241 ; movdqa %xmm9,%xmm14 + DB 102,68,15,96,245 ; punpcklbw %xmm5,%xmm14 + DB 102,15,111,193 ; movdqa %xmm1,%xmm0 + DB 102,15,96,197 ; punpcklbw %xmm5,%xmm0 + DB 102,68,15,111,192 ; movdqa %xmm0,%xmm8 + DB 242,65,15,112,193,231 ; pshuflw $0xe7,%xmm9,%xmm0 + DB 102,68,15,104,205 ; punpckhbw %xmm5,%xmm9 + DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0 + DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0 + DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 + DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0 + DB 243,15,112,224,95 ; pshufhw $0x5f,%xmm0,%xmm4 + DB 242,15,112,193,231 ; pshuflw $0xe7,%xmm1,%xmm0 + DB 102,15,104,205 ; punpckhbw %xmm5,%xmm1 + DB 102,15,127,76,36,64 ; movdqa %xmm1,0x40(%rsp) + DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0 + DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0 + DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 + DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0 + DB 243,68,15,112,224,95 ; pshufhw $0x5f,%xmm0,%xmm12 + DB 102,65,15,111,252 ; movdqa %xmm12,%xmm7 + DB 102,15,104,253 ; punpckhbw %xmm5,%xmm7 + DB 102,15,127,188,36,176,0,0,0 ; movdqa %xmm7,0xb0(%rsp) + DB 102,68,15,96,229 ; punpcklbw %xmm5,%xmm12 + DB 102,15,111,196 ; movdqa %xmm4,%xmm0 + DB 102,15,104,197 ; punpckhbw %xmm5,%xmm0 + DB 102,15,127,68,36,32 ; movdqa %xmm0,0x20(%rsp) + DB 102,15,96,229 ; punpcklbw %xmm5,%xmm4 + DB 102,68,15,127,188,36,32,1,0,0 ; movdqa %xmm15,0x120(%rsp) + DB 102,65,15,111,207 ; movdqa %xmm15,%xmm1 + DB 242,65,15,112,199,231 ; pshuflw $0xe7,%xmm15,%xmm0 + DB 102,68,15,96,253 ; punpcklbw %xmm5,%xmm15 + DB 102,15,104,205 ; punpckhbw %xmm5,%xmm1 + DB 102,15,127,76,36,16 ; movdqa %xmm1,0x10(%rsp) + DB 102,15,127,156,36,16,1,0,0 ; movdqa %xmm3,0x110(%rsp) + DB 102,15,111,211 ; movdqa %xmm3,%xmm2 + DB 102,15,111,203 ; movdqa %xmm3,%xmm1 + DB 102,15,96,205 ; punpcklbw %xmm5,%xmm1 + DB 102,15,127,12,36 ; movdqa %xmm1,(%rsp) + DB 102,15,104,213 ; punpckhbw %xmm5,%xmm2 + DB 102,15,127,148,36,0,1,0,0 ; movdqa %xmm2,0x100(%rsp) + DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0 + DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0 + DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 + DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0 + DB 243,15,112,240,95 ; pshufhw $0x5f,%xmm0,%xmm6 + DB 242,15,112,195,231 ; pshuflw $0xe7,%xmm3,%xmm0 + DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0 + DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0 + DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 + DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0 + DB 243,68,15,112,232,95 ; pshufhw $0x5f,%xmm0,%xmm13 + DB 102,65,15,111,205 ; movdqa %xmm13,%xmm1 + DB 102,15,104,205 ; punpckhbw %xmm5,%xmm1 + DB 102,15,127,76,36,48 ; movdqa %xmm1,0x30(%rsp) + DB 102,68,15,96,237 ; punpcklbw %xmm5,%xmm13 + DB 102,15,111,206 ; movdqa %xmm6,%xmm1 + DB 102,15,104,205 ; punpckhbw %xmm5,%xmm1 + DB 102,15,96,245 ; punpcklbw %xmm5,%xmm6 + DB 102,69,15,111,214 ; movdqa %xmm14,%xmm10 + DB 102,69,15,253,210 ; paddw %xmm10,%xmm10 + DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,65,15,111,216 ; movdqa %xmm8,%xmm3 + DB 102,69,15,253,192 ; paddw %xmm8,%xmm8 + DB 102,15,111,84,36,64 ; movdqa 0x40(%rsp),%xmm2 + DB 102,15,253,210 ; paddw %xmm2,%xmm2 + DB 102,15,217,215 ; psubusw %xmm7,%xmm2 + DB 102,15,117,213 ; pcmpeqw %xmm5,%xmm2 + DB 102,15,127,148,36,240,0,0,0 ; movdqa %xmm2,0xf0(%rsp) + DB 102,69,15,217,196 ; psubusw %xmm12,%xmm8 + DB 102,68,15,117,197 ; pcmpeqw %xmm5,%xmm8 + DB 102,15,111,84,36,32 ; movdqa 0x20(%rsp),%xmm2 + DB 102,15,217,194 ; psubusw %xmm2,%xmm0 + DB 102,15,117,197 ; pcmpeqw %xmm5,%xmm0 + DB 102,15,127,132,36,192,0,0,0 ; movdqa %xmm0,0xc0(%rsp) + DB 102,68,15,111,220 ; movdqa %xmm4,%xmm11 + DB 102,69,15,217,211 ; psubusw %xmm11,%xmm10 + DB 102,68,15,117,213 ; pcmpeqw %xmm5,%xmm10 + DB 102,15,111,45,123,8,0,0 ; movdqa 0x87b(%rip),%xmm5 # 3410 <_sk_overlay_sse2_8bit+0x5c3> + DB 102,15,111,198 ; movdqa %xmm6,%xmm0 + DB 102,15,239,197 ; pxor %xmm5,%xmm0 + DB 102,65,15,213,198 ; pmullw %xmm14,%xmm0 + DB 102,15,127,132,36,224,0,0,0 ; movdqa %xmm0,0xe0(%rsp) + DB 102,65,15,111,195 ; movdqa %xmm11,%xmm0 + DB 102,15,239,197 ; pxor %xmm5,%xmm0 + DB 102,65,15,213,199 ; pmullw %xmm15,%xmm0 + DB 102,15,127,132,36,208,0,0,0 ; movdqa %xmm0,0xd0(%rsp) + DB 102,15,111,198 ; movdqa %xmm6,%xmm0 + DB 102,65,15,249,247 ; psubw %xmm15,%xmm6 + DB 102,15,127,180,36,128,0,0,0 ; movdqa %xmm6,0x80(%rsp) + DB 102,65,15,111,255 ; movdqa %xmm15,%xmm7 + DB 102,65,15,213,254 ; pmullw %xmm14,%xmm7 + DB 102,65,15,213,195 ; pmullw %xmm11,%xmm0 + DB 102,15,127,132,36,144,0,0,0 ; movdqa %xmm0,0x90(%rsp) + DB 102,69,15,249,222 ; psubw %xmm14,%xmm11 + DB 102,15,111,193 ; movdqa %xmm1,%xmm0 + DB 102,15,239,197 ; pxor %xmm5,%xmm0 + DB 102,65,15,213,193 ; pmullw %xmm9,%xmm0 + DB 102,15,127,132,36,160,0,0,0 ; movdqa %xmm0,0xa0(%rsp) + DB 102,68,15,111,250 ; movdqa %xmm2,%xmm15 + DB 102,68,15,239,253 ; pxor %xmm5,%xmm15 + DB 102,15,111,116,36,16 ; movdqa 0x10(%rsp),%xmm6 + DB 102,68,15,213,254 ; pmullw %xmm6,%xmm15 + DB 102,15,111,193 ; movdqa %xmm1,%xmm0 + DB 102,15,249,206 ; psubw %xmm6,%xmm1 + DB 102,15,127,76,36,96 ; movdqa %xmm1,0x60(%rsp) + DB 102,65,15,213,241 ; pmullw %xmm9,%xmm6 + DB 102,15,213,194 ; pmullw %xmm2,%xmm0 + DB 102,15,127,68,36,112 ; movdqa %xmm0,0x70(%rsp) + DB 102,65,15,249,209 ; psubw %xmm9,%xmm2 + DB 102,15,127,84,36,32 ; movdqa %xmm2,0x20(%rsp) + DB 102,65,15,111,197 ; movdqa %xmm13,%xmm0 + DB 102,15,111,200 ; movdqa %xmm0,%xmm1 + DB 102,15,239,205 ; pxor %xmm5,%xmm1 + DB 102,15,213,203 ; pmullw %xmm3,%xmm1 + DB 102,15,127,76,36,16 ; movdqa %xmm1,0x10(%rsp) + DB 102,69,15,111,204 ; movdqa %xmm12,%xmm9 + DB 102,68,15,239,205 ; pxor %xmm5,%xmm9 + DB 102,15,111,36,36 ; movdqa (%rsp),%xmm4 + DB 102,68,15,213,204 ; pmullw %xmm4,%xmm9 + DB 102,68,15,111,232 ; movdqa %xmm0,%xmm13 + DB 102,15,249,196 ; psubw %xmm4,%xmm0 + DB 102,15,127,68,36,80 ; movdqa %xmm0,0x50(%rsp) + DB 102,15,213,227 ; pmullw %xmm3,%xmm4 + DB 102,69,15,213,236 ; pmullw %xmm12,%xmm13 + DB 102,68,15,249,227 ; psubw %xmm3,%xmm12 + DB 102,15,111,76,36,48 ; movdqa 0x30(%rsp),%xmm1 + DB 102,15,111,193 ; movdqa %xmm1,%xmm0 + DB 102,15,239,197 ; pxor %xmm5,%xmm0 + DB 102,15,111,92,36,64 ; movdqa 0x40(%rsp),%xmm3 + DB 102,15,213,195 ; pmullw %xmm3,%xmm0 + DB 102,15,127,4,36 ; movdqa %xmm0,(%rsp) + DB 102,15,111,132,36,176,0,0,0 ; movdqa 0xb0(%rsp),%xmm0 + DB 102,15,239,232 ; pxor %xmm0,%xmm5 + DB 102,15,111,148,36,0,1,0,0 ; movdqa 0x100(%rsp),%xmm2 + DB 102,15,213,234 ; pmullw %xmm2,%xmm5 + DB 102,68,15,111,241 ; movdqa %xmm1,%xmm14 + DB 102,15,249,202 ; psubw %xmm2,%xmm1 + DB 102,15,127,76,36,48 ; movdqa %xmm1,0x30(%rsp) + DB 102,15,213,211 ; pmullw %xmm3,%xmm2 + DB 102,68,15,213,240 ; pmullw %xmm0,%xmm14 + DB 102,15,249,195 ; psubw %xmm3,%xmm0 + DB 102,15,213,68,36,48 ; pmullw 0x30(%rsp),%xmm0 + DB 102,68,15,213,100,36,80 ; pmullw 0x50(%rsp),%xmm12 + DB 102,15,111,76,36,32 ; movdqa 0x20(%rsp),%xmm1 + DB 102,15,213,76,36,96 ; pmullw 0x60(%rsp),%xmm1 + DB 102,68,15,213,156,36,128,0,0,0 ; pmullw 0x80(%rsp),%xmm11 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,68,15,249,240 ; psubw %xmm0,%xmm14 + DB 102,69,15,253,228 ; paddw %xmm12,%xmm12 + DB 102,69,15,249,236 ; psubw %xmm12,%xmm13 + DB 102,15,253,201 ; paddw %xmm1,%xmm1 + DB 102,15,111,92,36,112 ; movdqa 0x70(%rsp),%xmm3 + DB 102,15,249,217 ; psubw %xmm1,%xmm3 + DB 102,69,15,253,219 ; paddw %xmm11,%xmm11 + DB 102,15,111,132,36,144,0,0,0 ; movdqa 0x90(%rsp),%xmm0 + DB 102,65,15,249,195 ; psubw %xmm11,%xmm0 + DB 102,15,253,255 ; paddw %xmm7,%xmm7 + DB 102,65,15,219,250 ; pand %xmm10,%xmm7 + DB 102,68,15,223,208 ; pandn %xmm0,%xmm10 + DB 102,68,15,235,215 ; por %xmm7,%xmm10 + DB 102,15,253,246 ; paddw %xmm6,%xmm6 + DB 102,15,111,132,36,192,0,0,0 ; movdqa 0xc0(%rsp),%xmm0 + DB 102,15,219,240 ; pand %xmm0,%xmm6 + DB 102,15,223,195 ; pandn %xmm3,%xmm0 + DB 102,15,235,198 ; por %xmm6,%xmm0 + DB 102,15,111,216 ; movdqa %xmm0,%xmm3 + DB 102,15,253,228 ; paddw %xmm4,%xmm4 + DB 102,65,15,219,224 ; pand %xmm8,%xmm4 + DB 102,69,15,223,197 ; pandn %xmm13,%xmm8 + DB 102,68,15,235,196 ; por %xmm4,%xmm8 + DB 102,15,253,210 ; paddw %xmm2,%xmm2 + DB 102,15,111,132,36,240,0,0,0 ; movdqa 0xf0(%rsp),%xmm0 + DB 102,15,219,208 ; pand %xmm0,%xmm2 + DB 102,65,15,223,198 ; pandn %xmm14,%xmm0 + DB 102,15,235,194 ; por %xmm2,%xmm0 + DB 102,15,111,200 ; movdqa %xmm0,%xmm1 + DB 102,15,111,148,36,208,0,0,0 ; movdqa 0xd0(%rsp),%xmm2 + DB 102,15,253,148,36,224,0,0,0 ; paddw 0xe0(%rsp),%xmm2 + DB 102,68,15,253,188,36,160,0,0,0 ; paddw 0xa0(%rsp),%xmm15 + DB 102,68,15,253,76,36,16 ; paddw 0x10(%rsp),%xmm9 + DB 102,15,253,44,36 ; paddw (%rsp),%xmm5 + DB 102,15,111,5,89,6,0,0 ; movdqa 0x659(%rip),%xmm0 # 3420 <_sk_overlay_sse2_8bit+0x5d3> + DB 102,15,253,208 ; paddw %xmm0,%xmm2 + DB 102,68,15,253,210 ; paddw %xmm2,%xmm10 + DB 102,68,15,253,248 ; paddw %xmm0,%xmm15 + DB 102,68,15,253,251 ; paddw %xmm3,%xmm15 + DB 102,68,15,253,200 ; paddw %xmm0,%xmm9 + DB 102,69,15,253,193 ; paddw %xmm9,%xmm8 + DB 102,15,253,232 ; paddw %xmm0,%xmm5 + DB 102,15,253,233 ; paddw %xmm1,%xmm5 + DB 102,15,111,5,60,6,0,0 ; movdqa 0x63c(%rip),%xmm0 # 3430 <_sk_overlay_sse2_8bit+0x5e3> + DB 102,15,228,232 ; pmulhuw %xmm0,%xmm5 + DB 102,68,15,228,192 ; pmulhuw %xmm0,%xmm8 + DB 102,68,15,228,248 ; pmulhuw %xmm0,%xmm15 + DB 102,68,15,228,208 ; pmulhuw %xmm0,%xmm10 + DB 102,65,15,113,215,7 ; psrlw $0x7,%xmm15 + DB 102,65,15,113,210,7 ; psrlw $0x7,%xmm10 + DB 102,69,15,103,215 ; packuswb %xmm15,%xmm10 + DB 102,15,113,213,7 ; psrlw $0x7,%xmm5 + DB 102,65,15,113,208,7 ; psrlw $0x7,%xmm8 + DB 102,68,15,103,197 ; packuswb %xmm5,%xmm8 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 15,40,148,36,32,1,0,0 ; movaps 0x120(%rsp),%xmm2 + DB 15,40,156,36,16,1,0,0 ; movaps 0x110(%rsp),%xmm3 + DB 102,65,15,111,194 ; movdqa %xmm10,%xmm0 + DB 102,65,15,111,200 ; movdqa %xmm8,%xmm1 + DB 72,129,196,56,1,0,0 ; add $0x138,%rsp + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_overlay_sse2_8bit +_sk_overlay_sse2_8bit LABEL PROC + DB 72,129,236,72,1,0,0 ; sub $0x148,%rsp + DB 102,15,239,228 ; pxor %xmm4,%xmm4 + DB 102,68,15,111,248 ; movdqa %xmm0,%xmm15 + DB 102,68,15,96,252 ; punpcklbw %xmm4,%xmm15 + DB 102,15,111,233 ; movdqa %xmm1,%xmm5 + DB 102,15,96,236 ; punpcklbw %xmm4,%xmm5 + DB 102,15,127,108,36,16 ; movdqa %xmm5,0x10(%rsp) + DB 242,15,112,232,231 ; pshuflw $0xe7,%xmm0,%xmm5 + DB 102,15,104,196 ; punpckhbw %xmm4,%xmm0 + DB 102,15,127,4,36 ; movdqa %xmm0,(%rsp) + DB 243,15,112,197,231 ; pshufhw $0xe7,%xmm5,%xmm0 + DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0 + DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 + DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0 + DB 243,15,112,232,95 ; pshufhw $0x5f,%xmm0,%xmm5 + DB 242,15,112,193,231 ; pshuflw $0xe7,%xmm1,%xmm0 + DB 102,15,104,204 ; punpckhbw %xmm4,%xmm1 + DB 102,15,127,140,36,16,1,0,0 ; movdqa %xmm1,0x110(%rsp) + DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0 + DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0 + DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 + DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0 + DB 243,15,112,200,95 ; pshufhw $0x5f,%xmm0,%xmm1 + DB 102,15,111,193 ; movdqa %xmm1,%xmm0 + DB 102,15,104,196 ; punpckhbw %xmm4,%xmm0 + DB 102,15,127,132,36,0,1,0,0 ; movdqa %xmm0,0x100(%rsp) + DB 102,15,96,204 ; punpcklbw %xmm4,%xmm1 + DB 102,15,127,140,36,224,0,0,0 ; movdqa %xmm1,0xe0(%rsp) + DB 102,68,15,111,221 ; movdqa %xmm5,%xmm11 + DB 102,68,15,104,220 ; punpckhbw %xmm4,%xmm11 + DB 102,15,96,236 ; punpcklbw %xmm4,%xmm5 + DB 102,68,15,111,213 ; movdqa %xmm5,%xmm10 + DB 102,15,111,202 ; movdqa %xmm2,%xmm1 + DB 102,15,127,140,36,48,1,0,0 ; movdqa %xmm1,0x130(%rsp) + DB 102,68,15,111,193 ; movdqa %xmm1,%xmm8 + DB 242,15,112,193,231 ; pshuflw $0xe7,%xmm1,%xmm0 + DB 102,15,96,204 ; punpcklbw %xmm4,%xmm1 + DB 102,68,15,104,196 ; punpckhbw %xmm4,%xmm8 + DB 102,15,127,156,36,32,1,0,0 ; movdqa %xmm3,0x120(%rsp) + DB 102,15,111,211 ; movdqa %xmm3,%xmm2 + DB 102,68,15,111,243 ; movdqa %xmm3,%xmm14 + DB 102,68,15,96,244 ; punpcklbw %xmm4,%xmm14 + DB 102,15,104,212 ; punpckhbw %xmm4,%xmm2 + DB 102,15,111,242 ; movdqa %xmm2,%xmm6 + DB 102,15,127,180,36,144,0,0,0 ; movdqa %xmm6,0x90(%rsp) + DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0 + DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0 + DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 + DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0 + DB 243,15,112,208,95 ; pshufhw $0x5f,%xmm0,%xmm2 + DB 242,15,112,195,231 ; pshuflw $0xe7,%xmm3,%xmm0 + DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0 + DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0 + DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0 + DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0 + DB 243,15,112,216,95 ; pshufhw $0x5f,%xmm0,%xmm3 + DB 102,15,111,195 ; movdqa %xmm3,%xmm0 + DB 102,15,104,196 ; punpckhbw %xmm4,%xmm0 + DB 102,15,127,132,36,128,0,0,0 ; movdqa %xmm0,0x80(%rsp) + DB 102,15,96,220 ; punpcklbw %xmm4,%xmm3 + DB 102,68,15,111,202 ; movdqa %xmm2,%xmm9 + DB 102,68,15,104,204 ; punpckhbw %xmm4,%xmm9 + DB 102,15,96,212 ; punpcklbw %xmm4,%xmm2 + DB 102,15,111,233 ; movdqa %xmm1,%xmm5 + DB 102,15,253,237 ; paddw %xmm5,%xmm5 + DB 102,65,15,111,248 ; movdqa %xmm8,%xmm7 + DB 102,15,253,255 ; paddw %xmm7,%xmm7 + DB 102,69,15,111,238 ; movdqa %xmm14,%xmm13 + DB 102,69,15,253,237 ; paddw %xmm13,%xmm13 + DB 102,15,253,246 ; paddw %xmm6,%xmm6 + DB 102,15,217,240 ; psubusw %xmm0,%xmm6 + DB 102,15,117,244 ; pcmpeqw %xmm4,%xmm6 + DB 102,15,127,180,36,240,0,0,0 ; movdqa %xmm6,0xf0(%rsp) + DB 102,68,15,217,235 ; psubusw %xmm3,%xmm13 + DB 102,68,15,117,236 ; pcmpeqw %xmm4,%xmm13 + DB 102,68,15,127,172,36,208,0,0,0 ; movdqa %xmm13,0xd0(%rsp) + DB 102,65,15,217,249 ; psubusw %xmm9,%xmm7 + DB 102,15,117,252 ; pcmpeqw %xmm4,%xmm7 + DB 102,15,127,188,36,176,0,0,0 ; movdqa %xmm7,0xb0(%rsp) + DB 102,15,217,234 ; psubusw %xmm2,%xmm5 + DB 102,15,117,236 ; pcmpeqw %xmm4,%xmm5 + DB 102,15,127,172,36,160,0,0,0 ; movdqa %xmm5,0xa0(%rsp) + DB 102,15,111,53,64,4,0,0 ; movdqa 0x440(%rip),%xmm6 # 3440 <_sk_overlay_sse2_8bit+0x5f3> + DB 102,15,111,194 ; movdqa %xmm2,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,65,15,213,199 ; pmullw %xmm15,%xmm0 + DB 102,15,127,132,36,192,0,0,0 ; movdqa %xmm0,0xc0(%rsp) + DB 102,65,15,111,194 ; movdqa %xmm10,%xmm0 + DB 102,68,15,111,224 ; movdqa %xmm0,%xmm12 + DB 102,68,15,239,230 ; pxor %xmm6,%xmm12 + DB 102,68,15,213,225 ; pmullw %xmm1,%xmm12 + DB 102,15,111,226 ; movdqa %xmm2,%xmm4 + DB 102,15,249,209 ; psubw %xmm1,%xmm2 + DB 102,15,127,84,36,80 ; movdqa %xmm2,0x50(%rsp) + DB 102,68,15,111,209 ; movdqa %xmm1,%xmm10 + DB 102,69,15,213,215 ; pmullw %xmm15,%xmm10 + DB 102,15,213,224 ; pmullw %xmm0,%xmm4 + DB 102,15,127,100,36,96 ; movdqa %xmm4,0x60(%rsp) + DB 102,65,15,249,199 ; psubw %xmm15,%xmm0 + DB 102,15,127,68,36,48 ; movdqa %xmm0,0x30(%rsp) + DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,15,111,20,36 ; movdqa (%rsp),%xmm2 + DB 102,15,213,194 ; pmullw %xmm2,%xmm0 + DB 102,15,127,68,36,112 ; movdqa %xmm0,0x70(%rsp) + DB 102,65,15,111,195 ; movdqa %xmm11,%xmm0 + DB 102,68,15,239,222 ; pxor %xmm6,%xmm11 + DB 102,69,15,213,216 ; pmullw %xmm8,%xmm11 + DB 102,69,15,111,249 ; movdqa %xmm9,%xmm15 + DB 102,69,15,249,200 ; psubw %xmm8,%xmm9 + DB 102,68,15,127,76,36,64 ; movdqa %xmm9,0x40(%rsp) + DB 102,68,15,213,194 ; pmullw %xmm2,%xmm8 + DB 102,68,15,213,248 ; pmullw %xmm0,%xmm15 + DB 102,15,249,194 ; psubw %xmm2,%xmm0 + DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9 + DB 102,15,111,195 ; movdqa %xmm3,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,15,111,84,36,16 ; movdqa 0x10(%rsp),%xmm2 + DB 102,15,213,194 ; pmullw %xmm2,%xmm0 + DB 102,15,127,4,36 ; movdqa %xmm0,(%rsp) + DB 102,15,111,132,36,224,0,0,0 ; movdqa 0xe0(%rsp),%xmm0 + DB 102,15,111,232 ; movdqa %xmm0,%xmm5 + DB 102,15,239,238 ; pxor %xmm6,%xmm5 + DB 102,65,15,213,238 ; pmullw %xmm14,%xmm5 + DB 102,68,15,111,235 ; movdqa %xmm3,%xmm13 + DB 102,65,15,249,222 ; psubw %xmm14,%xmm3 + DB 102,15,127,92,36,32 ; movdqa %xmm3,0x20(%rsp) + DB 102,65,15,111,254 ; movdqa %xmm14,%xmm7 + DB 102,15,213,250 ; pmullw %xmm2,%xmm7 + DB 102,68,15,213,232 ; pmullw %xmm0,%xmm13 + DB 102,15,249,194 ; psubw %xmm2,%xmm0 + DB 102,15,111,208 ; movdqa %xmm0,%xmm2 + DB 102,15,111,140,36,128,0,0,0 ; movdqa 0x80(%rsp),%xmm1 + DB 102,15,111,193 ; movdqa %xmm1,%xmm0 + DB 102,15,239,198 ; pxor %xmm6,%xmm0 + DB 102,15,111,156,36,16,1,0,0 ; movdqa 0x110(%rsp),%xmm3 + DB 102,15,213,195 ; pmullw %xmm3,%xmm0 + DB 102,15,127,68,36,16 ; movdqa %xmm0,0x10(%rsp) + DB 102,15,111,132,36,0,1,0,0 ; movdqa 0x100(%rsp),%xmm0 + DB 102,15,239,240 ; pxor %xmm0,%xmm6 + DB 102,15,111,164,36,144,0,0,0 ; movdqa 0x90(%rsp),%xmm4 + DB 102,15,213,244 ; pmullw %xmm4,%xmm6 + DB 102,68,15,111,241 ; movdqa %xmm1,%xmm14 + DB 102,15,249,204 ; psubw %xmm4,%xmm1 + DB 102,15,213,227 ; pmullw %xmm3,%xmm4 + DB 102,68,15,213,240 ; pmullw %xmm0,%xmm14 + DB 102,15,249,195 ; psubw %xmm3,%xmm0 + DB 102,15,213,193 ; pmullw %xmm1,%xmm0 + DB 102,15,213,84,36,32 ; pmullw 0x20(%rsp),%xmm2 + DB 102,68,15,213,76,36,64 ; pmullw 0x40(%rsp),%xmm9 + DB 102,15,111,76,36,48 ; movdqa 0x30(%rsp),%xmm1 + DB 102,15,213,76,36,80 ; pmullw 0x50(%rsp),%xmm1 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,68,15,249,240 ; psubw %xmm0,%xmm14 + DB 102,15,253,210 ; paddw %xmm2,%xmm2 + DB 102,68,15,249,234 ; psubw %xmm2,%xmm13 + DB 102,69,15,253,201 ; paddw %xmm9,%xmm9 + DB 102,69,15,249,249 ; psubw %xmm9,%xmm15 + DB 102,15,111,193 ; movdqa %xmm1,%xmm0 + DB 102,15,253,192 ; paddw %xmm0,%xmm0 + DB 102,15,111,76,36,96 ; movdqa 0x60(%rsp),%xmm1 + DB 102,15,249,200 ; psubw %xmm0,%xmm1 + DB 102,69,15,253,210 ; paddw %xmm10,%xmm10 + DB 102,15,111,132,36,160,0,0,0 ; movdqa 0xa0(%rsp),%xmm0 + DB 102,68,15,219,208 ; pand %xmm0,%xmm10 + DB 102,15,223,193 ; pandn %xmm1,%xmm0 + DB 102,65,15,235,194 ; por %xmm10,%xmm0 + DB 102,15,111,216 ; movdqa %xmm0,%xmm3 + DB 102,69,15,253,192 ; paddw %xmm8,%xmm8 + DB 102,15,111,132,36,176,0,0,0 ; movdqa 0xb0(%rsp),%xmm0 + DB 102,68,15,219,192 ; pand %xmm0,%xmm8 + DB 102,65,15,223,199 ; pandn %xmm15,%xmm0 + DB 102,65,15,235,192 ; por %xmm8,%xmm0 + DB 102,68,15,111,192 ; movdqa %xmm0,%xmm8 + DB 102,15,253,255 ; paddw %xmm7,%xmm7 + DB 102,15,111,132,36,208,0,0,0 ; movdqa 0xd0(%rsp),%xmm0 + DB 102,15,219,248 ; pand %xmm0,%xmm7 + DB 102,65,15,223,197 ; pandn %xmm13,%xmm0 + DB 102,15,235,199 ; por %xmm7,%xmm0 + DB 102,15,111,208 ; movdqa %xmm0,%xmm2 + DB 102,15,253,228 ; paddw %xmm4,%xmm4 + DB 102,15,111,132,36,240,0,0,0 ; movdqa 0xf0(%rsp),%xmm0 + DB 102,15,219,224 ; pand %xmm0,%xmm4 + DB 102,65,15,223,198 ; pandn %xmm14,%xmm0 + DB 102,15,235,196 ; por %xmm4,%xmm0 + DB 102,15,111,200 ; movdqa %xmm0,%xmm1 + DB 102,68,15,253,164,36,192,0,0,0 ; paddw 0xc0(%rsp),%xmm12 + DB 102,68,15,253,92,36,112 ; paddw 0x70(%rsp),%xmm11 + DB 102,15,253,44,36 ; paddw (%rsp),%xmm5 + DB 102,15,253,116,36,16 ; paddw 0x10(%rsp),%xmm6 + DB 102,15,111,5,26,2,0,0 ; movdqa 0x21a(%rip),%xmm0 # 3450 <_sk_overlay_sse2_8bit+0x603> + DB 102,68,15,253,224 ; paddw %xmm0,%xmm12 + DB 102,68,15,253,227 ; paddw %xmm3,%xmm12 + DB 102,68,15,253,216 ; paddw %xmm0,%xmm11 + DB 102,69,15,253,216 ; paddw %xmm8,%xmm11 + DB 102,15,253,232 ; paddw %xmm0,%xmm5 + DB 102,15,253,234 ; paddw %xmm2,%xmm5 + DB 102,15,253,240 ; paddw %xmm0,%xmm6 + DB 102,15,253,241 ; paddw %xmm1,%xmm6 + DB 102,15,111,5,254,1,0,0 ; movdqa 0x1fe(%rip),%xmm0 # 3460 <_sk_overlay_sse2_8bit+0x613> + DB 102,15,228,240 ; pmulhuw %xmm0,%xmm6 + DB 102,15,228,232 ; pmulhuw %xmm0,%xmm5 + DB 102,68,15,228,216 ; pmulhuw %xmm0,%xmm11 + DB 102,68,15,228,224 ; pmulhuw %xmm0,%xmm12 + DB 102,65,15,113,211,7 ; psrlw $0x7,%xmm11 + DB 102,65,15,113,212,7 ; psrlw $0x7,%xmm12 + DB 102,69,15,103,227 ; packuswb %xmm11,%xmm12 + DB 102,15,113,214,7 ; psrlw $0x7,%xmm6 + DB 102,15,113,213,7 ; psrlw $0x7,%xmm5 + DB 102,15,103,238 ; packuswb %xmm6,%xmm5 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 15,40,148,36,48,1,0,0 ; movaps 0x130(%rsp),%xmm2 + DB 15,40,156,36,32,1,0,0 ; movaps 0x120(%rsp),%xmm3 + DB 102,65,15,111,196 ; movdqa %xmm12,%xmm0 + DB 102,15,111,205 ; movdqa %xmm5,%xmm1 + DB 72,129,196,72,1,0,0 ; add $0x148,%rsp + DB 255,224 ; jmpq *%rax + ALIGN 4 DB 0,0 ; add %al,(%rax) - DB 127,67 ; jg 2a2b <_sk_difference_sse2_8bit+0x20a> + DB 127,67 ; jg 32ff <_sk_overlay_sse2_8bit+0x4b2> DB 0,0 ; add %al,(%rax) - DB 127,67 ; jg 2a2f <_sk_difference_sse2_8bit+0x20e> + DB 127,67 ; jg 3303 <_sk_overlay_sse2_8bit+0x4b6> DB 0,0 ; add %al,(%rax) - DB 127,67 ; jg 2a33 <_sk_difference_sse2_8bit+0x212> + DB 127,67 ; jg 3307 <_sk_overlay_sse2_8bit+0x4ba> ALIGN 16 DB 0,0 ; add %al,(%rax) @@ -47955,6 +49257,45 @@ ALIGN 16 DB 255 ; (bad) DB 255 ; (bad) DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 127,0 ; jg 3422 <.literal16+0x152> + DB 127,0 ; jg 3424 <.literal16+0x154> + DB 127,0 ; jg 3426 <.literal16+0x156> + DB 127,0 ; jg 3428 <.literal16+0x158> + DB 127,0 ; jg 342a <.literal16+0x15a> + DB 127,0 ; jg 342c <.literal16+0x15c> + DB 127,0 ; jg 342e <.literal16+0x15e> + DB 127,0 ; jg 3430 <.literal16+0x160> + DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax) + DB 129,128,129,128,129,128,255,0,255,0 ; addl $0xff00ff,-0x7f7e7f7f(%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 255,0 ; incl (%rax) + DB 127,0 ; jg 3452 <.literal16+0x182> + DB 127,0 ; jg 3454 <.literal16+0x184> + DB 127,0 ; jg 3456 <.literal16+0x186> + DB 127,0 ; jg 3458 <.literal16+0x188> + DB 127,0 ; jg 345a <.literal16+0x18a> + DB 127,0 ; jg 345c <.literal16+0x18c> + DB 127,0 ; jg 345e <.literal16+0x18e> + DB 127,0 ; jg 3460 <.literal16+0x190> + DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax) + DB 129 ; .byte 0x81 + DB 128 ; .byte 0x80 + DB 129 ; .byte 0x81 + DB 128 ; .byte 0x80 + DB 129 ; .byte 0x81 + DB 128 ; .byte 0x80 ELSE .MODEL FLAT,C _text32 SEGMENT ALIGN(32) 'CODE' diff --git a/src/jumper/SkJumper_stages_8bit.cpp b/src/jumper/SkJumper_stages_8bit.cpp index d1b0f54a41..6ebba0768b 100644 --- a/src/jumper/SkJumper_stages_8bit.cpp +++ b/src/jumper/SkJumper_stages_8bit.cpp @@ -61,18 +61,6 @@ SI void split(U8x4 u8x4, R* lo, R* hi) { memcpy(hi, (char*)&u8x4 + sizeof(R), sizeof(R)); } -union V { - U32 u32; - U8x4 u8x4; - - V() = default; - V(U32 v) : u32 (v) {} - V(U8x4 v) : u8x4(v) {} - V(int v) : u8x4(v) {} - V(float v) : u8x4(v*255) {} -}; -static const size_t kStride = sizeof(V) / sizeof(uint32_t); - // Usually __builtin_convertvector() is pretty good, but sometimes we can do better. SI U8x4 pack(U16x4 v) { #if defined(__AVX2__) @@ -100,6 +88,19 @@ SI U8x4 pack(U16x4 v) { #endif } +union V { + U32 u32; + U8x4 u8x4; + + V() = default; + V(U32 v) : u32 (v) {} + V(U8x4 v) : u8x4(v) {} + V(U16x4 v) : u8x4(pack((v + 127)/255)) {} + V(int v) : u8x4(v) {} + V(float v) : u8x4(v*255) {} +}; +static const size_t kStride = sizeof(V) / sizeof(uint32_t); + SI V operator+(V x, V y) { return x.u8x4 + y.u8x4; } SI V operator-(V x, V y) { return x.u8x4 - y.u8x4; } SI V operator*(V x, V y) { @@ -109,7 +110,8 @@ SI V operator*(V x, V y) { return pack((X*Y + X)>>8); } -SI V inv(V v) { return 0xff - v; } +template <typename T> +SI T inv(T v) { return 0xff - v; } SI V two(V v) { return v + v; } SI V lerp(V from, V to, V t) { return to*t + from*inv(t); } @@ -141,14 +143,18 @@ SI V swap_rb(V v) { #endif } + +template <typename MaskT, typename ValT> +SI ValT if_then_else(MaskT m, ValT t, ValT e) { + return (t & m) | (e & ~m); +} + SI V max(V a, V b) { - auto gt = a.u8x4 > b.u8x4; - return (a.u8x4 & gt) | (b.u8x4 &~gt); + return if_then_else(a.u8x4 > b.u8x4, a.u8x4, b.u8x4); } SI V min(V a, V b) { - auto gt = a.u8x4 > b.u8x4; - return (a.u8x4 & ~gt) | (b.u8x4 >); + return if_then_else(a.u8x4 > b.u8x4, b.u8x4, a.u8x4); } struct Params { @@ -440,3 +446,30 @@ STAGE(difference) { V min_ = min(src*alpha(dst), dst*alpha(src)); src = (src - min_) + (dst - zero_alpha(min_)); } + +template <typename Func> +V blend_rgb16(V src, V dst, Func&& blend) { + U16x4 s = __builtin_convertvector( src.u8x4, U16x4), + sa = __builtin_convertvector(alpha(src).u8x4, U16x4), + d = __builtin_convertvector( dst.u8x4, U16x4), + da = __builtin_convertvector(alpha(dst).u8x4, U16x4), + + rgb = blend(s, d, sa, da), + a = s + (d - d*sa); + + return if_then_else(0x0000ffffffffffff, rgb, a); +} + +STAGE(hardlight) { + src = blend_rgb16(src, dst, [](U16x4 s, U16x4 d, U16x4 sa, U16x4 da) { + return s*inv(da) + d*inv(sa) + + if_then_else(s*2 <= sa, s*d*2, sa*da - (da - d)*(sa - s)*2); + }); +} + +STAGE(overlay) { + src = blend_rgb16(src, dst, [](U16x4 s, U16x4 d, U16x4 sa, U16x4 da) { + return s*inv(da) + d*inv(sa) + + if_then_else(d*2 <= da, s*d*2, sa*da - (da - d)*(sa - s)*2); + }); +} |