aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper
diff options
context:
space:
mode:
authorGravatar Florin Malita <fmalita@chromium.org>2017-08-14 16:49:32 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-08-14 21:31:25 +0000
commit5bfc85a8b3cb7ec181a1d1a690ad3514ab1d3056 (patch)
tree5aaf6906d1195090ea5d2e43137a3fe132acedd9 /src/jumper
parent93ba0a4fc85f04bc1be9429983df1e57473b49a7 (diff)
Lowp overlay, hardlight stages
Before: micros bench 7669.09 ? blendmode_rect_HardLight 8888 8707.13 ? blendmode_rect_Overlay 8888 After: micros bench 6679.60 ? blendmode_rect_HardLight 8888 6789.57 ? blendmode_rect_Overlay 8888 Change-Id: I52f389253fa07dafe18e572af550af7387264a16 Reviewed-on: https://skia-review.googlesource.com/34280 Commit-Queue: Florin Malita <fmalita@chromium.org> Reviewed-by: Mike Klein <mtklein@google.com>
Diffstat (limited to 'src/jumper')
-rw-r--r--src/jumper/SkJumper.cpp4
-rw-r--r--src/jumper/SkJumper_generated.S1613
-rw-r--r--src/jumper/SkJumper_generated_win.S1599
-rw-r--r--src/jumper/SkJumper_stages_8bit.cpp67
4 files changed, 3006 insertions, 277 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 8baa1890b6..e1888a8647 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -134,7 +134,9 @@ using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**,K*);
M(darken) \
M(lighten) \
M(difference) \
- M(exclusion)
+ M(exclusion) \
+ M(hardlight) \
+ M(overlay)
#endif
extern "C" {
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index a6b1cbdbfe..d616ed93e9 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -56656,7 +56656,7 @@ HIDDEN _sk_set_rgb_hsw_8bit
FUNCTION(_sk_set_rgb_hsw_8bit)
_sk_set_rgb_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 197,250,16,37,142,44,0,0 // vmovss 0x2c8e(%rip),%xmm4 # 2d50 <_sk_difference_hsw_8bit+0x182>
+ .byte 197,250,16,37,110,51,0,0 // vmovss 0x336e(%rip),%xmm4 # 3430 <_sk_overlay_hsw_8bit+0x366>
.byte 197,218,89,40 // vmulss (%rax),%xmm4,%xmm5
.byte 196,225,250,44,205 // vcvttss2si %xmm5,%rcx
.byte 197,218,89,104,4 // vmulss 0x4(%rax),%xmm4,%xmm5
@@ -56669,7 +56669,7 @@ _sk_set_rgb_hsw_8bit:
.byte 9,208 // or %edx,%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
.byte 196,226,125,88,228 // vpbroadcastd %xmm4,%ymm4
- .byte 197,253,111,45,134,44,0,0 // vmovdqa 0x2c86(%rip),%ymm5 # 2d80 <_sk_difference_hsw_8bit+0x1b2>
+ .byte 197,253,111,45,102,51,0,0 // vmovdqa 0x3366(%rip),%ymm5 # 3460 <_sk_overlay_hsw_8bit+0x396>
.byte 197,245,219,205 // vpand %ymm5,%ymm1,%ymm1
.byte 197,253,219,197 // vpand %ymm5,%ymm0,%ymm0
.byte 197,221,235,192 // vpor %ymm0,%ymm4,%ymm0
@@ -56681,10 +56681,10 @@ HIDDEN _sk_premul_hsw_8bit
.globl _sk_premul_hsw_8bit
FUNCTION(_sk_premul_hsw_8bit)
_sk_premul_hsw_8bit:
- .byte 197,253,111,37,138,44,0,0 // vmovdqa 0x2c8a(%rip),%ymm4 # 2da0 <_sk_difference_hsw_8bit+0x1d2>
+ .byte 197,253,111,37,106,51,0,0 // vmovdqa 0x336a(%rip),%ymm4 # 3480 <_sk_overlay_hsw_8bit+0x3b6>
.byte 196,226,125,0,236 // vpshufb %ymm4,%ymm0,%ymm5
.byte 196,226,117,0,228 // vpshufb %ymm4,%ymm1,%ymm4
- .byte 197,253,111,53,152,44,0,0 // vmovdqa 0x2c98(%rip),%ymm6 # 2dc0 <_sk_difference_hsw_8bit+0x1f2>
+ .byte 197,253,111,53,120,51,0,0 // vmovdqa 0x3378(%rip),%ymm6 # 34a0 <_sk_overlay_hsw_8bit+0x3d6>
.byte 197,221,235,230 // vpor %ymm6,%ymm4,%ymm4
.byte 197,213,235,238 // vpor %ymm6,%ymm5,%ymm5
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -56724,7 +56724,7 @@ HIDDEN _sk_swap_rb_hsw_8bit
.globl _sk_swap_rb_hsw_8bit
FUNCTION(_sk_swap_rb_hsw_8bit)
_sk_swap_rb_hsw_8bit:
- .byte 197,253,111,37,16,44,0,0 // vmovdqa 0x2c10(%rip),%ymm4 # 2de0 <_sk_difference_hsw_8bit+0x212>
+ .byte 197,253,111,37,240,50,0,0 // vmovdqa 0x32f0(%rip),%ymm4 # 34c0 <_sk_overlay_hsw_8bit+0x3f6>
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57074,7 +57074,7 @@ _sk_load_bgra_hsw_8bit:
.byte 117,35 // jne 6b4 <_sk_load_bgra_hsw_8bit+0x44>
.byte 196,161,126,111,76,130,32 // vmovdqu 0x20(%rdx,%r8,4),%ymm1
.byte 196,161,126,111,4,130 // vmovdqu (%rdx,%r8,4),%ymm0
- .byte 197,253,111,37,90,39,0,0 // vmovdqa 0x275a(%rip),%ymm4 # 2e00 <_sk_difference_hsw_8bit+0x232>
+ .byte 197,253,111,37,58,46,0,0 // vmovdqa 0x2e3a(%rip),%ymm4 # 34e0 <_sk_overlay_hsw_8bit+0x416>
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57189,7 +57189,7 @@ _sk_load_bgra_dst_hsw_8bit:
.byte 117,35 // jne 86c <_sk_load_bgra_dst_hsw_8bit+0x44>
.byte 196,161,126,111,92,130,32 // vmovdqu 0x20(%rdx,%r8,4),%ymm3
.byte 196,161,126,111,20,130 // vmovdqu (%rdx,%r8,4),%ymm2
- .byte 197,253,111,37,194,37,0,0 // vmovdqa 0x25c2(%rip),%ymm4 # 2e20 <_sk_difference_hsw_8bit+0x252>
+ .byte 197,253,111,37,162,44,0,0 // vmovdqa 0x2ca2(%rip),%ymm4 # 3500 <_sk_overlay_hsw_8bit+0x436>
.byte 196,226,109,0,212 // vpshufb %ymm4,%ymm2,%ymm2
.byte 196,226,101,0,220 // vpshufb %ymm4,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57300,7 +57300,7 @@ _sk_store_bgra_hsw_8bit:
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,193,226,2 // shl $0x2,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 197,253,111,37,60,36,0,0 // vmovdqa 0x243c(%rip),%ymm4 # 2e40 <_sk_difference_hsw_8bit+0x272>
+ .byte 197,253,111,37,28,43,0,0 // vmovdqa 0x2b1c(%rip),%ymm4 # 3520 <_sk_overlay_hsw_8bit+0x456>
.byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5
.byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4
.byte 77,133,201 // test %r9,%r9
@@ -57586,10 +57586,10 @@ _sk_store_a8_hsw_8bit:
.byte 72,99,87,8 // movslq 0x8(%rdi),%rdx
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 197,253,111,37,104,32,0,0 // vmovdqa 0x2068(%rip),%ymm4 # 2e60 <_sk_difference_hsw_8bit+0x292>
+ .byte 197,253,111,37,72,39,0,0 // vmovdqa 0x2748(%rip),%ymm4 # 3540 <_sk_overlay_hsw_8bit+0x476>
.byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5
.byte 196,227,253,0,237,232 // vpermq $0xe8,%ymm5,%ymm5
- .byte 197,249,111,53,245,34,0,0 // vmovdqa 0x22f5(%rip),%xmm6 # 3100 <_sk_difference_hsw_8bit+0x532>
+ .byte 197,249,111,53,69,42,0,0 // vmovdqa 0x2a45(%rip),%xmm6 # 3850 <_sk_overlay_hsw_8bit+0x786>
.byte 196,226,81,0,238 // vpshufb %xmm6,%xmm5,%xmm5
.byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4
.byte 196,227,253,0,228,232 // vpermq $0xe8,%ymm4,%ymm4
@@ -57681,10 +57681,10 @@ _sk_load_g8_hsw_8bit:
.byte 196,226,125,49,200 // vpmovzxbd %xmm0,%ymm1
.byte 197,249,112,192,78 // vpshufd $0x4e,%xmm0,%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
- .byte 196,226,125,88,37,9,30,0,0 // vpbroadcastd 0x1e09(%rip),%ymm4 # 2d54 <_sk_difference_hsw_8bit+0x186>
+ .byte 196,226,125,88,37,233,36,0,0 // vpbroadcastd 0x24e9(%rip),%ymm4 # 3434 <_sk_overlay_hsw_8bit+0x36a>
.byte 196,226,125,64,236 // vpmulld %ymm4,%ymm0,%ymm5
.byte 196,226,117,64,196 // vpmulld %ymm4,%ymm1,%ymm0
- .byte 196,226,125,88,13,250,29,0,0 // vpbroadcastd 0x1dfa(%rip),%ymm1 # 2d58 <_sk_difference_hsw_8bit+0x18a>
+ .byte 196,226,125,88,13,218,36,0,0 // vpbroadcastd 0x24da(%rip),%ymm1 # 3438 <_sk_overlay_hsw_8bit+0x36e>
.byte 197,253,235,193 // vpor %ymm1,%ymm0,%ymm0
.byte 197,213,235,201 // vpor %ymm1,%ymm5,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57782,10 +57782,10 @@ _sk_load_g8_dst_hsw_8bit:
.byte 196,226,125,49,218 // vpmovzxbd %xmm2,%ymm3
.byte 197,249,112,210,78 // vpshufd $0x4e,%xmm2,%xmm2
.byte 196,226,125,49,210 // vpmovzxbd %xmm2,%ymm2
- .byte 196,226,125,88,37,153,28,0,0 // vpbroadcastd 0x1c99(%rip),%ymm4 # 2d5c <_sk_difference_hsw_8bit+0x18e>
+ .byte 196,226,125,88,37,121,35,0,0 // vpbroadcastd 0x2379(%rip),%ymm4 # 343c <_sk_overlay_hsw_8bit+0x372>
.byte 196,226,109,64,236 // vpmulld %ymm4,%ymm2,%ymm5
.byte 196,226,101,64,212 // vpmulld %ymm4,%ymm3,%ymm2
- .byte 196,226,125,88,29,138,28,0,0 // vpbroadcastd 0x1c8a(%rip),%ymm3 # 2d60 <_sk_difference_hsw_8bit+0x192>
+ .byte 196,226,125,88,29,106,35,0,0 // vpbroadcastd 0x236a(%rip),%ymm3 # 3440 <_sk_overlay_hsw_8bit+0x376>
.byte 197,237,235,211 // vpor %ymm3,%ymm2,%ymm2
.byte 197,213,235,219 // vpor %ymm3,%ymm5,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57882,7 +57882,7 @@ _sk_srcover_rgba_8888_hsw_8bit:
.byte 15,133,222,0,0,0 // jne 1303 <_sk_srcover_rgba_8888_hsw_8bit+0x103>
.byte 196,33,126,111,76,138,32 // vmovdqu 0x20(%rdx,%r9,4),%ymm9
.byte 196,33,126,111,28,138 // vmovdqu (%rdx,%r9,4),%ymm11
- .byte 197,253,111,53,70,28,0,0 // vmovdqa 0x1c46(%rip),%ymm6 # 2e80 <_sk_difference_hsw_8bit+0x2b2>
+ .byte 197,253,111,53,38,35,0,0 // vmovdqa 0x2326(%rip),%ymm6 # 3560 <_sk_overlay_hsw_8bit+0x496>
.byte 196,226,117,0,254 // vpshufb %ymm6,%ymm1,%ymm7
.byte 196,226,125,0,246 // vpshufb %ymm6,%ymm0,%ymm6
.byte 196,66,125,48,195 // vpmovzxbw %xmm11,%ymm8
@@ -58090,7 +58090,7 @@ FUNCTION(_sk_scale_1_float_hsw_8bit)
_sk_scale_1_float_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,250,16,32 // vmovss (%rax),%xmm4
- .byte 197,218,89,37,206,23,0,0 // vmulss 0x17ce(%rip),%xmm4,%xmm4 # 2d64 <_sk_difference_hsw_8bit+0x196>
+ .byte 197,218,89,37,174,30,0,0 // vmulss 0x1eae(%rip),%xmm4,%xmm4 # 3444 <_sk_overlay_hsw_8bit+0x37a>
.byte 197,250,44,196 // vcvttss2si %xmm4,%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
.byte 196,226,125,120,228 // vpbroadcastb %xmm4,%ymm4
@@ -58100,7 +58100,7 @@ _sk_scale_1_float_hsw_8bit:
.byte 196,226,125,48,241 // vpmovzxbw %xmm1,%ymm6
.byte 196,227,125,57,201,1 // vextracti128 $0x1,%ymm1,%xmm1
.byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1
- .byte 197,221,219,37,213,24,0,0 // vpand 0x18d5(%rip),%ymm4,%ymm4 # 2ea0 <_sk_difference_hsw_8bit+0x2d2>
+ .byte 197,221,219,37,181,31,0,0 // vpand 0x1fb5(%rip),%ymm4,%ymm4 # 3580 <_sk_overlay_hsw_8bit+0x4b6>
.byte 197,221,213,249 // vpmullw %ymm1,%ymm4,%ymm7
.byte 197,93,213,198 // vpmullw %ymm6,%ymm4,%ymm8
.byte 197,93,213,200 // vpmullw %ymm0,%ymm4,%ymm9
@@ -58139,7 +58139,7 @@ _sk_scale_u8_hsw_8bit:
.byte 196,226,125,49,236 // vpmovzxbd %xmm4,%ymm5
.byte 197,249,112,228,78 // vpshufd $0x4e,%xmm4,%xmm4
.byte 196,226,125,49,228 // vpmovzxbd %xmm4,%ymm4
- .byte 197,253,111,53,95,24,0,0 // vmovdqa 0x185f(%rip),%ymm6 # 2ec0 <_sk_difference_hsw_8bit+0x2f2>
+ .byte 197,253,111,53,63,31,0,0 // vmovdqa 0x1f3f(%rip),%ymm6 # 35a0 <_sk_overlay_hsw_8bit+0x4d6>
.byte 196,226,93,0,230 // vpshufb %ymm6,%ymm4,%ymm4
.byte 196,226,85,0,238 // vpshufb %ymm6,%ymm5,%ymm5
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -58256,7 +58256,7 @@ FUNCTION(_sk_lerp_1_float_hsw_8bit)
_sk_lerp_1_float_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,250,16,32 // vmovss (%rax),%xmm4
- .byte 197,218,89,37,50,21,0,0 // vmulss 0x1532(%rip),%xmm4,%xmm4 # 2d68 <_sk_difference_hsw_8bit+0x19a>
+ .byte 197,218,89,37,18,28,0,0 // vmulss 0x1c12(%rip),%xmm4,%xmm4 # 3448 <_sk_overlay_hsw_8bit+0x37e>
.byte 197,250,44,196 // vcvttss2si %xmm4,%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
.byte 196,226,125,120,228 // vpbroadcastb %xmm4,%ymm4
@@ -58266,7 +58266,7 @@ _sk_lerp_1_float_hsw_8bit:
.byte 196,226,125,48,241 // vpmovzxbw %xmm1,%ymm6
.byte 196,227,125,57,201,1 // vextracti128 $0x1,%ymm1,%xmm1
.byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1
- .byte 197,221,219,61,117,22,0,0 // vpand 0x1675(%rip),%ymm4,%ymm7 # 2ee0 <_sk_difference_hsw_8bit+0x312>
+ .byte 197,221,219,61,85,29,0,0 // vpand 0x1d55(%rip),%ymm4,%ymm7 # 35c0 <_sk_overlay_hsw_8bit+0x4f6>
.byte 197,69,213,193 // vpmullw %ymm1,%ymm7,%ymm8
.byte 197,69,213,206 // vpmullw %ymm6,%ymm7,%ymm9
.byte 197,69,213,208 // vpmullw %ymm0,%ymm7,%ymm10
@@ -58336,7 +58336,7 @@ _sk_lerp_u8_hsw_8bit:
.byte 196,226,125,49,236 // vpmovzxbd %xmm4,%ymm5
.byte 197,249,112,228,78 // vpshufd $0x4e,%xmm4,%xmm4
.byte 196,226,125,49,228 // vpmovzxbd %xmm4,%ymm4
- .byte 197,253,111,53,106,21,0,0 // vmovdqa 0x156a(%rip),%ymm6 # 2f00 <_sk_difference_hsw_8bit+0x332>
+ .byte 197,253,111,53,74,28,0,0 // vmovdqa 0x1c4a(%rip),%ymm6 # 35e0 <_sk_overlay_hsw_8bit+0x516>
.byte 196,98,93,0,206 // vpshufb %ymm6,%ymm4,%ymm9
.byte 196,98,85,0,222 // vpshufb %ymm6,%ymm5,%ymm11
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -58505,7 +58505,7 @@ HIDDEN _sk_black_color_hsw_8bit
FUNCTION(_sk_black_color_hsw_8bit)
_sk_black_color_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,65,17,0,0 // vbroadcastss 0x1141(%rip),%ymm0 # 2d6c <_sk_difference_hsw_8bit+0x19e>
+ .byte 196,226,125,24,5,33,24,0,0 // vbroadcastss 0x1821(%rip),%ymm0 # 344c <_sk_overlay_hsw_8bit+0x382>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 255,224 // jmpq *%rax
@@ -58531,7 +58531,7 @@ HIDDEN _sk_srcatop_hsw_8bit
.globl _sk_srcatop_hsw_8bit
FUNCTION(_sk_srcatop_hsw_8bit)
_sk_srcatop_hsw_8bit:
- .byte 197,125,111,5,207,18,0,0 // vmovdqa 0x12cf(%rip),%ymm8 # 2f20 <_sk_difference_hsw_8bit+0x352>
+ .byte 197,125,111,5,175,25,0,0 // vmovdqa 0x19af(%rip),%ymm8 # 3600 <_sk_overlay_hsw_8bit+0x536>
.byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
.byte 196,194,109,0,232 // vpshufb %ymm8,%ymm2,%ymm5
.byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
@@ -58608,7 +58608,7 @@ HIDDEN _sk_dstatop_hsw_8bit
.globl _sk_dstatop_hsw_8bit
FUNCTION(_sk_dstatop_hsw_8bit)
_sk_dstatop_hsw_8bit:
- .byte 197,125,111,5,138,17,0,0 // vmovdqa 0x118a(%rip),%ymm8 # 2f40 <_sk_difference_hsw_8bit+0x372>
+ .byte 197,125,111,5,106,24,0,0 // vmovdqa 0x186a(%rip),%ymm8 # 3620 <_sk_overlay_hsw_8bit+0x556>
.byte 196,194,117,0,224 // vpshufb %ymm8,%ymm1,%ymm4
.byte 196,194,125,0,232 // vpshufb %ymm8,%ymm0,%ymm5
.byte 196,98,125,48,210 // vpmovzxbw %xmm2,%ymm10
@@ -58685,7 +58685,7 @@ HIDDEN _sk_srcin_hsw_8bit
.globl _sk_srcin_hsw_8bit
FUNCTION(_sk_srcin_hsw_8bit)
_sk_srcin_hsw_8bit:
- .byte 197,253,111,37,65,16,0,0 // vmovdqa 0x1041(%rip),%ymm4 # 2f60 <_sk_difference_hsw_8bit+0x392>
+ .byte 197,253,111,37,33,23,0,0 // vmovdqa 0x1721(%rip),%ymm4 # 3640 <_sk_overlay_hsw_8bit+0x576>
.byte 196,226,101,0,236 // vpshufb %ymm4,%ymm3,%ymm5
.byte 196,226,109,0,228 // vpshufb %ymm4,%ymm2,%ymm4
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -58725,7 +58725,7 @@ HIDDEN _sk_dstin_hsw_8bit
.globl _sk_dstin_hsw_8bit
FUNCTION(_sk_dstin_hsw_8bit)
_sk_dstin_hsw_8bit:
- .byte 197,253,111,37,183,15,0,0 // vmovdqa 0xfb7(%rip),%ymm4 # 2f80 <_sk_difference_hsw_8bit+0x3b2>
+ .byte 197,253,111,37,151,22,0,0 // vmovdqa 0x1697(%rip),%ymm4 # 3660 <_sk_overlay_hsw_8bit+0x596>
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,125,48,226 // vpmovzxbw %xmm2,%ymm4
@@ -58765,7 +58765,7 @@ HIDDEN _sk_srcout_hsw_8bit
.globl _sk_srcout_hsw_8bit
FUNCTION(_sk_srcout_hsw_8bit)
_sk_srcout_hsw_8bit:
- .byte 197,253,111,37,43,15,0,0 // vmovdqa 0xf2b(%rip),%ymm4 # 2fa0 <_sk_difference_hsw_8bit+0x3d2>
+ .byte 197,253,111,37,11,22,0,0 // vmovdqa 0x160b(%rip),%ymm4 # 3680 <_sk_overlay_hsw_8bit+0x5b6>
.byte 196,226,109,0,236 // vpshufb %ymm4,%ymm2,%ymm5
.byte 196,226,101,0,228 // vpshufb %ymm4,%ymm3,%ymm4
.byte 197,205,118,246 // vpcmpeqd %ymm6,%ymm6,%ymm6
@@ -58808,7 +58808,7 @@ HIDDEN _sk_dstout_hsw_8bit
.globl _sk_dstout_hsw_8bit
FUNCTION(_sk_dstout_hsw_8bit)
_sk_dstout_hsw_8bit:
- .byte 197,253,111,37,149,14,0,0 // vmovdqa 0xe95(%rip),%ymm4 # 2fc0 <_sk_difference_hsw_8bit+0x3f2>
+ .byte 197,253,111,37,117,21,0,0 // vmovdqa 0x1575(%rip),%ymm4 # 36a0 <_sk_overlay_hsw_8bit+0x5d6>
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 197,221,118,228 // vpcmpeqd %ymm4,%ymm4,%ymm4
@@ -58851,7 +58851,7 @@ HIDDEN _sk_srcover_hsw_8bit
.globl _sk_srcover_hsw_8bit
FUNCTION(_sk_srcover_hsw_8bit)
_sk_srcover_hsw_8bit:
- .byte 197,253,111,37,253,13,0,0 // vmovdqa 0xdfd(%rip),%ymm4 # 2fe0 <_sk_difference_hsw_8bit+0x412>
+ .byte 197,253,111,37,221,20,0,0 // vmovdqa 0x14dd(%rip),%ymm4 # 36c0 <_sk_overlay_hsw_8bit+0x5f6>
.byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5
.byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4
.byte 196,98,125,48,202 // vpmovzxbw %xmm2,%ymm9
@@ -58895,7 +58895,7 @@ HIDDEN _sk_dstover_hsw_8bit
.globl _sk_dstover_hsw_8bit
FUNCTION(_sk_dstover_hsw_8bit)
_sk_dstover_hsw_8bit:
- .byte 197,253,111,37,94,13,0,0 // vmovdqa 0xd5e(%rip),%ymm4 # 3000 <_sk_difference_hsw_8bit+0x432>
+ .byte 197,253,111,37,62,20,0,0 // vmovdqa 0x143e(%rip),%ymm4 # 36e0 <_sk_overlay_hsw_8bit+0x616>
.byte 196,226,101,0,236 // vpshufb %ymm4,%ymm3,%ymm5
.byte 196,226,109,0,228 // vpshufb %ymm4,%ymm2,%ymm4
.byte 196,98,125,48,200 // vpmovzxbw %xmm0,%ymm9
@@ -58978,7 +58978,7 @@ FUNCTION(_sk_multiply_hsw_8bit)
_sk_multiply_hsw_8bit:
.byte 197,253,111,243 // vmovdqa %ymm3,%ymm6
.byte 197,253,111,218 // vmovdqa %ymm2,%ymm3
- .byte 197,125,111,13,31,12,0,0 // vmovdqa 0xc1f(%rip),%ymm9 # 3020 <_sk_difference_hsw_8bit+0x452>
+ .byte 197,125,111,13,255,18,0,0 // vmovdqa 0x12ff(%rip),%ymm9 # 3700 <_sk_overlay_hsw_8bit+0x636>
.byte 196,194,101,0,225 // vpshufb %ymm9,%ymm3,%ymm4
.byte 196,194,77,0,233 // vpshufb %ymm9,%ymm6,%ymm5
.byte 196,65,45,118,210 // vpcmpeqd %ymm10,%ymm10,%ymm10
@@ -59122,7 +59122,7 @@ HIDDEN _sk_xor__hsw_8bit
.globl _sk_xor__hsw_8bit
FUNCTION(_sk_xor__hsw_8bit)
_sk_xor__hsw_8bit:
- .byte 197,125,111,13,173,9,0,0 // vmovdqa 0x9ad(%rip),%ymm9 # 3040 <_sk_difference_hsw_8bit+0x472>
+ .byte 197,125,111,13,141,16,0,0 // vmovdqa 0x108d(%rip),%ymm9 # 3720 <_sk_overlay_hsw_8bit+0x656>
.byte 196,194,109,0,225 // vpshufb %ymm9,%ymm2,%ymm4
.byte 196,194,101,0,249 // vpshufb %ymm9,%ymm3,%ymm7
.byte 196,65,37,118,219 // vpcmpeqd %ymm11,%ymm11,%ymm11
@@ -59201,7 +59201,7 @@ HIDDEN _sk_darken_hsw_8bit
.globl _sk_darken_hsw_8bit
FUNCTION(_sk_darken_hsw_8bit)
_sk_darken_hsw_8bit:
- .byte 197,125,111,5,91,8,0,0 // vmovdqa 0x85b(%rip),%ymm8 # 3060 <_sk_difference_hsw_8bit+0x492>
+ .byte 197,125,111,5,59,15,0,0 // vmovdqa 0xf3b(%rip),%ymm8 # 3740 <_sk_overlay_hsw_8bit+0x676>
.byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
.byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6
.byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
@@ -59274,7 +59274,7 @@ _sk_darken_hsw_8bit:
.byte 197,253,248,246 // vpsubb %ymm6,%ymm0,%ymm6
.byte 197,245,248,205 // vpsubb %ymm5,%ymm1,%ymm1
.byte 197,253,248,196 // vpsubb %ymm4,%ymm0,%ymm0
- .byte 196,226,125,88,37,252,3,0,0 // vpbroadcastd 0x3fc(%rip),%ymm4 # 2d70 <_sk_difference_hsw_8bit+0x1a2>
+ .byte 196,226,125,88,37,220,10,0,0 // vpbroadcastd 0xadc(%rip),%ymm4 # 3450 <_sk_overlay_hsw_8bit+0x386>
.byte 196,227,125,76,198,64 // vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
.byte 196,227,117,76,207,64 // vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -59284,7 +59284,7 @@ HIDDEN _sk_lighten_hsw_8bit
.globl _sk_lighten_hsw_8bit
FUNCTION(_sk_lighten_hsw_8bit)
_sk_lighten_hsw_8bit:
- .byte 197,125,111,5,244,6,0,0 // vmovdqa 0x6f4(%rip),%ymm8 # 3080 <_sk_difference_hsw_8bit+0x4b2>
+ .byte 197,125,111,5,212,13,0,0 // vmovdqa 0xdd4(%rip),%ymm8 # 3760 <_sk_overlay_hsw_8bit+0x696>
.byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
.byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6
.byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
@@ -59357,7 +59357,7 @@ _sk_lighten_hsw_8bit:
.byte 197,253,248,246 // vpsubb %ymm6,%ymm0,%ymm6
.byte 197,245,248,205 // vpsubb %ymm5,%ymm1,%ymm1
.byte 197,253,248,196 // vpsubb %ymm4,%ymm0,%ymm0
- .byte 196,226,125,88,37,121,2,0,0 // vpbroadcastd 0x279(%rip),%ymm4 # 2d74 <_sk_difference_hsw_8bit+0x1a6>
+ .byte 196,226,125,88,37,89,9,0,0 // vpbroadcastd 0x959(%rip),%ymm4 # 3454 <_sk_overlay_hsw_8bit+0x38a>
.byte 196,227,125,76,198,64 // vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
.byte 196,227,117,76,207,64 // vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -59397,7 +59397,7 @@ _sk_exclusion_hsw_8bit:
.byte 196,227,77,56,252,1 // vinserti128 $0x1,%xmm4,%ymm6,%ymm7
.byte 196,227,77,70,228,49 // vperm2i128 $0x31,%ymm4,%ymm6,%ymm4
.byte 197,197,103,228 // vpackuswb %ymm4,%ymm7,%ymm4
- .byte 197,253,111,53,246,4,0,0 // vmovdqa 0x4f6(%rip),%ymm6 # 30a0 <_sk_difference_hsw_8bit+0x4d2>
+ .byte 197,253,111,53,214,11,0,0 // vmovdqa 0xbd6(%rip),%ymm6 # 3780 <_sk_overlay_hsw_8bit+0x6b6>
.byte 197,221,219,254 // vpand %ymm6,%ymm4,%ymm7
.byte 197,213,219,246 // vpand %ymm6,%ymm5,%ymm6
.byte 197,237,252,192 // vpaddb %ymm0,%ymm2,%ymm0
@@ -59413,7 +59413,7 @@ HIDDEN _sk_difference_hsw_8bit
.globl _sk_difference_hsw_8bit
FUNCTION(_sk_difference_hsw_8bit)
_sk_difference_hsw_8bit:
- .byte 197,125,111,5,234,4,0,0 // vmovdqa 0x4ea(%rip),%ymm8 # 30c0 <_sk_difference_hsw_8bit+0x4f2>
+ .byte 197,125,111,5,202,11,0,0 // vmovdqa 0xbca(%rip),%ymm8 # 37a0 <_sk_overlay_hsw_8bit+0x6d6>
.byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
.byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6
.byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
@@ -59480,7 +59480,7 @@ _sk_difference_hsw_8bit:
.byte 197,197,103,237 // vpackuswb %ymm5,%ymm7,%ymm5
.byte 197,181,218,228 // vpminub %ymm4,%ymm9,%ymm4
.byte 197,165,218,237 // vpminub %ymm5,%ymm11,%ymm5
- .byte 197,253,111,53,180,3,0,0 // vmovdqa 0x3b4(%rip),%ymm6 # 30e0 <_sk_difference_hsw_8bit+0x512>
+ .byte 197,253,111,53,148,10,0,0 // vmovdqa 0xa94(%rip),%ymm6 # 37c0 <_sk_overlay_hsw_8bit+0x6f6>
.byte 197,213,219,254 // vpand %ymm6,%ymm5,%ymm7
.byte 197,221,219,246 // vpand %ymm6,%ymm4,%ymm6
.byte 197,237,252,192 // vpaddb %ymm0,%ymm2,%ymm0
@@ -59492,9 +59492,354 @@ _sk_difference_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_hardlight_hsw_8bit
+.globl _sk_hardlight_hsw_8bit
+FUNCTION(_sk_hardlight_hsw_8bit)
+_sk_hardlight_hsw_8bit:
+ .byte 72,129,236,184,1,0,0 // sub $0x1b8,%rsp
+ .byte 197,252,17,92,36,128 // vmovups %ymm3,-0x80(%rsp)
+ .byte 196,226,125,48,248 // vpmovzxbw %xmm0,%ymm7
+ .byte 196,227,125,57,195,1 // vextracti128 $0x1,%ymm0,%xmm3
+ .byte 196,226,125,48,243 // vpmovzxbw %xmm3,%ymm6
+ .byte 196,98,125,48,217 // vpmovzxbw %xmm1,%ymm11
+ .byte 197,126,127,156,36,128,0,0,0 // vmovdqu %ymm11,0x80(%rsp)
+ .byte 196,227,125,57,203,1 // vextracti128 $0x1,%ymm1,%xmm3
+ .byte 196,98,125,48,195 // vpmovzxbw %xmm3,%ymm8
+ .byte 197,126,127,132,36,192,0,0,0 // vmovdqu %ymm8,0xc0(%rsp)
+ .byte 197,253,111,29,73,10,0,0 // vmovdqa 0xa49(%rip),%ymm3 # 37e0 <_sk_overlay_hsw_8bit+0x716>
+ .byte 196,226,125,0,227 // vpshufb %ymm3,%ymm0,%ymm4
+ .byte 196,226,117,0,203 // vpshufb %ymm3,%ymm1,%ymm1
+ .byte 197,125,111,251 // vmovdqa %ymm3,%ymm15
+ .byte 196,227,125,57,200,1 // vextracti128 $0x1,%ymm1,%xmm0
+ .byte 196,98,125,48,232 // vpmovzxbw %xmm0,%ymm13
+ .byte 196,226,125,48,233 // vpmovzxbw %xmm1,%ymm5
+ .byte 196,227,125,57,227,1 // vextracti128 $0x1,%ymm4,%xmm3
+ .byte 196,98,125,48,203 // vpmovzxbw %xmm3,%ymm9
+ .byte 196,98,125,48,212 // vpmovzxbw %xmm4,%ymm10
+ .byte 197,197,253,199 // vpaddw %ymm7,%ymm7,%ymm0
+ .byte 197,205,253,222 // vpaddw %ymm6,%ymm6,%ymm3
+ .byte 196,65,37,253,227 // vpaddw %ymm11,%ymm11,%ymm12
+ .byte 196,65,61,253,216 // vpaddw %ymm8,%ymm8,%ymm11
+ .byte 196,66,37,58,245 // vpminuw %ymm13,%ymm11,%ymm14
+ .byte 196,193,37,117,206 // vpcmpeqw %ymm14,%ymm11,%ymm1
+ .byte 197,254,127,140,36,96,1,0,0 // vmovdqu %ymm1,0x160(%rsp)
+ .byte 196,98,29,58,245 // vpminuw %ymm5,%ymm12,%ymm14
+ .byte 196,193,29,117,206 // vpcmpeqw %ymm14,%ymm12,%ymm1
+ .byte 197,254,127,140,36,64,1,0,0 // vmovdqu %ymm1,0x140(%rsp)
+ .byte 196,66,101,58,241 // vpminuw %ymm9,%ymm3,%ymm14
+ .byte 196,193,101,117,206 // vpcmpeqw %ymm14,%ymm3,%ymm1
+ .byte 197,254,127,140,36,0,1,0,0 // vmovdqu %ymm1,0x100(%rsp)
+ .byte 196,194,125,58,218 // vpminuw %ymm10,%ymm0,%ymm3
+ .byte 197,125,117,243 // vpcmpeqw %ymm3,%ymm0,%ymm14
+ .byte 197,253,111,226 // vmovdqa %ymm2,%ymm4
+ .byte 196,65,125,111,231 // vmovdqa %ymm15,%ymm12
+ .byte 196,194,93,0,220 // vpshufb %ymm12,%ymm4,%ymm3
+ .byte 196,226,125,48,211 // vpmovzxbw %xmm3,%ymm2
+ .byte 196,226,125,121,5,203,9,0,0 // vpbroadcastw 0x9cb(%rip),%ymm0 # 3800 <_sk_overlay_hsw_8bit+0x736>
+ .byte 197,109,239,248 // vpxor %ymm0,%ymm2,%ymm15
+ .byte 197,133,213,207 // vpmullw %ymm7,%ymm15,%ymm1
+ .byte 197,254,127,140,36,32,1,0,0 // vmovdqu %ymm1,0x120(%rsp)
+ .byte 196,226,125,48,204 // vpmovzxbw %xmm4,%ymm1
+ .byte 197,254,127,164,36,128,1,0,0 // vmovdqu %ymm4,0x180(%rsp)
+ .byte 197,45,239,248 // vpxor %ymm0,%ymm10,%ymm15
+ .byte 197,5,213,193 // vpmullw %ymm1,%ymm15,%ymm8
+ .byte 197,126,127,132,36,224,0,0,0 // vmovdqu %ymm8,0xe0(%rsp)
+ .byte 197,117,213,199 // vpmullw %ymm7,%ymm1,%ymm8
+ .byte 197,126,127,132,36,160,0,0,0 // vmovdqu %ymm8,0xa0(%rsp)
+ .byte 197,237,249,201 // vpsubw %ymm1,%ymm2,%ymm1
+ .byte 197,254,127,76,36,192 // vmovdqu %ymm1,-0x40(%rsp)
+ .byte 196,193,109,213,202 // vpmullw %ymm10,%ymm2,%ymm1
+ .byte 197,254,127,76,36,32 // vmovdqu %ymm1,0x20(%rsp)
+ .byte 197,45,249,223 // vpsubw %ymm7,%ymm10,%ymm11
+ .byte 196,227,125,57,226,1 // vextracti128 $0x1,%ymm4,%xmm2
+ .byte 196,226,125,48,210 // vpmovzxbw %xmm2,%ymm2
+ .byte 196,227,125,57,219,1 // vextracti128 $0x1,%ymm3,%xmm3
+ .byte 196,226,125,48,219 // vpmovzxbw %xmm3,%ymm3
+ .byte 197,101,239,208 // vpxor %ymm0,%ymm3,%ymm10
+ .byte 197,173,213,206 // vpmullw %ymm6,%ymm10,%ymm1
+ .byte 197,254,127,76,36,96 // vmovdqu %ymm1,0x60(%rsp)
+ .byte 197,53,239,208 // vpxor %ymm0,%ymm9,%ymm10
+ .byte 197,173,213,202 // vpmullw %ymm2,%ymm10,%ymm1
+ .byte 197,254,127,76,36,64 // vmovdqu %ymm1,0x40(%rsp)
+ .byte 197,237,213,206 // vpmullw %ymm6,%ymm2,%ymm1
+ .byte 197,254,127,76,36,160 // vmovdqu %ymm1,-0x60(%rsp)
+ .byte 197,229,249,226 // vpsubw %ymm2,%ymm3,%ymm4
+ .byte 196,65,101,213,193 // vpmullw %ymm9,%ymm3,%ymm8
+ .byte 197,181,249,246 // vpsubw %ymm6,%ymm9,%ymm6
+ .byte 197,254,111,124,36,128 // vmovdqu -0x80(%rsp),%ymm7
+ .byte 196,66,69,0,204 // vpshufb %ymm12,%ymm7,%ymm9
+ .byte 196,194,125,48,209 // vpmovzxbw %xmm9,%ymm2
+ .byte 197,109,239,248 // vpxor %ymm0,%ymm2,%ymm15
+ .byte 197,126,111,164,36,128,0,0,0 // vmovdqu 0x80(%rsp),%ymm12
+ .byte 196,193,5,213,204 // vpmullw %ymm12,%ymm15,%ymm1
+ .byte 197,254,127,12,36 // vmovdqu %ymm1,(%rsp)
+ .byte 196,226,125,48,207 // vpmovzxbw %xmm7,%ymm1
+ .byte 197,85,239,248 // vpxor %ymm0,%ymm5,%ymm15
+ .byte 197,133,213,217 // vpmullw %ymm1,%ymm15,%ymm3
+ .byte 197,254,127,92,36,224 // vmovdqu %ymm3,-0x20(%rsp)
+ .byte 196,65,117,213,212 // vpmullw %ymm12,%ymm1,%ymm10
+ .byte 197,237,249,217 // vpsubw %ymm1,%ymm2,%ymm3
+ .byte 197,237,213,213 // vpmullw %ymm5,%ymm2,%ymm2
+ .byte 196,193,85,249,236 // vpsubw %ymm12,%ymm5,%ymm5
+ .byte 196,195,125,57,255,1 // vextracti128 $0x1,%ymm7,%xmm15
+ .byte 196,66,125,48,255 // vpmovzxbw %xmm15,%ymm15
+ .byte 196,99,125,57,207,1 // vextracti128 $0x1,%ymm9,%xmm7
+ .byte 196,226,125,48,255 // vpmovzxbw %xmm7,%ymm7
+ .byte 197,69,239,200 // vpxor %ymm0,%ymm7,%ymm9
+ .byte 197,254,111,140,36,192,0,0,0 // vmovdqu 0xc0(%rsp),%ymm1
+ .byte 197,53,213,225 // vpmullw %ymm1,%ymm9,%ymm12
+ .byte 197,149,239,192 // vpxor %ymm0,%ymm13,%ymm0
+ .byte 196,65,125,213,207 // vpmullw %ymm15,%ymm0,%ymm9
+ .byte 197,133,213,193 // vpmullw %ymm1,%ymm15,%ymm0
+ .byte 196,65,69,249,255 // vpsubw %ymm15,%ymm7,%ymm15
+ .byte 196,193,69,213,253 // vpmullw %ymm13,%ymm7,%ymm7
+ .byte 197,21,249,233 // vpsubw %ymm1,%ymm13,%ymm13
+ .byte 196,65,5,213,237 // vpmullw %ymm13,%ymm15,%ymm13
+ .byte 197,229,213,205 // vpmullw %ymm5,%ymm3,%ymm1
+ .byte 197,221,213,230 // vpmullw %ymm6,%ymm4,%ymm4
+ .byte 197,165,213,108,36,192 // vpmullw -0x40(%rsp),%ymm11,%ymm5
+ .byte 196,193,21,253,245 // vpaddw %ymm13,%ymm13,%ymm6
+ .byte 197,197,249,246 // vpsubw %ymm6,%ymm7,%ymm6
+ .byte 197,245,253,201 // vpaddw %ymm1,%ymm1,%ymm1
+ .byte 197,237,249,201 // vpsubw %ymm1,%ymm2,%ymm1
+ .byte 197,221,253,212 // vpaddw %ymm4,%ymm4,%ymm2
+ .byte 197,189,249,210 // vpsubw %ymm2,%ymm8,%ymm2
+ .byte 197,213,253,221 // vpaddw %ymm5,%ymm5,%ymm3
+ .byte 197,254,111,100,36,32 // vmovdqu 0x20(%rsp),%ymm4
+ .byte 197,221,249,219 // vpsubw %ymm3,%ymm4,%ymm3
+ .byte 197,254,111,164,36,160,0,0,0 // vmovdqu 0xa0(%rsp),%ymm4
+ .byte 197,221,253,228 // vpaddw %ymm4,%ymm4,%ymm4
+ .byte 197,141,223,219 // vpandn %ymm3,%ymm14,%ymm3
+ .byte 196,193,93,219,230 // vpand %ymm14,%ymm4,%ymm4
+ .byte 197,221,235,219 // vpor %ymm3,%ymm4,%ymm3
+ .byte 197,254,111,100,36,160 // vmovdqu -0x60(%rsp),%ymm4
+ .byte 197,221,253,228 // vpaddw %ymm4,%ymm4,%ymm4
+ .byte 197,254,111,172,36,0,1,0,0 // vmovdqu 0x100(%rsp),%ymm5
+ .byte 197,213,223,210 // vpandn %ymm2,%ymm5,%ymm2
+ .byte 197,221,219,229 // vpand %ymm5,%ymm4,%ymm4
+ .byte 197,221,235,210 // vpor %ymm2,%ymm4,%ymm2
+ .byte 196,193,45,253,226 // vpaddw %ymm10,%ymm10,%ymm4
+ .byte 197,254,111,172,36,64,1,0,0 // vmovdqu 0x140(%rsp),%ymm5
+ .byte 197,213,223,201 // vpandn %ymm1,%ymm5,%ymm1
+ .byte 197,221,219,229 // vpand %ymm5,%ymm4,%ymm4
+ .byte 197,221,235,201 // vpor %ymm1,%ymm4,%ymm1
+ .byte 197,253,253,192 // vpaddw %ymm0,%ymm0,%ymm0
+ .byte 197,254,111,172,36,96,1,0,0 // vmovdqu 0x160(%rsp),%ymm5
+ .byte 197,213,223,230 // vpandn %ymm6,%ymm5,%ymm4
+ .byte 197,253,219,197 // vpand %ymm5,%ymm0,%ymm0
+ .byte 197,253,235,196 // vpor %ymm4,%ymm0,%ymm0
+ .byte 197,254,111,164,36,224,0,0,0 // vmovdqu 0xe0(%rsp),%ymm4
+ .byte 197,221,253,164,36,32,1,0,0 // vpaddw 0x120(%rsp),%ymm4,%ymm4
+ .byte 197,254,111,108,36,64 // vmovdqu 0x40(%rsp),%ymm5
+ .byte 197,213,253,108,36,96 // vpaddw 0x60(%rsp),%ymm5,%ymm5
+ .byte 197,254,111,116,36,224 // vmovdqu -0x20(%rsp),%ymm6
+ .byte 197,205,253,52,36 // vpaddw (%rsp),%ymm6,%ymm6
+ .byte 196,193,53,253,252 // vpaddw %ymm12,%ymm9,%ymm7
+ .byte 196,98,125,121,5,195,7,0,0 // vpbroadcastw 0x7c3(%rip),%ymm8 # 3802 <_sk_overlay_hsw_8bit+0x738>
+ .byte 196,193,93,253,224 // vpaddw %ymm8,%ymm4,%ymm4
+ .byte 197,229,253,220 // vpaddw %ymm4,%ymm3,%ymm3
+ .byte 196,193,85,253,224 // vpaddw %ymm8,%ymm5,%ymm4
+ .byte 197,237,253,212 // vpaddw %ymm4,%ymm2,%ymm2
+ .byte 196,193,77,253,224 // vpaddw %ymm8,%ymm6,%ymm4
+ .byte 197,245,253,204 // vpaddw %ymm4,%ymm1,%ymm1
+ .byte 196,193,69,253,224 // vpaddw %ymm8,%ymm7,%ymm4
+ .byte 197,253,253,196 // vpaddw %ymm4,%ymm0,%ymm0
+ .byte 196,226,125,121,37,152,7,0,0 // vpbroadcastw 0x798(%rip),%ymm4 # 3804 <_sk_overlay_hsw_8bit+0x73a>
+ .byte 197,253,228,196 // vpmulhuw %ymm4,%ymm0,%ymm0
+ .byte 197,245,228,204 // vpmulhuw %ymm4,%ymm1,%ymm1
+ .byte 197,237,228,212 // vpmulhuw %ymm4,%ymm2,%ymm2
+ .byte 197,237,113,210,7 // vpsrlw $0x7,%ymm2,%ymm2
+ .byte 197,229,228,220 // vpmulhuw %ymm4,%ymm3,%ymm3
+ .byte 197,229,113,211,7 // vpsrlw $0x7,%ymm3,%ymm3
+ .byte 196,227,101,56,226,1 // vinserti128 $0x1,%xmm2,%ymm3,%ymm4
+ .byte 196,227,101,70,210,49 // vperm2i128 $0x31,%ymm2,%ymm3,%ymm2
+ .byte 197,229,113,208,7 // vpsrlw $0x7,%ymm0,%ymm3
+ .byte 197,245,113,209,7 // vpsrlw $0x7,%ymm1,%ymm1
+ .byte 197,221,103,194 // vpackuswb %ymm2,%ymm4,%ymm0
+ .byte 196,227,117,56,211,1 // vinserti128 $0x1,%xmm3,%ymm1,%ymm2
+ .byte 196,227,117,70,203,49 // vperm2i128 $0x31,%ymm3,%ymm1,%ymm1
+ .byte 197,237,103,201 // vpackuswb %ymm1,%ymm2,%ymm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 197,252,16,148,36,128,1,0,0 // vmovups 0x180(%rsp),%ymm2
+ .byte 197,252,16,92,36,128 // vmovups -0x80(%rsp),%ymm3
+ .byte 72,129,196,184,1,0,0 // add $0x1b8,%rsp
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_overlay_hsw_8bit
+.globl _sk_overlay_hsw_8bit
+FUNCTION(_sk_overlay_hsw_8bit)
+_sk_overlay_hsw_8bit:
+ .byte 72,129,236,152,1,0,0 // sub $0x198,%rsp
+ .byte 197,252,17,140,36,64,1,0,0 // vmovups %ymm1,0x140(%rsp)
+ .byte 197,253,111,45,62,7,0,0 // vmovdqa 0x73e(%rip),%ymm5 # 3820 <_sk_overlay_hsw_8bit+0x756>
+ .byte 197,253,111,226 // vmovdqa %ymm2,%ymm4
+ .byte 196,227,125,57,225,1 // vextracti128 $0x1,%ymm4,%xmm1
+ .byte 196,98,125,48,193 // vpmovzxbw %xmm1,%ymm8
+ .byte 197,254,127,156,36,96,1,0,0 // vmovdqu %ymm3,0x160(%rsp)
+ .byte 196,226,125,48,251 // vpmovzxbw %xmm3,%ymm7
+ .byte 196,227,125,57,217,1 // vextracti128 $0x1,%ymm3,%xmm1
+ .byte 196,98,125,48,241 // vpmovzxbw %xmm1,%ymm14
+ .byte 196,226,93,0,205 // vpshufb %ymm5,%ymm4,%ymm1
+ .byte 197,254,127,164,36,32,1,0,0 // vmovdqu %ymm4,0x120(%rsp)
+ .byte 196,226,101,0,221 // vpshufb %ymm5,%ymm3,%ymm3
+ .byte 196,227,125,57,222,1 // vextracti128 $0x1,%ymm3,%xmm6
+ .byte 196,226,125,48,246 // vpmovzxbw %xmm6,%ymm6
+ .byte 196,98,125,48,203 // vpmovzxbw %xmm3,%ymm9
+ .byte 196,227,125,57,203,1 // vextracti128 $0x1,%ymm1,%xmm3
+ .byte 196,98,125,48,219 // vpmovzxbw %xmm3,%ymm11
+ .byte 196,193,61,253,216 // vpaddw %ymm8,%ymm8,%ymm3
+ .byte 197,69,253,231 // vpaddw %ymm7,%ymm7,%ymm12
+ .byte 196,65,13,253,214 // vpaddw %ymm14,%ymm14,%ymm10
+ .byte 196,98,45,58,238 // vpminuw %ymm6,%ymm10,%ymm13
+ .byte 196,193,45,117,213 // vpcmpeqw %ymm13,%ymm10,%ymm2
+ .byte 197,254,127,148,36,0,1,0,0 // vmovdqu %ymm2,0x100(%rsp)
+ .byte 196,66,29,58,233 // vpminuw %ymm9,%ymm12,%ymm13
+ .byte 196,193,29,117,213 // vpcmpeqw %ymm13,%ymm12,%ymm2
+ .byte 197,254,127,148,36,224,0,0,0 // vmovdqu %ymm2,0xe0(%rsp)
+ .byte 196,66,101,58,235 // vpminuw %ymm11,%ymm3,%ymm13
+ .byte 196,193,101,117,213 // vpcmpeqw %ymm13,%ymm3,%ymm2
+ .byte 197,254,127,148,36,160,0,0,0 // vmovdqu %ymm2,0xa0(%rsp)
+ .byte 196,226,125,48,212 // vpmovzxbw %xmm4,%ymm2
+ .byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1
+ .byte 197,237,253,226 // vpaddw %ymm2,%ymm2,%ymm4
+ .byte 196,226,93,58,217 // vpminuw %ymm1,%ymm4,%ymm3
+ .byte 197,93,117,251 // vpcmpeqw %ymm3,%ymm4,%ymm15
+ .byte 197,253,111,216 // vmovdqa %ymm0,%ymm3
+ .byte 196,226,125,48,227 // vpmovzxbw %xmm3,%ymm4
+ .byte 196,226,125,121,5,152,6,0,0 // vpbroadcastw 0x698(%rip),%ymm0 # 3840 <_sk_overlay_hsw_8bit+0x776>
+ .byte 197,117,239,232 // vpxor %ymm0,%ymm1,%ymm13
+ .byte 197,21,213,212 // vpmullw %ymm4,%ymm13,%ymm10
+ .byte 197,126,127,148,36,192,0,0,0 // vmovdqu %ymm10,0xc0(%rsp)
+ .byte 197,125,111,229 // vmovdqa %ymm5,%ymm12
+ .byte 196,194,101,0,236 // vpshufb %ymm12,%ymm3,%ymm5
+ .byte 197,125,111,211 // vmovdqa %ymm3,%ymm10
+ .byte 196,226,125,48,221 // vpmovzxbw %xmm5,%ymm3
+ .byte 197,101,239,232 // vpxor %ymm0,%ymm3,%ymm13
+ .byte 197,21,213,234 // vpmullw %ymm2,%ymm13,%ymm13
+ .byte 197,126,127,172,36,128,0,0,0 // vmovdqu %ymm13,0x80(%rsp)
+ .byte 197,109,213,236 // vpmullw %ymm4,%ymm2,%ymm13
+ .byte 197,126,127,108,36,96 // vmovdqu %ymm13,0x60(%rsp)
+ .byte 197,245,249,210 // vpsubw %ymm2,%ymm1,%ymm2
+ .byte 197,254,127,84,36,160 // vmovdqu %ymm2,-0x60(%rsp)
+ .byte 197,245,213,203 // vpmullw %ymm3,%ymm1,%ymm1
+ .byte 197,254,127,12,36 // vmovdqu %ymm1,(%rsp)
+ .byte 197,229,249,220 // vpsubw %ymm4,%ymm3,%ymm3
+ .byte 196,99,125,57,212,1 // vextracti128 $0x1,%ymm10,%xmm4
+ .byte 196,226,125,48,228 // vpmovzxbw %xmm4,%ymm4
+ .byte 196,227,125,57,237,1 // vextracti128 $0x1,%ymm5,%xmm5
+ .byte 196,226,125,48,237 // vpmovzxbw %xmm5,%ymm5
+ .byte 197,37,239,232 // vpxor %ymm0,%ymm11,%ymm13
+ .byte 197,149,213,204 // vpmullw %ymm4,%ymm13,%ymm1
+ .byte 197,254,127,76,36,64 // vmovdqu %ymm1,0x40(%rsp)
+ .byte 197,85,239,232 // vpxor %ymm0,%ymm5,%ymm13
+ .byte 196,193,21,213,200 // vpmullw %ymm8,%ymm13,%ymm1
+ .byte 197,254,127,76,36,32 // vmovdqu %ymm1,0x20(%rsp)
+ .byte 197,189,213,204 // vpmullw %ymm4,%ymm8,%ymm1
+ .byte 197,254,127,76,36,128 // vmovdqu %ymm1,-0x80(%rsp)
+ .byte 196,65,37,249,192 // vpsubw %ymm8,%ymm11,%ymm8
+ .byte 197,37,213,221 // vpmullw %ymm5,%ymm11,%ymm11
+ .byte 197,213,249,228 // vpsubw %ymm4,%ymm5,%ymm4
+ .byte 197,254,111,140,36,64,1,0,0 // vmovdqu 0x140(%rsp),%ymm1
+ .byte 196,194,117,0,236 // vpshufb %ymm12,%ymm1,%ymm5
+ .byte 196,226,125,48,209 // vpmovzxbw %xmm1,%ymm2
+ .byte 197,125,111,225 // vmovdqa %ymm1,%ymm12
+ .byte 197,53,239,232 // vpxor %ymm0,%ymm9,%ymm13
+ .byte 197,149,213,202 // vpmullw %ymm2,%ymm13,%ymm1
+ .byte 197,254,127,76,36,224 // vmovdqu %ymm1,-0x20(%rsp)
+ .byte 196,226,125,48,205 // vpmovzxbw %xmm5,%ymm1
+ .byte 197,117,239,232 // vpxor %ymm0,%ymm1,%ymm13
+ .byte 197,21,213,215 // vpmullw %ymm7,%ymm13,%ymm10
+ .byte 197,126,127,84,36,192 // vmovdqu %ymm10,-0x40(%rsp)
+ .byte 197,69,213,210 // vpmullw %ymm2,%ymm7,%ymm10
+ .byte 197,181,249,255 // vpsubw %ymm7,%ymm9,%ymm7
+ .byte 197,53,213,201 // vpmullw %ymm1,%ymm9,%ymm9
+ .byte 197,245,249,202 // vpsubw %ymm2,%ymm1,%ymm1
+ .byte 196,99,125,57,226,1 // vextracti128 $0x1,%ymm12,%xmm2
+ .byte 196,226,125,48,210 // vpmovzxbw %xmm2,%ymm2
+ .byte 196,227,125,57,237,1 // vextracti128 $0x1,%ymm5,%xmm5
+ .byte 196,226,125,48,237 // vpmovzxbw %xmm5,%ymm5
+ .byte 197,77,239,232 // vpxor %ymm0,%ymm6,%ymm13
+ .byte 197,21,213,234 // vpmullw %ymm2,%ymm13,%ymm13
+ .byte 197,213,239,192 // vpxor %ymm0,%ymm5,%ymm0
+ .byte 196,65,125,213,230 // vpmullw %ymm14,%ymm0,%ymm12
+ .byte 197,141,213,194 // vpmullw %ymm2,%ymm14,%ymm0
+ .byte 196,65,77,249,246 // vpsubw %ymm14,%ymm6,%ymm14
+ .byte 197,205,213,245 // vpmullw %ymm5,%ymm6,%ymm6
+ .byte 197,213,249,210 // vpsubw %ymm2,%ymm5,%ymm2
+ .byte 197,141,213,210 // vpmullw %ymm2,%ymm14,%ymm2
+ .byte 197,197,213,201 // vpmullw %ymm1,%ymm7,%ymm1
+ .byte 197,189,213,228 // vpmullw %ymm4,%ymm8,%ymm4
+ .byte 197,229,213,92,36,160 // vpmullw -0x60(%rsp),%ymm3,%ymm3
+ .byte 197,237,253,210 // vpaddw %ymm2,%ymm2,%ymm2
+ .byte 197,205,249,210 // vpsubw %ymm2,%ymm6,%ymm2
+ .byte 197,245,253,201 // vpaddw %ymm1,%ymm1,%ymm1
+ .byte 197,181,249,201 // vpsubw %ymm1,%ymm9,%ymm1
+ .byte 197,221,253,228 // vpaddw %ymm4,%ymm4,%ymm4
+ .byte 197,165,249,228 // vpsubw %ymm4,%ymm11,%ymm4
+ .byte 197,229,253,219 // vpaddw %ymm3,%ymm3,%ymm3
+ .byte 197,254,111,44,36 // vmovdqu (%rsp),%ymm5
+ .byte 197,213,249,219 // vpsubw %ymm3,%ymm5,%ymm3
+ .byte 197,254,111,108,36,96 // vmovdqu 0x60(%rsp),%ymm5
+ .byte 197,213,253,237 // vpaddw %ymm5,%ymm5,%ymm5
+ .byte 197,133,223,219 // vpandn %ymm3,%ymm15,%ymm3
+ .byte 196,193,85,219,239 // vpand %ymm15,%ymm5,%ymm5
+ .byte 197,213,235,219 // vpor %ymm3,%ymm5,%ymm3
+ .byte 197,254,111,108,36,128 // vmovdqu -0x80(%rsp),%ymm5
+ .byte 197,213,253,237 // vpaddw %ymm5,%ymm5,%ymm5
+ .byte 197,254,111,180,36,160,0,0,0 // vmovdqu 0xa0(%rsp),%ymm6
+ .byte 197,205,223,228 // vpandn %ymm4,%ymm6,%ymm4
+ .byte 197,213,219,238 // vpand %ymm6,%ymm5,%ymm5
+ .byte 197,213,235,228 // vpor %ymm4,%ymm5,%ymm4
+ .byte 196,193,45,253,234 // vpaddw %ymm10,%ymm10,%ymm5
+ .byte 197,254,111,180,36,224,0,0,0 // vmovdqu 0xe0(%rsp),%ymm6
+ .byte 197,205,223,201 // vpandn %ymm1,%ymm6,%ymm1
+ .byte 197,213,219,238 // vpand %ymm6,%ymm5,%ymm5
+ .byte 197,213,235,201 // vpor %ymm1,%ymm5,%ymm1
+ .byte 197,253,253,192 // vpaddw %ymm0,%ymm0,%ymm0
+ .byte 197,254,111,172,36,0,1,0,0 // vmovdqu 0x100(%rsp),%ymm5
+ .byte 197,213,223,210 // vpandn %ymm2,%ymm5,%ymm2
+ .byte 197,253,219,197 // vpand %ymm5,%ymm0,%ymm0
+ .byte 197,253,235,194 // vpor %ymm2,%ymm0,%ymm0
+ .byte 197,254,111,148,36,128,0,0,0 // vmovdqu 0x80(%rsp),%ymm2
+ .byte 197,237,253,148,36,192,0,0,0 // vpaddw 0xc0(%rsp),%ymm2,%ymm2
+ .byte 197,254,111,108,36,32 // vmovdqu 0x20(%rsp),%ymm5
+ .byte 197,213,253,108,36,64 // vpaddw 0x40(%rsp),%ymm5,%ymm5
+ .byte 197,254,111,116,36,192 // vmovdqu -0x40(%rsp),%ymm6
+ .byte 197,205,253,116,36,224 // vpaddw -0x20(%rsp),%ymm6,%ymm6
+ .byte 196,193,29,253,253 // vpaddw %ymm13,%ymm12,%ymm7
+ .byte 196,98,125,121,5,163,4,0,0 // vpbroadcastw 0x4a3(%rip),%ymm8 # 3842 <_sk_overlay_hsw_8bit+0x778>
+ .byte 196,193,109,253,208 // vpaddw %ymm8,%ymm2,%ymm2
+ .byte 197,229,253,210 // vpaddw %ymm2,%ymm3,%ymm2
+ .byte 196,193,85,253,216 // vpaddw %ymm8,%ymm5,%ymm3
+ .byte 197,221,253,219 // vpaddw %ymm3,%ymm4,%ymm3
+ .byte 196,193,77,253,224 // vpaddw %ymm8,%ymm6,%ymm4
+ .byte 197,245,253,204 // vpaddw %ymm4,%ymm1,%ymm1
+ .byte 196,193,69,253,224 // vpaddw %ymm8,%ymm7,%ymm4
+ .byte 197,253,253,196 // vpaddw %ymm4,%ymm0,%ymm0
+ .byte 196,226,125,121,37,120,4,0,0 // vpbroadcastw 0x478(%rip),%ymm4 # 3844 <_sk_overlay_hsw_8bit+0x77a>
+ .byte 197,253,228,196 // vpmulhuw %ymm4,%ymm0,%ymm0
+ .byte 197,245,228,204 // vpmulhuw %ymm4,%ymm1,%ymm1
+ .byte 197,229,228,220 // vpmulhuw %ymm4,%ymm3,%ymm3
+ .byte 197,229,113,211,7 // vpsrlw $0x7,%ymm3,%ymm3
+ .byte 197,237,228,212 // vpmulhuw %ymm4,%ymm2,%ymm2
+ .byte 197,237,113,210,7 // vpsrlw $0x7,%ymm2,%ymm2
+ .byte 196,227,109,56,227,1 // vinserti128 $0x1,%xmm3,%ymm2,%ymm4
+ .byte 196,227,109,70,211,49 // vperm2i128 $0x31,%ymm3,%ymm2,%ymm2
+ .byte 197,229,113,208,7 // vpsrlw $0x7,%ymm0,%ymm3
+ .byte 197,245,113,209,7 // vpsrlw $0x7,%ymm1,%ymm1
+ .byte 197,221,103,194 // vpackuswb %ymm2,%ymm4,%ymm0
+ .byte 196,227,117,56,211,1 // vinserti128 $0x1,%xmm3,%ymm1,%ymm2
+ .byte 196,227,117,70,203,49 // vperm2i128 $0x31,%ymm3,%ymm1,%ymm1
+ .byte 197,237,103,201 // vpackuswb %ymm1,%ymm2,%ymm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 197,252,16,148,36,32,1,0,0 // vmovups 0x120(%rsp),%ymm2
+ .byte 197,252,16,156,36,96,1,0,0 // vmovups 0x160(%rsp),%ymm3
+ .byte 72,129,196,152,1,0,0 // add $0x198,%rsp
+ .byte 255,224 // jmpq *%rax
+
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2d97 <_sk_difference_hsw_8bit+0x1c9>
+ .byte 127,67 // jg 3477 <_sk_overlay_hsw_8bit+0x3ad>
.byte 1,1 // add %eax,(%rcx)
.byte 1,0 // add %eax,(%rax)
.byte 0,0 // add %al,(%rax)
@@ -59504,9 +59849,9 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 0,255 // add %bh,%bh
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2dab <_sk_difference_hsw_8bit+0x1dd>
+ .byte 127,67 // jg 348b <_sk_overlay_hsw_8bit+0x3c1>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2daf <_sk_difference_hsw_8bit+0x1e1>
+ .byte 127,67 // jg 348f <_sk_overlay_hsw_8bit+0x3c5>
.byte 0,0 // add %al,(%rax)
.byte 0,255 // add %bh,%bh
.byte 255 // (bad)
@@ -60063,6 +60408,66 @@ BALIGN32
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,0 // incl (%rax)
+ .byte 3,3 // add (%rbx),%eax
+ .byte 3,3 // add (%rbx),%eax
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,3,3 // lsl (%rbx),%eax
+ .byte 3,3 // add (%rbx),%eax
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,255 // (bad)
+ .byte 0,127,0 // add %bh,0x0(%rdi)
+ .byte 129,128,0,0,0,0,0,0,0,0 // addl $0x0,0x0(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 0,0 // add %al,(%rax)
+ .byte 3,3 // add (%rbx),%eax
+ .byte 3,3 // add (%rbx),%eax
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,3,3 // lsl (%rbx),%eax
+ .byte 3,3 // add (%rbx),%eax
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,255 // (bad)
+ .byte 0,127,0 // add %bh,0x0(%rdi)
+ .byte 129 // .byte 0x81
+ .byte 128 // .byte 0x80
BALIGN16
.byte 0,2 // add %al,(%rdx)
@@ -60157,7 +60562,7 @@ HIDDEN _sk_set_rgb_sse41_8bit
FUNCTION(_sk_set_rgb_sse41_8bit)
_sk_set_rgb_sse41_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,15,16,37,89,33,0,0 // movss 0x2159(%rip),%xmm4 # 221c <_sk_difference_sse41_8bit+0x169>
+ .byte 243,15,16,37,61,41,0,0 // movss 0x293d(%rip),%xmm4 # 2a00 <_sk_overlay_sse41_8bit+0x400>
.byte 243,15,16,40 // movss (%rax),%xmm5
.byte 243,15,89,236 // mulss %xmm4,%xmm5
.byte 243,72,15,44,205 // cvttss2si %xmm5,%rcx
@@ -60172,7 +60577,7 @@ _sk_set_rgb_sse41_8bit:
.byte 9,208 // or %edx,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,112,228,0 // pshufd $0x0,%xmm4,%xmm4
- .byte 102,15,111,45,45,33,0,0 // movdqa 0x212d(%rip),%xmm5 # 2230 <_sk_difference_sse41_8bit+0x17d>
+ .byte 102,15,111,45,13,41,0,0 // movdqa 0x290d(%rip),%xmm5 # 2a10 <_sk_overlay_sse41_8bit+0x410>
.byte 102,15,219,205 // pand %xmm5,%xmm1
.byte 102,15,219,197 // pand %xmm5,%xmm0
.byte 102,15,235,196 // por %xmm4,%xmm0
@@ -60186,12 +60591,12 @@ FUNCTION(_sk_premul_sse41_8bit)
_sk_premul_sse41_8bit:
.byte 102,15,111,225 // movdqa %xmm1,%xmm4
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,15,111,5,25,33,0,0 // movdqa 0x2119(%rip),%xmm0 # 2240 <_sk_difference_sse41_8bit+0x18d>
+ .byte 102,15,111,5,249,40,0,0 // movdqa 0x28f9(%rip),%xmm0 # 2a20 <_sk_overlay_sse41_8bit+0x420>
.byte 102,15,111,253 // movdqa %xmm5,%xmm7
.byte 102,15,56,0,248 // pshufb %xmm0,%xmm7
.byte 102,15,111,244 // movdqa %xmm4,%xmm6
.byte 102,15,56,0,240 // pshufb %xmm0,%xmm6
- .byte 102,15,111,5,15,33,0,0 // movdqa 0x210f(%rip),%xmm0 # 2250 <_sk_difference_sse41_8bit+0x19d>
+ .byte 102,15,111,5,239,40,0,0 // movdqa 0x28ef(%rip),%xmm0 # 2a30 <_sk_overlay_sse41_8bit+0x430>
.byte 102,15,235,240 // por %xmm0,%xmm6
.byte 102,15,235,248 // por %xmm0,%xmm7
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
@@ -60224,7 +60629,7 @@ HIDDEN _sk_swap_rb_sse41_8bit
.globl _sk_swap_rb_sse41_8bit
FUNCTION(_sk_swap_rb_sse41_8bit)
_sk_swap_rb_sse41_8bit:
- .byte 102,15,111,37,156,32,0,0 // movdqa 0x209c(%rip),%xmm4 # 2260 <_sk_difference_sse41_8bit+0x1ad>
+ .byte 102,15,111,37,124,40,0,0 // movdqa 0x287c(%rip),%xmm4 # 2a40 <_sk_overlay_sse41_8bit+0x440>
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60351,7 +60756,7 @@ _sk_load_8888_dst_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,221 // jmpq ffffffffde000354 <_sk_difference_sse41_8bit+0xffffffffddffe2a1>
+ .byte 233,255,255,255,221 // jmpq ffffffffde000354 <_sk_overlay_sse41_8bit+0xffffffffddffdd54>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -60434,7 +60839,7 @@ _sk_load_bgra_sse41_8bit:
.byte 117,35 // jne 448 <_sk_load_bgra_sse41_8bit+0x44>
.byte 243,66,15,111,76,130,16 // movdqu 0x10(%rdx,%r8,4),%xmm1
.byte 243,66,15,111,4,130 // movdqu (%rdx,%r8,4),%xmm0
- .byte 102,15,111,37,54,30,0,0 // movdqa 0x1e36(%rip),%xmm4 # 2270 <_sk_difference_sse41_8bit+0x1bd>
+ .byte 102,15,111,37,22,38,0,0 // movdqa 0x2616(%rip),%xmm4 # 2a50 <_sk_overlay_sse41_8bit+0x450>
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60497,7 +60902,7 @@ _sk_load_bgra_dst_sse41_8bit:
.byte 117,35 // jne 518 <_sk_load_bgra_dst_sse41_8bit+0x44>
.byte 243,66,15,111,92,130,16 // movdqu 0x10(%rdx,%r8,4),%xmm3
.byte 243,66,15,111,20,130 // movdqu (%rdx,%r8,4),%xmm2
- .byte 102,15,111,37,118,29,0,0 // movdqa 0x1d76(%rip),%xmm4 # 2280 <_sk_difference_sse41_8bit+0x1cd>
+ .byte 102,15,111,37,86,37,0,0 // movdqa 0x2556(%rip),%xmm4 # 2a60 <_sk_overlay_sse41_8bit+0x460>
.byte 102,15,56,0,212 // pshufb %xmm4,%xmm2
.byte 102,15,56,0,220 // pshufb %xmm4,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60556,7 +60961,7 @@ _sk_store_bgra_sse41_8bit:
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,193,226,2 // shl $0x2,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 102,15,111,53,200,28,0,0 // movdqa 0x1cc8(%rip),%xmm6 # 2290 <_sk_difference_sse41_8bit+0x1dd>
+ .byte 102,15,111,53,168,36,0,0 // movdqa 0x24a8(%rip),%xmm6 # 2a70 <_sk_overlay_sse41_8bit+0x470>
.byte 102,15,111,233 // movdqa %xmm1,%xmm5
.byte 102,15,56,0,238 // pshufb %xmm6,%xmm5
.byte 102,15,111,224 // movdqa %xmm0,%xmm4
@@ -60622,7 +61027,7 @@ _sk_load_a8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,42 // jne 6af <_sk_load_a8_sse41_8bit+0x47>
.byte 102,66,15,56,48,12,2 // pmovzxbw (%rdx,%r8,1),%xmm1
- .byte 102,15,219,13,12,28,0,0 // pand 0x1c0c(%rip),%xmm1 # 22a0 <_sk_difference_sse41_8bit+0x1ed>
+ .byte 102,15,219,13,236,35,0,0 // pand 0x23ec(%rip),%xmm1 # 2a80 <_sk_overlay_sse41_8bit+0x480>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,193 // pmovzxwd %xmm1,%xmm0
.byte 102,15,105,204 // punpckhwd %xmm4,%xmm1
@@ -60670,7 +61075,7 @@ _sk_load_a8_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf000750 <_sk_difference_sse41_8bit+0xffffffffdeffe69d>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf000750 <_sk_overlay_sse41_8bit+0xffffffffdeffe150>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -60695,7 +61100,7 @@ _sk_load_a8_dst_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,42 // jne 7a3 <_sk_load_a8_dst_sse41_8bit+0x47>
.byte 102,66,15,56,48,28,2 // pmovzxbw (%rdx,%r8,1),%xmm3
- .byte 102,15,219,29,40,27,0,0 // pand 0x1b28(%rip),%xmm3 # 22b0 <_sk_difference_sse41_8bit+0x1fd>
+ .byte 102,15,219,29,8,35,0,0 // pand 0x2308(%rip),%xmm3 # 2a90 <_sk_overlay_sse41_8bit+0x490>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,211 // pmovzxwd %xmm3,%xmm2
.byte 102,15,105,220 // punpckhwd %xmm4,%xmm3
@@ -60743,7 +61148,7 @@ _sk_load_a8_dst_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf000844 <_sk_difference_sse41_8bit+0xffffffffdeffe791>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf000844 <_sk_overlay_sse41_8bit+0xffffffffdeffe244>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -60765,7 +61170,7 @@ _sk_store_a8_sse41_8bit:
.byte 72,99,87,8 // movslq 0x8(%rdi),%rdx
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 102,15,111,45,80,26,0,0 // movdqa 0x1a50(%rip),%xmm5 # 22c0 <_sk_difference_sse41_8bit+0x20d>
+ .byte 102,15,111,45,48,34,0,0 // movdqa 0x2230(%rip),%xmm5 # 2aa0 <_sk_overlay_sse41_8bit+0x4a0>
.byte 102,15,111,241 // movdqa %xmm1,%xmm6
.byte 102,15,56,0,245 // pshufb %xmm5,%xmm6
.byte 102,15,111,224 // movdqa %xmm0,%xmm4
@@ -60773,7 +61178,7 @@ _sk_store_a8_sse41_8bit:
.byte 102,15,108,230 // punpcklqdq %xmm6,%xmm4
.byte 77,133,201 // test %r9,%r9
.byte 117,19 // jne 89e <_sk_store_a8_sse41_8bit+0x4e>
- .byte 102,15,56,0,37,92,26,0,0 // pshufb 0x1a5c(%rip),%xmm4 # 22f0 <_sk_difference_sse41_8bit+0x23d>
+ .byte 102,15,56,0,37,60,34,0,0 // pshufb 0x223c(%rip),%xmm4 # 2ad0 <_sk_overlay_sse41_8bit+0x4d0>
.byte 102,66,15,214,36,2 // movq %xmm4,(%rdx,%r8,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -60789,13 +61194,13 @@ _sk_store_a8_sse41_8bit:
.byte 102,66,15,58,20,36,2,0 // pextrb $0x0,%xmm4,(%rdx,%r8,1)
.byte 235,209 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a>
.byte 102,66,15,58,20,100,2,2,4 // pextrb $0x4,%xmm4,0x2(%rdx,%r8,1)
- .byte 102,15,56,0,37,5,26,0,0 // pshufb 0x1a05(%rip),%xmm4 # 22e0 <_sk_difference_sse41_8bit+0x22d>
+ .byte 102,15,56,0,37,229,33,0,0 // pshufb 0x21e5(%rip),%xmm4 # 2ac0 <_sk_overlay_sse41_8bit+0x4c0>
.byte 102,66,15,58,21,36,2,0 // pextrw $0x0,%xmm4,(%rdx,%r8,1)
.byte 235,181 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a>
.byte 102,66,15,58,20,100,2,6,12 // pextrb $0xc,%xmm4,0x6(%rdx,%r8,1)
.byte 102,66,15,58,20,100,2,5,10 // pextrb $0xa,%xmm4,0x5(%rdx,%r8,1)
.byte 102,66,15,58,20,100,2,4,8 // pextrb $0x8,%xmm4,0x4(%rdx,%r8,1)
- .byte 102,15,56,0,37,199,25,0,0 // pshufb 0x19c7(%rip),%xmm4 # 22d0 <_sk_difference_sse41_8bit+0x21d>
+ .byte 102,15,56,0,37,167,33,0,0 // pshufb 0x21a7(%rip),%xmm4 # 2ab0 <_sk_overlay_sse41_8bit+0x4b0>
.byte 102,66,15,126,36,2 // movd %xmm4,(%rdx,%r8,1)
.byte 235,137 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a>
.byte 15,31,0 // nopl (%rax)
@@ -60835,14 +61240,14 @@ _sk_load_g8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,66 // jne 98f <_sk_load_g8_sse41_8bit+0x5f>
.byte 102,66,15,56,48,12,2 // pmovzxbw (%rdx,%r8,1),%xmm1
- .byte 102,15,219,13,164,25,0,0 // pand 0x19a4(%rip),%xmm1 # 2300 <_sk_difference_sse41_8bit+0x24d>
+ .byte 102,15,219,13,132,33,0,0 // pand 0x2184(%rip),%xmm1 # 2ae0 <_sk_overlay_sse41_8bit+0x4e0>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,193 // pmovzxwd %xmm1,%xmm0
.byte 102,15,105,204 // punpckhwd %xmm4,%xmm1
- .byte 102,15,111,37,159,25,0,0 // movdqa 0x199f(%rip),%xmm4 # 2310 <_sk_difference_sse41_8bit+0x25d>
+ .byte 102,15,111,37,127,33,0,0 // movdqa 0x217f(%rip),%xmm4 # 2af0 <_sk_overlay_sse41_8bit+0x4f0>
.byte 102,15,56,64,204 // pmulld %xmm4,%xmm1
.byte 102,15,56,64,196 // pmulld %xmm4,%xmm0
- .byte 102,15,111,37,157,25,0,0 // movdqa 0x199d(%rip),%xmm4 # 2320 <_sk_difference_sse41_8bit+0x26d>
+ .byte 102,15,111,37,125,33,0,0 // movdqa 0x217d(%rip),%xmm4 # 2b00 <_sk_overlay_sse41_8bit+0x500>
.byte 102,15,235,196 // por %xmm4,%xmm0
.byte 102,15,235,204 // por %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60887,7 +61292,7 @@ _sk_load_g8_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 232,255,255,255,221 // callq ffffffffde000a34 <_sk_difference_sse41_8bit+0xffffffffddffe981>
+ .byte 232,255,255,255,221 // callq ffffffffde000a34 <_sk_overlay_sse41_8bit+0xffffffffddffe434>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,210 // callq *%rdx
@@ -60912,14 +61317,14 @@ _sk_load_g8_dst_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,66 // jne a9f <_sk_load_g8_dst_sse41_8bit+0x5f>
.byte 102,66,15,56,48,28,2 // pmovzxbw (%rdx,%r8,1),%xmm3
- .byte 102,15,219,29,196,24,0,0 // pand 0x18c4(%rip),%xmm3 # 2330 <_sk_difference_sse41_8bit+0x27d>
+ .byte 102,15,219,29,164,32,0,0 // pand 0x20a4(%rip),%xmm3 # 2b10 <_sk_overlay_sse41_8bit+0x510>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,211 // pmovzxwd %xmm3,%xmm2
.byte 102,15,105,220 // punpckhwd %xmm4,%xmm3
- .byte 102,15,111,37,191,24,0,0 // movdqa 0x18bf(%rip),%xmm4 # 2340 <_sk_difference_sse41_8bit+0x28d>
+ .byte 102,15,111,37,159,32,0,0 // movdqa 0x209f(%rip),%xmm4 # 2b20 <_sk_overlay_sse41_8bit+0x520>
.byte 102,15,56,64,220 // pmulld %xmm4,%xmm3
.byte 102,15,56,64,212 // pmulld %xmm4,%xmm2
- .byte 102,15,111,37,189,24,0,0 // movdqa 0x18bd(%rip),%xmm4 # 2350 <_sk_difference_sse41_8bit+0x29d>
+ .byte 102,15,111,37,157,32,0,0 // movdqa 0x209d(%rip),%xmm4 # 2b30 <_sk_overlay_sse41_8bit+0x530>
.byte 102,15,235,212 // por %xmm4,%xmm2
.byte 102,15,235,220 // por %xmm4,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60964,7 +61369,7 @@ _sk_load_g8_dst_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 232,255,255,255,221 // callq ffffffffde000b44 <_sk_difference_sse41_8bit+0xffffffffddffea91>
+ .byte 232,255,255,255,221 // callq ffffffffde000b44 <_sk_overlay_sse41_8bit+0xffffffffddffe544>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,210 // callq *%rdx
@@ -60992,7 +61397,7 @@ _sk_srcover_rgba_8888_sse41_8bit:
.byte 243,70,15,111,68,138,16 // movdqu 0x10(%rdx,%r9,4),%xmm8
.byte 243,70,15,111,12,138 // movdqu (%rdx,%r9,4),%xmm9
.byte 77,133,192 // test %r8,%r8
- .byte 102,15,111,37,211,23,0,0 // movdqa 0x17d3(%rip),%xmm4 # 2360 <_sk_difference_sse41_8bit+0x2ad>
+ .byte 102,15,111,37,179,31,0,0 // movdqa 0x1fb3(%rip),%xmm4 # 2b40 <_sk_overlay_sse41_8bit+0x540>
.byte 102,15,111,241 // movdqa %xmm1,%xmm6
.byte 102,15,56,0,244 // pshufb %xmm4,%xmm6
.byte 102,15,111,248 // movdqa %xmm0,%xmm7
@@ -61114,7 +61519,7 @@ _sk_scale_1_float_sse41_8bit:
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,0 // movss (%rax),%xmm0
- .byte 243,15,89,5,178,20,0,0 // mulss 0x14b2(%rip),%xmm0 # 2220 <_sk_difference_sse41_8bit+0x16d>
+ .byte 243,15,89,5,150,28,0,0 // mulss 0x1c96(%rip),%xmm0 # 2a04 <_sk_overlay_sse41_8bit+0x404>
.byte 243,15,44,192 // cvttss2si %xmm0,%eax
.byte 15,87,192 // xorps %xmm0,%xmm0
.byte 102,68,15,56,48,197 // pmovzxbw %xmm5,%xmm8
@@ -61122,7 +61527,7 @@ _sk_scale_1_float_sse41_8bit:
.byte 102,68,15,56,48,204 // pmovzxbw %xmm4,%xmm9
.byte 102,15,104,224 // punpckhbw %xmm0,%xmm4
.byte 102,15,110,240 // movd %eax,%xmm6
- .byte 102,15,56,0,53,218,21,0,0 // pshufb 0x15da(%rip),%xmm6 # 2370 <_sk_difference_sse41_8bit+0x2bd>
+ .byte 102,15,56,0,53,186,29,0,0 // pshufb 0x1dba(%rip),%xmm6 # 2b50 <_sk_overlay_sse41_8bit+0x550>
.byte 102,15,111,206 // movdqa %xmm6,%xmm1
.byte 102,65,15,213,201 // pmullw %xmm9,%xmm1
.byte 102,15,111,198 // movdqa %xmm6,%xmm0
@@ -61157,11 +61562,11 @@ _sk_scale_u8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 15,133,160,0,0,0 // jne ea7 <_sk_scale_u8_sse41_8bit+0xc1>
.byte 102,66,15,56,48,52,2 // pmovzxbw (%rdx,%r8,1),%xmm6
- .byte 102,15,219,53,106,21,0,0 // pand 0x156a(%rip),%xmm6 # 2380 <_sk_difference_sse41_8bit+0x2cd>
+ .byte 102,15,219,53,74,29,0,0 // pand 0x1d4a(%rip),%xmm6 # 2b60 <_sk_overlay_sse41_8bit+0x560>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,111,254 // movdqa %xmm6,%xmm7
- .byte 102,15,56,0,61,104,21,0,0 // pshufb 0x1568(%rip),%xmm7 # 2390 <_sk_difference_sse41_8bit+0x2dd>
- .byte 102,15,56,0,53,111,21,0,0 // pshufb 0x156f(%rip),%xmm6 # 23a0 <_sk_difference_sse41_8bit+0x2ed>
+ .byte 102,15,56,0,61,72,29,0,0 // pshufb 0x1d48(%rip),%xmm7 # 2b70 <_sk_overlay_sse41_8bit+0x570>
+ .byte 102,15,56,0,53,79,29,0,0 // pshufb 0x1d4f(%rip),%xmm6 # 2b80 <_sk_overlay_sse41_8bit+0x580>
.byte 102,68,15,56,48,200 // pmovzxbw %xmm0,%xmm9
.byte 102,65,15,104,192 // punpckhbw %xmm8,%xmm0
.byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10
@@ -61246,7 +61651,7 @@ FUNCTION(_sk_lerp_1_float_sse41_8bit)
_sk_lerp_1_float_sse41_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,32 // movss (%rax),%xmm4
- .byte 243,15,89,37,182,18,0,0 // mulss 0x12b6(%rip),%xmm4 # 2224 <_sk_difference_sse41_8bit+0x171>
+ .byte 243,15,89,37,154,26,0,0 // mulss 0x1a9a(%rip),%xmm4 # 2a08 <_sk_overlay_sse41_8bit+0x408>
.byte 243,15,44,196 // cvttss2si %xmm4,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
@@ -61257,7 +61662,7 @@ _sk_lerp_1_float_sse41_8bit:
.byte 102,65,15,104,193 // punpckhbw %xmm9,%xmm0
.byte 102,68,15,56,48,217 // pmovzxbw %xmm1,%xmm11
.byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1
- .byte 102,15,56,0,61,7,20,0,0 // pshufb 0x1407(%rip),%xmm7 # 23b0 <_sk_difference_sse41_8bit+0x2fd>
+ .byte 102,15,56,0,61,231,27,0,0 // pshufb 0x1be7(%rip),%xmm7 # 2b90 <_sk_overlay_sse41_8bit+0x590>
.byte 102,68,15,111,231 // movdqa %xmm7,%xmm12
.byte 102,69,15,213,227 // pmullw %xmm11,%xmm12
.byte 102,68,15,111,239 // movdqa %xmm7,%xmm13
@@ -61320,11 +61725,11 @@ _sk_lerp_u8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 15,133,46,1,0,0 // jne 11d1 <_sk_lerp_u8_sse41_8bit+0x14f>
.byte 102,66,15,56,48,60,2 // pmovzxbw (%rdx,%r8,1),%xmm7
- .byte 102,15,219,61,14,19,0,0 // pand 0x130e(%rip),%xmm7 # 23c0 <_sk_difference_sse41_8bit+0x30d>
+ .byte 102,15,219,61,238,26,0,0 // pand 0x1aee(%rip),%xmm7 # 2ba0 <_sk_overlay_sse41_8bit+0x5a0>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,111,247 // movdqa %xmm7,%xmm6
- .byte 102,15,56,0,53,12,19,0,0 // pshufb 0x130c(%rip),%xmm6 # 23d0 <_sk_difference_sse41_8bit+0x31d>
- .byte 102,15,56,0,61,19,19,0,0 // pshufb 0x1313(%rip),%xmm7 # 23e0 <_sk_difference_sse41_8bit+0x32d>
+ .byte 102,15,56,0,53,236,26,0,0 // pshufb 0x1aec(%rip),%xmm6 # 2bb0 <_sk_overlay_sse41_8bit+0x5b0>
+ .byte 102,15,56,0,61,243,26,0,0 // pshufb 0x1af3(%rip),%xmm7 # 2bc0 <_sk_overlay_sse41_8bit+0x5c0>
.byte 102,68,15,56,48,200 // pmovzxbw %xmm0,%xmm9
.byte 102,65,15,104,192 // punpckhbw %xmm8,%xmm0
.byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10
@@ -61419,7 +61824,7 @@ _sk_lerp_u8_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf00127c <_sk_difference_sse41_8bit+0xffffffffdefff1c9>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf00127c <_sk_overlay_sse41_8bit+0xffffffffdeffec7c>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -61453,7 +61858,7 @@ HIDDEN _sk_black_color_sse41_8bit
FUNCTION(_sk_black_color_sse41_8bit)
_sk_black_color_sse41_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 15,40,5,75,17,0,0 // movaps 0x114b(%rip),%xmm0 # 23f0 <_sk_difference_sse41_8bit+0x33d>
+ .byte 15,40,5,43,25,0,0 // movaps 0x192b(%rip),%xmm0 # 2bd0 <_sk_overlay_sse41_8bit+0x5d0>
.byte 15,40,200 // movaps %xmm0,%xmm1
.byte 255,224 // jmpq *%rax
@@ -61479,7 +61884,7 @@ HIDDEN _sk_srcatop_sse41_8bit
.globl _sk_srcatop_sse41_8bit
FUNCTION(_sk_srcatop_sse41_8bit)
_sk_srcatop_sse41_8bit:
- .byte 102,68,15,111,21,55,17,0,0 // movdqa 0x1137(%rip),%xmm10 # 2400 <_sk_difference_sse41_8bit+0x34d>
+ .byte 102,68,15,111,21,23,25,0,0 // movdqa 0x1917(%rip),%xmm10 # 2be0 <_sk_overlay_sse41_8bit+0x5e0>
.byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
.byte 102,68,15,56,48,195 // pmovzxbw %xmm3,%xmm8
.byte 102,15,111,235 // movdqa %xmm3,%xmm5
@@ -61549,7 +61954,7 @@ HIDDEN _sk_dstatop_sse41_8bit
.globl _sk_dstatop_sse41_8bit
FUNCTION(_sk_dstatop_sse41_8bit)
_sk_dstatop_sse41_8bit:
- .byte 102,68,15,111,29,12,16,0,0 // movdqa 0x100c(%rip),%xmm11 # 2410 <_sk_difference_sse41_8bit+0x35d>
+ .byte 102,68,15,111,29,236,23,0,0 // movdqa 0x17ec(%rip),%xmm11 # 2bf0 <_sk_overlay_sse41_8bit+0x5f0>
.byte 102,68,15,111,233 // movdqa %xmm1,%xmm13
.byte 102,69,15,56,0,235 // pshufb %xmm11,%xmm13
.byte 102,68,15,111,248 // movdqa %xmm0,%xmm15
@@ -61621,7 +62026,7 @@ FUNCTION(_sk_srcin_sse41_8bit)
_sk_srcin_sse41_8bit:
.byte 102,15,111,225 // movdqa %xmm1,%xmm4
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,15,111,5,211,14,0,0 // movdqa 0xed3(%rip),%xmm0 # 2420 <_sk_difference_sse41_8bit+0x36d>
+ .byte 102,15,111,5,179,22,0,0 // movdqa 0x16b3(%rip),%xmm0 # 2c00 <_sk_overlay_sse41_8bit+0x600>
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
.byte 102,15,56,0,240 // pshufb %xmm0,%xmm6
.byte 102,15,111,250 // movdqa %xmm2,%xmm7
@@ -61656,7 +62061,7 @@ HIDDEN _sk_dstin_sse41_8bit
.globl _sk_dstin_sse41_8bit
FUNCTION(_sk_dstin_sse41_8bit)
_sk_dstin_sse41_8bit:
- .byte 102,15,111,37,86,14,0,0 // movdqa 0xe56(%rip),%xmm4 # 2430 <_sk_difference_sse41_8bit+0x37d>
+ .byte 102,15,111,37,54,22,0,0 // movdqa 0x1636(%rip),%xmm4 # 2c10 <_sk_overlay_sse41_8bit+0x610>
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,69,15,239,210 // pxor %xmm10,%xmm10
@@ -61695,7 +62100,7 @@ FUNCTION(_sk_srcout_sse41_8bit)
_sk_srcout_sse41_8bit:
.byte 102,15,111,225 // movdqa %xmm1,%xmm4
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,15,111,5,201,13,0,0 // movdqa 0xdc9(%rip),%xmm0 # 2440 <_sk_difference_sse41_8bit+0x38d>
+ .byte 102,15,111,5,169,21,0,0 // movdqa 0x15a9(%rip),%xmm0 # 2c20 <_sk_overlay_sse41_8bit+0x620>
.byte 102,15,111,250 // movdqa %xmm2,%xmm7
.byte 102,15,56,0,248 // pshufb %xmm0,%xmm7
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
@@ -61733,7 +62138,7 @@ HIDDEN _sk_dstout_sse41_8bit
.globl _sk_dstout_sse41_8bit
FUNCTION(_sk_dstout_sse41_8bit)
_sk_dstout_sse41_8bit:
- .byte 102,15,111,37,64,13,0,0 // movdqa 0xd40(%rip),%xmm4 # 2450 <_sk_difference_sse41_8bit+0x39d>
+ .byte 102,15,111,37,32,21,0,0 // movdqa 0x1520(%rip),%xmm4 # 2c30 <_sk_overlay_sse41_8bit+0x630>
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 102,15,118,228 // pcmpeqd %xmm4,%xmm4
@@ -61773,7 +62178,7 @@ HIDDEN _sk_srcover_sse41_8bit
.globl _sk_srcover_sse41_8bit
FUNCTION(_sk_srcover_sse41_8bit)
_sk_srcover_sse41_8bit:
- .byte 102,15,111,53,175,12,0,0 // movdqa 0xcaf(%rip),%xmm6 # 2460 <_sk_difference_sse41_8bit+0x3ad>
+ .byte 102,15,111,53,143,20,0,0 // movdqa 0x148f(%rip),%xmm6 # 2c40 <_sk_overlay_sse41_8bit+0x640>
.byte 102,68,15,111,217 // movdqa %xmm1,%xmm11
.byte 102,68,15,56,0,222 // pshufb %xmm6,%xmm11
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
@@ -61814,7 +62219,7 @@ HIDDEN _sk_dstover_sse41_8bit
.globl _sk_dstover_sse41_8bit
FUNCTION(_sk_dstover_sse41_8bit)
_sk_dstover_sse41_8bit:
- .byte 102,68,15,111,5,15,12,0,0 // movdqa 0xc0f(%rip),%xmm8 # 2470 <_sk_difference_sse41_8bit+0x3bd>
+ .byte 102,68,15,111,5,239,19,0,0 // movdqa 0x13ef(%rip),%xmm8 # 2c50 <_sk_overlay_sse41_8bit+0x650>
.byte 102,68,15,111,209 // movdqa %xmm1,%xmm10
.byte 102,68,15,56,48,201 // pmovzxbw %xmm1,%xmm9
.byte 102,15,252,203 // paddb %xmm3,%xmm1
@@ -61893,7 +62298,7 @@ _sk_multiply_sse41_8bit:
.byte 102,15,111,218 // movdqa %xmm2,%xmm3
.byte 102,15,111,209 // movdqa %xmm1,%xmm2
.byte 102,15,111,200 // movdqa %xmm0,%xmm1
- .byte 102,68,15,111,53,221,10,0,0 // movdqa 0xadd(%rip),%xmm14 # 2480 <_sk_difference_sse41_8bit+0x3cd>
+ .byte 102,68,15,111,53,189,18,0,0 // movdqa 0x12bd(%rip),%xmm14 # 2c60 <_sk_overlay_sse41_8bit+0x660>
.byte 102,68,15,111,195 // movdqa %xmm3,%xmm8
.byte 102,15,111,235 // movdqa %xmm3,%xmm5
.byte 102,65,15,56,0,238 // pshufb %xmm14,%xmm5
@@ -62022,7 +62427,7 @@ HIDDEN _sk_xor__sse41_8bit
.globl _sk_xor__sse41_8bit
FUNCTION(_sk_xor__sse41_8bit)
_sk_xor__sse41_8bit:
- .byte 102,68,15,111,21,171,8,0,0 // movdqa 0x8ab(%rip),%xmm10 # 2490 <_sk_difference_sse41_8bit+0x3dd>
+ .byte 102,68,15,111,21,139,16,0,0 // movdqa 0x108b(%rip),%xmm10 # 2c70 <_sk_overlay_sse41_8bit+0x670>
.byte 102,68,15,111,226 // movdqa %xmm2,%xmm12
.byte 102,68,15,56,48,194 // pmovzxbw %xmm2,%xmm8
.byte 102,15,111,234 // movdqa %xmm2,%xmm5
@@ -62095,7 +62500,7 @@ HIDDEN _sk_darken_sse41_8bit
FUNCTION(_sk_darken_sse41_8bit)
_sk_darken_sse41_8bit:
.byte 102,68,15,111,240 // movdqa %xmm0,%xmm14
- .byte 102,68,15,111,5,110,7,0,0 // movdqa 0x76e(%rip),%xmm8 # 24a0 <_sk_difference_sse41_8bit+0x3ed>
+ .byte 102,68,15,111,5,78,15,0,0 // movdqa 0xf4e(%rip),%xmm8 # 2c80 <_sk_overlay_sse41_8bit+0x680>
.byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
.byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
@@ -62162,7 +62567,7 @@ _sk_darken_sse41_8bit:
.byte 102,15,248,231 // psubb %xmm7,%xmm4
.byte 102,15,248,206 // psubb %xmm6,%xmm1
.byte 102,68,15,248,245 // psubb %xmm5,%xmm14
- .byte 15,40,5,50,6,0,0 // movaps 0x632(%rip),%xmm0 # 24b0 <_sk_difference_sse41_8bit+0x3fd>
+ .byte 15,40,5,18,14,0,0 // movaps 0xe12(%rip),%xmm0 # 2c90 <_sk_overlay_sse41_8bit+0x690>
.byte 102,68,15,56,16,244 // pblendvb %xmm0,%xmm4,%xmm14
.byte 102,65,15,56,16,200 // pblendvb %xmm0,%xmm8,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -62174,7 +62579,7 @@ HIDDEN _sk_lighten_sse41_8bit
FUNCTION(_sk_lighten_sse41_8bit)
_sk_lighten_sse41_8bit:
.byte 102,68,15,111,240 // movdqa %xmm0,%xmm14
- .byte 102,68,15,111,5,31,6,0,0 // movdqa 0x61f(%rip),%xmm8 # 24c0 <_sk_difference_sse41_8bit+0x40d>
+ .byte 102,68,15,111,5,255,13,0,0 // movdqa 0xdff(%rip),%xmm8 # 2ca0 <_sk_overlay_sse41_8bit+0x6a0>
.byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
.byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
@@ -62241,7 +62646,7 @@ _sk_lighten_sse41_8bit:
.byte 102,15,248,231 // psubb %xmm7,%xmm4
.byte 102,15,248,206 // psubb %xmm6,%xmm1
.byte 102,68,15,248,245 // psubb %xmm5,%xmm14
- .byte 15,40,5,227,4,0,0 // movaps 0x4e3(%rip),%xmm0 # 24d0 <_sk_difference_sse41_8bit+0x41d>
+ .byte 15,40,5,195,12,0,0 // movaps 0xcc3(%rip),%xmm0 # 2cb0 <_sk_overlay_sse41_8bit+0x6b0>
.byte 102,68,15,56,16,244 // pblendvb %xmm0,%xmm4,%xmm14
.byte 102,65,15,56,16,200 // pblendvb %xmm0,%xmm8,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -62281,7 +62686,7 @@ _sk_exclusion_sse41_8bit:
.byte 102,15,113,215,8 // psrlw $0x8,%xmm7
.byte 102,15,103,253 // packuswb %xmm5,%xmm7
.byte 102,15,103,230 // packuswb %xmm6,%xmm4
- .byte 102,15,111,45,73,4,0,0 // movdqa 0x449(%rip),%xmm5 # 24e0 <_sk_difference_sse41_8bit+0x42d>
+ .byte 102,15,111,45,41,12,0,0 // movdqa 0xc29(%rip),%xmm5 # 2cc0 <_sk_overlay_sse41_8bit+0x6c0>
.byte 102,15,248,204 // psubb %xmm4,%xmm1
.byte 102,15,219,229 // pand %xmm5,%xmm4
.byte 102,15,219,239 // pand %xmm7,%xmm5
@@ -62296,7 +62701,7 @@ HIDDEN _sk_difference_sse41_8bit
FUNCTION(_sk_difference_sse41_8bit)
_sk_difference_sse41_8bit:
.byte 102,68,15,111,193 // movdqa %xmm1,%xmm8
- .byte 102,15,111,13,48,4,0,0 // movdqa 0x430(%rip),%xmm1 # 24f0 <_sk_difference_sse41_8bit+0x43d>
+ .byte 102,15,111,13,16,12,0,0 // movdqa 0xc10(%rip),%xmm1 # 2cd0 <_sk_overlay_sse41_8bit+0x6d0>
.byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
.byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
@@ -62334,7 +62739,7 @@ _sk_difference_sse41_8bit:
.byte 102,15,113,214,8 // psrlw $0x8,%xmm6
.byte 102,15,113,215,8 // psrlw $0x8,%xmm7
.byte 102,15,103,254 // packuswb %xmm6,%xmm7
- .byte 102,15,111,37,117,3,0,0 // movdqa 0x375(%rip),%xmm4 # 24f0 <_sk_difference_sse41_8bit+0x43d>
+ .byte 102,15,111,37,85,11,0,0 // movdqa 0xb55(%rip),%xmm4 # 2cd0 <_sk_overlay_sse41_8bit+0x6d0>
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 102,15,56,0,236 // pshufb %xmm4,%xmm5
.byte 102,69,15,104,236 // punpckhbw %xmm12,%xmm13
@@ -62359,7 +62764,7 @@ _sk_difference_sse41_8bit:
.byte 102,15,103,241 // packuswb %xmm1,%xmm6
.byte 102,65,15,218,230 // pminub %xmm14,%xmm4
.byte 102,15,218,247 // pminub %xmm7,%xmm6
- .byte 102,15,111,13,8,3,0,0 // movdqa 0x308(%rip),%xmm1 # 2500 <_sk_difference_sse41_8bit+0x44d>
+ .byte 102,15,111,13,232,10,0,0 // movdqa 0xae8(%rip),%xmm1 # 2ce0 <_sk_overlay_sse41_8bit+0x6e0>
.byte 102,68,15,248,198 // psubb %xmm6,%xmm8
.byte 102,15,219,241 // pand %xmm1,%xmm6
.byte 102,15,219,204 // pand %xmm4,%xmm1
@@ -62370,13 +62775,420 @@ _sk_difference_sse41_8bit:
.byte 102,65,15,111,200 // movdqa %xmm8,%xmm1
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_hardlight_sse41_8bit
+.globl _sk_hardlight_sse41_8bit
+FUNCTION(_sk_hardlight_sse41_8bit)
+_sk_hardlight_sse41_8bit:
+ .byte 72,129,236,152,0,0,0 // sub $0x98,%rsp
+ .byte 102,68,15,111,251 // movdqa %xmm3,%xmm15
+ .byte 102,68,15,111,233 // movdqa %xmm1,%xmm13
+ .byte 102,68,15,111,240 // movdqa %xmm0,%xmm14
+ .byte 102,15,239,192 // pxor %xmm0,%xmm0
+ .byte 102,69,15,111,198 // movdqa %xmm14,%xmm8
+ .byte 102,68,15,104,192 // punpckhbw %xmm0,%xmm8
+ .byte 102,15,104,200 // punpckhbw %xmm0,%xmm1
+ .byte 102,15,111,225 // movdqa %xmm1,%xmm4
+ .byte 102,15,127,100,36,144 // movdqa %xmm4,-0x70(%rsp)
+ .byte 102,65,15,56,48,206 // pmovzxbw %xmm14,%xmm1
+ .byte 102,15,127,76,36,128 // movdqa %xmm1,-0x80(%rsp)
+ .byte 102,69,15,56,48,221 // pmovzxbw %xmm13,%xmm11
+ .byte 102,68,15,127,92,36,48 // movdqa %xmm11,0x30(%rsp)
+ .byte 102,15,111,13,130,10,0,0 // movdqa 0xa82(%rip),%xmm1 # 2cf0 <_sk_overlay_sse41_8bit+0x6f0>
+ .byte 102,68,15,56,0,241 // pshufb %xmm1,%xmm14
+ .byte 102,68,15,56,0,233 // pshufb %xmm1,%xmm13
+ .byte 102,69,15,56,48,205 // pmovzxbw %xmm13,%xmm9
+ .byte 102,68,15,104,232 // punpckhbw %xmm0,%xmm13
+ .byte 102,69,15,56,48,214 // pmovzxbw %xmm14,%xmm10
+ .byte 102,68,15,104,240 // punpckhbw %xmm0,%xmm14
+ .byte 102,15,127,84,36,192 // movdqa %xmm2,-0x40(%rsp)
+ .byte 102,15,111,242 // movdqa %xmm2,%xmm6
+ .byte 102,15,111,234 // movdqa %xmm2,%xmm5
+ .byte 102,15,104,232 // punpckhbw %xmm0,%xmm5
+ .byte 102,68,15,127,124,36,208 // movdqa %xmm15,-0x30(%rsp)
+ .byte 102,65,15,111,215 // movdqa %xmm15,%xmm2
+ .byte 102,68,15,104,248 // punpckhbw %xmm0,%xmm15
+ .byte 102,15,56,0,241 // pshufb %xmm1,%xmm6
+ .byte 102,15,56,0,209 // pshufb %xmm1,%xmm2
+ .byte 102,68,15,56,48,226 // pmovzxbw %xmm2,%xmm12
+ .byte 102,68,15,127,100,36,160 // movdqa %xmm12,-0x60(%rsp)
+ .byte 102,15,104,208 // punpckhbw %xmm0,%xmm2
+ .byte 102,15,127,84,36,176 // movdqa %xmm2,-0x50(%rsp)
+ .byte 102,15,56,48,222 // pmovzxbw %xmm6,%xmm3
+ .byte 102,15,104,240 // punpckhbw %xmm0,%xmm6
+ .byte 102,15,111,254 // movdqa %xmm6,%xmm7
+ .byte 102,15,111,196 // movdqa %xmm4,%xmm0
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,15,111,200 // movdqa %xmm0,%xmm1
+ .byte 102,65,15,56,58,205 // pminuw %xmm13,%xmm1
+ .byte 102,15,117,200 // pcmpeqw %xmm0,%xmm1
+ .byte 102,15,127,76,36,112 // movdqa %xmm1,0x70(%rsp)
+ .byte 102,65,15,111,192 // movdqa %xmm8,%xmm0
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,15,111,200 // movdqa %xmm0,%xmm1
+ .byte 102,65,15,56,58,206 // pminuw %xmm14,%xmm1
+ .byte 102,15,117,200 // pcmpeqw %xmm0,%xmm1
+ .byte 102,15,127,76,36,80 // movdqa %xmm1,0x50(%rsp)
+ .byte 102,65,15,111,195 // movdqa %xmm11,%xmm0
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,15,111,208 // movdqa %xmm0,%xmm2
+ .byte 102,65,15,111,225 // movdqa %xmm9,%xmm4
+ .byte 102,15,127,100,36,224 // movdqa %xmm4,-0x20(%rsp)
+ .byte 102,15,56,58,212 // pminuw %xmm4,%xmm2
+ .byte 102,15,117,208 // pcmpeqw %xmm0,%xmm2
+ .byte 102,15,127,148,36,128,0,0,0 // movdqa %xmm2,0x80(%rsp)
+ .byte 102,15,111,68,36,128 // movdqa -0x80(%rsp),%xmm0
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,15,111,240 // movdqa %xmm0,%xmm6
+ .byte 102,65,15,56,58,242 // pminuw %xmm10,%xmm6
+ .byte 102,15,117,240 // pcmpeqw %xmm0,%xmm6
+ .byte 102,15,127,116,36,64 // movdqa %xmm6,0x40(%rsp)
+ .byte 102,15,111,53,150,9,0,0 // movdqa 0x996(%rip),%xmm6 # 2d00 <_sk_overlay_sse41_8bit+0x700>
+ .byte 102,15,111,199 // movdqa %xmm7,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,65,15,213,192 // pmullw %xmm8,%xmm0
+ .byte 102,15,127,68,36,96 // movdqa %xmm0,0x60(%rsp)
+ .byte 102,69,15,111,222 // movdqa %xmm14,%xmm11
+ .byte 102,68,15,239,222 // pxor %xmm6,%xmm11
+ .byte 102,68,15,213,221 // pmullw %xmm5,%xmm11
+ .byte 102,15,111,199 // movdqa %xmm7,%xmm0
+ .byte 102,15,249,253 // psubw %xmm5,%xmm7
+ .byte 102,15,127,60,36 // movdqa %xmm7,(%rsp)
+ .byte 102,68,15,111,205 // movdqa %xmm5,%xmm9
+ .byte 102,69,15,213,200 // pmullw %xmm8,%xmm9
+ .byte 102,65,15,213,198 // pmullw %xmm14,%xmm0
+ .byte 102,15,127,68,36,16 // movdqa %xmm0,0x10(%rsp)
+ .byte 102,69,15,249,240 // psubw %xmm8,%xmm14
+ .byte 102,15,111,195 // movdqa %xmm3,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,68,15,111,192 // movdqa %xmm0,%xmm8
+ .byte 102,65,15,111,212 // movdqa %xmm12,%xmm2
+ .byte 102,15,239,214 // pxor %xmm6,%xmm2
+ .byte 102,15,111,76,36,176 // movdqa -0x50(%rsp),%xmm1
+ .byte 102,15,111,193 // movdqa %xmm1,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,68,15,111,100,36,144 // movdqa -0x70(%rsp),%xmm12
+ .byte 102,65,15,213,196 // pmullw %xmm12,%xmm0
+ .byte 102,15,127,68,36,32 // movdqa %xmm0,0x20(%rsp)
+ .byte 102,65,15,111,194 // movdqa %xmm10,%xmm0
+ .byte 102,15,111,248 // movdqa %xmm0,%xmm7
+ .byte 102,15,239,254 // pxor %xmm6,%xmm7
+ .byte 102,15,111,236 // movdqa %xmm4,%xmm5
+ .byte 102,15,239,238 // pxor %xmm6,%xmm5
+ .byte 102,65,15,239,245 // pxor %xmm13,%xmm6
+ .byte 102,65,15,111,231 // movdqa %xmm15,%xmm4
+ .byte 102,15,213,244 // pmullw %xmm4,%xmm6
+ .byte 102,68,15,111,249 // movdqa %xmm1,%xmm15
+ .byte 102,15,249,204 // psubw %xmm4,%xmm1
+ .byte 102,15,127,76,36,176 // movdqa %xmm1,-0x50(%rsp)
+ .byte 102,65,15,213,228 // pmullw %xmm12,%xmm4
+ .byte 102,69,15,213,253 // pmullw %xmm13,%xmm15
+ .byte 102,69,15,249,236 // psubw %xmm12,%xmm13
+ .byte 102,68,15,111,84,36,128 // movdqa -0x80(%rsp),%xmm10
+ .byte 102,69,15,213,194 // pmullw %xmm10,%xmm8
+ .byte 102,68,15,127,68,36,144 // movdqa %xmm8,-0x70(%rsp)
+ .byte 102,68,15,56,48,68,36,192 // pmovzxbw -0x40(%rsp),%xmm8
+ .byte 102,65,15,213,248 // pmullw %xmm8,%xmm7
+ .byte 102,68,15,111,227 // movdqa %xmm3,%xmm12
+ .byte 102,65,15,249,216 // psubw %xmm8,%xmm3
+ .byte 102,15,127,92,36,240 // movdqa %xmm3,-0x10(%rsp)
+ .byte 102,69,15,213,194 // pmullw %xmm10,%xmm8
+ .byte 102,68,15,213,224 // pmullw %xmm0,%xmm12
+ .byte 102,65,15,249,194 // psubw %xmm10,%xmm0
+ .byte 102,15,111,200 // movdqa %xmm0,%xmm1
+ .byte 102,15,111,92,36,48 // movdqa 0x30(%rsp),%xmm3
+ .byte 102,15,213,211 // pmullw %xmm3,%xmm2
+ .byte 102,15,127,84,36,128 // movdqa %xmm2,-0x80(%rsp)
+ .byte 102,15,56,48,84,36,208 // pmovzxbw -0x30(%rsp),%xmm2
+ .byte 102,15,213,234 // pmullw %xmm2,%xmm5
+ .byte 102,15,111,68,36,160 // movdqa -0x60(%rsp),%xmm0
+ .byte 102,68,15,111,208 // movdqa %xmm0,%xmm10
+ .byte 102,15,249,194 // psubw %xmm2,%xmm0
+ .byte 102,15,127,68,36,160 // movdqa %xmm0,-0x60(%rsp)
+ .byte 102,15,213,211 // pmullw %xmm3,%xmm2
+ .byte 102,15,111,68,36,224 // movdqa -0x20(%rsp),%xmm0
+ .byte 102,68,15,213,208 // pmullw %xmm0,%xmm10
+ .byte 102,15,249,195 // psubw %xmm3,%xmm0
+ .byte 102,68,15,213,108,36,176 // pmullw -0x50(%rsp),%xmm13
+ .byte 102,68,15,213,52,36 // pmullw (%rsp),%xmm14
+ .byte 102,15,213,68,36,160 // pmullw -0x60(%rsp),%xmm0
+ .byte 102,15,213,76,36,240 // pmullw -0x10(%rsp),%xmm1
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,68,15,249,208 // psubw %xmm0,%xmm10
+ .byte 102,15,253,201 // paddw %xmm1,%xmm1
+ .byte 102,68,15,249,225 // psubw %xmm1,%xmm12
+ .byte 102,69,15,253,237 // paddw %xmm13,%xmm13
+ .byte 102,69,15,249,253 // psubw %xmm13,%xmm15
+ .byte 102,69,15,253,246 // paddw %xmm14,%xmm14
+ .byte 102,15,111,76,36,16 // movdqa 0x10(%rsp),%xmm1
+ .byte 102,65,15,249,206 // psubw %xmm14,%xmm1
+ .byte 102,69,15,253,201 // paddw %xmm9,%xmm9
+ .byte 102,15,111,68,36,80 // movdqa 0x50(%rsp),%xmm0
+ .byte 102,68,15,219,200 // pand %xmm0,%xmm9
+ .byte 102,15,223,193 // pandn %xmm1,%xmm0
+ .byte 102,65,15,235,193 // por %xmm9,%xmm0
+ .byte 102,15,111,216 // movdqa %xmm0,%xmm3
+ .byte 102,15,253,228 // paddw %xmm4,%xmm4
+ .byte 102,15,111,68,36,112 // movdqa 0x70(%rsp),%xmm0
+ .byte 102,15,219,224 // pand %xmm0,%xmm4
+ .byte 102,65,15,223,199 // pandn %xmm15,%xmm0
+ .byte 102,15,235,196 // por %xmm4,%xmm0
+ .byte 102,15,111,200 // movdqa %xmm0,%xmm1
+ .byte 102,69,15,253,192 // paddw %xmm8,%xmm8
+ .byte 102,15,111,68,36,64 // movdqa 0x40(%rsp),%xmm0
+ .byte 102,68,15,219,192 // pand %xmm0,%xmm8
+ .byte 102,65,15,223,196 // pandn %xmm12,%xmm0
+ .byte 102,65,15,235,192 // por %xmm8,%xmm0
+ .byte 102,15,111,224 // movdqa %xmm0,%xmm4
+ .byte 102,15,253,210 // paddw %xmm2,%xmm2
+ .byte 102,15,111,132,36,128,0,0,0 // movdqa 0x80(%rsp),%xmm0
+ .byte 102,15,219,208 // pand %xmm0,%xmm2
+ .byte 102,65,15,223,194 // pandn %xmm10,%xmm0
+ .byte 102,15,235,194 // por %xmm2,%xmm0
+ .byte 102,15,111,208 // movdqa %xmm0,%xmm2
+ .byte 102,68,15,253,92,36,96 // paddw 0x60(%rsp),%xmm11
+ .byte 102,15,253,116,36,32 // paddw 0x20(%rsp),%xmm6
+ .byte 102,15,253,124,36,144 // paddw -0x70(%rsp),%xmm7
+ .byte 102,15,253,108,36,128 // paddw -0x80(%rsp),%xmm5
+ .byte 102,15,111,5,134,7,0,0 // movdqa 0x786(%rip),%xmm0 # 2d10 <_sk_overlay_sse41_8bit+0x710>
+ .byte 102,68,15,253,216 // paddw %xmm0,%xmm11
+ .byte 102,68,15,253,219 // paddw %xmm3,%xmm11
+ .byte 102,15,253,240 // paddw %xmm0,%xmm6
+ .byte 102,15,253,241 // paddw %xmm1,%xmm6
+ .byte 102,15,253,248 // paddw %xmm0,%xmm7
+ .byte 102,15,253,252 // paddw %xmm4,%xmm7
+ .byte 102,15,253,232 // paddw %xmm0,%xmm5
+ .byte 102,15,253,234 // paddw %xmm2,%xmm5
+ .byte 102,15,111,5,108,7,0,0 // movdqa 0x76c(%rip),%xmm0 # 2d20 <_sk_overlay_sse41_8bit+0x720>
+ .byte 102,15,228,240 // pmulhuw %xmm0,%xmm6
+ .byte 102,68,15,228,216 // pmulhuw %xmm0,%xmm11
+ .byte 102,15,228,232 // pmulhuw %xmm0,%xmm5
+ .byte 102,15,228,248 // pmulhuw %xmm0,%xmm7
+ .byte 102,65,15,113,211,7 // psrlw $0x7,%xmm11
+ .byte 102,15,113,215,7 // psrlw $0x7,%xmm7
+ .byte 102,65,15,103,251 // packuswb %xmm11,%xmm7
+ .byte 102,15,113,214,7 // psrlw $0x7,%xmm6
+ .byte 102,15,113,213,7 // psrlw $0x7,%xmm5
+ .byte 102,15,103,238 // packuswb %xmm6,%xmm5
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 15,40,84,36,192 // movaps -0x40(%rsp),%xmm2
+ .byte 15,40,92,36,208 // movaps -0x30(%rsp),%xmm3
+ .byte 102,15,111,199 // movdqa %xmm7,%xmm0
+ .byte 102,15,111,205 // movdqa %xmm5,%xmm1
+ .byte 72,129,196,152,0,0,0 // add $0x98,%rsp
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_overlay_sse41_8bit
+.globl _sk_overlay_sse41_8bit
+FUNCTION(_sk_overlay_sse41_8bit)
+_sk_overlay_sse41_8bit:
+ .byte 72,129,236,200,0,0,0 // sub $0xc8,%rsp
+ .byte 102,68,15,111,241 // movdqa %xmm1,%xmm14
+ .byte 102,68,15,111,248 // movdqa %xmm0,%xmm15
+ .byte 102,15,239,192 // pxor %xmm0,%xmm0
+ .byte 102,65,15,111,231 // movdqa %xmm15,%xmm4
+ .byte 102,15,104,224 // punpckhbw %xmm0,%xmm4
+ .byte 102,69,15,111,230 // movdqa %xmm14,%xmm12
+ .byte 102,68,15,104,224 // punpckhbw %xmm0,%xmm12
+ .byte 102,65,15,56,48,207 // pmovzxbw %xmm15,%xmm1
+ .byte 102,15,127,76,36,128 // movdqa %xmm1,-0x80(%rsp)
+ .byte 102,65,15,56,48,206 // pmovzxbw %xmm14,%xmm1
+ .byte 102,15,127,140,36,144,0,0,0 // movdqa %xmm1,0x90(%rsp)
+ .byte 102,15,111,13,229,6,0,0 // movdqa 0x6e5(%rip),%xmm1 # 2d30 <_sk_overlay_sse41_8bit+0x730>
+ .byte 102,68,15,56,0,249 // pshufb %xmm1,%xmm15
+ .byte 102,68,15,56,0,241 // pshufb %xmm1,%xmm14
+ .byte 102,65,15,56,48,238 // pmovzxbw %xmm14,%xmm5
+ .byte 102,15,127,108,36,160 // movdqa %xmm5,-0x60(%rsp)
+ .byte 102,68,15,104,240 // punpckhbw %xmm0,%xmm14
+ .byte 102,65,15,56,48,239 // pmovzxbw %xmm15,%xmm5
+ .byte 102,15,127,172,36,128,0,0,0 // movdqa %xmm5,0x80(%rsp)
+ .byte 102,68,15,104,248 // punpckhbw %xmm0,%xmm15
+ .byte 102,15,111,242 // movdqa %xmm2,%xmm6
+ .byte 102,15,111,234 // movdqa %xmm2,%xmm5
+ .byte 102,68,15,111,218 // movdqa %xmm2,%xmm11
+ .byte 102,68,15,127,156,36,160,0,0,0 // movdqa %xmm11,0xa0(%rsp)
+ .byte 102,15,104,232 // punpckhbw %xmm0,%xmm5
+ .byte 102,15,111,251 // movdqa %xmm3,%xmm7
+ .byte 102,15,111,211 // movdqa %xmm3,%xmm2
+ .byte 102,15,127,156,36,176,0,0,0 // movdqa %xmm3,0xb0(%rsp)
+ .byte 102,15,104,208 // punpckhbw %xmm0,%xmm2
+ .byte 102,15,56,0,241 // pshufb %xmm1,%xmm6
+ .byte 102,15,56,0,249 // pshufb %xmm1,%xmm7
+ .byte 102,15,56,48,207 // pmovzxbw %xmm7,%xmm1
+ .byte 102,15,104,248 // punpckhbw %xmm0,%xmm7
+ .byte 102,68,15,56,48,238 // pmovzxbw %xmm6,%xmm13
+ .byte 102,15,104,240 // punpckhbw %xmm0,%xmm6
+ .byte 102,68,15,111,206 // movdqa %xmm6,%xmm9
+ .byte 102,15,111,194 // movdqa %xmm2,%xmm0
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,15,111,240 // movdqa %xmm0,%xmm6
+ .byte 102,15,56,58,247 // pminuw %xmm7,%xmm6
+ .byte 102,15,117,240 // pcmpeqw %xmm0,%xmm6
+ .byte 102,15,127,116,36,80 // movdqa %xmm6,0x50(%rsp)
+ .byte 102,15,111,197 // movdqa %xmm5,%xmm0
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,15,111,240 // movdqa %xmm0,%xmm6
+ .byte 102,65,15,56,58,241 // pminuw %xmm9,%xmm6
+ .byte 102,15,117,240 // pcmpeqw %xmm0,%xmm6
+ .byte 102,15,127,116,36,64 // movdqa %xmm6,0x40(%rsp)
+ .byte 102,15,56,48,195 // pmovzxbw %xmm3,%xmm0
+ .byte 102,15,127,68,36,16 // movdqa %xmm0,0x10(%rsp)
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,15,111,216 // movdqa %xmm0,%xmm3
+ .byte 102,15,56,58,217 // pminuw %xmm1,%xmm3
+ .byte 102,15,127,76,36,144 // movdqa %xmm1,-0x70(%rsp)
+ .byte 102,15,117,216 // pcmpeqw %xmm0,%xmm3
+ .byte 102,15,127,92,36,96 // movdqa %xmm3,0x60(%rsp)
+ .byte 102,65,15,56,48,195 // pmovzxbw %xmm11,%xmm0
+ .byte 102,15,127,68,36,224 // movdqa %xmm0,-0x20(%rsp)
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,15,111,216 // movdqa %xmm0,%xmm3
+ .byte 102,65,15,56,58,221 // pminuw %xmm13,%xmm3
+ .byte 102,15,117,216 // pcmpeqw %xmm0,%xmm3
+ .byte 102,15,127,92,36,48 // movdqa %xmm3,0x30(%rsp)
+ .byte 102,15,111,53,231,5,0,0 // movdqa 0x5e7(%rip),%xmm6 # 2d40 <_sk_overlay_sse41_8bit+0x740>
+ .byte 102,65,15,111,193 // movdqa %xmm9,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,15,213,196 // pmullw %xmm4,%xmm0
+ .byte 102,15,127,68,36,112 // movdqa %xmm0,0x70(%rsp)
+ .byte 102,69,15,111,223 // movdqa %xmm15,%xmm11
+ .byte 102,68,15,239,222 // pxor %xmm6,%xmm11
+ .byte 102,68,15,213,221 // pmullw %xmm5,%xmm11
+ .byte 102,65,15,111,193 // movdqa %xmm9,%xmm0
+ .byte 102,68,15,249,205 // psubw %xmm5,%xmm9
+ .byte 102,68,15,127,76,36,208 // movdqa %xmm9,-0x30(%rsp)
+ .byte 102,68,15,111,205 // movdqa %xmm5,%xmm9
+ .byte 102,68,15,213,204 // pmullw %xmm4,%xmm9
+ .byte 102,65,15,213,199 // pmullw %xmm15,%xmm0
+ .byte 102,15,127,68,36,240 // movdqa %xmm0,-0x10(%rsp)
+ .byte 102,68,15,249,252 // psubw %xmm4,%xmm15
+ .byte 102,65,15,111,197 // movdqa %xmm13,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,15,111,216 // movdqa %xmm0,%xmm3
+ .byte 102,15,239,206 // pxor %xmm6,%xmm1
+ .byte 102,68,15,111,209 // movdqa %xmm1,%xmm10
+ .byte 102,15,111,207 // movdqa %xmm7,%xmm1
+ .byte 102,15,111,193 // movdqa %xmm1,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,65,15,111,252 // movdqa %xmm12,%xmm7
+ .byte 102,15,213,199 // pmullw %xmm7,%xmm0
+ .byte 102,15,127,68,36,32 // movdqa %xmm0,0x20(%rsp)
+ .byte 102,15,111,132,36,128,0,0,0 // movdqa 0x80(%rsp),%xmm0
+ .byte 102,68,15,111,192 // movdqa %xmm0,%xmm8
+ .byte 102,68,15,239,198 // pxor %xmm6,%xmm8
+ .byte 102,15,111,108,36,160 // movdqa -0x60(%rsp),%xmm5
+ .byte 102,15,239,238 // pxor %xmm6,%xmm5
+ .byte 102,65,15,239,246 // pxor %xmm14,%xmm6
+ .byte 102,15,213,242 // pmullw %xmm2,%xmm6
+ .byte 102,68,15,111,225 // movdqa %xmm1,%xmm12
+ .byte 102,15,249,202 // psubw %xmm2,%xmm1
+ .byte 102,15,127,76,36,192 // movdqa %xmm1,-0x40(%rsp)
+ .byte 102,15,111,226 // movdqa %xmm2,%xmm4
+ .byte 102,15,213,231 // pmullw %xmm7,%xmm4
+ .byte 102,69,15,213,230 // pmullw %xmm14,%xmm12
+ .byte 102,68,15,249,247 // psubw %xmm7,%xmm14
+ .byte 102,15,111,76,36,128 // movdqa -0x80(%rsp),%xmm1
+ .byte 102,15,213,217 // pmullw %xmm1,%xmm3
+ .byte 102,15,127,28,36 // movdqa %xmm3,(%rsp)
+ .byte 102,15,111,84,36,224 // movdqa -0x20(%rsp),%xmm2
+ .byte 102,68,15,213,194 // pmullw %xmm2,%xmm8
+ .byte 102,65,15,111,221 // movdqa %xmm13,%xmm3
+ .byte 102,15,249,218 // psubw %xmm2,%xmm3
+ .byte 102,15,127,92,36,176 // movdqa %xmm3,-0x50(%rsp)
+ .byte 102,15,111,218 // movdqa %xmm2,%xmm3
+ .byte 102,15,213,217 // pmullw %xmm1,%xmm3
+ .byte 102,68,15,213,232 // pmullw %xmm0,%xmm13
+ .byte 102,15,249,193 // psubw %xmm1,%xmm0
+ .byte 102,15,111,200 // movdqa %xmm0,%xmm1
+ .byte 102,15,111,148,36,144,0,0,0 // movdqa 0x90(%rsp),%xmm2
+ .byte 102,68,15,213,210 // pmullw %xmm2,%xmm10
+ .byte 102,68,15,127,84,36,128 // movdqa %xmm10,-0x80(%rsp)
+ .byte 102,15,111,68,36,16 // movdqa 0x10(%rsp),%xmm0
+ .byte 102,15,213,232 // pmullw %xmm0,%xmm5
+ .byte 102,15,111,124,36,144 // movdqa -0x70(%rsp),%xmm7
+ .byte 102,68,15,111,215 // movdqa %xmm7,%xmm10
+ .byte 102,15,249,248 // psubw %xmm0,%xmm7
+ .byte 102,15,127,124,36,144 // movdqa %xmm7,-0x70(%rsp)
+ .byte 102,15,111,248 // movdqa %xmm0,%xmm7
+ .byte 102,15,213,250 // pmullw %xmm2,%xmm7
+ .byte 102,15,111,68,36,160 // movdqa -0x60(%rsp),%xmm0
+ .byte 102,68,15,213,208 // pmullw %xmm0,%xmm10
+ .byte 102,15,249,194 // psubw %xmm2,%xmm0
+ .byte 102,68,15,213,116,36,192 // pmullw -0x40(%rsp),%xmm14
+ .byte 102,68,15,213,124,36,208 // pmullw -0x30(%rsp),%xmm15
+ .byte 102,15,213,68,36,144 // pmullw -0x70(%rsp),%xmm0
+ .byte 102,15,213,76,36,176 // pmullw -0x50(%rsp),%xmm1
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,68,15,249,208 // psubw %xmm0,%xmm10
+ .byte 102,15,253,201 // paddw %xmm1,%xmm1
+ .byte 102,68,15,249,233 // psubw %xmm1,%xmm13
+ .byte 102,69,15,253,246 // paddw %xmm14,%xmm14
+ .byte 102,69,15,249,230 // psubw %xmm14,%xmm12
+ .byte 102,69,15,253,255 // paddw %xmm15,%xmm15
+ .byte 102,15,111,76,36,240 // movdqa -0x10(%rsp),%xmm1
+ .byte 102,65,15,249,207 // psubw %xmm15,%xmm1
+ .byte 102,69,15,253,201 // paddw %xmm9,%xmm9
+ .byte 102,15,111,68,36,64 // movdqa 0x40(%rsp),%xmm0
+ .byte 102,68,15,219,200 // pand %xmm0,%xmm9
+ .byte 102,15,223,193 // pandn %xmm1,%xmm0
+ .byte 102,65,15,235,193 // por %xmm9,%xmm0
+ .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9
+ .byte 102,15,253,228 // paddw %xmm4,%xmm4
+ .byte 102,15,111,68,36,80 // movdqa 0x50(%rsp),%xmm0
+ .byte 102,15,219,224 // pand %xmm0,%xmm4
+ .byte 102,65,15,223,196 // pandn %xmm12,%xmm0
+ .byte 102,15,235,196 // por %xmm4,%xmm0
+ .byte 102,15,111,200 // movdqa %xmm0,%xmm1
+ .byte 102,15,253,219 // paddw %xmm3,%xmm3
+ .byte 102,15,111,68,36,48 // movdqa 0x30(%rsp),%xmm0
+ .byte 102,15,219,216 // pand %xmm0,%xmm3
+ .byte 102,65,15,223,197 // pandn %xmm13,%xmm0
+ .byte 102,15,235,195 // por %xmm3,%xmm0
+ .byte 102,15,111,216 // movdqa %xmm0,%xmm3
+ .byte 102,15,253,255 // paddw %xmm7,%xmm7
+ .byte 102,15,111,68,36,96 // movdqa 0x60(%rsp),%xmm0
+ .byte 102,15,219,248 // pand %xmm0,%xmm7
+ .byte 102,65,15,223,194 // pandn %xmm10,%xmm0
+ .byte 102,15,235,199 // por %xmm7,%xmm0
+ .byte 102,15,111,208 // movdqa %xmm0,%xmm2
+ .byte 102,68,15,253,92,36,112 // paddw 0x70(%rsp),%xmm11
+ .byte 102,15,253,116,36,32 // paddw 0x20(%rsp),%xmm6
+ .byte 102,68,15,253,4,36 // paddw (%rsp),%xmm8
+ .byte 102,15,253,108,36,128 // paddw -0x80(%rsp),%xmm5
+ .byte 102,15,111,5,211,3,0,0 // movdqa 0x3d3(%rip),%xmm0 # 2d50 <_sk_overlay_sse41_8bit+0x750>
+ .byte 102,68,15,253,216 // paddw %xmm0,%xmm11
+ .byte 102,69,15,253,217 // paddw %xmm9,%xmm11
+ .byte 102,15,253,240 // paddw %xmm0,%xmm6
+ .byte 102,15,253,241 // paddw %xmm1,%xmm6
+ .byte 102,68,15,253,192 // paddw %xmm0,%xmm8
+ .byte 102,68,15,253,195 // paddw %xmm3,%xmm8
+ .byte 102,15,253,232 // paddw %xmm0,%xmm5
+ .byte 102,15,253,234 // paddw %xmm2,%xmm5
+ .byte 102,15,111,5,183,3,0,0 // movdqa 0x3b7(%rip),%xmm0 # 2d60 <_sk_overlay_sse41_8bit+0x760>
+ .byte 102,15,228,240 // pmulhuw %xmm0,%xmm6
+ .byte 102,68,15,228,216 // pmulhuw %xmm0,%xmm11
+ .byte 102,15,228,232 // pmulhuw %xmm0,%xmm5
+ .byte 102,68,15,228,192 // pmulhuw %xmm0,%xmm8
+ .byte 102,65,15,113,211,7 // psrlw $0x7,%xmm11
+ .byte 102,65,15,113,208,7 // psrlw $0x7,%xmm8
+ .byte 102,69,15,103,195 // packuswb %xmm11,%xmm8
+ .byte 102,15,113,214,7 // psrlw $0x7,%xmm6
+ .byte 102,15,113,213,7 // psrlw $0x7,%xmm5
+ .byte 102,15,103,238 // packuswb %xmm6,%xmm5
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 15,40,148,36,160,0,0,0 // movaps 0xa0(%rsp),%xmm2
+ .byte 15,40,156,36,176,0,0,0 // movaps 0xb0(%rsp),%xmm3
+ .byte 102,65,15,111,192 // movdqa %xmm8,%xmm0
+ .byte 102,15,111,205 // movdqa %xmm5,%xmm1
+ .byte 72,129,196,200,0,0,0 // add $0xc8,%rsp
+ .byte 255,224 // jmpq *%rax
+
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2263 <_sk_difference_sse41_8bit+0x1b0>
+ .byte 127,67 // jg 2a47 <_sk_overlay_sse41_8bit+0x447>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2267 <_sk_difference_sse41_8bit+0x1b4>
+ .byte 127,67 // jg 2a4b <_sk_overlay_sse41_8bit+0x44b>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 226b <_sk_difference_sse41_8bit+0x1b8>
+ .byte 127,67 // jg 2a4f <_sk_overlay_sse41_8bit+0x44f>
BALIGN16
.byte 0,0 // add %al,(%rax)
@@ -62807,6 +63619,67 @@ BALIGN16
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,0 // incl (%rax)
+ .byte 3,3 // add (%rbx),%eax
+ .byte 3,3 // add (%rbx),%eax
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,255 // (bad)
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,127,0 // add %bh,0x0(%rdi)
+ .byte 127,0 // jg 2d14 <.literal16+0x304>
+ .byte 127,0 // jg 2d16 <.literal16+0x306>
+ .byte 127,0 // jg 2d18 <.literal16+0x308>
+ .byte 127,0 // jg 2d1a <.literal16+0x30a>
+ .byte 127,0 // jg 2d1c <.literal16+0x30c>
+ .byte 127,0 // jg 2d1e <.literal16+0x30e>
+ .byte 127,0 // jg 2d20 <.literal16+0x310>
+ .byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax)
+ .byte 129,128,129,128,129,128,3,3,3,3 // addl $0x3030303,-0x7f7e7f7f(%rax)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,255 // (bad)
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,255 // add %bh,%bh
+ .byte 0,127,0 // add %bh,0x0(%rdi)
+ .byte 127,0 // jg 2d54 <.literal16+0x344>
+ .byte 127,0 // jg 2d56 <.literal16+0x346>
+ .byte 127,0 // jg 2d58 <.literal16+0x348>
+ .byte 127,0 // jg 2d5a <.literal16+0x34a>
+ .byte 127,0 // jg 2d5c <.literal16+0x34c>
+ .byte 127,0 // jg 2d5e <.literal16+0x34e>
+ .byte 127,0 // jg 2d60 <.literal16+0x350>
+ .byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax)
+ .byte 129 // .byte 0x81
+ .byte 128 // .byte 0x80
+ .byte 129 // .byte 0x81
+ .byte 128 // .byte 0x80
+ .byte 129 // .byte 0x81
+ .byte 128 // .byte 0x80
BALIGN32
HIDDEN _sk_start_pipeline_sse2_8bit
@@ -62891,7 +63764,7 @@ HIDDEN _sk_set_rgb_sse2_8bit
FUNCTION(_sk_set_rgb_sse2_8bit)
_sk_set_rgb_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,15,16,37,89,40,0,0 // movss 0x2859(%rip),%xmm4 # 291c <_sk_difference_sse2_8bit+0x1c5>
+ .byte 243,15,16,37,209,48,0,0 // movss 0x30d1(%rip),%xmm4 # 3194 <_sk_overlay_sse2_8bit+0x43f>
.byte 243,15,16,40 // movss (%rax),%xmm5
.byte 243,15,89,236 // mulss %xmm4,%xmm5
.byte 243,72,15,44,205 // cvttss2si %xmm5,%rcx
@@ -62906,7 +63779,7 @@ _sk_set_rgb_sse2_8bit:
.byte 9,208 // or %edx,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,112,228,0 // pshufd $0x0,%xmm4,%xmm4
- .byte 102,15,111,45,45,40,0,0 // movdqa 0x282d(%rip),%xmm5 # 2930 <_sk_difference_sse2_8bit+0x1d9>
+ .byte 102,15,111,45,157,48,0,0 // movdqa 0x309d(%rip),%xmm5 # 31a0 <_sk_overlay_sse2_8bit+0x44b>
.byte 102,15,219,205 // pand %xmm5,%xmm1
.byte 102,15,219,197 // pand %xmm5,%xmm0
.byte 102,15,235,196 // por %xmm4,%xmm0
@@ -62931,7 +63804,7 @@ _sk_premul_sse2_8bit:
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
.byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
.byte 243,15,112,248,95 // pshufhw $0x5f,%xmm0,%xmm7
- .byte 102,15,111,5,225,39,0,0 // movdqa 0x27e1(%rip),%xmm0 # 2940 <_sk_difference_sse2_8bit+0x1e9>
+ .byte 102,15,111,5,81,48,0,0 // movdqa 0x3051(%rip),%xmm0 # 31b0 <_sk_overlay_sse2_8bit+0x45b>
.byte 102,15,235,248 // por %xmm0,%xmm7
.byte 102,15,235,240 // por %xmm0,%xmm6
.byte 102,69,15,239,201 // pxor %xmm9,%xmm9
@@ -63452,7 +64325,7 @@ _sk_load_a8_sse2_8bit:
.byte 117,48 // jne 7fd <_sk_load_a8_sse2_8bit+0x4d>
.byte 243,66,15,126,4,2 // movq (%rdx,%r8,1),%xmm0
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
- .byte 102,15,84,5,113,33,0,0 // andpd 0x2171(%rip),%xmm0 # 2950 <_sk_difference_sse2_8bit+0x1f9>
+ .byte 102,15,84,5,225,41,0,0 // andpd 0x29e1(%rip),%xmm0 # 31c0 <_sk_overlay_sse2_8bit+0x46b>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,40,200 // movapd %xmm0,%xmm1
.byte 102,15,105,204 // punpckhwd %xmm4,%xmm1
@@ -63529,7 +64402,7 @@ _sk_load_a8_dst_sse2_8bit:
.byte 117,48 // jne 8f1 <_sk_load_a8_dst_sse2_8bit+0x4d>
.byte 243,66,15,126,20,2 // movq (%rdx,%r8,1),%xmm2
.byte 102,15,96,208 // punpcklbw %xmm0,%xmm2
- .byte 102,15,84,21,141,32,0,0 // andpd 0x208d(%rip),%xmm2 # 2960 <_sk_difference_sse2_8bit+0x209>
+ .byte 102,15,84,21,253,40,0,0 // andpd 0x28fd(%rip),%xmm2 # 31d0 <_sk_overlay_sse2_8bit+0x47b>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,40,218 // movapd %xmm2,%xmm3
.byte 102,15,105,220 // punpckhwd %xmm4,%xmm3
@@ -63613,7 +64486,7 @@ _sk_store_a8_sse2_8bit:
.byte 102,15,107,229 // packssdw %xmm5,%xmm4
.byte 77,133,201 // test %r9,%r9
.byte 117,22 // jne 9f5 <_sk_store_a8_sse2_8bit+0x5d>
- .byte 102,15,219,37,137,31,0,0 // pand 0x1f89(%rip),%xmm4 # 2970 <_sk_difference_sse2_8bit+0x219>
+ .byte 102,15,219,37,249,39,0,0 // pand 0x27f9(%rip),%xmm4 # 31e0 <_sk_overlay_sse2_8bit+0x48b>
.byte 102,15,103,228 // packuswb %xmm4,%xmm4
.byte 102,66,15,214,36,2 // movq %xmm4,(%rdx,%r8,1)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -63634,7 +64507,7 @@ _sk_store_a8_sse2_8bit:
.byte 102,15,127,100,36,184 // movdqa %xmm4,-0x48(%rsp)
.byte 138,68,36,188 // mov -0x44(%rsp),%al
.byte 66,136,68,2,2 // mov %al,0x2(%rdx,%r8,1)
- .byte 102,15,219,37,51,31,0,0 // pand 0x1f33(%rip),%xmm4 # 2970 <_sk_difference_sse2_8bit+0x219>
+ .byte 102,15,219,37,163,39,0,0 // pand 0x27a3(%rip),%xmm4 # 31e0 <_sk_overlay_sse2_8bit+0x48b>
.byte 102,15,103,228 // packuswb %xmm4,%xmm4
.byte 102,15,126,224 // movd %xmm4,%eax
.byte 102,66,137,4,2 // mov %ax,(%rdx,%r8,1)
@@ -63648,7 +64521,7 @@ _sk_store_a8_sse2_8bit:
.byte 102,15,127,100,36,200 // movdqa %xmm4,-0x38(%rsp)
.byte 138,68,36,208 // mov -0x30(%rsp),%al
.byte 66,136,68,2,4 // mov %al,0x4(%rdx,%r8,1)
- .byte 102,15,219,37,239,30,0,0 // pand 0x1eef(%rip),%xmm4 # 2970 <_sk_difference_sse2_8bit+0x219>
+ .byte 102,15,219,37,95,39,0,0 // pand 0x275f(%rip),%xmm4 # 31e0 <_sk_overlay_sse2_8bit+0x48b>
.byte 102,15,103,228 // packuswb %xmm4,%xmm4
.byte 102,66,15,126,36,2 // movd %xmm4,(%rdx,%r8,1)
.byte 233,97,255,255,255 // jmpq 9f1 <_sk_store_a8_sse2_8bit+0x59>
@@ -63658,7 +64531,7 @@ _sk_store_a8_sse2_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,218 // jmpq ffffffffdb000aa0 <_sk_difference_sse2_8bit+0xffffffffdaffe349>
+ .byte 233,255,255,255,218 // jmpq ffffffffdb000aa0 <_sk_overlay_sse2_8bit+0xffffffffdaffdd4b>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,203 // dec %ebx
@@ -63685,12 +64558,12 @@ _sk_load_g8_sse2_8bit:
.byte 117,116 // jne b3d <_sk_load_g8_sse2_8bit+0x91>
.byte 243,66,15,126,4,2 // movq (%rdx,%r8,1),%xmm0
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
- .byte 102,15,84,5,165,30,0,0 // andpd 0x1ea5(%rip),%xmm0 # 2980 <_sk_difference_sse2_8bit+0x229>
+ .byte 102,15,84,5,21,39,0,0 // andpd 0x2715(%rip),%xmm0 # 31f0 <_sk_overlay_sse2_8bit+0x49b>
.byte 102,15,239,201 // pxor %xmm1,%xmm1
.byte 102,15,40,224 // movapd %xmm0,%xmm4
.byte 102,15,97,225 // punpcklwd %xmm1,%xmm4
.byte 102,15,105,193 // punpckhwd %xmm1,%xmm0
- .byte 102,15,111,45,157,30,0,0 // movdqa 0x1e9d(%rip),%xmm5 # 2990 <_sk_difference_sse2_8bit+0x239>
+ .byte 102,15,111,45,13,39,0,0 // movdqa 0x270d(%rip),%xmm5 # 3200 <_sk_overlay_sse2_8bit+0x4ab>
.byte 102,15,112,240,245 // pshufd $0xf5,%xmm0,%xmm6
.byte 102,15,244,197 // pmuludq %xmm5,%xmm0
.byte 102,15,112,200,232 // pshufd $0xe8,%xmm0,%xmm1
@@ -63703,7 +64576,7 @@ _sk_load_g8_sse2_8bit:
.byte 102,15,244,245 // pmuludq %xmm5,%xmm6
.byte 102,15,112,230,232 // pshufd $0xe8,%xmm6,%xmm4
.byte 102,15,98,196 // punpckldq %xmm4,%xmm0
- .byte 102,15,111,37,111,30,0,0 // movdqa 0x1e6f(%rip),%xmm4 # 29a0 <_sk_difference_sse2_8bit+0x249>
+ .byte 102,15,111,37,223,38,0,0 // movdqa 0x26df(%rip),%xmm4 # 3210 <_sk_overlay_sse2_8bit+0x4bb>
.byte 102,15,235,196 // por %xmm4,%xmm0
.byte 102,15,235,204 // por %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -63777,12 +64650,12 @@ _sk_load_g8_dst_sse2_8bit:
.byte 117,116 // jne c7d <_sk_load_g8_dst_sse2_8bit+0x91>
.byte 243,66,15,126,20,2 // movq (%rdx,%r8,1),%xmm2
.byte 102,15,96,208 // punpcklbw %xmm0,%xmm2
- .byte 102,15,84,21,149,29,0,0 // andpd 0x1d95(%rip),%xmm2 # 29b0 <_sk_difference_sse2_8bit+0x259>
+ .byte 102,15,84,21,5,38,0,0 // andpd 0x2605(%rip),%xmm2 # 3220 <_sk_overlay_sse2_8bit+0x4cb>
.byte 102,15,239,219 // pxor %xmm3,%xmm3
.byte 102,15,40,226 // movapd %xmm2,%xmm4
.byte 102,15,97,227 // punpcklwd %xmm3,%xmm4
.byte 102,15,105,211 // punpckhwd %xmm3,%xmm2
- .byte 102,15,111,45,141,29,0,0 // movdqa 0x1d8d(%rip),%xmm5 # 29c0 <_sk_difference_sse2_8bit+0x269>
+ .byte 102,15,111,45,253,37,0,0 // movdqa 0x25fd(%rip),%xmm5 # 3230 <_sk_overlay_sse2_8bit+0x4db>
.byte 102,15,112,242,245 // pshufd $0xf5,%xmm2,%xmm6
.byte 102,15,244,213 // pmuludq %xmm5,%xmm2
.byte 102,15,112,218,232 // pshufd $0xe8,%xmm2,%xmm3
@@ -63795,7 +64668,7 @@ _sk_load_g8_dst_sse2_8bit:
.byte 102,15,244,245 // pmuludq %xmm5,%xmm6
.byte 102,15,112,230,232 // pshufd $0xe8,%xmm6,%xmm4
.byte 102,15,98,212 // punpckldq %xmm4,%xmm2
- .byte 102,15,111,37,95,29,0,0 // movdqa 0x1d5f(%rip),%xmm4 # 29d0 <_sk_difference_sse2_8bit+0x279>
+ .byte 102,15,111,37,207,37,0,0 // movdqa 0x25cf(%rip),%xmm4 # 3240 <_sk_overlay_sse2_8bit+0x4eb>
.byte 102,15,235,212 // por %xmm4,%xmm2
.byte 102,15,235,220 // por %xmm4,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -64012,7 +64885,7 @@ _sk_scale_1_float_sse2_8bit:
.byte 102,68,15,111,200 // movdqa %xmm0,%xmm9
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,0 // movss (%rax),%xmm0
- .byte 243,15,89,5,144,25,0,0 // mulss 0x1990(%rip),%xmm0 # 2920 <_sk_difference_sse2_8bit+0x1c9>
+ .byte 243,15,89,5,8,34,0,0 // mulss 0x2208(%rip),%xmm0 # 3198 <_sk_overlay_sse2_8bit+0x443>
.byte 243,15,44,192 // cvttss2si %xmm0,%eax
.byte 102,15,239,246 // pxor %xmm6,%xmm6
.byte 102,65,15,111,193 // movdqa %xmm9,%xmm0
@@ -64024,7 +64897,7 @@ _sk_scale_1_float_sse2_8bit:
.byte 102,15,96,246 // punpcklbw %xmm6,%xmm6
.byte 242,15,112,246,0 // pshuflw $0x0,%xmm6,%xmm6
.byte 102,15,112,246,80 // pshufd $0x50,%xmm6,%xmm6
- .byte 102,15,219,53,23,26,0,0 // pand 0x1a17(%rip),%xmm6 # 29e0 <_sk_difference_sse2_8bit+0x289>
+ .byte 102,15,219,53,135,34,0,0 // pand 0x2287(%rip),%xmm6 # 3250 <_sk_overlay_sse2_8bit+0x4fb>
.byte 102,15,111,254 // movdqa %xmm6,%xmm7
.byte 102,65,15,213,248 // pmullw %xmm8,%xmm7
.byte 102,15,111,230 // movdqa %xmm6,%xmm4
@@ -64060,7 +64933,7 @@ _sk_scale_u8_sse2_8bit:
.byte 15,133,239,0,0,0 // jne 1129 <_sk_scale_u8_sse2_8bit+0x110>
.byte 243,66,15,126,36,2 // movq (%rdx,%r8,1),%xmm4
.byte 102,15,96,224 // punpcklbw %xmm0,%xmm4
- .byte 102,15,84,37,164,25,0,0 // andpd 0x19a4(%rip),%xmm4 # 29f0 <_sk_difference_sse2_8bit+0x299>
+ .byte 102,15,84,37,20,34,0,0 // andpd 0x2214(%rip),%xmm4 # 3260 <_sk_overlay_sse2_8bit+0x50b>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,40,236 // movapd %xmm4,%xmm5
.byte 102,65,15,105,232 // punpckhwd %xmm8,%xmm5
@@ -64169,7 +65042,7 @@ FUNCTION(_sk_lerp_1_float_sse2_8bit)
_sk_lerp_1_float_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,32 // movss (%rax),%xmm4
- .byte 243,15,89,37,58,23,0,0 // mulss 0x173a(%rip),%xmm4 # 2924 <_sk_difference_sse2_8bit+0x1cd>
+ .byte 243,15,89,37,178,31,0,0 // mulss 0x1fb2(%rip),%xmm4 # 319c <_sk_overlay_sse2_8bit+0x447>
.byte 243,15,44,196 // cvttss2si %xmm4,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
@@ -64182,7 +65055,7 @@ _sk_lerp_1_float_sse2_8bit:
.byte 102,68,15,111,217 // movdqa %xmm1,%xmm11
.byte 102,69,15,96,217 // punpcklbw %xmm9,%xmm11
.byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1
- .byte 102,15,111,53,213,23,0,0 // movdqa 0x17d5(%rip),%xmm6 # 2a00 <_sk_difference_sse2_8bit+0x2a9>
+ .byte 102,15,111,53,69,32,0,0 // movdqa 0x2045(%rip),%xmm6 # 3270 <_sk_overlay_sse2_8bit+0x51b>
.byte 102,65,15,219,240 // pand %xmm8,%xmm6
.byte 102,15,111,230 // movdqa %xmm6,%xmm4
.byte 102,15,213,225 // pmullw %xmm1,%xmm4
@@ -64250,7 +65123,7 @@ _sk_lerp_u8_sse2_8bit:
.byte 15,133,141,1,0,0 // jne 14c0 <_sk_lerp_u8_sse2_8bit+0x1ae>
.byte 243,66,15,126,44,2 // movq (%rdx,%r8,1),%xmm5
.byte 102,15,96,232 // punpcklbw %xmm0,%xmm5
- .byte 102,15,84,45,203,22,0,0 // andpd 0x16cb(%rip),%xmm5 # 2a10 <_sk_difference_sse2_8bit+0x2b9>
+ .byte 102,15,84,45,59,31,0,0 // andpd 0x1f3b(%rip),%xmm5 # 3280 <_sk_overlay_sse2_8bit+0x52b>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,40,229 // movapd %xmm5,%xmm4
.byte 102,65,15,105,224 // punpckhwd %xmm8,%xmm4
@@ -64408,7 +65281,7 @@ HIDDEN _sk_black_color_sse2_8bit
FUNCTION(_sk_black_color_sse2_8bit)
_sk_black_color_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 15,40,5,147,20,0,0 // movaps 0x1493(%rip),%xmm0 # 2a20 <_sk_difference_sse2_8bit+0x2c9>
+ .byte 15,40,5,3,29,0,0 // movaps 0x1d03(%rip),%xmm0 # 3290 <_sk_overlay_sse2_8bit+0x53b>
.byte 15,40,200 // movaps %xmm0,%xmm1
.byte 255,224 // jmpq *%rax
@@ -65300,7 +66173,7 @@ _sk_darken_sse2_8bit:
.byte 102,65,15,248,234 // psubb %xmm10,%xmm5
.byte 102,15,248,207 // psubb %xmm7,%xmm1
.byte 102,15,248,196 // psubb %xmm4,%xmm0
- .byte 102,15,111,37,174,5,0,0 // movdqa 0x5ae(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x2d9>
+ .byte 102,15,111,37,30,14,0,0 // movdqa 0xe1e(%rip),%xmm4 # 32a0 <_sk_overlay_sse2_8bit+0x54b>
.byte 102,15,219,236 // pand %xmm4,%xmm5
.byte 102,15,111,252 // movdqa %xmm4,%xmm7
.byte 102,15,223,248 // pandn %xmm0,%xmm7
@@ -65408,7 +66281,7 @@ _sk_lighten_sse2_8bit:
.byte 102,65,15,248,234 // psubb %xmm10,%xmm5
.byte 102,15,248,207 // psubb %xmm7,%xmm1
.byte 102,15,248,196 // psubb %xmm4,%xmm0
- .byte 102,15,111,37,210,3,0,0 // movdqa 0x3d2(%rip),%xmm4 # 2a40 <_sk_difference_sse2_8bit+0x2e9>
+ .byte 102,15,111,37,66,12,0,0 // movdqa 0xc42(%rip),%xmm4 # 32b0 <_sk_overlay_sse2_8bit+0x55b>
.byte 102,15,219,236 // pand %xmm4,%xmm5
.byte 102,15,111,252 // movdqa %xmm4,%xmm7
.byte 102,15,223,248 // pandn %xmm0,%xmm7
@@ -65458,7 +66331,7 @@ _sk_exclusion_sse2_8bit:
.byte 102,15,113,214,8 // psrlw $0x8,%xmm6
.byte 102,15,103,244 // packuswb %xmm4,%xmm6
.byte 102,15,103,239 // packuswb %xmm7,%xmm5
- .byte 102,15,111,37,21,3,0,0 // movdqa 0x315(%rip),%xmm4 # 2a50 <_sk_difference_sse2_8bit+0x2f9>
+ .byte 102,15,111,37,133,11,0,0 // movdqa 0xb85(%rip),%xmm4 # 32c0 <_sk_overlay_sse2_8bit+0x56b>
.byte 102,15,248,205 // psubb %xmm5,%xmm1
.byte 102,15,219,236 // pand %xmm4,%xmm5
.byte 102,15,219,230 // pand %xmm6,%xmm4
@@ -65557,7 +66430,7 @@ _sk_difference_sse2_8bit:
.byte 102,65,15,103,244 // packuswb %xmm12,%xmm6
.byte 102,65,15,218,226 // pminub %xmm10,%xmm4
.byte 102,65,15,218,243 // pminub %xmm11,%xmm6
- .byte 102,15,111,45,98,1,0,0 // movdqa 0x162(%rip),%xmm5 # 2a60 <_sk_difference_sse2_8bit+0x309>
+ .byte 102,15,111,45,210,9,0,0 // movdqa 0x9d2(%rip),%xmm5 # 32d0 <_sk_overlay_sse2_8bit+0x57b>
.byte 102,15,248,206 // psubb %xmm6,%xmm1
.byte 102,15,219,245 // pand %xmm5,%xmm6
.byte 102,15,219,236 // pand %xmm4,%xmm5
@@ -65567,13 +66440,454 @@ _sk_difference_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_hardlight_sse2_8bit
+.globl _sk_hardlight_sse2_8bit
+FUNCTION(_sk_hardlight_sse2_8bit)
+_sk_hardlight_sse2_8bit:
+ .byte 72,129,236,184,0,0,0 // sub $0xb8,%rsp
+ .byte 102,68,15,111,250 // movdqa %xmm2,%xmm15
+ .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9
+ .byte 102,15,239,237 // pxor %xmm5,%xmm5
+ .byte 102,69,15,111,241 // movdqa %xmm9,%xmm14
+ .byte 102,68,15,96,245 // punpcklbw %xmm5,%xmm14
+ .byte 102,15,111,193 // movdqa %xmm1,%xmm0
+ .byte 102,15,96,197 // punpcklbw %xmm5,%xmm0
+ .byte 102,68,15,111,192 // movdqa %xmm0,%xmm8
+ .byte 242,65,15,112,193,231 // pshuflw $0xe7,%xmm9,%xmm0
+ .byte 102,68,15,104,205 // punpckhbw %xmm5,%xmm9
+ .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0
+ .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0
+ .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
+ .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
+ .byte 243,15,112,224,95 // pshufhw $0x5f,%xmm0,%xmm4
+ .byte 242,15,112,193,231 // pshuflw $0xe7,%xmm1,%xmm0
+ .byte 102,15,104,205 // punpckhbw %xmm5,%xmm1
+ .byte 102,15,127,76,36,192 // movdqa %xmm1,-0x40(%rsp)
+ .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0
+ .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0
+ .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
+ .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
+ .byte 243,68,15,112,224,95 // pshufhw $0x5f,%xmm0,%xmm12
+ .byte 102,65,15,111,252 // movdqa %xmm12,%xmm7
+ .byte 102,15,104,253 // punpckhbw %xmm5,%xmm7
+ .byte 102,15,127,124,36,48 // movdqa %xmm7,0x30(%rsp)
+ .byte 102,68,15,96,229 // punpcklbw %xmm5,%xmm12
+ .byte 102,15,111,196 // movdqa %xmm4,%xmm0
+ .byte 102,15,104,197 // punpckhbw %xmm5,%xmm0
+ .byte 102,15,127,68,36,160 // movdqa %xmm0,-0x60(%rsp)
+ .byte 102,15,96,229 // punpcklbw %xmm5,%xmm4
+ .byte 102,68,15,127,188,36,160,0,0,0 // movdqa %xmm15,0xa0(%rsp)
+ .byte 102,65,15,111,207 // movdqa %xmm15,%xmm1
+ .byte 242,65,15,112,199,231 // pshuflw $0xe7,%xmm15,%xmm0
+ .byte 102,68,15,96,253 // punpcklbw %xmm5,%xmm15
+ .byte 102,15,104,205 // punpckhbw %xmm5,%xmm1
+ .byte 102,15,127,76,36,144 // movdqa %xmm1,-0x70(%rsp)
+ .byte 102,15,127,156,36,144,0,0,0 // movdqa %xmm3,0x90(%rsp)
+ .byte 102,15,111,211 // movdqa %xmm3,%xmm2
+ .byte 102,15,111,203 // movdqa %xmm3,%xmm1
+ .byte 102,15,96,205 // punpcklbw %xmm5,%xmm1
+ .byte 102,15,127,76,36,128 // movdqa %xmm1,-0x80(%rsp)
+ .byte 102,15,104,213 // punpckhbw %xmm5,%xmm2
+ .byte 102,15,127,148,36,128,0,0,0 // movdqa %xmm2,0x80(%rsp)
+ .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0
+ .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0
+ .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
+ .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
+ .byte 243,15,112,240,95 // pshufhw $0x5f,%xmm0,%xmm6
+ .byte 242,15,112,195,231 // pshuflw $0xe7,%xmm3,%xmm0
+ .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0
+ .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0
+ .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
+ .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
+ .byte 243,68,15,112,232,95 // pshufhw $0x5f,%xmm0,%xmm13
+ .byte 102,65,15,111,205 // movdqa %xmm13,%xmm1
+ .byte 102,15,104,205 // punpckhbw %xmm5,%xmm1
+ .byte 102,15,127,76,36,176 // movdqa %xmm1,-0x50(%rsp)
+ .byte 102,68,15,96,237 // punpcklbw %xmm5,%xmm13
+ .byte 102,15,111,206 // movdqa %xmm6,%xmm1
+ .byte 102,15,104,205 // punpckhbw %xmm5,%xmm1
+ .byte 102,15,96,245 // punpcklbw %xmm5,%xmm6
+ .byte 102,69,15,111,214 // movdqa %xmm14,%xmm10
+ .byte 102,69,15,253,210 // paddw %xmm10,%xmm10
+ .byte 102,65,15,111,193 // movdqa %xmm9,%xmm0
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,65,15,111,216 // movdqa %xmm8,%xmm3
+ .byte 102,69,15,253,192 // paddw %xmm8,%xmm8
+ .byte 102,15,111,84,36,192 // movdqa -0x40(%rsp),%xmm2
+ .byte 102,15,253,210 // paddw %xmm2,%xmm2
+ .byte 102,15,217,215 // psubusw %xmm7,%xmm2
+ .byte 102,15,117,213 // pcmpeqw %xmm5,%xmm2
+ .byte 102,15,127,84,36,112 // movdqa %xmm2,0x70(%rsp)
+ .byte 102,69,15,217,196 // psubusw %xmm12,%xmm8
+ .byte 102,68,15,117,197 // pcmpeqw %xmm5,%xmm8
+ .byte 102,15,111,84,36,160 // movdqa -0x60(%rsp),%xmm2
+ .byte 102,15,217,194 // psubusw %xmm2,%xmm0
+ .byte 102,15,117,197 // pcmpeqw %xmm5,%xmm0
+ .byte 102,15,127,68,36,64 // movdqa %xmm0,0x40(%rsp)
+ .byte 102,68,15,111,220 // movdqa %xmm4,%xmm11
+ .byte 102,69,15,217,211 // psubusw %xmm11,%xmm10
+ .byte 102,68,15,117,213 // pcmpeqw %xmm5,%xmm10
+ .byte 102,15,111,45,29,8,0,0 // movdqa 0x81d(%rip),%xmm5 # 32e0 <_sk_overlay_sse2_8bit+0x58b>
+ .byte 102,15,111,198 // movdqa %xmm6,%xmm0
+ .byte 102,15,239,197 // pxor %xmm5,%xmm0
+ .byte 102,65,15,213,198 // pmullw %xmm14,%xmm0
+ .byte 102,15,127,68,36,96 // movdqa %xmm0,0x60(%rsp)
+ .byte 102,65,15,111,195 // movdqa %xmm11,%xmm0
+ .byte 102,15,239,197 // pxor %xmm5,%xmm0
+ .byte 102,65,15,213,199 // pmullw %xmm15,%xmm0
+ .byte 102,15,127,68,36,80 // movdqa %xmm0,0x50(%rsp)
+ .byte 102,15,111,198 // movdqa %xmm6,%xmm0
+ .byte 102,65,15,249,247 // psubw %xmm15,%xmm6
+ .byte 102,15,127,52,36 // movdqa %xmm6,(%rsp)
+ .byte 102,65,15,111,255 // movdqa %xmm15,%xmm7
+ .byte 102,65,15,213,254 // pmullw %xmm14,%xmm7
+ .byte 102,65,15,213,195 // pmullw %xmm11,%xmm0
+ .byte 102,15,127,68,36,16 // movdqa %xmm0,0x10(%rsp)
+ .byte 102,69,15,249,222 // psubw %xmm14,%xmm11
+ .byte 102,15,111,193 // movdqa %xmm1,%xmm0
+ .byte 102,15,239,197 // pxor %xmm5,%xmm0
+ .byte 102,65,15,213,193 // pmullw %xmm9,%xmm0
+ .byte 102,15,127,68,36,32 // movdqa %xmm0,0x20(%rsp)
+ .byte 102,68,15,111,250 // movdqa %xmm2,%xmm15
+ .byte 102,68,15,239,253 // pxor %xmm5,%xmm15
+ .byte 102,15,111,116,36,144 // movdqa -0x70(%rsp),%xmm6
+ .byte 102,68,15,213,254 // pmullw %xmm6,%xmm15
+ .byte 102,15,111,193 // movdqa %xmm1,%xmm0
+ .byte 102,15,249,206 // psubw %xmm6,%xmm1
+ .byte 102,15,127,76,36,224 // movdqa %xmm1,-0x20(%rsp)
+ .byte 102,65,15,213,241 // pmullw %xmm9,%xmm6
+ .byte 102,15,213,194 // pmullw %xmm2,%xmm0
+ .byte 102,15,127,68,36,240 // movdqa %xmm0,-0x10(%rsp)
+ .byte 102,65,15,249,209 // psubw %xmm9,%xmm2
+ .byte 102,15,127,84,36,160 // movdqa %xmm2,-0x60(%rsp)
+ .byte 102,65,15,111,197 // movdqa %xmm13,%xmm0
+ .byte 102,15,111,200 // movdqa %xmm0,%xmm1
+ .byte 102,15,239,205 // pxor %xmm5,%xmm1
+ .byte 102,15,213,203 // pmullw %xmm3,%xmm1
+ .byte 102,15,127,76,36,144 // movdqa %xmm1,-0x70(%rsp)
+ .byte 102,69,15,111,204 // movdqa %xmm12,%xmm9
+ .byte 102,68,15,239,205 // pxor %xmm5,%xmm9
+ .byte 102,15,111,100,36,128 // movdqa -0x80(%rsp),%xmm4
+ .byte 102,68,15,213,204 // pmullw %xmm4,%xmm9
+ .byte 102,68,15,111,232 // movdqa %xmm0,%xmm13
+ .byte 102,15,249,196 // psubw %xmm4,%xmm0
+ .byte 102,15,127,68,36,208 // movdqa %xmm0,-0x30(%rsp)
+ .byte 102,15,213,227 // pmullw %xmm3,%xmm4
+ .byte 102,69,15,213,236 // pmullw %xmm12,%xmm13
+ .byte 102,68,15,249,227 // psubw %xmm3,%xmm12
+ .byte 102,15,111,76,36,176 // movdqa -0x50(%rsp),%xmm1
+ .byte 102,15,111,193 // movdqa %xmm1,%xmm0
+ .byte 102,15,239,197 // pxor %xmm5,%xmm0
+ .byte 102,15,111,92,36,192 // movdqa -0x40(%rsp),%xmm3
+ .byte 102,15,213,195 // pmullw %xmm3,%xmm0
+ .byte 102,15,127,68,36,128 // movdqa %xmm0,-0x80(%rsp)
+ .byte 102,15,111,68,36,48 // movdqa 0x30(%rsp),%xmm0
+ .byte 102,15,239,232 // pxor %xmm0,%xmm5
+ .byte 102,15,111,148,36,128,0,0,0 // movdqa 0x80(%rsp),%xmm2
+ .byte 102,15,213,234 // pmullw %xmm2,%xmm5
+ .byte 102,68,15,111,241 // movdqa %xmm1,%xmm14
+ .byte 102,15,249,202 // psubw %xmm2,%xmm1
+ .byte 102,15,127,76,36,176 // movdqa %xmm1,-0x50(%rsp)
+ .byte 102,15,213,211 // pmullw %xmm3,%xmm2
+ .byte 102,68,15,213,240 // pmullw %xmm0,%xmm14
+ .byte 102,15,249,195 // psubw %xmm3,%xmm0
+ .byte 102,15,213,68,36,176 // pmullw -0x50(%rsp),%xmm0
+ .byte 102,68,15,213,100,36,208 // pmullw -0x30(%rsp),%xmm12
+ .byte 102,15,111,76,36,160 // movdqa -0x60(%rsp),%xmm1
+ .byte 102,15,213,76,36,224 // pmullw -0x20(%rsp),%xmm1
+ .byte 102,68,15,213,28,36 // pmullw (%rsp),%xmm11
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,68,15,249,240 // psubw %xmm0,%xmm14
+ .byte 102,69,15,253,228 // paddw %xmm12,%xmm12
+ .byte 102,69,15,249,236 // psubw %xmm12,%xmm13
+ .byte 102,15,253,201 // paddw %xmm1,%xmm1
+ .byte 102,15,111,92,36,240 // movdqa -0x10(%rsp),%xmm3
+ .byte 102,15,249,217 // psubw %xmm1,%xmm3
+ .byte 102,69,15,253,219 // paddw %xmm11,%xmm11
+ .byte 102,15,111,68,36,16 // movdqa 0x10(%rsp),%xmm0
+ .byte 102,65,15,249,195 // psubw %xmm11,%xmm0
+ .byte 102,15,253,255 // paddw %xmm7,%xmm7
+ .byte 102,65,15,219,250 // pand %xmm10,%xmm7
+ .byte 102,68,15,223,208 // pandn %xmm0,%xmm10
+ .byte 102,68,15,235,215 // por %xmm7,%xmm10
+ .byte 102,15,253,246 // paddw %xmm6,%xmm6
+ .byte 102,15,111,68,36,64 // movdqa 0x40(%rsp),%xmm0
+ .byte 102,15,219,240 // pand %xmm0,%xmm6
+ .byte 102,15,223,195 // pandn %xmm3,%xmm0
+ .byte 102,15,235,198 // por %xmm6,%xmm0
+ .byte 102,15,111,216 // movdqa %xmm0,%xmm3
+ .byte 102,15,253,228 // paddw %xmm4,%xmm4
+ .byte 102,65,15,219,224 // pand %xmm8,%xmm4
+ .byte 102,69,15,223,197 // pandn %xmm13,%xmm8
+ .byte 102,68,15,235,196 // por %xmm4,%xmm8
+ .byte 102,15,253,210 // paddw %xmm2,%xmm2
+ .byte 102,15,111,68,36,112 // movdqa 0x70(%rsp),%xmm0
+ .byte 102,15,219,208 // pand %xmm0,%xmm2
+ .byte 102,65,15,223,198 // pandn %xmm14,%xmm0
+ .byte 102,15,235,194 // por %xmm2,%xmm0
+ .byte 102,15,111,200 // movdqa %xmm0,%xmm1
+ .byte 102,15,111,84,36,80 // movdqa 0x50(%rsp),%xmm2
+ .byte 102,15,253,84,36,96 // paddw 0x60(%rsp),%xmm2
+ .byte 102,68,15,253,124,36,32 // paddw 0x20(%rsp),%xmm15
+ .byte 102,68,15,253,76,36,144 // paddw -0x70(%rsp),%xmm9
+ .byte 102,15,253,108,36,128 // paddw -0x80(%rsp),%xmm5
+ .byte 102,15,111,5,33,6,0,0 // movdqa 0x621(%rip),%xmm0 # 32f0 <_sk_overlay_sse2_8bit+0x59b>
+ .byte 102,15,253,208 // paddw %xmm0,%xmm2
+ .byte 102,68,15,253,210 // paddw %xmm2,%xmm10
+ .byte 102,68,15,253,248 // paddw %xmm0,%xmm15
+ .byte 102,68,15,253,251 // paddw %xmm3,%xmm15
+ .byte 102,68,15,253,200 // paddw %xmm0,%xmm9
+ .byte 102,69,15,253,193 // paddw %xmm9,%xmm8
+ .byte 102,15,253,232 // paddw %xmm0,%xmm5
+ .byte 102,15,253,233 // paddw %xmm1,%xmm5
+ .byte 102,15,111,5,4,6,0,0 // movdqa 0x604(%rip),%xmm0 # 3300 <_sk_overlay_sse2_8bit+0x5ab>
+ .byte 102,15,228,232 // pmulhuw %xmm0,%xmm5
+ .byte 102,68,15,228,192 // pmulhuw %xmm0,%xmm8
+ .byte 102,68,15,228,248 // pmulhuw %xmm0,%xmm15
+ .byte 102,68,15,228,208 // pmulhuw %xmm0,%xmm10
+ .byte 102,65,15,113,215,7 // psrlw $0x7,%xmm15
+ .byte 102,65,15,113,210,7 // psrlw $0x7,%xmm10
+ .byte 102,69,15,103,215 // packuswb %xmm15,%xmm10
+ .byte 102,15,113,213,7 // psrlw $0x7,%xmm5
+ .byte 102,65,15,113,208,7 // psrlw $0x7,%xmm8
+ .byte 102,68,15,103,197 // packuswb %xmm5,%xmm8
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 15,40,148,36,160,0,0,0 // movaps 0xa0(%rsp),%xmm2
+ .byte 15,40,156,36,144,0,0,0 // movaps 0x90(%rsp),%xmm3
+ .byte 102,65,15,111,194 // movdqa %xmm10,%xmm0
+ .byte 102,65,15,111,200 // movdqa %xmm8,%xmm1
+ .byte 72,129,196,184,0,0,0 // add $0xb8,%rsp
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_overlay_sse2_8bit
+.globl _sk_overlay_sse2_8bit
+FUNCTION(_sk_overlay_sse2_8bit)
+_sk_overlay_sse2_8bit:
+ .byte 72,129,236,200,0,0,0 // sub $0xc8,%rsp
+ .byte 102,15,239,228 // pxor %xmm4,%xmm4
+ .byte 102,68,15,111,248 // movdqa %xmm0,%xmm15
+ .byte 102,68,15,96,252 // punpcklbw %xmm4,%xmm15
+ .byte 102,15,111,233 // movdqa %xmm1,%xmm5
+ .byte 102,15,96,236 // punpcklbw %xmm4,%xmm5
+ .byte 102,15,127,108,36,144 // movdqa %xmm5,-0x70(%rsp)
+ .byte 242,15,112,232,231 // pshuflw $0xe7,%xmm0,%xmm5
+ .byte 102,15,104,196 // punpckhbw %xmm4,%xmm0
+ .byte 102,15,127,68,36,128 // movdqa %xmm0,-0x80(%rsp)
+ .byte 243,15,112,197,231 // pshufhw $0xe7,%xmm5,%xmm0
+ .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0
+ .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
+ .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
+ .byte 243,15,112,232,95 // pshufhw $0x5f,%xmm0,%xmm5
+ .byte 242,15,112,193,231 // pshuflw $0xe7,%xmm1,%xmm0
+ .byte 102,15,104,204 // punpckhbw %xmm4,%xmm1
+ .byte 102,15,127,140,36,144,0,0,0 // movdqa %xmm1,0x90(%rsp)
+ .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0
+ .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0
+ .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
+ .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
+ .byte 243,15,112,200,95 // pshufhw $0x5f,%xmm0,%xmm1
+ .byte 102,15,111,193 // movdqa %xmm1,%xmm0
+ .byte 102,15,104,196 // punpckhbw %xmm4,%xmm0
+ .byte 102,15,127,132,36,128,0,0,0 // movdqa %xmm0,0x80(%rsp)
+ .byte 102,15,96,204 // punpcklbw %xmm4,%xmm1
+ .byte 102,15,127,76,36,96 // movdqa %xmm1,0x60(%rsp)
+ .byte 102,68,15,111,221 // movdqa %xmm5,%xmm11
+ .byte 102,68,15,104,220 // punpckhbw %xmm4,%xmm11
+ .byte 102,15,96,236 // punpcklbw %xmm4,%xmm5
+ .byte 102,68,15,111,213 // movdqa %xmm5,%xmm10
+ .byte 102,15,111,202 // movdqa %xmm2,%xmm1
+ .byte 102,15,127,140,36,176,0,0,0 // movdqa %xmm1,0xb0(%rsp)
+ .byte 102,68,15,111,193 // movdqa %xmm1,%xmm8
+ .byte 242,15,112,193,231 // pshuflw $0xe7,%xmm1,%xmm0
+ .byte 102,15,96,204 // punpcklbw %xmm4,%xmm1
+ .byte 102,68,15,104,196 // punpckhbw %xmm4,%xmm8
+ .byte 102,15,127,156,36,160,0,0,0 // movdqa %xmm3,0xa0(%rsp)
+ .byte 102,15,111,211 // movdqa %xmm3,%xmm2
+ .byte 102,68,15,111,243 // movdqa %xmm3,%xmm14
+ .byte 102,68,15,96,244 // punpcklbw %xmm4,%xmm14
+ .byte 102,15,104,212 // punpckhbw %xmm4,%xmm2
+ .byte 102,15,111,242 // movdqa %xmm2,%xmm6
+ .byte 102,15,127,116,36,16 // movdqa %xmm6,0x10(%rsp)
+ .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0
+ .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0
+ .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
+ .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
+ .byte 243,15,112,208,95 // pshufhw $0x5f,%xmm0,%xmm2
+ .byte 242,15,112,195,231 // pshuflw $0xe7,%xmm3,%xmm0
+ .byte 243,15,112,192,231 // pshufhw $0xe7,%xmm0,%xmm0
+ .byte 102,15,112,192,232 // pshufd $0xe8,%xmm0,%xmm0
+ .byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
+ .byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
+ .byte 243,15,112,216,95 // pshufhw $0x5f,%xmm0,%xmm3
+ .byte 102,15,111,195 // movdqa %xmm3,%xmm0
+ .byte 102,15,104,196 // punpckhbw %xmm4,%xmm0
+ .byte 102,15,127,4,36 // movdqa %xmm0,(%rsp)
+ .byte 102,15,96,220 // punpcklbw %xmm4,%xmm3
+ .byte 102,68,15,111,202 // movdqa %xmm2,%xmm9
+ .byte 102,68,15,104,204 // punpckhbw %xmm4,%xmm9
+ .byte 102,15,96,212 // punpcklbw %xmm4,%xmm2
+ .byte 102,15,111,233 // movdqa %xmm1,%xmm5
+ .byte 102,15,253,237 // paddw %xmm5,%xmm5
+ .byte 102,65,15,111,248 // movdqa %xmm8,%xmm7
+ .byte 102,15,253,255 // paddw %xmm7,%xmm7
+ .byte 102,69,15,111,238 // movdqa %xmm14,%xmm13
+ .byte 102,69,15,253,237 // paddw %xmm13,%xmm13
+ .byte 102,15,253,246 // paddw %xmm6,%xmm6
+ .byte 102,15,217,240 // psubusw %xmm0,%xmm6
+ .byte 102,15,117,244 // pcmpeqw %xmm4,%xmm6
+ .byte 102,15,127,116,36,112 // movdqa %xmm6,0x70(%rsp)
+ .byte 102,68,15,217,235 // psubusw %xmm3,%xmm13
+ .byte 102,68,15,117,236 // pcmpeqw %xmm4,%xmm13
+ .byte 102,68,15,127,108,36,80 // movdqa %xmm13,0x50(%rsp)
+ .byte 102,65,15,217,249 // psubusw %xmm9,%xmm7
+ .byte 102,15,117,252 // pcmpeqw %xmm4,%xmm7
+ .byte 102,15,127,124,36,48 // movdqa %xmm7,0x30(%rsp)
+ .byte 102,15,217,234 // psubusw %xmm2,%xmm5
+ .byte 102,15,117,236 // pcmpeqw %xmm4,%xmm5
+ .byte 102,15,127,108,36,32 // movdqa %xmm5,0x20(%rsp)
+ .byte 102,15,111,53,29,4,0,0 // movdqa 0x41d(%rip),%xmm6 # 3310 <_sk_overlay_sse2_8bit+0x5bb>
+ .byte 102,15,111,194 // movdqa %xmm2,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,65,15,213,199 // pmullw %xmm15,%xmm0
+ .byte 102,15,127,68,36,64 // movdqa %xmm0,0x40(%rsp)
+ .byte 102,65,15,111,194 // movdqa %xmm10,%xmm0
+ .byte 102,68,15,111,224 // movdqa %xmm0,%xmm12
+ .byte 102,68,15,239,230 // pxor %xmm6,%xmm12
+ .byte 102,68,15,213,225 // pmullw %xmm1,%xmm12
+ .byte 102,15,111,226 // movdqa %xmm2,%xmm4
+ .byte 102,15,249,209 // psubw %xmm1,%xmm2
+ .byte 102,15,127,84,36,208 // movdqa %xmm2,-0x30(%rsp)
+ .byte 102,68,15,111,209 // movdqa %xmm1,%xmm10
+ .byte 102,69,15,213,215 // pmullw %xmm15,%xmm10
+ .byte 102,15,213,224 // pmullw %xmm0,%xmm4
+ .byte 102,15,127,100,36,224 // movdqa %xmm4,-0x20(%rsp)
+ .byte 102,65,15,249,199 // psubw %xmm15,%xmm0
+ .byte 102,15,127,68,36,176 // movdqa %xmm0,-0x50(%rsp)
+ .byte 102,65,15,111,193 // movdqa %xmm9,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,15,111,84,36,128 // movdqa -0x80(%rsp),%xmm2
+ .byte 102,15,213,194 // pmullw %xmm2,%xmm0
+ .byte 102,15,127,68,36,240 // movdqa %xmm0,-0x10(%rsp)
+ .byte 102,65,15,111,195 // movdqa %xmm11,%xmm0
+ .byte 102,68,15,239,222 // pxor %xmm6,%xmm11
+ .byte 102,69,15,213,216 // pmullw %xmm8,%xmm11
+ .byte 102,69,15,111,249 // movdqa %xmm9,%xmm15
+ .byte 102,69,15,249,200 // psubw %xmm8,%xmm9
+ .byte 102,68,15,127,76,36,192 // movdqa %xmm9,-0x40(%rsp)
+ .byte 102,68,15,213,194 // pmullw %xmm2,%xmm8
+ .byte 102,68,15,213,248 // pmullw %xmm0,%xmm15
+ .byte 102,15,249,194 // psubw %xmm2,%xmm0
+ .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9
+ .byte 102,15,111,195 // movdqa %xmm3,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,15,111,84,36,144 // movdqa -0x70(%rsp),%xmm2
+ .byte 102,15,213,194 // pmullw %xmm2,%xmm0
+ .byte 102,15,127,68,36,128 // movdqa %xmm0,-0x80(%rsp)
+ .byte 102,15,111,68,36,96 // movdqa 0x60(%rsp),%xmm0
+ .byte 102,15,111,232 // movdqa %xmm0,%xmm5
+ .byte 102,15,239,238 // pxor %xmm6,%xmm5
+ .byte 102,65,15,213,238 // pmullw %xmm14,%xmm5
+ .byte 102,68,15,111,235 // movdqa %xmm3,%xmm13
+ .byte 102,65,15,249,222 // psubw %xmm14,%xmm3
+ .byte 102,15,127,92,36,160 // movdqa %xmm3,-0x60(%rsp)
+ .byte 102,65,15,111,254 // movdqa %xmm14,%xmm7
+ .byte 102,15,213,250 // pmullw %xmm2,%xmm7
+ .byte 102,68,15,213,232 // pmullw %xmm0,%xmm13
+ .byte 102,15,249,194 // psubw %xmm2,%xmm0
+ .byte 102,15,111,208 // movdqa %xmm0,%xmm2
+ .byte 102,15,111,12,36 // movdqa (%rsp),%xmm1
+ .byte 102,15,111,193 // movdqa %xmm1,%xmm0
+ .byte 102,15,239,198 // pxor %xmm6,%xmm0
+ .byte 102,15,111,156,36,144,0,0,0 // movdqa 0x90(%rsp),%xmm3
+ .byte 102,15,213,195 // pmullw %xmm3,%xmm0
+ .byte 102,15,127,68,36,144 // movdqa %xmm0,-0x70(%rsp)
+ .byte 102,15,111,132,36,128,0,0,0 // movdqa 0x80(%rsp),%xmm0
+ .byte 102,15,239,240 // pxor %xmm0,%xmm6
+ .byte 102,15,111,100,36,16 // movdqa 0x10(%rsp),%xmm4
+ .byte 102,15,213,244 // pmullw %xmm4,%xmm6
+ .byte 102,68,15,111,241 // movdqa %xmm1,%xmm14
+ .byte 102,15,249,204 // psubw %xmm4,%xmm1
+ .byte 102,15,213,227 // pmullw %xmm3,%xmm4
+ .byte 102,68,15,213,240 // pmullw %xmm0,%xmm14
+ .byte 102,15,249,195 // psubw %xmm3,%xmm0
+ .byte 102,15,213,193 // pmullw %xmm1,%xmm0
+ .byte 102,15,213,84,36,160 // pmullw -0x60(%rsp),%xmm2
+ .byte 102,68,15,213,76,36,192 // pmullw -0x40(%rsp),%xmm9
+ .byte 102,15,111,76,36,176 // movdqa -0x50(%rsp),%xmm1
+ .byte 102,15,213,76,36,208 // pmullw -0x30(%rsp),%xmm1
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,68,15,249,240 // psubw %xmm0,%xmm14
+ .byte 102,15,253,210 // paddw %xmm2,%xmm2
+ .byte 102,68,15,249,234 // psubw %xmm2,%xmm13
+ .byte 102,69,15,253,201 // paddw %xmm9,%xmm9
+ .byte 102,69,15,249,249 // psubw %xmm9,%xmm15
+ .byte 102,15,111,193 // movdqa %xmm1,%xmm0
+ .byte 102,15,253,192 // paddw %xmm0,%xmm0
+ .byte 102,15,111,76,36,224 // movdqa -0x20(%rsp),%xmm1
+ .byte 102,15,249,200 // psubw %xmm0,%xmm1
+ .byte 102,69,15,253,210 // paddw %xmm10,%xmm10
+ .byte 102,15,111,68,36,32 // movdqa 0x20(%rsp),%xmm0
+ .byte 102,68,15,219,208 // pand %xmm0,%xmm10
+ .byte 102,15,223,193 // pandn %xmm1,%xmm0
+ .byte 102,65,15,235,194 // por %xmm10,%xmm0
+ .byte 102,15,111,216 // movdqa %xmm0,%xmm3
+ .byte 102,69,15,253,192 // paddw %xmm8,%xmm8
+ .byte 102,15,111,68,36,48 // movdqa 0x30(%rsp),%xmm0
+ .byte 102,68,15,219,192 // pand %xmm0,%xmm8
+ .byte 102,65,15,223,199 // pandn %xmm15,%xmm0
+ .byte 102,65,15,235,192 // por %xmm8,%xmm0
+ .byte 102,68,15,111,192 // movdqa %xmm0,%xmm8
+ .byte 102,15,253,255 // paddw %xmm7,%xmm7
+ .byte 102,15,111,68,36,80 // movdqa 0x50(%rsp),%xmm0
+ .byte 102,15,219,248 // pand %xmm0,%xmm7
+ .byte 102,65,15,223,197 // pandn %xmm13,%xmm0
+ .byte 102,15,235,199 // por %xmm7,%xmm0
+ .byte 102,15,111,208 // movdqa %xmm0,%xmm2
+ .byte 102,15,253,228 // paddw %xmm4,%xmm4
+ .byte 102,15,111,68,36,112 // movdqa 0x70(%rsp),%xmm0
+ .byte 102,15,219,224 // pand %xmm0,%xmm4
+ .byte 102,65,15,223,198 // pandn %xmm14,%xmm0
+ .byte 102,15,235,196 // por %xmm4,%xmm0
+ .byte 102,15,111,200 // movdqa %xmm0,%xmm1
+ .byte 102,68,15,253,100,36,64 // paddw 0x40(%rsp),%xmm12
+ .byte 102,68,15,253,92,36,240 // paddw -0x10(%rsp),%xmm11
+ .byte 102,15,253,108,36,128 // paddw -0x80(%rsp),%xmm5
+ .byte 102,15,253,116,36,144 // paddw -0x70(%rsp),%xmm6
+ .byte 102,15,111,5,16,2,0,0 // movdqa 0x210(%rip),%xmm0 # 3320 <_sk_overlay_sse2_8bit+0x5cb>
+ .byte 102,68,15,253,224 // paddw %xmm0,%xmm12
+ .byte 102,68,15,253,227 // paddw %xmm3,%xmm12
+ .byte 102,68,15,253,216 // paddw %xmm0,%xmm11
+ .byte 102,69,15,253,216 // paddw %xmm8,%xmm11
+ .byte 102,15,253,232 // paddw %xmm0,%xmm5
+ .byte 102,15,253,234 // paddw %xmm2,%xmm5
+ .byte 102,15,253,240 // paddw %xmm0,%xmm6
+ .byte 102,15,253,241 // paddw %xmm1,%xmm6
+ .byte 102,15,111,5,244,1,0,0 // movdqa 0x1f4(%rip),%xmm0 # 3330 <_sk_overlay_sse2_8bit+0x5db>
+ .byte 102,15,228,240 // pmulhuw %xmm0,%xmm6
+ .byte 102,15,228,232 // pmulhuw %xmm0,%xmm5
+ .byte 102,68,15,228,216 // pmulhuw %xmm0,%xmm11
+ .byte 102,68,15,228,224 // pmulhuw %xmm0,%xmm12
+ .byte 102,65,15,113,211,7 // psrlw $0x7,%xmm11
+ .byte 102,65,15,113,212,7 // psrlw $0x7,%xmm12
+ .byte 102,69,15,103,227 // packuswb %xmm11,%xmm12
+ .byte 102,15,113,214,7 // psrlw $0x7,%xmm6
+ .byte 102,15,113,213,7 // psrlw $0x7,%xmm5
+ .byte 102,15,103,238 // packuswb %xmm6,%xmm5
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 15,40,148,36,176,0,0,0 // movaps 0xb0(%rsp),%xmm2
+ .byte 15,40,156,36,160,0,0,0 // movaps 0xa0(%rsp),%xmm3
+ .byte 102,65,15,111,196 // movdqa %xmm12,%xmm0
+ .byte 102,15,111,205 // movdqa %xmm5,%xmm1
+ .byte 72,129,196,200,0,0,0 // add $0xc8,%rsp
+ .byte 255,224 // jmpq *%rax
+
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2963 <_sk_difference_sse2_8bit+0x20c>
+ .byte 127,67 // jg 31db <_sk_overlay_sse2_8bit+0x486>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2967 <_sk_difference_sse2_8bit+0x210>
+ .byte 127,67 // jg 31df <_sk_overlay_sse2_8bit+0x48a>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 296b <_sk_difference_sse2_8bit+0x214>
+ .byte 127,67 // jg 31e3 <_sk_overlay_sse2_8bit+0x48e>
BALIGN16
.byte 0,0 // add %al,(%rax)
@@ -65752,6 +67066,45 @@ BALIGN16
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 127,0 // jg 32f2 <.literal16+0x152>
+ .byte 127,0 // jg 32f4 <.literal16+0x154>
+ .byte 127,0 // jg 32f6 <.literal16+0x156>
+ .byte 127,0 // jg 32f8 <.literal16+0x158>
+ .byte 127,0 // jg 32fa <.literal16+0x15a>
+ .byte 127,0 // jg 32fc <.literal16+0x15c>
+ .byte 127,0 // jg 32fe <.literal16+0x15e>
+ .byte 127,0 // jg 3300 <.literal16+0x160>
+ .byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax)
+ .byte 129,128,129,128,129,128,255,0,255,0 // addl $0xff00ff,-0x7f7e7f7f(%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 255,0 // incl (%rax)
+ .byte 127,0 // jg 3322 <.literal16+0x182>
+ .byte 127,0 // jg 3324 <.literal16+0x184>
+ .byte 127,0 // jg 3326 <.literal16+0x186>
+ .byte 127,0 // jg 3328 <.literal16+0x188>
+ .byte 127,0 // jg 332a <.literal16+0x18a>
+ .byte 127,0 // jg 332c <.literal16+0x18c>
+ .byte 127,0 // jg 332e <.literal16+0x18e>
+ .byte 127,0 // jg 3330 <.literal16+0x190>
+ .byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax)
+ .byte 129 // .byte 0x81
+ .byte 128 // .byte 0x80
+ .byte 129 // .byte 0x81
+ .byte 128 // .byte 0x80
+ .byte 129 // .byte 0x81
+ .byte 128 // .byte 0x80
#elif defined(__i386__)
BALIGN32
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 02d1015c03..2222717b44 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -39059,7 +39059,7 @@ _sk_uniform_color_hsw_8bit LABEL PROC
PUBLIC _sk_set_rgb_hsw_8bit
_sk_set_rgb_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 197,250,16,37,149,44,0,0 ; vmovss 0x2c95(%rip),%xmm4 # 2e18 <_sk_difference_hsw_8bit+0x184>
+ DB 197,250,16,37,161,51,0,0 ; vmovss 0x33a1(%rip),%xmm4 # 3524 <_sk_overlay_hsw_8bit+0x37d>
DB 197,218,89,40 ; vmulss (%rax),%xmm4,%xmm5
DB 196,225,250,44,205 ; vcvttss2si %xmm5,%rcx
DB 197,218,89,104,4 ; vmulss 0x4(%rax),%xmm4,%xmm5
@@ -39072,7 +39072,7 @@ _sk_set_rgb_hsw_8bit LABEL PROC
DB 9,208 ; or %edx,%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
DB 196,226,125,88,228 ; vpbroadcastd %xmm4,%ymm4
- DB 197,253,111,45,133,44,0,0 ; vmovdqa 0x2c85(%rip),%ymm5 # 2e40 <_sk_difference_hsw_8bit+0x1ac>
+ DB 197,253,111,45,165,51,0,0 ; vmovdqa 0x33a5(%rip),%ymm5 # 3560 <_sk_overlay_hsw_8bit+0x3b9>
DB 197,245,219,205 ; vpand %ymm5,%ymm1,%ymm1
DB 197,253,219,197 ; vpand %ymm5,%ymm0,%ymm0
DB 197,221,235,192 ; vpor %ymm0,%ymm4,%ymm0
@@ -39082,10 +39082,10 @@ _sk_set_rgb_hsw_8bit LABEL PROC
PUBLIC _sk_premul_hsw_8bit
_sk_premul_hsw_8bit LABEL PROC
- DB 197,253,111,37,137,44,0,0 ; vmovdqa 0x2c89(%rip),%ymm4 # 2e60 <_sk_difference_hsw_8bit+0x1cc>
+ DB 197,253,111,37,169,51,0,0 ; vmovdqa 0x33a9(%rip),%ymm4 # 3580 <_sk_overlay_hsw_8bit+0x3d9>
DB 196,226,125,0,236 ; vpshufb %ymm4,%ymm0,%ymm5
DB 196,226,117,0,228 ; vpshufb %ymm4,%ymm1,%ymm4
- DB 197,253,111,53,151,44,0,0 ; vmovdqa 0x2c97(%rip),%ymm6 # 2e80 <_sk_difference_hsw_8bit+0x1ec>
+ DB 197,253,111,53,183,51,0,0 ; vmovdqa 0x33b7(%rip),%ymm6 # 35a0 <_sk_overlay_hsw_8bit+0x3f9>
DB 197,221,235,230 ; vpor %ymm6,%ymm4,%ymm4
DB 197,213,235,238 ; vpor %ymm6,%ymm5,%ymm5
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -39123,7 +39123,7 @@ _sk_premul_hsw_8bit LABEL PROC
PUBLIC _sk_swap_rb_hsw_8bit
_sk_swap_rb_hsw_8bit LABEL PROC
- DB 197,253,111,37,15,44,0,0 ; vmovdqa 0x2c0f(%rip),%ymm4 # 2ea0 <_sk_difference_hsw_8bit+0x20c>
+ DB 197,253,111,37,47,51,0,0 ; vmovdqa 0x332f(%rip),%ymm4 # 35c0 <_sk_overlay_hsw_8bit+0x419>
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -39463,7 +39463,7 @@ _sk_load_bgra_hsw_8bit LABEL PROC
DB 117,35 ; jne 774 <_sk_load_bgra_hsw_8bit+0x44>
DB 196,161,126,111,76,130,32 ; vmovdqu 0x20(%rdx,%r8,4),%ymm1
DB 196,161,126,111,4,130 ; vmovdqu (%rdx,%r8,4),%ymm0
- DB 197,253,111,37,90,39,0,0 ; vmovdqa 0x275a(%rip),%ymm4 # 2ec0 <_sk_difference_hsw_8bit+0x22c>
+ DB 197,253,111,37,122,46,0,0 ; vmovdqa 0x2e7a(%rip),%ymm4 # 35e0 <_sk_overlay_hsw_8bit+0x439>
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -39576,7 +39576,7 @@ _sk_load_bgra_dst_hsw_8bit LABEL PROC
DB 117,35 ; jne 92c <_sk_load_bgra_dst_hsw_8bit+0x44>
DB 196,161,126,111,92,130,32 ; vmovdqu 0x20(%rdx,%r8,4),%ymm3
DB 196,161,126,111,20,130 ; vmovdqu (%rdx,%r8,4),%ymm2
- DB 197,253,111,37,194,37,0,0 ; vmovdqa 0x25c2(%rip),%ymm4 # 2ee0 <_sk_difference_hsw_8bit+0x24c>
+ DB 197,253,111,37,226,44,0,0 ; vmovdqa 0x2ce2(%rip),%ymm4 # 3600 <_sk_overlay_hsw_8bit+0x459>
DB 196,226,109,0,212 ; vpshufb %ymm4,%ymm2,%ymm2
DB 196,226,101,0,220 ; vpshufb %ymm4,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -39685,7 +39685,7 @@ _sk_store_bgra_hsw_8bit LABEL PROC
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,193,226,2 ; shl $0x2,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 197,253,111,37,60,36,0,0 ; vmovdqa 0x243c(%rip),%ymm4 # 2f00 <_sk_difference_hsw_8bit+0x26c>
+ DB 197,253,111,37,92,43,0,0 ; vmovdqa 0x2b5c(%rip),%ymm4 # 3620 <_sk_overlay_hsw_8bit+0x479>
DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5
DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4
DB 77,133,201 ; test %r9,%r9
@@ -39965,10 +39965,10 @@ _sk_store_a8_hsw_8bit LABEL PROC
DB 72,99,87,8 ; movslq 0x8(%rdi),%rdx
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 197,253,111,37,104,32,0,0 ; vmovdqa 0x2068(%rip),%ymm4 # 2f20 <_sk_difference_hsw_8bit+0x28c>
+ DB 197,253,111,37,136,39,0,0 ; vmovdqa 0x2788(%rip),%ymm4 # 3640 <_sk_overlay_hsw_8bit+0x499>
DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5
DB 196,227,253,0,237,232 ; vpermq $0xe8,%ymm5,%ymm5
- DB 197,249,111,53,245,34,0,0 ; vmovdqa 0x22f5(%rip),%xmm6 # 31c0 <_sk_difference_hsw_8bit+0x52c>
+ DB 197,249,111,53,133,42,0,0 ; vmovdqa 0x2a85(%rip),%xmm6 # 3950 <_sk_overlay_hsw_8bit+0x7a9>
DB 196,226,81,0,238 ; vpshufb %xmm6,%xmm5,%xmm5
DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4
DB 196,227,253,0,228,232 ; vpermq $0xe8,%ymm4,%ymm4
@@ -40058,10 +40058,10 @@ _sk_load_g8_hsw_8bit LABEL PROC
DB 196,226,125,49,200 ; vpmovzxbd %xmm0,%ymm1
DB 197,249,112,192,78 ; vpshufd $0x4e,%xmm0,%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
- DB 196,226,125,88,37,17,30,0,0 ; vpbroadcastd 0x1e11(%rip),%ymm4 # 2e1c <_sk_difference_hsw_8bit+0x188>
+ DB 196,226,125,88,37,29,37,0,0 ; vpbroadcastd 0x251d(%rip),%ymm4 # 3528 <_sk_overlay_hsw_8bit+0x381>
DB 196,226,125,64,236 ; vpmulld %ymm4,%ymm0,%ymm5
DB 196,226,117,64,196 ; vpmulld %ymm4,%ymm1,%ymm0
- DB 196,226,125,88,13,2,30,0,0 ; vpbroadcastd 0x1e02(%rip),%ymm1 # 2e20 <_sk_difference_hsw_8bit+0x18c>
+ DB 196,226,125,88,13,14,37,0,0 ; vpbroadcastd 0x250e(%rip),%ymm1 # 352c <_sk_overlay_hsw_8bit+0x385>
DB 197,253,235,193 ; vpor %ymm1,%ymm0,%ymm0
DB 197,213,235,201 ; vpor %ymm1,%ymm5,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -40157,10 +40157,10 @@ _sk_load_g8_dst_hsw_8bit LABEL PROC
DB 196,226,125,49,218 ; vpmovzxbd %xmm2,%ymm3
DB 197,249,112,210,78 ; vpshufd $0x4e,%xmm2,%xmm2
DB 196,226,125,49,210 ; vpmovzxbd %xmm2,%ymm2
- DB 196,226,125,88,37,161,28,0,0 ; vpbroadcastd 0x1ca1(%rip),%ymm4 # 2e24 <_sk_difference_hsw_8bit+0x190>
+ DB 196,226,125,88,37,173,35,0,0 ; vpbroadcastd 0x23ad(%rip),%ymm4 # 3530 <_sk_overlay_hsw_8bit+0x389>
DB 196,226,109,64,236 ; vpmulld %ymm4,%ymm2,%ymm5
DB 196,226,101,64,212 ; vpmulld %ymm4,%ymm3,%ymm2
- DB 196,226,125,88,29,146,28,0,0 ; vpbroadcastd 0x1c92(%rip),%ymm3 # 2e28 <_sk_difference_hsw_8bit+0x194>
+ DB 196,226,125,88,29,158,35,0,0 ; vpbroadcastd 0x239e(%rip),%ymm3 # 3534 <_sk_overlay_hsw_8bit+0x38d>
DB 197,237,235,211 ; vpor %ymm3,%ymm2,%ymm2
DB 197,213,235,219 ; vpor %ymm3,%ymm5,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -40255,7 +40255,7 @@ _sk_srcover_rgba_8888_hsw_8bit LABEL PROC
DB 15,133,222,0,0,0 ; jne 13c3 <_sk_srcover_rgba_8888_hsw_8bit+0x103>
DB 196,33,126,111,76,138,32 ; vmovdqu 0x20(%rdx,%r9,4),%ymm9
DB 196,33,126,111,28,138 ; vmovdqu (%rdx,%r9,4),%ymm11
- DB 197,253,111,53,70,28,0,0 ; vmovdqa 0x1c46(%rip),%ymm6 # 2f40 <_sk_difference_hsw_8bit+0x2ac>
+ DB 197,253,111,53,102,35,0,0 ; vmovdqa 0x2366(%rip),%ymm6 # 3660 <_sk_overlay_hsw_8bit+0x4b9>
DB 196,226,117,0,254 ; vpshufb %ymm6,%ymm1,%ymm7
DB 196,226,125,0,246 ; vpshufb %ymm6,%ymm0,%ymm6
DB 196,66,125,48,195 ; vpmovzxbw %xmm11,%ymm8
@@ -40461,7 +40461,7 @@ PUBLIC _sk_scale_1_float_hsw_8bit
_sk_scale_1_float_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,250,16,32 ; vmovss (%rax),%xmm4
- DB 197,218,89,37,214,23,0,0 ; vmulss 0x17d6(%rip),%xmm4,%xmm4 # 2e2c <_sk_difference_hsw_8bit+0x198>
+ DB 197,218,89,37,226,30,0,0 ; vmulss 0x1ee2(%rip),%xmm4,%xmm4 # 3538 <_sk_overlay_hsw_8bit+0x391>
DB 197,250,44,196 ; vcvttss2si %xmm4,%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
DB 196,226,125,120,228 ; vpbroadcastb %xmm4,%ymm4
@@ -40471,7 +40471,7 @@ _sk_scale_1_float_hsw_8bit LABEL PROC
DB 196,226,125,48,241 ; vpmovzxbw %xmm1,%ymm6
DB 196,227,125,57,201,1 ; vextracti128 $0x1,%ymm1,%xmm1
DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1
- DB 197,221,219,37,213,24,0,0 ; vpand 0x18d5(%rip),%ymm4,%ymm4 # 2f60 <_sk_difference_hsw_8bit+0x2cc>
+ DB 197,221,219,37,245,31,0,0 ; vpand 0x1ff5(%rip),%ymm4,%ymm4 # 3680 <_sk_overlay_hsw_8bit+0x4d9>
DB 197,221,213,249 ; vpmullw %ymm1,%ymm4,%ymm7
DB 197,93,213,198 ; vpmullw %ymm6,%ymm4,%ymm8
DB 197,93,213,200 ; vpmullw %ymm0,%ymm4,%ymm9
@@ -40508,7 +40508,7 @@ _sk_scale_u8_hsw_8bit LABEL PROC
DB 196,226,125,49,236 ; vpmovzxbd %xmm4,%ymm5
DB 197,249,112,228,78 ; vpshufd $0x4e,%xmm4,%xmm4
DB 196,226,125,49,228 ; vpmovzxbd %xmm4,%ymm4
- DB 197,253,111,53,95,24,0,0 ; vmovdqa 0x185f(%rip),%ymm6 # 2f80 <_sk_difference_hsw_8bit+0x2ec>
+ DB 197,253,111,53,127,31,0,0 ; vmovdqa 0x1f7f(%rip),%ymm6 # 36a0 <_sk_overlay_hsw_8bit+0x4f9>
DB 196,226,93,0,230 ; vpshufb %ymm6,%ymm4,%ymm4
DB 196,226,85,0,238 ; vpshufb %ymm6,%ymm5,%ymm5
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -40623,7 +40623,7 @@ PUBLIC _sk_lerp_1_float_hsw_8bit
_sk_lerp_1_float_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,250,16,32 ; vmovss (%rax),%xmm4
- DB 197,218,89,37,58,21,0,0 ; vmulss 0x153a(%rip),%xmm4,%xmm4 # 2e30 <_sk_difference_hsw_8bit+0x19c>
+ DB 197,218,89,37,70,28,0,0 ; vmulss 0x1c46(%rip),%xmm4,%xmm4 # 353c <_sk_overlay_hsw_8bit+0x395>
DB 197,250,44,196 ; vcvttss2si %xmm4,%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
DB 196,226,125,120,228 ; vpbroadcastb %xmm4,%ymm4
@@ -40633,7 +40633,7 @@ _sk_lerp_1_float_hsw_8bit LABEL PROC
DB 196,226,125,48,241 ; vpmovzxbw %xmm1,%ymm6
DB 196,227,125,57,201,1 ; vextracti128 $0x1,%ymm1,%xmm1
DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1
- DB 197,221,219,61,117,22,0,0 ; vpand 0x1675(%rip),%ymm4,%ymm7 # 2fa0 <_sk_difference_hsw_8bit+0x30c>
+ DB 197,221,219,61,149,29,0,0 ; vpand 0x1d95(%rip),%ymm4,%ymm7 # 36c0 <_sk_overlay_hsw_8bit+0x519>
DB 197,69,213,193 ; vpmullw %ymm1,%ymm7,%ymm8
DB 197,69,213,206 ; vpmullw %ymm6,%ymm7,%ymm9
DB 197,69,213,208 ; vpmullw %ymm0,%ymm7,%ymm10
@@ -40701,7 +40701,7 @@ _sk_lerp_u8_hsw_8bit LABEL PROC
DB 196,226,125,49,236 ; vpmovzxbd %xmm4,%ymm5
DB 197,249,112,228,78 ; vpshufd $0x4e,%xmm4,%xmm4
DB 196,226,125,49,228 ; vpmovzxbd %xmm4,%ymm4
- DB 197,253,111,53,106,21,0,0 ; vmovdqa 0x156a(%rip),%ymm6 # 2fc0 <_sk_difference_hsw_8bit+0x32c>
+ DB 197,253,111,53,138,28,0,0 ; vmovdqa 0x1c8a(%rip),%ymm6 # 36e0 <_sk_overlay_hsw_8bit+0x539>
DB 196,98,93,0,206 ; vpshufb %ymm6,%ymm4,%ymm9
DB 196,98,85,0,222 ; vpshufb %ymm6,%ymm5,%ymm11
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -40864,7 +40864,7 @@ _sk_move_dst_src_hsw_8bit LABEL PROC
PUBLIC _sk_black_color_hsw_8bit
_sk_black_color_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,73,17,0,0 ; vbroadcastss 0x1149(%rip),%ymm0 # 2e34 <_sk_difference_hsw_8bit+0x1a0>
+ DB 196,226,125,24,5,85,24,0,0 ; vbroadcastss 0x1855(%rip),%ymm0 # 3540 <_sk_overlay_hsw_8bit+0x399>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 255,224 ; jmpq *%rax
@@ -40884,7 +40884,7 @@ _sk_clear_hsw_8bit LABEL PROC
PUBLIC _sk_srcatop_hsw_8bit
_sk_srcatop_hsw_8bit LABEL PROC
- DB 197,125,111,5,207,18,0,0 ; vmovdqa 0x12cf(%rip),%ymm8 # 2fe0 <_sk_difference_hsw_8bit+0x34c>
+ DB 197,125,111,5,239,25,0,0 ; vmovdqa 0x19ef(%rip),%ymm8 # 3700 <_sk_overlay_hsw_8bit+0x559>
DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
DB 196,194,109,0,232 ; vpshufb %ymm8,%ymm2,%ymm5
DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
@@ -40959,7 +40959,7 @@ _sk_srcatop_hsw_8bit LABEL PROC
PUBLIC _sk_dstatop_hsw_8bit
_sk_dstatop_hsw_8bit LABEL PROC
- DB 197,125,111,5,138,17,0,0 ; vmovdqa 0x118a(%rip),%ymm8 # 3000 <_sk_difference_hsw_8bit+0x36c>
+ DB 197,125,111,5,170,24,0,0 ; vmovdqa 0x18aa(%rip),%ymm8 # 3720 <_sk_overlay_hsw_8bit+0x579>
DB 196,194,117,0,224 ; vpshufb %ymm8,%ymm1,%ymm4
DB 196,194,125,0,232 ; vpshufb %ymm8,%ymm0,%ymm5
DB 196,98,125,48,210 ; vpmovzxbw %xmm2,%ymm10
@@ -41034,7 +41034,7 @@ _sk_dstatop_hsw_8bit LABEL PROC
PUBLIC _sk_srcin_hsw_8bit
_sk_srcin_hsw_8bit LABEL PROC
- DB 197,253,111,37,65,16,0,0 ; vmovdqa 0x1041(%rip),%ymm4 # 3020 <_sk_difference_hsw_8bit+0x38c>
+ DB 197,253,111,37,97,23,0,0 ; vmovdqa 0x1761(%rip),%ymm4 # 3740 <_sk_overlay_hsw_8bit+0x599>
DB 196,226,101,0,236 ; vpshufb %ymm4,%ymm3,%ymm5
DB 196,226,109,0,228 ; vpshufb %ymm4,%ymm2,%ymm4
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -41072,7 +41072,7 @@ _sk_srcin_hsw_8bit LABEL PROC
PUBLIC _sk_dstin_hsw_8bit
_sk_dstin_hsw_8bit LABEL PROC
- DB 197,253,111,37,183,15,0,0 ; vmovdqa 0xfb7(%rip),%ymm4 # 3040 <_sk_difference_hsw_8bit+0x3ac>
+ DB 197,253,111,37,215,22,0,0 ; vmovdqa 0x16d7(%rip),%ymm4 # 3760 <_sk_overlay_hsw_8bit+0x5b9>
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,125,48,226 ; vpmovzxbw %xmm2,%ymm4
@@ -41110,7 +41110,7 @@ _sk_dstin_hsw_8bit LABEL PROC
PUBLIC _sk_srcout_hsw_8bit
_sk_srcout_hsw_8bit LABEL PROC
- DB 197,253,111,37,43,15,0,0 ; vmovdqa 0xf2b(%rip),%ymm4 # 3060 <_sk_difference_hsw_8bit+0x3cc>
+ DB 197,253,111,37,75,22,0,0 ; vmovdqa 0x164b(%rip),%ymm4 # 3780 <_sk_overlay_hsw_8bit+0x5d9>
DB 196,226,109,0,236 ; vpshufb %ymm4,%ymm2,%ymm5
DB 196,226,101,0,228 ; vpshufb %ymm4,%ymm3,%ymm4
DB 197,205,118,246 ; vpcmpeqd %ymm6,%ymm6,%ymm6
@@ -41151,7 +41151,7 @@ _sk_srcout_hsw_8bit LABEL PROC
PUBLIC _sk_dstout_hsw_8bit
_sk_dstout_hsw_8bit LABEL PROC
- DB 197,253,111,37,149,14,0,0 ; vmovdqa 0xe95(%rip),%ymm4 # 3080 <_sk_difference_hsw_8bit+0x3ec>
+ DB 197,253,111,37,181,21,0,0 ; vmovdqa 0x15b5(%rip),%ymm4 # 37a0 <_sk_overlay_hsw_8bit+0x5f9>
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 197,221,118,228 ; vpcmpeqd %ymm4,%ymm4,%ymm4
@@ -41192,7 +41192,7 @@ _sk_dstout_hsw_8bit LABEL PROC
PUBLIC _sk_srcover_hsw_8bit
_sk_srcover_hsw_8bit LABEL PROC
- DB 197,253,111,37,253,13,0,0 ; vmovdqa 0xdfd(%rip),%ymm4 # 30a0 <_sk_difference_hsw_8bit+0x40c>
+ DB 197,253,111,37,29,21,0,0 ; vmovdqa 0x151d(%rip),%ymm4 # 37c0 <_sk_overlay_hsw_8bit+0x619>
DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5
DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4
DB 196,98,125,48,202 ; vpmovzxbw %xmm2,%ymm9
@@ -41234,7 +41234,7 @@ _sk_srcover_hsw_8bit LABEL PROC
PUBLIC _sk_dstover_hsw_8bit
_sk_dstover_hsw_8bit LABEL PROC
- DB 197,253,111,37,94,13,0,0 ; vmovdqa 0xd5e(%rip),%ymm4 # 30c0 <_sk_difference_hsw_8bit+0x42c>
+ DB 197,253,111,37,126,20,0,0 ; vmovdqa 0x147e(%rip),%ymm4 # 37e0 <_sk_overlay_hsw_8bit+0x639>
DB 196,226,101,0,236 ; vpshufb %ymm4,%ymm3,%ymm5
DB 196,226,109,0,228 ; vpshufb %ymm4,%ymm2,%ymm4
DB 196,98,125,48,200 ; vpmovzxbw %xmm0,%ymm9
@@ -41314,7 +41314,7 @@ _sk_multiply_hsw_8bit LABEL PROC
DB 72,131,236,56 ; sub $0x38,%rsp
DB 197,253,111,243 ; vmovdqa %ymm3,%ymm6
DB 197,253,111,218 ; vmovdqa %ymm2,%ymm3
- DB 197,125,111,13,27,12,0,0 ; vmovdqa 0xc1b(%rip),%ymm9 # 30e0 <_sk_difference_hsw_8bit+0x44c>
+ DB 197,125,111,13,59,19,0,0 ; vmovdqa 0x133b(%rip),%ymm9 # 3800 <_sk_overlay_hsw_8bit+0x659>
DB 196,194,101,0,225 ; vpshufb %ymm9,%ymm3,%ymm4
DB 196,194,77,0,233 ; vpshufb %ymm9,%ymm6,%ymm5
DB 196,65,45,118,210 ; vpcmpeqd %ymm10,%ymm10,%ymm10
@@ -41455,7 +41455,7 @@ _sk_screen_hsw_8bit LABEL PROC
PUBLIC _sk_xor__hsw_8bit
_sk_xor__hsw_8bit LABEL PROC
- DB 197,125,111,13,167,9,0,0 ; vmovdqa 0x9a7(%rip),%ymm9 # 3100 <_sk_difference_hsw_8bit+0x46c>
+ DB 197,125,111,13,199,16,0,0 ; vmovdqa 0x10c7(%rip),%ymm9 # 3820 <_sk_overlay_hsw_8bit+0x679>
DB 196,194,109,0,225 ; vpshufb %ymm9,%ymm2,%ymm4
DB 196,194,101,0,249 ; vpshufb %ymm9,%ymm3,%ymm7
DB 196,65,37,118,219 ; vpcmpeqd %ymm11,%ymm11,%ymm11
@@ -41532,7 +41532,7 @@ _sk_xor__hsw_8bit LABEL PROC
PUBLIC _sk_darken_hsw_8bit
_sk_darken_hsw_8bit LABEL PROC
- DB 197,125,111,5,85,8,0,0 ; vmovdqa 0x855(%rip),%ymm8 # 3120 <_sk_difference_hsw_8bit+0x48c>
+ DB 197,125,111,5,117,15,0,0 ; vmovdqa 0xf75(%rip),%ymm8 # 3840 <_sk_overlay_hsw_8bit+0x699>
DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6
DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
@@ -41605,7 +41605,7 @@ _sk_darken_hsw_8bit LABEL PROC
DB 197,253,248,246 ; vpsubb %ymm6,%ymm0,%ymm6
DB 197,245,248,205 ; vpsubb %ymm5,%ymm1,%ymm1
DB 197,253,248,196 ; vpsubb %ymm4,%ymm0,%ymm0
- DB 196,226,125,88,37,254,3,0,0 ; vpbroadcastd 0x3fe(%rip),%ymm4 # 2e38 <_sk_difference_hsw_8bit+0x1a4>
+ DB 196,226,125,88,37,10,11,0,0 ; vpbroadcastd 0xb0a(%rip),%ymm4 # 3544 <_sk_overlay_hsw_8bit+0x39d>
DB 196,227,125,76,198,64 ; vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
DB 196,227,117,76,207,64 ; vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -41613,7 +41613,7 @@ _sk_darken_hsw_8bit LABEL PROC
PUBLIC _sk_lighten_hsw_8bit
_sk_lighten_hsw_8bit LABEL PROC
- DB 197,125,111,5,238,6,0,0 ; vmovdqa 0x6ee(%rip),%ymm8 # 3140 <_sk_difference_hsw_8bit+0x4ac>
+ DB 197,125,111,5,14,14,0,0 ; vmovdqa 0xe0e(%rip),%ymm8 # 3860 <_sk_overlay_hsw_8bit+0x6b9>
DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6
DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
@@ -41686,7 +41686,7 @@ _sk_lighten_hsw_8bit LABEL PROC
DB 197,253,248,246 ; vpsubb %ymm6,%ymm0,%ymm6
DB 197,245,248,205 ; vpsubb %ymm5,%ymm1,%ymm1
DB 197,253,248,196 ; vpsubb %ymm4,%ymm0,%ymm0
- DB 196,226,125,88,37,123,2,0,0 ; vpbroadcastd 0x27b(%rip),%ymm4 # 2e3c <_sk_difference_hsw_8bit+0x1a8>
+ DB 196,226,125,88,37,135,9,0,0 ; vpbroadcastd 0x987(%rip),%ymm4 # 3548 <_sk_overlay_hsw_8bit+0x3a1>
DB 196,227,125,76,198,64 ; vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
DB 196,227,117,76,207,64 ; vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -41724,7 +41724,7 @@ _sk_exclusion_hsw_8bit LABEL PROC
DB 196,227,77,56,252,1 ; vinserti128 $0x1,%xmm4,%ymm6,%ymm7
DB 196,227,77,70,228,49 ; vperm2i128 $0x31,%ymm4,%ymm6,%ymm4
DB 197,197,103,228 ; vpackuswb %ymm4,%ymm7,%ymm4
- DB 197,253,111,53,240,4,0,0 ; vmovdqa 0x4f0(%rip),%ymm6 # 3160 <_sk_difference_hsw_8bit+0x4cc>
+ DB 197,253,111,53,16,12,0,0 ; vmovdqa 0xc10(%rip),%ymm6 # 3880 <_sk_overlay_hsw_8bit+0x6d9>
DB 197,221,219,254 ; vpand %ymm6,%ymm4,%ymm7
DB 197,213,219,246 ; vpand %ymm6,%ymm5,%ymm6
DB 197,237,252,192 ; vpaddb %ymm0,%ymm2,%ymm0
@@ -41738,7 +41738,7 @@ _sk_exclusion_hsw_8bit LABEL PROC
PUBLIC _sk_difference_hsw_8bit
_sk_difference_hsw_8bit LABEL PROC
- DB 197,125,111,5,228,4,0,0 ; vmovdqa 0x4e4(%rip),%ymm8 # 3180 <_sk_difference_hsw_8bit+0x4ec>
+ DB 197,125,111,5,4,12,0,0 ; vmovdqa 0xc04(%rip),%ymm8 # 38a0 <_sk_overlay_hsw_8bit+0x6f9>
DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6
DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
@@ -41805,7 +41805,7 @@ _sk_difference_hsw_8bit LABEL PROC
DB 197,197,103,237 ; vpackuswb %ymm5,%ymm7,%ymm5
DB 197,181,218,228 ; vpminub %ymm4,%ymm9,%ymm4
DB 197,165,218,237 ; vpminub %ymm5,%ymm11,%ymm5
- DB 197,253,111,53,174,3,0,0 ; vmovdqa 0x3ae(%rip),%ymm6 # 31a0 <_sk_difference_hsw_8bit+0x50c>
+ DB 197,253,111,53,206,10,0,0 ; vmovdqa 0xace(%rip),%ymm6 # 38c0 <_sk_overlay_hsw_8bit+0x719>
DB 197,213,219,254 ; vpand %ymm6,%ymm5,%ymm7
DB 197,221,219,246 ; vpand %ymm6,%ymm4,%ymm6
DB 197,237,252,192 ; vpaddb %ymm0,%ymm2,%ymm0
@@ -41817,9 +41817,350 @@ _sk_difference_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_hardlight_hsw_8bit
+_sk_hardlight_hsw_8bit LABEL PROC
+ DB 72,129,236,56,2,0,0 ; sub $0x238,%rsp
+ DB 197,252,17,28,36 ; vmovups %ymm3,(%rsp)
+ DB 196,226,125,48,248 ; vpmovzxbw %xmm0,%ymm7
+ DB 196,227,125,57,195,1 ; vextracti128 $0x1,%ymm0,%xmm3
+ DB 196,226,125,48,243 ; vpmovzxbw %xmm3,%ymm6
+ DB 196,98,125,48,217 ; vpmovzxbw %xmm1,%ymm11
+ DB 197,126,127,156,36,0,1,0,0 ; vmovdqu %ymm11,0x100(%rsp)
+ DB 196,227,125,57,203,1 ; vextracti128 $0x1,%ymm1,%xmm3
+ DB 196,98,125,48,195 ; vpmovzxbw %xmm3,%ymm8
+ DB 197,126,127,132,36,64,1,0,0 ; vmovdqu %ymm8,0x140(%rsp)
+ DB 197,253,111,29,132,10,0,0 ; vmovdqa 0xa84(%rip),%ymm3 # 38e0 <_sk_overlay_hsw_8bit+0x739>
+ DB 196,226,125,0,227 ; vpshufb %ymm3,%ymm0,%ymm4
+ DB 196,226,117,0,203 ; vpshufb %ymm3,%ymm1,%ymm1
+ DB 197,125,111,251 ; vmovdqa %ymm3,%ymm15
+ DB 196,227,125,57,200,1 ; vextracti128 $0x1,%ymm1,%xmm0
+ DB 196,98,125,48,232 ; vpmovzxbw %xmm0,%ymm13
+ DB 196,226,125,48,233 ; vpmovzxbw %xmm1,%ymm5
+ DB 196,227,125,57,227,1 ; vextracti128 $0x1,%ymm4,%xmm3
+ DB 196,98,125,48,203 ; vpmovzxbw %xmm3,%ymm9
+ DB 196,98,125,48,212 ; vpmovzxbw %xmm4,%ymm10
+ DB 197,197,253,199 ; vpaddw %ymm7,%ymm7,%ymm0
+ DB 197,205,253,222 ; vpaddw %ymm6,%ymm6,%ymm3
+ DB 196,65,37,253,227 ; vpaddw %ymm11,%ymm11,%ymm12
+ DB 196,65,61,253,216 ; vpaddw %ymm8,%ymm8,%ymm11
+ DB 196,66,37,58,245 ; vpminuw %ymm13,%ymm11,%ymm14
+ DB 196,193,37,117,206 ; vpcmpeqw %ymm14,%ymm11,%ymm1
+ DB 197,254,127,140,36,224,1,0,0 ; vmovdqu %ymm1,0x1e0(%rsp)
+ DB 196,98,29,58,245 ; vpminuw %ymm5,%ymm12,%ymm14
+ DB 196,193,29,117,206 ; vpcmpeqw %ymm14,%ymm12,%ymm1
+ DB 197,254,127,140,36,192,1,0,0 ; vmovdqu %ymm1,0x1c0(%rsp)
+ DB 196,66,101,58,241 ; vpminuw %ymm9,%ymm3,%ymm14
+ DB 196,193,101,117,206 ; vpcmpeqw %ymm14,%ymm3,%ymm1
+ DB 197,254,127,140,36,128,1,0,0 ; vmovdqu %ymm1,0x180(%rsp)
+ DB 196,194,125,58,218 ; vpminuw %ymm10,%ymm0,%ymm3
+ DB 197,125,117,243 ; vpcmpeqw %ymm3,%ymm0,%ymm14
+ DB 197,253,111,226 ; vmovdqa %ymm2,%ymm4
+ DB 196,65,125,111,231 ; vmovdqa %ymm15,%ymm12
+ DB 196,194,93,0,220 ; vpshufb %ymm12,%ymm4,%ymm3
+ DB 196,226,125,48,211 ; vpmovzxbw %xmm3,%ymm2
+ DB 196,226,125,121,5,6,10,0,0 ; vpbroadcastw 0xa06(%rip),%ymm0 # 3900 <_sk_overlay_hsw_8bit+0x759>
+ DB 197,109,239,248 ; vpxor %ymm0,%ymm2,%ymm15
+ DB 197,133,213,207 ; vpmullw %ymm7,%ymm15,%ymm1
+ DB 197,254,127,140,36,160,1,0,0 ; vmovdqu %ymm1,0x1a0(%rsp)
+ DB 196,226,125,48,204 ; vpmovzxbw %xmm4,%ymm1
+ DB 197,254,127,164,36,0,2,0,0 ; vmovdqu %ymm4,0x200(%rsp)
+ DB 197,45,239,248 ; vpxor %ymm0,%ymm10,%ymm15
+ DB 197,5,213,193 ; vpmullw %ymm1,%ymm15,%ymm8
+ DB 197,126,127,132,36,96,1,0,0 ; vmovdqu %ymm8,0x160(%rsp)
+ DB 197,117,213,199 ; vpmullw %ymm7,%ymm1,%ymm8
+ DB 197,126,127,132,36,32,1,0,0 ; vmovdqu %ymm8,0x120(%rsp)
+ DB 197,237,249,201 ; vpsubw %ymm1,%ymm2,%ymm1
+ DB 197,254,127,76,36,64 ; vmovdqu %ymm1,0x40(%rsp)
+ DB 196,193,109,213,202 ; vpmullw %ymm10,%ymm2,%ymm1
+ DB 197,254,127,140,36,160,0,0,0 ; vmovdqu %ymm1,0xa0(%rsp)
+ DB 197,45,249,223 ; vpsubw %ymm7,%ymm10,%ymm11
+ DB 196,227,125,57,226,1 ; vextracti128 $0x1,%ymm4,%xmm2
+ DB 196,226,125,48,210 ; vpmovzxbw %xmm2,%ymm2
+ DB 196,227,125,57,219,1 ; vextracti128 $0x1,%ymm3,%xmm3
+ DB 196,226,125,48,219 ; vpmovzxbw %xmm3,%ymm3
+ DB 197,101,239,208 ; vpxor %ymm0,%ymm3,%ymm10
+ DB 197,173,213,206 ; vpmullw %ymm6,%ymm10,%ymm1
+ DB 197,254,127,140,36,224,0,0,0 ; vmovdqu %ymm1,0xe0(%rsp)
+ DB 197,53,239,208 ; vpxor %ymm0,%ymm9,%ymm10
+ DB 197,173,213,202 ; vpmullw %ymm2,%ymm10,%ymm1
+ DB 197,254,127,140,36,192,0,0,0 ; vmovdqu %ymm1,0xc0(%rsp)
+ DB 197,237,213,206 ; vpmullw %ymm6,%ymm2,%ymm1
+ DB 197,254,127,76,36,32 ; vmovdqu %ymm1,0x20(%rsp)
+ DB 197,229,249,226 ; vpsubw %ymm2,%ymm3,%ymm4
+ DB 196,65,101,213,193 ; vpmullw %ymm9,%ymm3,%ymm8
+ DB 197,181,249,246 ; vpsubw %ymm6,%ymm9,%ymm6
+ DB 197,254,111,60,36 ; vmovdqu (%rsp),%ymm7
+ DB 196,66,69,0,204 ; vpshufb %ymm12,%ymm7,%ymm9
+ DB 196,194,125,48,209 ; vpmovzxbw %xmm9,%ymm2
+ DB 197,109,239,248 ; vpxor %ymm0,%ymm2,%ymm15
+ DB 197,126,111,164,36,0,1,0,0 ; vmovdqu 0x100(%rsp),%ymm12
+ DB 196,193,5,213,204 ; vpmullw %ymm12,%ymm15,%ymm1
+ DB 197,254,127,140,36,128,0,0,0 ; vmovdqu %ymm1,0x80(%rsp)
+ DB 196,226,125,48,207 ; vpmovzxbw %xmm7,%ymm1
+ DB 197,85,239,248 ; vpxor %ymm0,%ymm5,%ymm15
+ DB 197,133,213,217 ; vpmullw %ymm1,%ymm15,%ymm3
+ DB 197,254,127,92,36,96 ; vmovdqu %ymm3,0x60(%rsp)
+ DB 196,65,117,213,212 ; vpmullw %ymm12,%ymm1,%ymm10
+ DB 197,237,249,217 ; vpsubw %ymm1,%ymm2,%ymm3
+ DB 197,237,213,213 ; vpmullw %ymm5,%ymm2,%ymm2
+ DB 196,193,85,249,236 ; vpsubw %ymm12,%ymm5,%ymm5
+ DB 196,195,125,57,255,1 ; vextracti128 $0x1,%ymm7,%xmm15
+ DB 196,66,125,48,255 ; vpmovzxbw %xmm15,%ymm15
+ DB 196,99,125,57,207,1 ; vextracti128 $0x1,%ymm9,%xmm7
+ DB 196,226,125,48,255 ; vpmovzxbw %xmm7,%ymm7
+ DB 197,69,239,200 ; vpxor %ymm0,%ymm7,%ymm9
+ DB 197,254,111,140,36,64,1,0,0 ; vmovdqu 0x140(%rsp),%ymm1
+ DB 197,53,213,225 ; vpmullw %ymm1,%ymm9,%ymm12
+ DB 197,149,239,192 ; vpxor %ymm0,%ymm13,%ymm0
+ DB 196,65,125,213,207 ; vpmullw %ymm15,%ymm0,%ymm9
+ DB 197,133,213,193 ; vpmullw %ymm1,%ymm15,%ymm0
+ DB 196,65,69,249,255 ; vpsubw %ymm15,%ymm7,%ymm15
+ DB 196,193,69,213,253 ; vpmullw %ymm13,%ymm7,%ymm7
+ DB 197,21,249,233 ; vpsubw %ymm1,%ymm13,%ymm13
+ DB 196,65,5,213,237 ; vpmullw %ymm13,%ymm15,%ymm13
+ DB 197,229,213,205 ; vpmullw %ymm5,%ymm3,%ymm1
+ DB 197,221,213,230 ; vpmullw %ymm6,%ymm4,%ymm4
+ DB 197,165,213,108,36,64 ; vpmullw 0x40(%rsp),%ymm11,%ymm5
+ DB 196,193,21,253,245 ; vpaddw %ymm13,%ymm13,%ymm6
+ DB 197,197,249,246 ; vpsubw %ymm6,%ymm7,%ymm6
+ DB 197,245,253,201 ; vpaddw %ymm1,%ymm1,%ymm1
+ DB 197,237,249,201 ; vpsubw %ymm1,%ymm2,%ymm1
+ DB 197,221,253,212 ; vpaddw %ymm4,%ymm4,%ymm2
+ DB 197,189,249,210 ; vpsubw %ymm2,%ymm8,%ymm2
+ DB 197,213,253,221 ; vpaddw %ymm5,%ymm5,%ymm3
+ DB 197,254,111,164,36,160,0,0,0 ; vmovdqu 0xa0(%rsp),%ymm4
+ DB 197,221,249,219 ; vpsubw %ymm3,%ymm4,%ymm3
+ DB 197,254,111,164,36,32,1,0,0 ; vmovdqu 0x120(%rsp),%ymm4
+ DB 197,221,253,228 ; vpaddw %ymm4,%ymm4,%ymm4
+ DB 197,141,223,219 ; vpandn %ymm3,%ymm14,%ymm3
+ DB 196,193,93,219,230 ; vpand %ymm14,%ymm4,%ymm4
+ DB 197,221,235,219 ; vpor %ymm3,%ymm4,%ymm3
+ DB 197,254,111,100,36,32 ; vmovdqu 0x20(%rsp),%ymm4
+ DB 197,221,253,228 ; vpaddw %ymm4,%ymm4,%ymm4
+ DB 197,254,111,172,36,128,1,0,0 ; vmovdqu 0x180(%rsp),%ymm5
+ DB 197,213,223,210 ; vpandn %ymm2,%ymm5,%ymm2
+ DB 197,221,219,229 ; vpand %ymm5,%ymm4,%ymm4
+ DB 197,221,235,210 ; vpor %ymm2,%ymm4,%ymm2
+ DB 196,193,45,253,226 ; vpaddw %ymm10,%ymm10,%ymm4
+ DB 197,254,111,172,36,192,1,0,0 ; vmovdqu 0x1c0(%rsp),%ymm5
+ DB 197,213,223,201 ; vpandn %ymm1,%ymm5,%ymm1
+ DB 197,221,219,229 ; vpand %ymm5,%ymm4,%ymm4
+ DB 197,221,235,201 ; vpor %ymm1,%ymm4,%ymm1
+ DB 197,253,253,192 ; vpaddw %ymm0,%ymm0,%ymm0
+ DB 197,254,111,172,36,224,1,0,0 ; vmovdqu 0x1e0(%rsp),%ymm5
+ DB 197,213,223,230 ; vpandn %ymm6,%ymm5,%ymm4
+ DB 197,253,219,197 ; vpand %ymm5,%ymm0,%ymm0
+ DB 197,253,235,196 ; vpor %ymm4,%ymm0,%ymm0
+ DB 197,254,111,164,36,96,1,0,0 ; vmovdqu 0x160(%rsp),%ymm4
+ DB 197,221,253,164,36,160,1,0,0 ; vpaddw 0x1a0(%rsp),%ymm4,%ymm4
+ DB 197,254,111,172,36,192,0,0,0 ; vmovdqu 0xc0(%rsp),%ymm5
+ DB 197,213,253,172,36,224,0,0,0 ; vpaddw 0xe0(%rsp),%ymm5,%ymm5
+ DB 197,254,111,116,36,96 ; vmovdqu 0x60(%rsp),%ymm6
+ DB 197,205,253,180,36,128,0,0,0 ; vpaddw 0x80(%rsp),%ymm6,%ymm6
+ DB 196,193,53,253,252 ; vpaddw %ymm12,%ymm9,%ymm7
+ DB 196,98,125,121,5,229,7,0,0 ; vpbroadcastw 0x7e5(%rip),%ymm8 # 3902 <_sk_overlay_hsw_8bit+0x75b>
+ DB 196,193,93,253,224 ; vpaddw %ymm8,%ymm4,%ymm4
+ DB 197,229,253,220 ; vpaddw %ymm4,%ymm3,%ymm3
+ DB 196,193,85,253,224 ; vpaddw %ymm8,%ymm5,%ymm4
+ DB 197,237,253,212 ; vpaddw %ymm4,%ymm2,%ymm2
+ DB 196,193,77,253,224 ; vpaddw %ymm8,%ymm6,%ymm4
+ DB 197,245,253,204 ; vpaddw %ymm4,%ymm1,%ymm1
+ DB 196,193,69,253,224 ; vpaddw %ymm8,%ymm7,%ymm4
+ DB 197,253,253,196 ; vpaddw %ymm4,%ymm0,%ymm0
+ DB 196,226,125,121,37,186,7,0,0 ; vpbroadcastw 0x7ba(%rip),%ymm4 # 3904 <_sk_overlay_hsw_8bit+0x75d>
+ DB 197,253,228,196 ; vpmulhuw %ymm4,%ymm0,%ymm0
+ DB 197,245,228,204 ; vpmulhuw %ymm4,%ymm1,%ymm1
+ DB 197,237,228,212 ; vpmulhuw %ymm4,%ymm2,%ymm2
+ DB 197,237,113,210,7 ; vpsrlw $0x7,%ymm2,%ymm2
+ DB 197,229,228,220 ; vpmulhuw %ymm4,%ymm3,%ymm3
+ DB 197,229,113,211,7 ; vpsrlw $0x7,%ymm3,%ymm3
+ DB 196,227,101,56,226,1 ; vinserti128 $0x1,%xmm2,%ymm3,%ymm4
+ DB 196,227,101,70,210,49 ; vperm2i128 $0x31,%ymm2,%ymm3,%ymm2
+ DB 197,229,113,208,7 ; vpsrlw $0x7,%ymm0,%ymm3
+ DB 197,245,113,209,7 ; vpsrlw $0x7,%ymm1,%ymm1
+ DB 197,221,103,194 ; vpackuswb %ymm2,%ymm4,%ymm0
+ DB 196,227,117,56,211,1 ; vinserti128 $0x1,%xmm3,%ymm1,%ymm2
+ DB 196,227,117,70,203,49 ; vperm2i128 $0x31,%ymm3,%ymm1,%ymm1
+ DB 197,237,103,201 ; vpackuswb %ymm1,%ymm2,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 197,252,16,148,36,0,2,0,0 ; vmovups 0x200(%rsp),%ymm2
+ DB 197,252,16,28,36 ; vmovups (%rsp),%ymm3
+ DB 72,129,196,56,2,0,0 ; add $0x238,%rsp
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_overlay_hsw_8bit
+_sk_overlay_hsw_8bit LABEL PROC
+ DB 72,129,236,24,2,0,0 ; sub $0x218,%rsp
+ DB 197,252,17,140,36,192,1,0,0 ; vmovups %ymm1,0x1c0(%rsp)
+ DB 197,253,111,45,97,7,0,0 ; vmovdqa 0x761(%rip),%ymm5 # 3920 <_sk_overlay_hsw_8bit+0x779>
+ DB 197,253,111,226 ; vmovdqa %ymm2,%ymm4
+ DB 196,227,125,57,225,1 ; vextracti128 $0x1,%ymm4,%xmm1
+ DB 196,98,125,48,193 ; vpmovzxbw %xmm1,%ymm8
+ DB 197,254,127,156,36,224,1,0,0 ; vmovdqu %ymm3,0x1e0(%rsp)
+ DB 196,226,125,48,251 ; vpmovzxbw %xmm3,%ymm7
+ DB 196,227,125,57,217,1 ; vextracti128 $0x1,%ymm3,%xmm1
+ DB 196,98,125,48,241 ; vpmovzxbw %xmm1,%ymm14
+ DB 196,226,93,0,205 ; vpshufb %ymm5,%ymm4,%ymm1
+ DB 197,254,127,164,36,160,1,0,0 ; vmovdqu %ymm4,0x1a0(%rsp)
+ DB 196,226,101,0,221 ; vpshufb %ymm5,%ymm3,%ymm3
+ DB 196,227,125,57,222,1 ; vextracti128 $0x1,%ymm3,%xmm6
+ DB 196,226,125,48,246 ; vpmovzxbw %xmm6,%ymm6
+ DB 196,98,125,48,203 ; vpmovzxbw %xmm3,%ymm9
+ DB 196,227,125,57,203,1 ; vextracti128 $0x1,%ymm1,%xmm3
+ DB 196,98,125,48,219 ; vpmovzxbw %xmm3,%ymm11
+ DB 196,193,61,253,216 ; vpaddw %ymm8,%ymm8,%ymm3
+ DB 197,69,253,231 ; vpaddw %ymm7,%ymm7,%ymm12
+ DB 196,65,13,253,214 ; vpaddw %ymm14,%ymm14,%ymm10
+ DB 196,98,45,58,238 ; vpminuw %ymm6,%ymm10,%ymm13
+ DB 196,193,45,117,213 ; vpcmpeqw %ymm13,%ymm10,%ymm2
+ DB 197,254,127,148,36,128,1,0,0 ; vmovdqu %ymm2,0x180(%rsp)
+ DB 196,66,29,58,233 ; vpminuw %ymm9,%ymm12,%ymm13
+ DB 196,193,29,117,213 ; vpcmpeqw %ymm13,%ymm12,%ymm2
+ DB 197,254,127,148,36,96,1,0,0 ; vmovdqu %ymm2,0x160(%rsp)
+ DB 196,66,101,58,235 ; vpminuw %ymm11,%ymm3,%ymm13
+ DB 196,193,101,117,213 ; vpcmpeqw %ymm13,%ymm3,%ymm2
+ DB 197,254,127,148,36,32,1,0,0 ; vmovdqu %ymm2,0x120(%rsp)
+ DB 196,226,125,48,212 ; vpmovzxbw %xmm4,%ymm2
+ DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1
+ DB 197,237,253,226 ; vpaddw %ymm2,%ymm2,%ymm4
+ DB 196,226,93,58,217 ; vpminuw %ymm1,%ymm4,%ymm3
+ DB 197,93,117,251 ; vpcmpeqw %ymm3,%ymm4,%ymm15
+ DB 197,253,111,216 ; vmovdqa %ymm0,%ymm3
+ DB 196,226,125,48,227 ; vpmovzxbw %xmm3,%ymm4
+ DB 196,226,125,121,5,187,6,0,0 ; vpbroadcastw 0x6bb(%rip),%ymm0 # 3940 <_sk_overlay_hsw_8bit+0x799>
+ DB 197,117,239,232 ; vpxor %ymm0,%ymm1,%ymm13
+ DB 197,21,213,212 ; vpmullw %ymm4,%ymm13,%ymm10
+ DB 197,126,127,148,36,64,1,0,0 ; vmovdqu %ymm10,0x140(%rsp)
+ DB 197,125,111,229 ; vmovdqa %ymm5,%ymm12
+ DB 196,194,101,0,236 ; vpshufb %ymm12,%ymm3,%ymm5
+ DB 197,125,111,211 ; vmovdqa %ymm3,%ymm10
+ DB 196,226,125,48,221 ; vpmovzxbw %xmm5,%ymm3
+ DB 197,101,239,232 ; vpxor %ymm0,%ymm3,%ymm13
+ DB 197,21,213,234 ; vpmullw %ymm2,%ymm13,%ymm13
+ DB 197,126,127,172,36,0,1,0,0 ; vmovdqu %ymm13,0x100(%rsp)
+ DB 197,109,213,236 ; vpmullw %ymm4,%ymm2,%ymm13
+ DB 197,126,127,172,36,224,0,0,0 ; vmovdqu %ymm13,0xe0(%rsp)
+ DB 197,245,249,210 ; vpsubw %ymm2,%ymm1,%ymm2
+ DB 197,254,127,84,36,32 ; vmovdqu %ymm2,0x20(%rsp)
+ DB 197,245,213,203 ; vpmullw %ymm3,%ymm1,%ymm1
+ DB 197,254,127,140,36,128,0,0,0 ; vmovdqu %ymm1,0x80(%rsp)
+ DB 197,229,249,220 ; vpsubw %ymm4,%ymm3,%ymm3
+ DB 196,99,125,57,212,1 ; vextracti128 $0x1,%ymm10,%xmm4
+ DB 196,226,125,48,228 ; vpmovzxbw %xmm4,%ymm4
+ DB 196,227,125,57,237,1 ; vextracti128 $0x1,%ymm5,%xmm5
+ DB 196,226,125,48,237 ; vpmovzxbw %xmm5,%ymm5
+ DB 197,37,239,232 ; vpxor %ymm0,%ymm11,%ymm13
+ DB 197,149,213,204 ; vpmullw %ymm4,%ymm13,%ymm1
+ DB 197,254,127,140,36,192,0,0,0 ; vmovdqu %ymm1,0xc0(%rsp)
+ DB 197,85,239,232 ; vpxor %ymm0,%ymm5,%ymm13
+ DB 196,193,21,213,200 ; vpmullw %ymm8,%ymm13,%ymm1
+ DB 197,254,127,140,36,160,0,0,0 ; vmovdqu %ymm1,0xa0(%rsp)
+ DB 197,189,213,204 ; vpmullw %ymm4,%ymm8,%ymm1
+ DB 197,254,127,12,36 ; vmovdqu %ymm1,(%rsp)
+ DB 196,65,37,249,192 ; vpsubw %ymm8,%ymm11,%ymm8
+ DB 197,37,213,221 ; vpmullw %ymm5,%ymm11,%ymm11
+ DB 197,213,249,228 ; vpsubw %ymm4,%ymm5,%ymm4
+ DB 197,254,111,140,36,192,1,0,0 ; vmovdqu 0x1c0(%rsp),%ymm1
+ DB 196,194,117,0,236 ; vpshufb %ymm12,%ymm1,%ymm5
+ DB 196,226,125,48,209 ; vpmovzxbw %xmm1,%ymm2
+ DB 197,125,111,225 ; vmovdqa %ymm1,%ymm12
+ DB 197,53,239,232 ; vpxor %ymm0,%ymm9,%ymm13
+ DB 197,149,213,202 ; vpmullw %ymm2,%ymm13,%ymm1
+ DB 197,254,127,76,36,96 ; vmovdqu %ymm1,0x60(%rsp)
+ DB 196,226,125,48,205 ; vpmovzxbw %xmm5,%ymm1
+ DB 197,117,239,232 ; vpxor %ymm0,%ymm1,%ymm13
+ DB 197,21,213,215 ; vpmullw %ymm7,%ymm13,%ymm10
+ DB 197,126,127,84,36,64 ; vmovdqu %ymm10,0x40(%rsp)
+ DB 197,69,213,210 ; vpmullw %ymm2,%ymm7,%ymm10
+ DB 197,181,249,255 ; vpsubw %ymm7,%ymm9,%ymm7
+ DB 197,53,213,201 ; vpmullw %ymm1,%ymm9,%ymm9
+ DB 197,245,249,202 ; vpsubw %ymm2,%ymm1,%ymm1
+ DB 196,99,125,57,226,1 ; vextracti128 $0x1,%ymm12,%xmm2
+ DB 196,226,125,48,210 ; vpmovzxbw %xmm2,%ymm2
+ DB 196,227,125,57,237,1 ; vextracti128 $0x1,%ymm5,%xmm5
+ DB 196,226,125,48,237 ; vpmovzxbw %xmm5,%ymm5
+ DB 197,77,239,232 ; vpxor %ymm0,%ymm6,%ymm13
+ DB 197,21,213,234 ; vpmullw %ymm2,%ymm13,%ymm13
+ DB 197,213,239,192 ; vpxor %ymm0,%ymm5,%ymm0
+ DB 196,65,125,213,230 ; vpmullw %ymm14,%ymm0,%ymm12
+ DB 197,141,213,194 ; vpmullw %ymm2,%ymm14,%ymm0
+ DB 196,65,77,249,246 ; vpsubw %ymm14,%ymm6,%ymm14
+ DB 197,205,213,245 ; vpmullw %ymm5,%ymm6,%ymm6
+ DB 197,213,249,210 ; vpsubw %ymm2,%ymm5,%ymm2
+ DB 197,141,213,210 ; vpmullw %ymm2,%ymm14,%ymm2
+ DB 197,197,213,201 ; vpmullw %ymm1,%ymm7,%ymm1
+ DB 197,189,213,228 ; vpmullw %ymm4,%ymm8,%ymm4
+ DB 197,229,213,92,36,32 ; vpmullw 0x20(%rsp),%ymm3,%ymm3
+ DB 197,237,253,210 ; vpaddw %ymm2,%ymm2,%ymm2
+ DB 197,205,249,210 ; vpsubw %ymm2,%ymm6,%ymm2
+ DB 197,245,253,201 ; vpaddw %ymm1,%ymm1,%ymm1
+ DB 197,181,249,201 ; vpsubw %ymm1,%ymm9,%ymm1
+ DB 197,221,253,228 ; vpaddw %ymm4,%ymm4,%ymm4
+ DB 197,165,249,228 ; vpsubw %ymm4,%ymm11,%ymm4
+ DB 197,229,253,219 ; vpaddw %ymm3,%ymm3,%ymm3
+ DB 197,254,111,172,36,128,0,0,0 ; vmovdqu 0x80(%rsp),%ymm5
+ DB 197,213,249,219 ; vpsubw %ymm3,%ymm5,%ymm3
+ DB 197,254,111,172,36,224,0,0,0 ; vmovdqu 0xe0(%rsp),%ymm5
+ DB 197,213,253,237 ; vpaddw %ymm5,%ymm5,%ymm5
+ DB 197,133,223,219 ; vpandn %ymm3,%ymm15,%ymm3
+ DB 196,193,85,219,239 ; vpand %ymm15,%ymm5,%ymm5
+ DB 197,213,235,219 ; vpor %ymm3,%ymm5,%ymm3
+ DB 197,254,111,44,36 ; vmovdqu (%rsp),%ymm5
+ DB 197,213,253,237 ; vpaddw %ymm5,%ymm5,%ymm5
+ DB 197,254,111,180,36,32,1,0,0 ; vmovdqu 0x120(%rsp),%ymm6
+ DB 197,205,223,228 ; vpandn %ymm4,%ymm6,%ymm4
+ DB 197,213,219,238 ; vpand %ymm6,%ymm5,%ymm5
+ DB 197,213,235,228 ; vpor %ymm4,%ymm5,%ymm4
+ DB 196,193,45,253,234 ; vpaddw %ymm10,%ymm10,%ymm5
+ DB 197,254,111,180,36,96,1,0,0 ; vmovdqu 0x160(%rsp),%ymm6
+ DB 197,205,223,201 ; vpandn %ymm1,%ymm6,%ymm1
+ DB 197,213,219,238 ; vpand %ymm6,%ymm5,%ymm5
+ DB 197,213,235,201 ; vpor %ymm1,%ymm5,%ymm1
+ DB 197,253,253,192 ; vpaddw %ymm0,%ymm0,%ymm0
+ DB 197,254,111,172,36,128,1,0,0 ; vmovdqu 0x180(%rsp),%ymm5
+ DB 197,213,223,210 ; vpandn %ymm2,%ymm5,%ymm2
+ DB 197,253,219,197 ; vpand %ymm5,%ymm0,%ymm0
+ DB 197,253,235,194 ; vpor %ymm2,%ymm0,%ymm0
+ DB 197,254,111,148,36,0,1,0,0 ; vmovdqu 0x100(%rsp),%ymm2
+ DB 197,237,253,148,36,64,1,0,0 ; vpaddw 0x140(%rsp),%ymm2,%ymm2
+ DB 197,254,111,172,36,160,0,0,0 ; vmovdqu 0xa0(%rsp),%ymm5
+ DB 197,213,253,172,36,192,0,0,0 ; vpaddw 0xc0(%rsp),%ymm5,%ymm5
+ DB 197,254,111,116,36,64 ; vmovdqu 0x40(%rsp),%ymm6
+ DB 197,205,253,116,36,96 ; vpaddw 0x60(%rsp),%ymm6,%ymm6
+ DB 196,193,29,253,253 ; vpaddw %ymm13,%ymm12,%ymm7
+ DB 196,98,125,121,5,174,4,0,0 ; vpbroadcastw 0x4ae(%rip),%ymm8 # 3942 <_sk_overlay_hsw_8bit+0x79b>
+ DB 196,193,109,253,208 ; vpaddw %ymm8,%ymm2,%ymm2
+ DB 197,229,253,210 ; vpaddw %ymm2,%ymm3,%ymm2
+ DB 196,193,85,253,216 ; vpaddw %ymm8,%ymm5,%ymm3
+ DB 197,221,253,219 ; vpaddw %ymm3,%ymm4,%ymm3
+ DB 196,193,77,253,224 ; vpaddw %ymm8,%ymm6,%ymm4
+ DB 197,245,253,204 ; vpaddw %ymm4,%ymm1,%ymm1
+ DB 196,193,69,253,224 ; vpaddw %ymm8,%ymm7,%ymm4
+ DB 197,253,253,196 ; vpaddw %ymm4,%ymm0,%ymm0
+ DB 196,226,125,121,37,131,4,0,0 ; vpbroadcastw 0x483(%rip),%ymm4 # 3944 <_sk_overlay_hsw_8bit+0x79d>
+ DB 197,253,228,196 ; vpmulhuw %ymm4,%ymm0,%ymm0
+ DB 197,245,228,204 ; vpmulhuw %ymm4,%ymm1,%ymm1
+ DB 197,229,228,220 ; vpmulhuw %ymm4,%ymm3,%ymm3
+ DB 197,229,113,211,7 ; vpsrlw $0x7,%ymm3,%ymm3
+ DB 197,237,228,212 ; vpmulhuw %ymm4,%ymm2,%ymm2
+ DB 197,237,113,210,7 ; vpsrlw $0x7,%ymm2,%ymm2
+ DB 196,227,109,56,227,1 ; vinserti128 $0x1,%xmm3,%ymm2,%ymm4
+ DB 196,227,109,70,211,49 ; vperm2i128 $0x31,%ymm3,%ymm2,%ymm2
+ DB 197,229,113,208,7 ; vpsrlw $0x7,%ymm0,%ymm3
+ DB 197,245,113,209,7 ; vpsrlw $0x7,%ymm1,%ymm1
+ DB 197,221,103,194 ; vpackuswb %ymm2,%ymm4,%ymm0
+ DB 196,227,117,56,211,1 ; vinserti128 $0x1,%xmm3,%ymm1,%ymm2
+ DB 196,227,117,70,203,49 ; vperm2i128 $0x31,%ymm3,%ymm1,%ymm1
+ DB 197,237,103,201 ; vpackuswb %ymm1,%ymm2,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 197,252,16,148,36,160,1,0,0 ; vmovups 0x1a0(%rsp),%ymm2
+ DB 197,252,16,156,36,224,1,0,0 ; vmovups 0x1e0(%rsp),%ymm3
+ DB 72,129,196,24,2,0,0 ; add $0x218,%rsp
+ DB 255,224 ; jmpq *%rax
+
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2e5f <_sk_difference_hsw_8bit+0x1cb>
+ DB 127,67 ; jg 356b <_sk_overlay_hsw_8bit+0x3c4>
DB 1,1 ; add %eax,(%rcx)
DB 1,0 ; add %eax,(%rax)
DB 0,0 ; add %al,(%rax)
@@ -41829,9 +42170,9 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 0,255 ; add %bh,%bh
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2e73 <_sk_difference_hsw_8bit+0x1df>
+ DB 127,67 ; jg 357f <_sk_overlay_hsw_8bit+0x3d8>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2e77 <_sk_difference_hsw_8bit+0x1e3>
+ DB 127,67 ; jg 3583 <_sk_overlay_hsw_8bit+0x3dc>
DB 0,0 ; add %al,(%rax)
DB 0,255 ; add %bh,%bh
DB 255 ; (bad)
@@ -42388,6 +42729,66 @@ ALIGN 32
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,0 ; incl (%rax)
+ DB 3,3 ; add (%rbx),%eax
+ DB 3,3 ; add (%rbx),%eax
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 11,11 ; or (%rbx),%ecx
+ DB 11,11 ; or (%rbx),%ecx
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,3,3 ; lsl (%rbx),%eax
+ DB 3,3 ; add (%rbx),%eax
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 11,11 ; or (%rbx),%ecx
+ DB 11,11 ; or (%rbx),%ecx
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,255 ; (bad)
+ DB 0,127,0 ; add %bh,0x0(%rdi)
+ DB 129,128,0,0,0,0,0,0,0,0 ; addl $0x0,0x0(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 0,0 ; add %al,(%rax)
+ DB 3,3 ; add (%rbx),%eax
+ DB 3,3 ; add (%rbx),%eax
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 11,11 ; or (%rbx),%ecx
+ DB 11,11 ; or (%rbx),%ecx
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,3,3 ; lsl (%rbx),%eax
+ DB 3,3 ; add (%rbx),%eax
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 11,11 ; or (%rbx),%ecx
+ DB 11,11 ; or (%rbx),%ecx
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,255 ; (bad)
+ DB 0,127,0 ; add %bh,0x0(%rdi)
+ DB 129 ; .byte 0x81
+ DB 128 ; .byte 0x80
ALIGN 16
DB 0,2 ; add %al,(%rdx)
@@ -42498,7 +42899,7 @@ _sk_uniform_color_sse41_8bit LABEL PROC
PUBLIC _sk_set_rgb_sse41_8bit
_sk_set_rgb_sse41_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,15,16,37,88,33,0,0 ; movss 0x2158(%rip),%xmm4 # 22d8 <_sk_difference_sse41_8bit+0x169>
+ DB 243,15,16,37,152,41,0,0 ; movss 0x2998(%rip),%xmm4 # 2b18 <_sk_overlay_sse41_8bit+0x42f>
DB 243,15,16,40 ; movss (%rax),%xmm5
DB 243,15,89,236 ; mulss %xmm4,%xmm5
DB 243,72,15,44,205 ; cvttss2si %xmm5,%rcx
@@ -42513,7 +42914,7 @@ _sk_set_rgb_sse41_8bit LABEL PROC
DB 9,208 ; or %edx,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,112,228,0 ; pshufd $0x0,%xmm4,%xmm4
- DB 102,15,111,45,48,33,0,0 ; movdqa 0x2130(%rip),%xmm5 # 22f0 <_sk_difference_sse41_8bit+0x181>
+ DB 102,15,111,45,112,41,0,0 ; movdqa 0x2970(%rip),%xmm5 # 2b30 <_sk_overlay_sse41_8bit+0x447>
DB 102,15,219,205 ; pand %xmm5,%xmm1
DB 102,15,219,197 ; pand %xmm5,%xmm0
DB 102,15,235,196 ; por %xmm4,%xmm0
@@ -42525,12 +42926,12 @@ PUBLIC _sk_premul_sse41_8bit
_sk_premul_sse41_8bit LABEL PROC
DB 102,15,111,225 ; movdqa %xmm1,%xmm4
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,15,111,5,28,33,0,0 ; movdqa 0x211c(%rip),%xmm0 # 2300 <_sk_difference_sse41_8bit+0x191>
+ DB 102,15,111,5,92,41,0,0 ; movdqa 0x295c(%rip),%xmm0 # 2b40 <_sk_overlay_sse41_8bit+0x457>
DB 102,15,111,253 ; movdqa %xmm5,%xmm7
DB 102,15,56,0,248 ; pshufb %xmm0,%xmm7
DB 102,15,111,244 ; movdqa %xmm4,%xmm6
DB 102,15,56,0,240 ; pshufb %xmm0,%xmm6
- DB 102,15,111,5,18,33,0,0 ; movdqa 0x2112(%rip),%xmm0 # 2310 <_sk_difference_sse41_8bit+0x1a1>
+ DB 102,15,111,5,82,41,0,0 ; movdqa 0x2952(%rip),%xmm0 # 2b50 <_sk_overlay_sse41_8bit+0x467>
DB 102,15,235,240 ; por %xmm0,%xmm6
DB 102,15,235,248 ; por %xmm0,%xmm7
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
@@ -42561,7 +42962,7 @@ _sk_premul_sse41_8bit LABEL PROC
PUBLIC _sk_swap_rb_sse41_8bit
_sk_swap_rb_sse41_8bit LABEL PROC
- DB 102,15,111,37,159,32,0,0 ; movdqa 0x209f(%rip),%xmm4 # 2320 <_sk_difference_sse41_8bit+0x1b1>
+ DB 102,15,111,37,223,40,0,0 ; movdqa 0x28df(%rip),%xmm4 # 2b60 <_sk_overlay_sse41_8bit+0x477>
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42682,7 +43083,7 @@ _sk_load_8888_dst_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,221 ; jmpq ffffffffde000410 <_sk_difference_sse41_8bit+0xffffffffddffe2a1>
+ DB 233,255,255,255,221 ; jmpq ffffffffde000410 <_sk_overlay_sse41_8bit+0xffffffffddffdd27>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -42761,7 +43162,7 @@ _sk_load_bgra_sse41_8bit LABEL PROC
DB 117,35 ; jne 504 <_sk_load_bgra_sse41_8bit+0x44>
DB 243,66,15,111,76,130,16 ; movdqu 0x10(%rdx,%r8,4),%xmm1
DB 243,66,15,111,4,130 ; movdqu (%rdx,%r8,4),%xmm0
- DB 102,15,111,37,58,30,0,0 ; movdqa 0x1e3a(%rip),%xmm4 # 2330 <_sk_difference_sse41_8bit+0x1c1>
+ DB 102,15,111,37,122,38,0,0 ; movdqa 0x267a(%rip),%xmm4 # 2b70 <_sk_overlay_sse41_8bit+0x487>
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42822,7 +43223,7 @@ _sk_load_bgra_dst_sse41_8bit LABEL PROC
DB 117,35 ; jne 5d4 <_sk_load_bgra_dst_sse41_8bit+0x44>
DB 243,66,15,111,92,130,16 ; movdqu 0x10(%rdx,%r8,4),%xmm3
DB 243,66,15,111,20,130 ; movdqu (%rdx,%r8,4),%xmm2
- DB 102,15,111,37,122,29,0,0 ; movdqa 0x1d7a(%rip),%xmm4 # 2340 <_sk_difference_sse41_8bit+0x1d1>
+ DB 102,15,111,37,186,37,0,0 ; movdqa 0x25ba(%rip),%xmm4 # 2b80 <_sk_overlay_sse41_8bit+0x497>
DB 102,15,56,0,212 ; pshufb %xmm4,%xmm2
DB 102,15,56,0,220 ; pshufb %xmm4,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42879,7 +43280,7 @@ _sk_store_bgra_sse41_8bit LABEL PROC
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,193,226,2 ; shl $0x2,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 102,15,111,53,204,28,0,0 ; movdqa 0x1ccc(%rip),%xmm6 # 2350 <_sk_difference_sse41_8bit+0x1e1>
+ DB 102,15,111,53,12,37,0,0 ; movdqa 0x250c(%rip),%xmm6 # 2b90 <_sk_overlay_sse41_8bit+0x4a7>
DB 102,15,111,233 ; movdqa %xmm1,%xmm5
DB 102,15,56,0,238 ; pshufb %xmm6,%xmm5
DB 102,15,111,224 ; movdqa %xmm0,%xmm4
@@ -42943,7 +43344,7 @@ _sk_load_a8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,42 ; jne 76b <_sk_load_a8_sse41_8bit+0x47>
DB 102,66,15,56,48,12,2 ; pmovzxbw (%rdx,%r8,1),%xmm1
- DB 102,15,219,13,16,28,0,0 ; pand 0x1c10(%rip),%xmm1 # 2360 <_sk_difference_sse41_8bit+0x1f1>
+ DB 102,15,219,13,80,36,0,0 ; pand 0x2450(%rip),%xmm1 # 2ba0 <_sk_overlay_sse41_8bit+0x4b7>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,193 ; pmovzxwd %xmm1,%xmm0
DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1
@@ -42991,7 +43392,7 @@ _sk_load_a8_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf00080c <_sk_difference_sse41_8bit+0xffffffffdeffe69d>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf00080c <_sk_overlay_sse41_8bit+0xffffffffdeffe123>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -43014,7 +43415,7 @@ _sk_load_a8_dst_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,42 ; jne 85f <_sk_load_a8_dst_sse41_8bit+0x47>
DB 102,66,15,56,48,28,2 ; pmovzxbw (%rdx,%r8,1),%xmm3
- DB 102,15,219,29,44,27,0,0 ; pand 0x1b2c(%rip),%xmm3 # 2370 <_sk_difference_sse41_8bit+0x201>
+ DB 102,15,219,29,108,35,0,0 ; pand 0x236c(%rip),%xmm3 # 2bb0 <_sk_overlay_sse41_8bit+0x4c7>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,211 ; pmovzxwd %xmm3,%xmm2
DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3
@@ -43062,7 +43463,7 @@ _sk_load_a8_dst_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf000900 <_sk_difference_sse41_8bit+0xffffffffdeffe791>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf000900 <_sk_overlay_sse41_8bit+0xffffffffdeffe217>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -43082,7 +43483,7 @@ _sk_store_a8_sse41_8bit LABEL PROC
DB 72,99,87,8 ; movslq 0x8(%rdi),%rdx
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 102,15,111,45,84,26,0,0 ; movdqa 0x1a54(%rip),%xmm5 # 2380 <_sk_difference_sse41_8bit+0x211>
+ DB 102,15,111,45,148,34,0,0 ; movdqa 0x2294(%rip),%xmm5 # 2bc0 <_sk_overlay_sse41_8bit+0x4d7>
DB 102,15,111,241 ; movdqa %xmm1,%xmm6
DB 102,15,56,0,245 ; pshufb %xmm5,%xmm6
DB 102,15,111,224 ; movdqa %xmm0,%xmm4
@@ -43090,7 +43491,7 @@ _sk_store_a8_sse41_8bit LABEL PROC
DB 102,15,108,230 ; punpcklqdq %xmm6,%xmm4
DB 77,133,201 ; test %r9,%r9
DB 117,19 ; jne 95a <_sk_store_a8_sse41_8bit+0x4e>
- DB 102,15,56,0,37,96,26,0,0 ; pshufb 0x1a60(%rip),%xmm4 # 23b0 <_sk_difference_sse41_8bit+0x241>
+ DB 102,15,56,0,37,160,34,0,0 ; pshufb 0x22a0(%rip),%xmm4 # 2bf0 <_sk_overlay_sse41_8bit+0x507>
DB 102,66,15,214,36,2 ; movq %xmm4,(%rdx,%r8,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -43106,13 +43507,13 @@ _sk_store_a8_sse41_8bit LABEL PROC
DB 102,66,15,58,20,36,2,0 ; pextrb $0x0,%xmm4,(%rdx,%r8,1)
DB 235,209 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a>
DB 102,66,15,58,20,100,2,2,4 ; pextrb $0x4,%xmm4,0x2(%rdx,%r8,1)
- DB 102,15,56,0,37,9,26,0,0 ; pshufb 0x1a09(%rip),%xmm4 # 23a0 <_sk_difference_sse41_8bit+0x231>
+ DB 102,15,56,0,37,73,34,0,0 ; pshufb 0x2249(%rip),%xmm4 # 2be0 <_sk_overlay_sse41_8bit+0x4f7>
DB 102,66,15,58,21,36,2,0 ; pextrw $0x0,%xmm4,(%rdx,%r8,1)
DB 235,181 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a>
DB 102,66,15,58,20,100,2,6,12 ; pextrb $0xc,%xmm4,0x6(%rdx,%r8,1)
DB 102,66,15,58,20,100,2,5,10 ; pextrb $0xa,%xmm4,0x5(%rdx,%r8,1)
DB 102,66,15,58,20,100,2,4,8 ; pextrb $0x8,%xmm4,0x4(%rdx,%r8,1)
- DB 102,15,56,0,37,203,25,0,0 ; pshufb 0x19cb(%rip),%xmm4 # 2390 <_sk_difference_sse41_8bit+0x221>
+ DB 102,15,56,0,37,11,34,0,0 ; pshufb 0x220b(%rip),%xmm4 # 2bd0 <_sk_overlay_sse41_8bit+0x4e7>
DB 102,66,15,126,36,2 ; movd %xmm4,(%rdx,%r8,1)
DB 235,137 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a>
DB 15,31,0 ; nopl (%rax)
@@ -43150,14 +43551,14 @@ _sk_load_g8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,66 ; jne a4b <_sk_load_g8_sse41_8bit+0x5f>
DB 102,66,15,56,48,12,2 ; pmovzxbw (%rdx,%r8,1),%xmm1
- DB 102,15,219,13,168,25,0,0 ; pand 0x19a8(%rip),%xmm1 # 23c0 <_sk_difference_sse41_8bit+0x251>
+ DB 102,15,219,13,232,33,0,0 ; pand 0x21e8(%rip),%xmm1 # 2c00 <_sk_overlay_sse41_8bit+0x517>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,193 ; pmovzxwd %xmm1,%xmm0
DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1
- DB 102,15,111,37,163,25,0,0 ; movdqa 0x19a3(%rip),%xmm4 # 23d0 <_sk_difference_sse41_8bit+0x261>
+ DB 102,15,111,37,227,33,0,0 ; movdqa 0x21e3(%rip),%xmm4 # 2c10 <_sk_overlay_sse41_8bit+0x527>
DB 102,15,56,64,204 ; pmulld %xmm4,%xmm1
DB 102,15,56,64,196 ; pmulld %xmm4,%xmm0
- DB 102,15,111,37,161,25,0,0 ; movdqa 0x19a1(%rip),%xmm4 # 23e0 <_sk_difference_sse41_8bit+0x271>
+ DB 102,15,111,37,225,33,0,0 ; movdqa 0x21e1(%rip),%xmm4 # 2c20 <_sk_overlay_sse41_8bit+0x537>
DB 102,15,235,196 ; por %xmm4,%xmm0
DB 102,15,235,204 ; por %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -43202,7 +43603,7 @@ _sk_load_g8_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 232,255,255,255,221 ; callq ffffffffde000af0 <_sk_difference_sse41_8bit+0xffffffffddffe981>
+ DB 232,255,255,255,221 ; callq ffffffffde000af0 <_sk_overlay_sse41_8bit+0xffffffffddffe407>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,210 ; callq *%rdx
@@ -43225,14 +43626,14 @@ _sk_load_g8_dst_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,66 ; jne b5b <_sk_load_g8_dst_sse41_8bit+0x5f>
DB 102,66,15,56,48,28,2 ; pmovzxbw (%rdx,%r8,1),%xmm3
- DB 102,15,219,29,200,24,0,0 ; pand 0x18c8(%rip),%xmm3 # 23f0 <_sk_difference_sse41_8bit+0x281>
+ DB 102,15,219,29,8,33,0,0 ; pand 0x2108(%rip),%xmm3 # 2c30 <_sk_overlay_sse41_8bit+0x547>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,211 ; pmovzxwd %xmm3,%xmm2
DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3
- DB 102,15,111,37,195,24,0,0 ; movdqa 0x18c3(%rip),%xmm4 # 2400 <_sk_difference_sse41_8bit+0x291>
+ DB 102,15,111,37,3,33,0,0 ; movdqa 0x2103(%rip),%xmm4 # 2c40 <_sk_overlay_sse41_8bit+0x557>
DB 102,15,56,64,220 ; pmulld %xmm4,%xmm3
DB 102,15,56,64,212 ; pmulld %xmm4,%xmm2
- DB 102,15,111,37,193,24,0,0 ; movdqa 0x18c1(%rip),%xmm4 # 2410 <_sk_difference_sse41_8bit+0x2a1>
+ DB 102,15,111,37,1,33,0,0 ; movdqa 0x2101(%rip),%xmm4 # 2c50 <_sk_overlay_sse41_8bit+0x567>
DB 102,15,235,212 ; por %xmm4,%xmm2
DB 102,15,235,220 ; por %xmm4,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -43277,7 +43678,7 @@ _sk_load_g8_dst_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 232,255,255,255,221 ; callq ffffffffde000c00 <_sk_difference_sse41_8bit+0xffffffffddffea91>
+ DB 232,255,255,255,221 ; callq ffffffffde000c00 <_sk_overlay_sse41_8bit+0xffffffffddffe517>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,210 ; callq *%rdx
@@ -43303,7 +43704,7 @@ _sk_srcover_rgba_8888_sse41_8bit LABEL PROC
DB 243,70,15,111,68,138,16 ; movdqu 0x10(%rdx,%r9,4),%xmm8
DB 243,70,15,111,12,138 ; movdqu (%rdx,%r9,4),%xmm9
DB 77,133,192 ; test %r8,%r8
- DB 102,15,111,37,215,23,0,0 ; movdqa 0x17d7(%rip),%xmm4 # 2420 <_sk_difference_sse41_8bit+0x2b1>
+ DB 102,15,111,37,23,32,0,0 ; movdqa 0x2017(%rip),%xmm4 # 2c60 <_sk_overlay_sse41_8bit+0x577>
DB 102,15,111,241 ; movdqa %xmm1,%xmm6
DB 102,15,56,0,244 ; pshufb %xmm4,%xmm6
DB 102,15,111,248 ; movdqa %xmm0,%xmm7
@@ -43423,7 +43824,7 @@ _sk_scale_1_float_sse41_8bit LABEL PROC
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,0 ; movss (%rax),%xmm0
- DB 243,15,89,5,178,20,0,0 ; mulss 0x14b2(%rip),%xmm0 # 22dc <_sk_difference_sse41_8bit+0x16d>
+ DB 243,15,89,5,242,28,0,0 ; mulss 0x1cf2(%rip),%xmm0 # 2b1c <_sk_overlay_sse41_8bit+0x433>
DB 243,15,44,192 ; cvttss2si %xmm0,%eax
DB 15,87,192 ; xorps %xmm0,%xmm0
DB 102,68,15,56,48,197 ; pmovzxbw %xmm5,%xmm8
@@ -43431,7 +43832,7 @@ _sk_scale_1_float_sse41_8bit LABEL PROC
DB 102,68,15,56,48,204 ; pmovzxbw %xmm4,%xmm9
DB 102,15,104,224 ; punpckhbw %xmm0,%xmm4
DB 102,15,110,240 ; movd %eax,%xmm6
- DB 102,15,56,0,53,222,21,0,0 ; pshufb 0x15de(%rip),%xmm6 # 2430 <_sk_difference_sse41_8bit+0x2c1>
+ DB 102,15,56,0,53,30,30,0,0 ; pshufb 0x1e1e(%rip),%xmm6 # 2c70 <_sk_overlay_sse41_8bit+0x587>
DB 102,15,111,206 ; movdqa %xmm6,%xmm1
DB 102,65,15,213,201 ; pmullw %xmm9,%xmm1
DB 102,15,111,198 ; movdqa %xmm6,%xmm0
@@ -43464,11 +43865,11 @@ _sk_scale_u8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 15,133,160,0,0,0 ; jne f63 <_sk_scale_u8_sse41_8bit+0xc1>
DB 102,66,15,56,48,52,2 ; pmovzxbw (%rdx,%r8,1),%xmm6
- DB 102,15,219,53,110,21,0,0 ; pand 0x156e(%rip),%xmm6 # 2440 <_sk_difference_sse41_8bit+0x2d1>
+ DB 102,15,219,53,174,29,0,0 ; pand 0x1dae(%rip),%xmm6 # 2c80 <_sk_overlay_sse41_8bit+0x597>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,111,254 ; movdqa %xmm6,%xmm7
- DB 102,15,56,0,61,108,21,0,0 ; pshufb 0x156c(%rip),%xmm7 # 2450 <_sk_difference_sse41_8bit+0x2e1>
- DB 102,15,56,0,53,115,21,0,0 ; pshufb 0x1573(%rip),%xmm6 # 2460 <_sk_difference_sse41_8bit+0x2f1>
+ DB 102,15,56,0,61,172,29,0,0 ; pshufb 0x1dac(%rip),%xmm7 # 2c90 <_sk_overlay_sse41_8bit+0x5a7>
+ DB 102,15,56,0,53,179,29,0,0 ; pshufb 0x1db3(%rip),%xmm6 # 2ca0 <_sk_overlay_sse41_8bit+0x5b7>
DB 102,68,15,56,48,200 ; pmovzxbw %xmm0,%xmm9
DB 102,65,15,104,192 ; punpckhbw %xmm8,%xmm0
DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10
@@ -43551,7 +43952,7 @@ PUBLIC _sk_lerp_1_float_sse41_8bit
_sk_lerp_1_float_sse41_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,32 ; movss (%rax),%xmm4
- DB 243,15,89,37,182,18,0,0 ; mulss 0x12b6(%rip),%xmm4 # 22e0 <_sk_difference_sse41_8bit+0x171>
+ DB 243,15,89,37,246,26,0,0 ; mulss 0x1af6(%rip),%xmm4 # 2b20 <_sk_overlay_sse41_8bit+0x437>
DB 243,15,44,196 ; cvttss2si %xmm4,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
@@ -43562,7 +43963,7 @@ _sk_lerp_1_float_sse41_8bit LABEL PROC
DB 102,65,15,104,193 ; punpckhbw %xmm9,%xmm0
DB 102,68,15,56,48,217 ; pmovzxbw %xmm1,%xmm11
DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1
- DB 102,15,56,0,61,11,20,0,0 ; pshufb 0x140b(%rip),%xmm7 # 2470 <_sk_difference_sse41_8bit+0x301>
+ DB 102,15,56,0,61,75,28,0,0 ; pshufb 0x1c4b(%rip),%xmm7 # 2cb0 <_sk_overlay_sse41_8bit+0x5c7>
DB 102,68,15,111,231 ; movdqa %xmm7,%xmm12
DB 102,69,15,213,227 ; pmullw %xmm11,%xmm12
DB 102,68,15,111,239 ; movdqa %xmm7,%xmm13
@@ -43623,11 +44024,11 @@ _sk_lerp_u8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 15,133,46,1,0,0 ; jne 128d <_sk_lerp_u8_sse41_8bit+0x14f>
DB 102,66,15,56,48,60,2 ; pmovzxbw (%rdx,%r8,1),%xmm7
- DB 102,15,219,61,18,19,0,0 ; pand 0x1312(%rip),%xmm7 # 2480 <_sk_difference_sse41_8bit+0x311>
+ DB 102,15,219,61,82,27,0,0 ; pand 0x1b52(%rip),%xmm7 # 2cc0 <_sk_overlay_sse41_8bit+0x5d7>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,111,247 ; movdqa %xmm7,%xmm6
- DB 102,15,56,0,53,16,19,0,0 ; pshufb 0x1310(%rip),%xmm6 # 2490 <_sk_difference_sse41_8bit+0x321>
- DB 102,15,56,0,61,23,19,0,0 ; pshufb 0x1317(%rip),%xmm7 # 24a0 <_sk_difference_sse41_8bit+0x331>
+ DB 102,15,56,0,53,80,27,0,0 ; pshufb 0x1b50(%rip),%xmm6 # 2cd0 <_sk_overlay_sse41_8bit+0x5e7>
+ DB 102,15,56,0,61,87,27,0,0 ; pshufb 0x1b57(%rip),%xmm7 # 2ce0 <_sk_overlay_sse41_8bit+0x5f7>
DB 102,68,15,56,48,200 ; pmovzxbw %xmm0,%xmm9
DB 102,65,15,104,192 ; punpckhbw %xmm8,%xmm0
DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10
@@ -43722,7 +44123,7 @@ _sk_lerp_u8_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf001338 <_sk_difference_sse41_8bit+0xffffffffdefff1c9>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf001338 <_sk_overlay_sse41_8bit+0xffffffffdeffec4f>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -43750,7 +44151,7 @@ _sk_move_dst_src_sse41_8bit LABEL PROC
PUBLIC _sk_black_color_sse41_8bit
_sk_black_color_sse41_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 15,40,5,79,17,0,0 ; movaps 0x114f(%rip),%xmm0 # 24b0 <_sk_difference_sse41_8bit+0x341>
+ DB 15,40,5,143,25,0,0 ; movaps 0x198f(%rip),%xmm0 # 2cf0 <_sk_overlay_sse41_8bit+0x607>
DB 15,40,200 ; movaps %xmm0,%xmm1
DB 255,224 ; jmpq *%rax
@@ -43770,7 +44171,7 @@ _sk_clear_sse41_8bit LABEL PROC
PUBLIC _sk_srcatop_sse41_8bit
_sk_srcatop_sse41_8bit LABEL PROC
- DB 102,68,15,111,21,59,17,0,0 ; movdqa 0x113b(%rip),%xmm10 # 24c0 <_sk_difference_sse41_8bit+0x351>
+ DB 102,68,15,111,21,123,25,0,0 ; movdqa 0x197b(%rip),%xmm10 # 2d00 <_sk_overlay_sse41_8bit+0x617>
DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
DB 102,68,15,56,48,195 ; pmovzxbw %xmm3,%xmm8
DB 102,15,111,235 ; movdqa %xmm3,%xmm5
@@ -43838,7 +44239,7 @@ _sk_srcatop_sse41_8bit LABEL PROC
PUBLIC _sk_dstatop_sse41_8bit
_sk_dstatop_sse41_8bit LABEL PROC
- DB 102,68,15,111,29,16,16,0,0 ; movdqa 0x1010(%rip),%xmm11 # 24d0 <_sk_difference_sse41_8bit+0x361>
+ DB 102,68,15,111,29,80,24,0,0 ; movdqa 0x1850(%rip),%xmm11 # 2d10 <_sk_overlay_sse41_8bit+0x627>
DB 102,68,15,111,233 ; movdqa %xmm1,%xmm13
DB 102,69,15,56,0,235 ; pshufb %xmm11,%xmm13
DB 102,68,15,111,248 ; movdqa %xmm0,%xmm15
@@ -43908,7 +44309,7 @@ PUBLIC _sk_srcin_sse41_8bit
_sk_srcin_sse41_8bit LABEL PROC
DB 102,15,111,225 ; movdqa %xmm1,%xmm4
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,15,111,5,215,14,0,0 ; movdqa 0xed7(%rip),%xmm0 # 24e0 <_sk_difference_sse41_8bit+0x371>
+ DB 102,15,111,5,23,23,0,0 ; movdqa 0x1717(%rip),%xmm0 # 2d20 <_sk_overlay_sse41_8bit+0x637>
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
DB 102,15,56,0,240 ; pshufb %xmm0,%xmm6
DB 102,15,111,250 ; movdqa %xmm2,%xmm7
@@ -43941,7 +44342,7 @@ _sk_srcin_sse41_8bit LABEL PROC
PUBLIC _sk_dstin_sse41_8bit
_sk_dstin_sse41_8bit LABEL PROC
- DB 102,15,111,37,90,14,0,0 ; movdqa 0xe5a(%rip),%xmm4 # 24f0 <_sk_difference_sse41_8bit+0x381>
+ DB 102,15,111,37,154,22,0,0 ; movdqa 0x169a(%rip),%xmm4 # 2d30 <_sk_overlay_sse41_8bit+0x647>
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,69,15,239,210 ; pxor %xmm10,%xmm10
@@ -43978,7 +44379,7 @@ PUBLIC _sk_srcout_sse41_8bit
_sk_srcout_sse41_8bit LABEL PROC
DB 102,15,111,225 ; movdqa %xmm1,%xmm4
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,15,111,5,205,13,0,0 ; movdqa 0xdcd(%rip),%xmm0 # 2500 <_sk_difference_sse41_8bit+0x391>
+ DB 102,15,111,5,13,22,0,0 ; movdqa 0x160d(%rip),%xmm0 # 2d40 <_sk_overlay_sse41_8bit+0x657>
DB 102,15,111,250 ; movdqa %xmm2,%xmm7
DB 102,15,56,0,248 ; pshufb %xmm0,%xmm7
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
@@ -44014,7 +44415,7 @@ _sk_srcout_sse41_8bit LABEL PROC
PUBLIC _sk_dstout_sse41_8bit
_sk_dstout_sse41_8bit LABEL PROC
- DB 102,15,111,37,68,13,0,0 ; movdqa 0xd44(%rip),%xmm4 # 2510 <_sk_difference_sse41_8bit+0x3a1>
+ DB 102,15,111,37,132,21,0,0 ; movdqa 0x1584(%rip),%xmm4 # 2d50 <_sk_overlay_sse41_8bit+0x667>
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 102,15,118,228 ; pcmpeqd %xmm4,%xmm4
@@ -44052,7 +44453,7 @@ _sk_dstout_sse41_8bit LABEL PROC
PUBLIC _sk_srcover_sse41_8bit
_sk_srcover_sse41_8bit LABEL PROC
- DB 102,15,111,53,179,12,0,0 ; movdqa 0xcb3(%rip),%xmm6 # 2520 <_sk_difference_sse41_8bit+0x3b1>
+ DB 102,15,111,53,243,20,0,0 ; movdqa 0x14f3(%rip),%xmm6 # 2d60 <_sk_overlay_sse41_8bit+0x677>
DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11
DB 102,68,15,56,0,222 ; pshufb %xmm6,%xmm11
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
@@ -44091,7 +44492,7 @@ _sk_srcover_sse41_8bit LABEL PROC
PUBLIC _sk_dstover_sse41_8bit
_sk_dstover_sse41_8bit LABEL PROC
- DB 102,68,15,111,5,19,12,0,0 ; movdqa 0xc13(%rip),%xmm8 # 2530 <_sk_difference_sse41_8bit+0x3c1>
+ DB 102,68,15,111,5,83,20,0,0 ; movdqa 0x1453(%rip),%xmm8 # 2d70 <_sk_overlay_sse41_8bit+0x687>
DB 102,68,15,111,209 ; movdqa %xmm1,%xmm10
DB 102,68,15,56,48,201 ; pmovzxbw %xmm1,%xmm9
DB 102,15,252,203 ; paddb %xmm3,%xmm1
@@ -44166,7 +44567,7 @@ _sk_multiply_sse41_8bit LABEL PROC
DB 102,15,111,218 ; movdqa %xmm2,%xmm3
DB 102,15,111,209 ; movdqa %xmm1,%xmm2
DB 102,15,111,200 ; movdqa %xmm0,%xmm1
- DB 102,68,15,111,53,225,10,0,0 ; movdqa 0xae1(%rip),%xmm14 # 2540 <_sk_difference_sse41_8bit+0x3d1>
+ DB 102,68,15,111,53,33,19,0,0 ; movdqa 0x1321(%rip),%xmm14 # 2d80 <_sk_overlay_sse41_8bit+0x697>
DB 102,68,15,111,195 ; movdqa %xmm3,%xmm8
DB 102,15,111,235 ; movdqa %xmm3,%xmm5
DB 102,65,15,56,0,238 ; pshufb %xmm14,%xmm5
@@ -44291,7 +44692,7 @@ _sk_screen_sse41_8bit LABEL PROC
PUBLIC _sk_xor__sse41_8bit
_sk_xor__sse41_8bit LABEL PROC
- DB 102,68,15,111,21,175,8,0,0 ; movdqa 0x8af(%rip),%xmm10 # 2550 <_sk_difference_sse41_8bit+0x3e1>
+ DB 102,68,15,111,21,239,16,0,0 ; movdqa 0x10ef(%rip),%xmm10 # 2d90 <_sk_overlay_sse41_8bit+0x6a7>
DB 102,68,15,111,226 ; movdqa %xmm2,%xmm12
DB 102,68,15,56,48,194 ; pmovzxbw %xmm2,%xmm8
DB 102,15,111,234 ; movdqa %xmm2,%xmm5
@@ -44362,7 +44763,7 @@ _sk_xor__sse41_8bit LABEL PROC
PUBLIC _sk_darken_sse41_8bit
_sk_darken_sse41_8bit LABEL PROC
DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14
- DB 102,68,15,111,5,114,7,0,0 ; movdqa 0x772(%rip),%xmm8 # 2560 <_sk_difference_sse41_8bit+0x3f1>
+ DB 102,68,15,111,5,178,15,0,0 ; movdqa 0xfb2(%rip),%xmm8 # 2da0 <_sk_overlay_sse41_8bit+0x6b7>
DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
@@ -44429,7 +44830,7 @@ _sk_darken_sse41_8bit LABEL PROC
DB 102,15,248,231 ; psubb %xmm7,%xmm4
DB 102,15,248,206 ; psubb %xmm6,%xmm1
DB 102,68,15,248,245 ; psubb %xmm5,%xmm14
- DB 15,40,5,54,6,0,0 ; movaps 0x636(%rip),%xmm0 # 2570 <_sk_difference_sse41_8bit+0x401>
+ DB 15,40,5,118,14,0,0 ; movaps 0xe76(%rip),%xmm0 # 2db0 <_sk_overlay_sse41_8bit+0x6c7>
DB 102,68,15,56,16,244 ; pblendvb %xmm0,%xmm4,%xmm14
DB 102,65,15,56,16,200 ; pblendvb %xmm0,%xmm8,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -44439,7 +44840,7 @@ _sk_darken_sse41_8bit LABEL PROC
PUBLIC _sk_lighten_sse41_8bit
_sk_lighten_sse41_8bit LABEL PROC
DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14
- DB 102,68,15,111,5,35,6,0,0 ; movdqa 0x623(%rip),%xmm8 # 2580 <_sk_difference_sse41_8bit+0x411>
+ DB 102,68,15,111,5,99,14,0,0 ; movdqa 0xe63(%rip),%xmm8 # 2dc0 <_sk_overlay_sse41_8bit+0x6d7>
DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
@@ -44506,7 +44907,7 @@ _sk_lighten_sse41_8bit LABEL PROC
DB 102,15,248,231 ; psubb %xmm7,%xmm4
DB 102,15,248,206 ; psubb %xmm6,%xmm1
DB 102,68,15,248,245 ; psubb %xmm5,%xmm14
- DB 15,40,5,231,4,0,0 ; movaps 0x4e7(%rip),%xmm0 # 2590 <_sk_difference_sse41_8bit+0x421>
+ DB 15,40,5,39,13,0,0 ; movaps 0xd27(%rip),%xmm0 # 2dd0 <_sk_overlay_sse41_8bit+0x6e7>
DB 102,68,15,56,16,244 ; pblendvb %xmm0,%xmm4,%xmm14
DB 102,65,15,56,16,200 ; pblendvb %xmm0,%xmm8,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -44544,7 +44945,7 @@ _sk_exclusion_sse41_8bit LABEL PROC
DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
DB 102,15,103,253 ; packuswb %xmm5,%xmm7
DB 102,15,103,230 ; packuswb %xmm6,%xmm4
- DB 102,15,111,45,77,4,0,0 ; movdqa 0x44d(%rip),%xmm5 # 25a0 <_sk_difference_sse41_8bit+0x431>
+ DB 102,15,111,45,141,12,0,0 ; movdqa 0xc8d(%rip),%xmm5 # 2de0 <_sk_overlay_sse41_8bit+0x6f7>
DB 102,15,248,204 ; psubb %xmm4,%xmm1
DB 102,15,219,229 ; pand %xmm5,%xmm4
DB 102,15,219,239 ; pand %xmm7,%xmm5
@@ -44557,7 +44958,7 @@ _sk_exclusion_sse41_8bit LABEL PROC
PUBLIC _sk_difference_sse41_8bit
_sk_difference_sse41_8bit LABEL PROC
DB 102,68,15,111,193 ; movdqa %xmm1,%xmm8
- DB 102,15,111,13,52,4,0,0 ; movdqa 0x434(%rip),%xmm1 # 25b0 <_sk_difference_sse41_8bit+0x441>
+ DB 102,15,111,13,116,12,0,0 ; movdqa 0xc74(%rip),%xmm1 # 2df0 <_sk_overlay_sse41_8bit+0x707>
DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
@@ -44595,7 +44996,7 @@ _sk_difference_sse41_8bit LABEL PROC
DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
DB 102,15,103,254 ; packuswb %xmm6,%xmm7
- DB 102,15,111,37,121,3,0,0 ; movdqa 0x379(%rip),%xmm4 # 25b0 <_sk_difference_sse41_8bit+0x441>
+ DB 102,15,111,37,185,11,0,0 ; movdqa 0xbb9(%rip),%xmm4 # 2df0 <_sk_overlay_sse41_8bit+0x707>
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 102,15,56,0,236 ; pshufb %xmm4,%xmm5
DB 102,69,15,104,236 ; punpckhbw %xmm12,%xmm13
@@ -44620,7 +45021,7 @@ _sk_difference_sse41_8bit LABEL PROC
DB 102,15,103,241 ; packuswb %xmm1,%xmm6
DB 102,65,15,218,230 ; pminub %xmm14,%xmm4
DB 102,15,218,247 ; pminub %xmm7,%xmm6
- DB 102,15,111,13,12,3,0,0 ; movdqa 0x30c(%rip),%xmm1 # 25c0 <_sk_difference_sse41_8bit+0x451>
+ DB 102,15,111,13,76,11,0,0 ; movdqa 0xb4c(%rip),%xmm1 # 2e00 <_sk_overlay_sse41_8bit+0x717>
DB 102,68,15,248,198 ; psubb %xmm6,%xmm8
DB 102,15,219,241 ; pand %xmm1,%xmm6
DB 102,15,219,204 ; pand %xmm4,%xmm1
@@ -44631,13 +45032,416 @@ _sk_difference_sse41_8bit LABEL PROC
DB 102,65,15,111,200 ; movdqa %xmm8,%xmm1
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_hardlight_sse41_8bit
+_sk_hardlight_sse41_8bit LABEL PROC
+ DB 72,129,236,24,1,0,0 ; sub $0x118,%rsp
+ DB 102,68,15,111,251 ; movdqa %xmm3,%xmm15
+ DB 102,68,15,111,233 ; movdqa %xmm1,%xmm13
+ DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14
+ DB 102,15,239,192 ; pxor %xmm0,%xmm0
+ DB 102,69,15,111,198 ; movdqa %xmm14,%xmm8
+ DB 102,68,15,104,192 ; punpckhbw %xmm0,%xmm8
+ DB 102,15,104,200 ; punpckhbw %xmm0,%xmm1
+ DB 102,15,111,225 ; movdqa %xmm1,%xmm4
+ DB 102,15,127,100,36,16 ; movdqa %xmm4,0x10(%rsp)
+ DB 102,65,15,56,48,206 ; pmovzxbw %xmm14,%xmm1
+ DB 102,15,127,12,36 ; movdqa %xmm1,(%rsp)
+ DB 102,69,15,56,48,221 ; pmovzxbw %xmm13,%xmm11
+ DB 102,68,15,127,156,36,176,0,0,0 ; movdqa %xmm11,0xb0(%rsp)
+ DB 102,15,111,13,228,10,0,0 ; movdqa 0xae4(%rip),%xmm1 # 2e10 <_sk_overlay_sse41_8bit+0x727>
+ DB 102,68,15,56,0,241 ; pshufb %xmm1,%xmm14
+ DB 102,68,15,56,0,233 ; pshufb %xmm1,%xmm13
+ DB 102,69,15,56,48,205 ; pmovzxbw %xmm13,%xmm9
+ DB 102,68,15,104,232 ; punpckhbw %xmm0,%xmm13
+ DB 102,69,15,56,48,214 ; pmovzxbw %xmm14,%xmm10
+ DB 102,68,15,104,240 ; punpckhbw %xmm0,%xmm14
+ DB 102,15,127,84,36,64 ; movdqa %xmm2,0x40(%rsp)
+ DB 102,15,111,242 ; movdqa %xmm2,%xmm6
+ DB 102,15,111,234 ; movdqa %xmm2,%xmm5
+ DB 102,15,104,232 ; punpckhbw %xmm0,%xmm5
+ DB 102,68,15,127,124,36,80 ; movdqa %xmm15,0x50(%rsp)
+ DB 102,65,15,111,215 ; movdqa %xmm15,%xmm2
+ DB 102,68,15,104,248 ; punpckhbw %xmm0,%xmm15
+ DB 102,15,56,0,241 ; pshufb %xmm1,%xmm6
+ DB 102,15,56,0,209 ; pshufb %xmm1,%xmm2
+ DB 102,68,15,56,48,226 ; pmovzxbw %xmm2,%xmm12
+ DB 102,68,15,127,100,36,32 ; movdqa %xmm12,0x20(%rsp)
+ DB 102,15,104,208 ; punpckhbw %xmm0,%xmm2
+ DB 102,15,127,84,36,48 ; movdqa %xmm2,0x30(%rsp)
+ DB 102,15,56,48,222 ; pmovzxbw %xmm6,%xmm3
+ DB 102,15,104,240 ; punpckhbw %xmm0,%xmm6
+ DB 102,15,111,254 ; movdqa %xmm6,%xmm7
+ DB 102,15,111,196 ; movdqa %xmm4,%xmm0
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,15,111,200 ; movdqa %xmm0,%xmm1
+ DB 102,65,15,56,58,205 ; pminuw %xmm13,%xmm1
+ DB 102,15,117,200 ; pcmpeqw %xmm0,%xmm1
+ DB 102,15,127,140,36,240,0,0,0 ; movdqa %xmm1,0xf0(%rsp)
+ DB 102,65,15,111,192 ; movdqa %xmm8,%xmm0
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,15,111,200 ; movdqa %xmm0,%xmm1
+ DB 102,65,15,56,58,206 ; pminuw %xmm14,%xmm1
+ DB 102,15,117,200 ; pcmpeqw %xmm0,%xmm1
+ DB 102,15,127,140,36,208,0,0,0 ; movdqa %xmm1,0xd0(%rsp)
+ DB 102,65,15,111,195 ; movdqa %xmm11,%xmm0
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,15,111,208 ; movdqa %xmm0,%xmm2
+ DB 102,65,15,111,225 ; movdqa %xmm9,%xmm4
+ DB 102,15,127,100,36,96 ; movdqa %xmm4,0x60(%rsp)
+ DB 102,15,56,58,212 ; pminuw %xmm4,%xmm2
+ DB 102,15,117,208 ; pcmpeqw %xmm0,%xmm2
+ DB 102,15,127,148,36,0,1,0,0 ; movdqa %xmm2,0x100(%rsp)
+ DB 102,15,111,4,36 ; movdqa (%rsp),%xmm0
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,15,111,240 ; movdqa %xmm0,%xmm6
+ DB 102,65,15,56,58,242 ; pminuw %xmm10,%xmm6
+ DB 102,15,117,240 ; pcmpeqw %xmm0,%xmm6
+ DB 102,15,127,180,36,192,0,0,0 ; movdqa %xmm6,0xc0(%rsp)
+ DB 102,15,111,53,240,9,0,0 ; movdqa 0x9f0(%rip),%xmm6 # 2e20 <_sk_overlay_sse41_8bit+0x737>
+ DB 102,15,111,199 ; movdqa %xmm7,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,65,15,213,192 ; pmullw %xmm8,%xmm0
+ DB 102,15,127,132,36,224,0,0,0 ; movdqa %xmm0,0xe0(%rsp)
+ DB 102,69,15,111,222 ; movdqa %xmm14,%xmm11
+ DB 102,68,15,239,222 ; pxor %xmm6,%xmm11
+ DB 102,68,15,213,221 ; pmullw %xmm5,%xmm11
+ DB 102,15,111,199 ; movdqa %xmm7,%xmm0
+ DB 102,15,249,253 ; psubw %xmm5,%xmm7
+ DB 102,15,127,188,36,128,0,0,0 ; movdqa %xmm7,0x80(%rsp)
+ DB 102,68,15,111,205 ; movdqa %xmm5,%xmm9
+ DB 102,69,15,213,200 ; pmullw %xmm8,%xmm9
+ DB 102,65,15,213,198 ; pmullw %xmm14,%xmm0
+ DB 102,15,127,132,36,144,0,0,0 ; movdqa %xmm0,0x90(%rsp)
+ DB 102,69,15,249,240 ; psubw %xmm8,%xmm14
+ DB 102,15,111,195 ; movdqa %xmm3,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,68,15,111,192 ; movdqa %xmm0,%xmm8
+ DB 102,65,15,111,212 ; movdqa %xmm12,%xmm2
+ DB 102,15,239,214 ; pxor %xmm6,%xmm2
+ DB 102,15,111,76,36,48 ; movdqa 0x30(%rsp),%xmm1
+ DB 102,15,111,193 ; movdqa %xmm1,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,68,15,111,100,36,16 ; movdqa 0x10(%rsp),%xmm12
+ DB 102,65,15,213,196 ; pmullw %xmm12,%xmm0
+ DB 102,15,127,132,36,160,0,0,0 ; movdqa %xmm0,0xa0(%rsp)
+ DB 102,65,15,111,194 ; movdqa %xmm10,%xmm0
+ DB 102,15,111,248 ; movdqa %xmm0,%xmm7
+ DB 102,15,239,254 ; pxor %xmm6,%xmm7
+ DB 102,15,111,236 ; movdqa %xmm4,%xmm5
+ DB 102,15,239,238 ; pxor %xmm6,%xmm5
+ DB 102,65,15,239,245 ; pxor %xmm13,%xmm6
+ DB 102,65,15,111,231 ; movdqa %xmm15,%xmm4
+ DB 102,15,213,244 ; pmullw %xmm4,%xmm6
+ DB 102,68,15,111,249 ; movdqa %xmm1,%xmm15
+ DB 102,15,249,204 ; psubw %xmm4,%xmm1
+ DB 102,15,127,76,36,48 ; movdqa %xmm1,0x30(%rsp)
+ DB 102,65,15,213,228 ; pmullw %xmm12,%xmm4
+ DB 102,69,15,213,253 ; pmullw %xmm13,%xmm15
+ DB 102,69,15,249,236 ; psubw %xmm12,%xmm13
+ DB 102,68,15,111,20,36 ; movdqa (%rsp),%xmm10
+ DB 102,69,15,213,194 ; pmullw %xmm10,%xmm8
+ DB 102,68,15,127,68,36,16 ; movdqa %xmm8,0x10(%rsp)
+ DB 102,68,15,56,48,68,36,64 ; pmovzxbw 0x40(%rsp),%xmm8
+ DB 102,65,15,213,248 ; pmullw %xmm8,%xmm7
+ DB 102,68,15,111,227 ; movdqa %xmm3,%xmm12
+ DB 102,65,15,249,216 ; psubw %xmm8,%xmm3
+ DB 102,15,127,92,36,112 ; movdqa %xmm3,0x70(%rsp)
+ DB 102,69,15,213,194 ; pmullw %xmm10,%xmm8
+ DB 102,68,15,213,224 ; pmullw %xmm0,%xmm12
+ DB 102,65,15,249,194 ; psubw %xmm10,%xmm0
+ DB 102,15,111,200 ; movdqa %xmm0,%xmm1
+ DB 102,15,111,156,36,176,0,0,0 ; movdqa 0xb0(%rsp),%xmm3
+ DB 102,15,213,211 ; pmullw %xmm3,%xmm2
+ DB 102,15,127,20,36 ; movdqa %xmm2,(%rsp)
+ DB 102,15,56,48,84,36,80 ; pmovzxbw 0x50(%rsp),%xmm2
+ DB 102,15,213,234 ; pmullw %xmm2,%xmm5
+ DB 102,15,111,68,36,32 ; movdqa 0x20(%rsp),%xmm0
+ DB 102,68,15,111,208 ; movdqa %xmm0,%xmm10
+ DB 102,15,249,194 ; psubw %xmm2,%xmm0
+ DB 102,15,127,68,36,32 ; movdqa %xmm0,0x20(%rsp)
+ DB 102,15,213,211 ; pmullw %xmm3,%xmm2
+ DB 102,15,111,68,36,96 ; movdqa 0x60(%rsp),%xmm0
+ DB 102,68,15,213,208 ; pmullw %xmm0,%xmm10
+ DB 102,15,249,195 ; psubw %xmm3,%xmm0
+ DB 102,68,15,213,108,36,48 ; pmullw 0x30(%rsp),%xmm13
+ DB 102,68,15,213,180,36,128,0,0,0 ; pmullw 0x80(%rsp),%xmm14
+ DB 102,15,213,68,36,32 ; pmullw 0x20(%rsp),%xmm0
+ DB 102,15,213,76,36,112 ; pmullw 0x70(%rsp),%xmm1
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,68,15,249,208 ; psubw %xmm0,%xmm10
+ DB 102,15,253,201 ; paddw %xmm1,%xmm1
+ DB 102,68,15,249,225 ; psubw %xmm1,%xmm12
+ DB 102,69,15,253,237 ; paddw %xmm13,%xmm13
+ DB 102,69,15,249,253 ; psubw %xmm13,%xmm15
+ DB 102,69,15,253,246 ; paddw %xmm14,%xmm14
+ DB 102,15,111,140,36,144,0,0,0 ; movdqa 0x90(%rsp),%xmm1
+ DB 102,65,15,249,206 ; psubw %xmm14,%xmm1
+ DB 102,69,15,253,201 ; paddw %xmm9,%xmm9
+ DB 102,15,111,132,36,208,0,0,0 ; movdqa 0xd0(%rsp),%xmm0
+ DB 102,68,15,219,200 ; pand %xmm0,%xmm9
+ DB 102,15,223,193 ; pandn %xmm1,%xmm0
+ DB 102,65,15,235,193 ; por %xmm9,%xmm0
+ DB 102,15,111,216 ; movdqa %xmm0,%xmm3
+ DB 102,15,253,228 ; paddw %xmm4,%xmm4
+ DB 102,15,111,132,36,240,0,0,0 ; movdqa 0xf0(%rsp),%xmm0
+ DB 102,15,219,224 ; pand %xmm0,%xmm4
+ DB 102,65,15,223,199 ; pandn %xmm15,%xmm0
+ DB 102,15,235,196 ; por %xmm4,%xmm0
+ DB 102,15,111,200 ; movdqa %xmm0,%xmm1
+ DB 102,69,15,253,192 ; paddw %xmm8,%xmm8
+ DB 102,15,111,132,36,192,0,0,0 ; movdqa 0xc0(%rsp),%xmm0
+ DB 102,68,15,219,192 ; pand %xmm0,%xmm8
+ DB 102,65,15,223,196 ; pandn %xmm12,%xmm0
+ DB 102,65,15,235,192 ; por %xmm8,%xmm0
+ DB 102,15,111,224 ; movdqa %xmm0,%xmm4
+ DB 102,15,253,210 ; paddw %xmm2,%xmm2
+ DB 102,15,111,132,36,0,1,0,0 ; movdqa 0x100(%rsp),%xmm0
+ DB 102,15,219,208 ; pand %xmm0,%xmm2
+ DB 102,65,15,223,194 ; pandn %xmm10,%xmm0
+ DB 102,15,235,194 ; por %xmm2,%xmm0
+ DB 102,15,111,208 ; movdqa %xmm0,%xmm2
+ DB 102,68,15,253,156,36,224,0,0,0 ; paddw 0xe0(%rsp),%xmm11
+ DB 102,15,253,180,36,160,0,0,0 ; paddw 0xa0(%rsp),%xmm6
+ DB 102,15,253,124,36,16 ; paddw 0x10(%rsp),%xmm7
+ DB 102,15,253,44,36 ; paddw (%rsp),%xmm5
+ DB 102,15,111,5,189,7,0,0 ; movdqa 0x7bd(%rip),%xmm0 # 2e30 <_sk_overlay_sse41_8bit+0x747>
+ DB 102,68,15,253,216 ; paddw %xmm0,%xmm11
+ DB 102,68,15,253,219 ; paddw %xmm3,%xmm11
+ DB 102,15,253,240 ; paddw %xmm0,%xmm6
+ DB 102,15,253,241 ; paddw %xmm1,%xmm6
+ DB 102,15,253,248 ; paddw %xmm0,%xmm7
+ DB 102,15,253,252 ; paddw %xmm4,%xmm7
+ DB 102,15,253,232 ; paddw %xmm0,%xmm5
+ DB 102,15,253,234 ; paddw %xmm2,%xmm5
+ DB 102,15,111,5,163,7,0,0 ; movdqa 0x7a3(%rip),%xmm0 # 2e40 <_sk_overlay_sse41_8bit+0x757>
+ DB 102,15,228,240 ; pmulhuw %xmm0,%xmm6
+ DB 102,68,15,228,216 ; pmulhuw %xmm0,%xmm11
+ DB 102,15,228,232 ; pmulhuw %xmm0,%xmm5
+ DB 102,15,228,248 ; pmulhuw %xmm0,%xmm7
+ DB 102,65,15,113,211,7 ; psrlw $0x7,%xmm11
+ DB 102,15,113,215,7 ; psrlw $0x7,%xmm7
+ DB 102,65,15,103,251 ; packuswb %xmm11,%xmm7
+ DB 102,15,113,214,7 ; psrlw $0x7,%xmm6
+ DB 102,15,113,213,7 ; psrlw $0x7,%xmm5
+ DB 102,15,103,238 ; packuswb %xmm6,%xmm5
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 15,40,84,36,64 ; movaps 0x40(%rsp),%xmm2
+ DB 15,40,92,36,80 ; movaps 0x50(%rsp),%xmm3
+ DB 102,15,111,199 ; movdqa %xmm7,%xmm0
+ DB 102,15,111,205 ; movdqa %xmm5,%xmm1
+ DB 72,129,196,24,1,0,0 ; add $0x118,%rsp
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_overlay_sse41_8bit
+_sk_overlay_sse41_8bit LABEL PROC
+ DB 72,129,236,72,1,0,0 ; sub $0x148,%rsp
+ DB 102,68,15,111,241 ; movdqa %xmm1,%xmm14
+ DB 102,68,15,111,248 ; movdqa %xmm0,%xmm15
+ DB 102,15,239,192 ; pxor %xmm0,%xmm0
+ DB 102,65,15,111,231 ; movdqa %xmm15,%xmm4
+ DB 102,15,104,224 ; punpckhbw %xmm0,%xmm4
+ DB 102,69,15,111,230 ; movdqa %xmm14,%xmm12
+ DB 102,68,15,104,224 ; punpckhbw %xmm0,%xmm12
+ DB 102,65,15,56,48,207 ; pmovzxbw %xmm15,%xmm1
+ DB 102,15,127,12,36 ; movdqa %xmm1,(%rsp)
+ DB 102,65,15,56,48,206 ; pmovzxbw %xmm14,%xmm1
+ DB 102,15,127,140,36,16,1,0,0 ; movdqa %xmm1,0x110(%rsp)
+ DB 102,15,111,13,29,7,0,0 ; movdqa 0x71d(%rip),%xmm1 # 2e50 <_sk_overlay_sse41_8bit+0x767>
+ DB 102,68,15,56,0,249 ; pshufb %xmm1,%xmm15
+ DB 102,68,15,56,0,241 ; pshufb %xmm1,%xmm14
+ DB 102,65,15,56,48,238 ; pmovzxbw %xmm14,%xmm5
+ DB 102,15,127,108,36,32 ; movdqa %xmm5,0x20(%rsp)
+ DB 102,68,15,104,240 ; punpckhbw %xmm0,%xmm14
+ DB 102,65,15,56,48,239 ; pmovzxbw %xmm15,%xmm5
+ DB 102,15,127,172,36,0,1,0,0 ; movdqa %xmm5,0x100(%rsp)
+ DB 102,68,15,104,248 ; punpckhbw %xmm0,%xmm15
+ DB 102,15,111,242 ; movdqa %xmm2,%xmm6
+ DB 102,15,111,234 ; movdqa %xmm2,%xmm5
+ DB 102,68,15,111,218 ; movdqa %xmm2,%xmm11
+ DB 102,68,15,127,156,36,32,1,0,0 ; movdqa %xmm11,0x120(%rsp)
+ DB 102,15,104,232 ; punpckhbw %xmm0,%xmm5
+ DB 102,15,111,251 ; movdqa %xmm3,%xmm7
+ DB 102,15,111,211 ; movdqa %xmm3,%xmm2
+ DB 102,15,127,156,36,48,1,0,0 ; movdqa %xmm3,0x130(%rsp)
+ DB 102,15,104,208 ; punpckhbw %xmm0,%xmm2
+ DB 102,15,56,0,241 ; pshufb %xmm1,%xmm6
+ DB 102,15,56,0,249 ; pshufb %xmm1,%xmm7
+ DB 102,15,56,48,207 ; pmovzxbw %xmm7,%xmm1
+ DB 102,15,104,248 ; punpckhbw %xmm0,%xmm7
+ DB 102,68,15,56,48,238 ; pmovzxbw %xmm6,%xmm13
+ DB 102,15,104,240 ; punpckhbw %xmm0,%xmm6
+ DB 102,68,15,111,206 ; movdqa %xmm6,%xmm9
+ DB 102,15,111,194 ; movdqa %xmm2,%xmm0
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,15,111,240 ; movdqa %xmm0,%xmm6
+ DB 102,15,56,58,247 ; pminuw %xmm7,%xmm6
+ DB 102,15,117,240 ; pcmpeqw %xmm0,%xmm6
+ DB 102,15,127,180,36,208,0,0,0 ; movdqa %xmm6,0xd0(%rsp)
+ DB 102,15,111,197 ; movdqa %xmm5,%xmm0
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,15,111,240 ; movdqa %xmm0,%xmm6
+ DB 102,65,15,56,58,241 ; pminuw %xmm9,%xmm6
+ DB 102,15,117,240 ; pcmpeqw %xmm0,%xmm6
+ DB 102,15,127,180,36,192,0,0,0 ; movdqa %xmm6,0xc0(%rsp)
+ DB 102,15,56,48,195 ; pmovzxbw %xmm3,%xmm0
+ DB 102,15,127,132,36,144,0,0,0 ; movdqa %xmm0,0x90(%rsp)
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,15,111,216 ; movdqa %xmm0,%xmm3
+ DB 102,15,56,58,217 ; pminuw %xmm1,%xmm3
+ DB 102,15,127,76,36,16 ; movdqa %xmm1,0x10(%rsp)
+ DB 102,15,117,216 ; pcmpeqw %xmm0,%xmm3
+ DB 102,15,127,156,36,224,0,0,0 ; movdqa %xmm3,0xe0(%rsp)
+ DB 102,65,15,56,48,195 ; pmovzxbw %xmm11,%xmm0
+ DB 102,15,127,68,36,96 ; movdqa %xmm0,0x60(%rsp)
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,15,111,216 ; movdqa %xmm0,%xmm3
+ DB 102,65,15,56,58,221 ; pminuw %xmm13,%xmm3
+ DB 102,15,117,216 ; pcmpeqw %xmm0,%xmm3
+ DB 102,15,127,156,36,176,0,0,0 ; movdqa %xmm3,0xb0(%rsp)
+ DB 102,15,111,53,16,6,0,0 ; movdqa 0x610(%rip),%xmm6 # 2e60 <_sk_overlay_sse41_8bit+0x777>
+ DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,15,213,196 ; pmullw %xmm4,%xmm0
+ DB 102,15,127,132,36,240,0,0,0 ; movdqa %xmm0,0xf0(%rsp)
+ DB 102,69,15,111,223 ; movdqa %xmm15,%xmm11
+ DB 102,68,15,239,222 ; pxor %xmm6,%xmm11
+ DB 102,68,15,213,221 ; pmullw %xmm5,%xmm11
+ DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0
+ DB 102,68,15,249,205 ; psubw %xmm5,%xmm9
+ DB 102,68,15,127,76,36,80 ; movdqa %xmm9,0x50(%rsp)
+ DB 102,68,15,111,205 ; movdqa %xmm5,%xmm9
+ DB 102,68,15,213,204 ; pmullw %xmm4,%xmm9
+ DB 102,65,15,213,199 ; pmullw %xmm15,%xmm0
+ DB 102,15,127,68,36,112 ; movdqa %xmm0,0x70(%rsp)
+ DB 102,68,15,249,252 ; psubw %xmm4,%xmm15
+ DB 102,65,15,111,197 ; movdqa %xmm13,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,15,111,216 ; movdqa %xmm0,%xmm3
+ DB 102,15,239,206 ; pxor %xmm6,%xmm1
+ DB 102,68,15,111,209 ; movdqa %xmm1,%xmm10
+ DB 102,15,111,207 ; movdqa %xmm7,%xmm1
+ DB 102,15,111,193 ; movdqa %xmm1,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,65,15,111,252 ; movdqa %xmm12,%xmm7
+ DB 102,15,213,199 ; pmullw %xmm7,%xmm0
+ DB 102,15,127,132,36,160,0,0,0 ; movdqa %xmm0,0xa0(%rsp)
+ DB 102,15,111,132,36,0,1,0,0 ; movdqa 0x100(%rsp),%xmm0
+ DB 102,68,15,111,192 ; movdqa %xmm0,%xmm8
+ DB 102,68,15,239,198 ; pxor %xmm6,%xmm8
+ DB 102,15,111,108,36,32 ; movdqa 0x20(%rsp),%xmm5
+ DB 102,15,239,238 ; pxor %xmm6,%xmm5
+ DB 102,65,15,239,246 ; pxor %xmm14,%xmm6
+ DB 102,15,213,242 ; pmullw %xmm2,%xmm6
+ DB 102,68,15,111,225 ; movdqa %xmm1,%xmm12
+ DB 102,15,249,202 ; psubw %xmm2,%xmm1
+ DB 102,15,127,76,36,64 ; movdqa %xmm1,0x40(%rsp)
+ DB 102,15,111,226 ; movdqa %xmm2,%xmm4
+ DB 102,15,213,231 ; pmullw %xmm7,%xmm4
+ DB 102,69,15,213,230 ; pmullw %xmm14,%xmm12
+ DB 102,68,15,249,247 ; psubw %xmm7,%xmm14
+ DB 102,15,111,12,36 ; movdqa (%rsp),%xmm1
+ DB 102,15,213,217 ; pmullw %xmm1,%xmm3
+ DB 102,15,127,156,36,128,0,0,0 ; movdqa %xmm3,0x80(%rsp)
+ DB 102,15,111,84,36,96 ; movdqa 0x60(%rsp),%xmm2
+ DB 102,68,15,213,194 ; pmullw %xmm2,%xmm8
+ DB 102,65,15,111,221 ; movdqa %xmm13,%xmm3
+ DB 102,15,249,218 ; psubw %xmm2,%xmm3
+ DB 102,15,127,92,36,48 ; movdqa %xmm3,0x30(%rsp)
+ DB 102,15,111,218 ; movdqa %xmm2,%xmm3
+ DB 102,15,213,217 ; pmullw %xmm1,%xmm3
+ DB 102,68,15,213,232 ; pmullw %xmm0,%xmm13
+ DB 102,15,249,193 ; psubw %xmm1,%xmm0
+ DB 102,15,111,200 ; movdqa %xmm0,%xmm1
+ DB 102,15,111,148,36,16,1,0,0 ; movdqa 0x110(%rsp),%xmm2
+ DB 102,68,15,213,210 ; pmullw %xmm2,%xmm10
+ DB 102,68,15,127,20,36 ; movdqa %xmm10,(%rsp)
+ DB 102,15,111,132,36,144,0,0,0 ; movdqa 0x90(%rsp),%xmm0
+ DB 102,15,213,232 ; pmullw %xmm0,%xmm5
+ DB 102,15,111,124,36,16 ; movdqa 0x10(%rsp),%xmm7
+ DB 102,68,15,111,215 ; movdqa %xmm7,%xmm10
+ DB 102,15,249,248 ; psubw %xmm0,%xmm7
+ DB 102,15,127,124,36,16 ; movdqa %xmm7,0x10(%rsp)
+ DB 102,15,111,248 ; movdqa %xmm0,%xmm7
+ DB 102,15,213,250 ; pmullw %xmm2,%xmm7
+ DB 102,15,111,68,36,32 ; movdqa 0x20(%rsp),%xmm0
+ DB 102,68,15,213,208 ; pmullw %xmm0,%xmm10
+ DB 102,15,249,194 ; psubw %xmm2,%xmm0
+ DB 102,68,15,213,116,36,64 ; pmullw 0x40(%rsp),%xmm14
+ DB 102,68,15,213,124,36,80 ; pmullw 0x50(%rsp),%xmm15
+ DB 102,15,213,68,36,16 ; pmullw 0x10(%rsp),%xmm0
+ DB 102,15,213,76,36,48 ; pmullw 0x30(%rsp),%xmm1
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,68,15,249,208 ; psubw %xmm0,%xmm10
+ DB 102,15,253,201 ; paddw %xmm1,%xmm1
+ DB 102,68,15,249,233 ; psubw %xmm1,%xmm13
+ DB 102,69,15,253,246 ; paddw %xmm14,%xmm14
+ DB 102,69,15,249,230 ; psubw %xmm14,%xmm12
+ DB 102,69,15,253,255 ; paddw %xmm15,%xmm15
+ DB 102,15,111,76,36,112 ; movdqa 0x70(%rsp),%xmm1
+ DB 102,65,15,249,207 ; psubw %xmm15,%xmm1
+ DB 102,69,15,253,201 ; paddw %xmm9,%xmm9
+ DB 102,15,111,132,36,192,0,0,0 ; movdqa 0xc0(%rsp),%xmm0
+ DB 102,68,15,219,200 ; pand %xmm0,%xmm9
+ DB 102,15,223,193 ; pandn %xmm1,%xmm0
+ DB 102,65,15,235,193 ; por %xmm9,%xmm0
+ DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9
+ DB 102,15,253,228 ; paddw %xmm4,%xmm4
+ DB 102,15,111,132,36,208,0,0,0 ; movdqa 0xd0(%rsp),%xmm0
+ DB 102,15,219,224 ; pand %xmm0,%xmm4
+ DB 102,65,15,223,196 ; pandn %xmm12,%xmm0
+ DB 102,15,235,196 ; por %xmm4,%xmm0
+ DB 102,15,111,200 ; movdqa %xmm0,%xmm1
+ DB 102,15,253,219 ; paddw %xmm3,%xmm3
+ DB 102,15,111,132,36,176,0,0,0 ; movdqa 0xb0(%rsp),%xmm0
+ DB 102,15,219,216 ; pand %xmm0,%xmm3
+ DB 102,65,15,223,197 ; pandn %xmm13,%xmm0
+ DB 102,15,235,195 ; por %xmm3,%xmm0
+ DB 102,15,111,216 ; movdqa %xmm0,%xmm3
+ DB 102,15,253,255 ; paddw %xmm7,%xmm7
+ DB 102,15,111,132,36,224,0,0,0 ; movdqa 0xe0(%rsp),%xmm0
+ DB 102,15,219,248 ; pand %xmm0,%xmm7
+ DB 102,65,15,223,194 ; pandn %xmm10,%xmm0
+ DB 102,15,235,199 ; por %xmm7,%xmm0
+ DB 102,15,111,208 ; movdqa %xmm0,%xmm2
+ DB 102,68,15,253,156,36,240,0,0,0 ; paddw 0xf0(%rsp),%xmm11
+ DB 102,15,253,180,36,160,0,0,0 ; paddw 0xa0(%rsp),%xmm6
+ DB 102,68,15,253,132,36,128,0,0,0 ; paddw 0x80(%rsp),%xmm8
+ DB 102,15,253,44,36 ; paddw (%rsp),%xmm5
+ DB 102,15,111,5,220,3,0,0 ; movdqa 0x3dc(%rip),%xmm0 # 2e70 <_sk_overlay_sse41_8bit+0x787>
+ DB 102,68,15,253,216 ; paddw %xmm0,%xmm11
+ DB 102,69,15,253,217 ; paddw %xmm9,%xmm11
+ DB 102,15,253,240 ; paddw %xmm0,%xmm6
+ DB 102,15,253,241 ; paddw %xmm1,%xmm6
+ DB 102,68,15,253,192 ; paddw %xmm0,%xmm8
+ DB 102,68,15,253,195 ; paddw %xmm3,%xmm8
+ DB 102,15,253,232 ; paddw %xmm0,%xmm5
+ DB 102,15,253,234 ; paddw %xmm2,%xmm5
+ DB 102,15,111,5,192,3,0,0 ; movdqa 0x3c0(%rip),%xmm0 # 2e80 <_sk_overlay_sse41_8bit+0x797>
+ DB 102,15,228,240 ; pmulhuw %xmm0,%xmm6
+ DB 102,68,15,228,216 ; pmulhuw %xmm0,%xmm11
+ DB 102,15,228,232 ; pmulhuw %xmm0,%xmm5
+ DB 102,68,15,228,192 ; pmulhuw %xmm0,%xmm8
+ DB 102,65,15,113,211,7 ; psrlw $0x7,%xmm11
+ DB 102,65,15,113,208,7 ; psrlw $0x7,%xmm8
+ DB 102,69,15,103,195 ; packuswb %xmm11,%xmm8
+ DB 102,15,113,214,7 ; psrlw $0x7,%xmm6
+ DB 102,15,113,213,7 ; psrlw $0x7,%xmm5
+ DB 102,15,103,238 ; packuswb %xmm6,%xmm5
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 15,40,148,36,32,1,0,0 ; movaps 0x120(%rsp),%xmm2
+ DB 15,40,156,36,48,1,0,0 ; movaps 0x130(%rsp),%xmm3
+ DB 102,65,15,111,192 ; movdqa %xmm8,%xmm0
+ DB 102,15,111,205 ; movdqa %xmm5,%xmm1
+ DB 72,129,196,72,1,0,0 ; add $0x148,%rsp
+ DB 255,224 ; jmpq *%rax
+
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 231f <_sk_difference_sse41_8bit+0x1b0>
+ DB 127,67 ; jg 2b5f <_sk_overlay_sse41_8bit+0x476>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2323 <_sk_difference_sse41_8bit+0x1b4>
+ DB 127,67 ; jg 2b63 <_sk_overlay_sse41_8bit+0x47a>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2327 <_sk_difference_sse41_8bit+0x1b8>
+ DB 127,67 ; jg 2b67 <_sk_overlay_sse41_8bit+0x47e>
ALIGN 16
DB 0,0 ; add %al,(%rax)
@@ -45068,6 +45872,67 @@ ALIGN 16
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,0 ; incl (%rax)
+ DB 3,3 ; add (%rbx),%eax
+ DB 3,3 ; add (%rbx),%eax
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 11,11 ; or (%rbx),%ecx
+ DB 11,11 ; or (%rbx),%ecx
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,255 ; (bad)
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,127,0 ; add %bh,0x0(%rdi)
+ DB 127,0 ; jg 2e34 <.literal16+0x304>
+ DB 127,0 ; jg 2e36 <.literal16+0x306>
+ DB 127,0 ; jg 2e38 <.literal16+0x308>
+ DB 127,0 ; jg 2e3a <.literal16+0x30a>
+ DB 127,0 ; jg 2e3c <.literal16+0x30c>
+ DB 127,0 ; jg 2e3e <.literal16+0x30e>
+ DB 127,0 ; jg 2e40 <.literal16+0x310>
+ DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax)
+ DB 129,128,129,128,129,128,3,3,3,3 ; addl $0x3030303,-0x7f7e7f7f(%rax)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 11,11 ; or (%rbx),%ecx
+ DB 11,11 ; or (%rbx),%ecx
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,255 ; (bad)
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,255 ; add %bh,%bh
+ DB 0,127,0 ; add %bh,0x0(%rdi)
+ DB 127,0 ; jg 2e74 <.literal16+0x344>
+ DB 127,0 ; jg 2e76 <.literal16+0x346>
+ DB 127,0 ; jg 2e78 <.literal16+0x348>
+ DB 127,0 ; jg 2e7a <.literal16+0x34a>
+ DB 127,0 ; jg 2e7c <.literal16+0x34c>
+ DB 127,0 ; jg 2e7e <.literal16+0x34e>
+ DB 127,0 ; jg 2e80 <.literal16+0x350>
+ DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax)
+ DB 129 ; .byte 0x81
+ DB 128 ; .byte 0x80
+ DB 129 ; .byte 0x81
+ DB 128 ; .byte 0x80
+ DB 129 ; .byte 0x81
+ DB 128 ; .byte 0x80
ALIGN 32
PUBLIC _sk_start_pipeline_sse2_8bit
@@ -45168,7 +46033,7 @@ _sk_uniform_color_sse2_8bit LABEL PROC
PUBLIC _sk_set_rgb_sse2_8bit
_sk_set_rgb_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,15,16,37,100,40,0,0 ; movss 0x2864(%rip),%xmm4 # 29e4 <_sk_difference_sse2_8bit+0x1c3>
+ DB 243,15,16,37,56,49,0,0 ; movss 0x3138(%rip),%xmm4 # 32b8 <_sk_overlay_sse2_8bit+0x46b>
DB 243,15,16,40 ; movss (%rax),%xmm5
DB 243,15,89,236 ; mulss %xmm4,%xmm5
DB 243,72,15,44,205 ; cvttss2si %xmm5,%rcx
@@ -45183,7 +46048,7 @@ _sk_set_rgb_sse2_8bit LABEL PROC
DB 9,208 ; or %edx,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,112,228,0 ; pshufd $0x0,%xmm4,%xmm4
- DB 102,15,111,45,48,40,0,0 ; movdqa 0x2830(%rip),%xmm5 # 29f0 <_sk_difference_sse2_8bit+0x1cf>
+ DB 102,15,111,45,16,49,0,0 ; movdqa 0x3110(%rip),%xmm5 # 32d0 <_sk_overlay_sse2_8bit+0x483>
DB 102,15,219,205 ; pand %xmm5,%xmm1
DB 102,15,219,197 ; pand %xmm5,%xmm0
DB 102,15,235,196 ; por %xmm4,%xmm0
@@ -45206,7 +46071,7 @@ _sk_premul_sse2_8bit LABEL PROC
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
DB 243,15,112,248,95 ; pshufhw $0x5f,%xmm0,%xmm7
- DB 102,15,111,5,228,39,0,0 ; movdqa 0x27e4(%rip),%xmm0 # 2a00 <_sk_difference_sse2_8bit+0x1df>
+ DB 102,15,111,5,196,48,0,0 ; movdqa 0x30c4(%rip),%xmm0 # 32e0 <_sk_overlay_sse2_8bit+0x493>
DB 102,15,235,248 ; por %xmm0,%xmm7
DB 102,15,235,240 ; por %xmm0,%xmm6
DB 102,69,15,239,201 ; pxor %xmm9,%xmm9
@@ -45708,7 +46573,7 @@ _sk_load_a8_sse2_8bit LABEL PROC
DB 117,48 ; jne 8b9 <_sk_load_a8_sse2_8bit+0x4d>
DB 243,66,15,126,4,2 ; movq (%rdx,%r8,1),%xmm0
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
- DB 102,15,84,5,117,33,0,0 ; andpd 0x2175(%rip),%xmm0 # 2a10 <_sk_difference_sse2_8bit+0x1ef>
+ DB 102,15,84,5,85,42,0,0 ; andpd 0x2a55(%rip),%xmm0 # 32f0 <_sk_overlay_sse2_8bit+0x4a3>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,40,200 ; movapd %xmm0,%xmm1
DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1
@@ -45783,7 +46648,7 @@ _sk_load_a8_dst_sse2_8bit LABEL PROC
DB 117,48 ; jne 9ad <_sk_load_a8_dst_sse2_8bit+0x4d>
DB 243,66,15,126,20,2 ; movq (%rdx,%r8,1),%xmm2
DB 102,15,96,208 ; punpcklbw %xmm0,%xmm2
- DB 102,15,84,21,145,32,0,0 ; andpd 0x2091(%rip),%xmm2 # 2a20 <_sk_difference_sse2_8bit+0x1ff>
+ DB 102,15,84,21,113,41,0,0 ; andpd 0x2971(%rip),%xmm2 # 3300 <_sk_overlay_sse2_8bit+0x4b3>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,40,218 ; movapd %xmm2,%xmm3
DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3
@@ -45866,7 +46731,7 @@ _sk_store_a8_sse2_8bit LABEL PROC
DB 102,15,107,229 ; packssdw %xmm5,%xmm4
DB 77,133,201 ; test %r9,%r9
DB 117,26 ; jne ab9 <_sk_store_a8_sse2_8bit+0x65>
- DB 102,15,219,37,137,31,0,0 ; pand 0x1f89(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x20f>
+ DB 102,15,219,37,105,40,0,0 ; pand 0x2869(%rip),%xmm4 # 3310 <_sk_overlay_sse2_8bit+0x4c3>
DB 102,15,103,228 ; packuswb %xmm4,%xmm4
DB 102,66,15,214,36,2 ; movq %xmm4,(%rdx,%r8,1)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -45888,7 +46753,7 @@ _sk_store_a8_sse2_8bit LABEL PROC
DB 102,15,127,100,36,16 ; movdqa %xmm4,0x10(%rsp)
DB 138,68,36,20 ; mov 0x14(%rsp),%al
DB 66,136,68,2,2 ; mov %al,0x2(%rdx,%r8,1)
- DB 102,15,219,37,49,31,0,0 ; pand 0x1f31(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x20f>
+ DB 102,15,219,37,17,40,0,0 ; pand 0x2811(%rip),%xmm4 # 3310 <_sk_overlay_sse2_8bit+0x4c3>
DB 102,15,103,228 ; packuswb %xmm4,%xmm4
DB 102,15,126,224 ; movd %xmm4,%eax
DB 102,66,137,4,2 ; mov %ax,(%rdx,%r8,1)
@@ -45902,7 +46767,7 @@ _sk_store_a8_sse2_8bit LABEL PROC
DB 102,15,127,100,36,32 ; movdqa %xmm4,0x20(%rsp)
DB 138,68,36,40 ; mov 0x28(%rsp),%al
DB 66,136,68,2,4 ; mov %al,0x4(%rdx,%r8,1)
- DB 102,15,219,37,237,30,0,0 ; pand 0x1eed(%rip),%xmm4 # 2a30 <_sk_difference_sse2_8bit+0x20f>
+ DB 102,15,219,37,205,39,0,0 ; pand 0x27cd(%rip),%xmm4 # 3310 <_sk_overlay_sse2_8bit+0x4c3>
DB 102,15,103,228 ; packuswb %xmm4,%xmm4
DB 102,66,15,126,36,2 ; movd %xmm4,(%rdx,%r8,1)
DB 233,95,255,255,255 ; jmpq ab1 <_sk_store_a8_sse2_8bit+0x5d>
@@ -45940,12 +46805,12 @@ _sk_load_g8_sse2_8bit LABEL PROC
DB 117,116 ; jne c01 <_sk_load_g8_sse2_8bit+0x91>
DB 243,66,15,126,4,2 ; movq (%rdx,%r8,1),%xmm0
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
- DB 102,15,84,5,161,30,0,0 ; andpd 0x1ea1(%rip),%xmm0 # 2a40 <_sk_difference_sse2_8bit+0x21f>
+ DB 102,15,84,5,129,39,0,0 ; andpd 0x2781(%rip),%xmm0 # 3320 <_sk_overlay_sse2_8bit+0x4d3>
DB 102,15,239,201 ; pxor %xmm1,%xmm1
DB 102,15,40,224 ; movapd %xmm0,%xmm4
DB 102,15,97,225 ; punpcklwd %xmm1,%xmm4
DB 102,15,105,193 ; punpckhwd %xmm1,%xmm0
- DB 102,15,111,45,153,30,0,0 ; movdqa 0x1e99(%rip),%xmm5 # 2a50 <_sk_difference_sse2_8bit+0x22f>
+ DB 102,15,111,45,121,39,0,0 ; movdqa 0x2779(%rip),%xmm5 # 3330 <_sk_overlay_sse2_8bit+0x4e3>
DB 102,15,112,240,245 ; pshufd $0xf5,%xmm0,%xmm6
DB 102,15,244,197 ; pmuludq %xmm5,%xmm0
DB 102,15,112,200,232 ; pshufd $0xe8,%xmm0,%xmm1
@@ -45958,7 +46823,7 @@ _sk_load_g8_sse2_8bit LABEL PROC
DB 102,15,244,245 ; pmuludq %xmm5,%xmm6
DB 102,15,112,230,232 ; pshufd $0xe8,%xmm6,%xmm4
DB 102,15,98,196 ; punpckldq %xmm4,%xmm0
- DB 102,15,111,37,107,30,0,0 ; movdqa 0x1e6b(%rip),%xmm4 # 2a60 <_sk_difference_sse2_8bit+0x23f>
+ DB 102,15,111,37,75,39,0,0 ; movdqa 0x274b(%rip),%xmm4 # 3340 <_sk_overlay_sse2_8bit+0x4f3>
DB 102,15,235,196 ; por %xmm4,%xmm0
DB 102,15,235,204 ; por %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -46030,12 +46895,12 @@ _sk_load_g8_dst_sse2_8bit LABEL PROC
DB 117,116 ; jne d41 <_sk_load_g8_dst_sse2_8bit+0x91>
DB 243,66,15,126,20,2 ; movq (%rdx,%r8,1),%xmm2
DB 102,15,96,208 ; punpcklbw %xmm0,%xmm2
- DB 102,15,84,21,145,29,0,0 ; andpd 0x1d91(%rip),%xmm2 # 2a70 <_sk_difference_sse2_8bit+0x24f>
+ DB 102,15,84,21,113,38,0,0 ; andpd 0x2671(%rip),%xmm2 # 3350 <_sk_overlay_sse2_8bit+0x503>
DB 102,15,239,219 ; pxor %xmm3,%xmm3
DB 102,15,40,226 ; movapd %xmm2,%xmm4
DB 102,15,97,227 ; punpcklwd %xmm3,%xmm4
DB 102,15,105,211 ; punpckhwd %xmm3,%xmm2
- DB 102,15,111,45,137,29,0,0 ; movdqa 0x1d89(%rip),%xmm5 # 2a80 <_sk_difference_sse2_8bit+0x25f>
+ DB 102,15,111,45,105,38,0,0 ; movdqa 0x2669(%rip),%xmm5 # 3360 <_sk_overlay_sse2_8bit+0x513>
DB 102,15,112,242,245 ; pshufd $0xf5,%xmm2,%xmm6
DB 102,15,244,213 ; pmuludq %xmm5,%xmm2
DB 102,15,112,218,232 ; pshufd $0xe8,%xmm2,%xmm3
@@ -46048,7 +46913,7 @@ _sk_load_g8_dst_sse2_8bit LABEL PROC
DB 102,15,244,245 ; pmuludq %xmm5,%xmm6
DB 102,15,112,230,232 ; pshufd $0xe8,%xmm6,%xmm4
DB 102,15,98,212 ; punpckldq %xmm4,%xmm2
- DB 102,15,111,37,91,29,0,0 ; movdqa 0x1d5b(%rip),%xmm4 # 2a90 <_sk_difference_sse2_8bit+0x26f>
+ DB 102,15,111,37,59,38,0,0 ; movdqa 0x263b(%rip),%xmm4 # 3370 <_sk_overlay_sse2_8bit+0x523>
DB 102,15,235,212 ; por %xmm4,%xmm2
DB 102,15,235,220 ; por %xmm4,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -46261,7 +47126,7 @@ _sk_scale_1_float_sse2_8bit LABEL PROC
DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,0 ; movss (%rax),%xmm0
- DB 243,15,89,5,148,25,0,0 ; mulss 0x1994(%rip),%xmm0 # 29e8 <_sk_difference_sse2_8bit+0x1c7>
+ DB 243,15,89,5,104,34,0,0 ; mulss 0x2268(%rip),%xmm0 # 32bc <_sk_overlay_sse2_8bit+0x46f>
DB 243,15,44,192 ; cvttss2si %xmm0,%eax
DB 102,15,239,246 ; pxor %xmm6,%xmm6
DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0
@@ -46273,7 +47138,7 @@ _sk_scale_1_float_sse2_8bit LABEL PROC
DB 102,15,96,246 ; punpcklbw %xmm6,%xmm6
DB 242,15,112,246,0 ; pshuflw $0x0,%xmm6,%xmm6
DB 102,15,112,246,80 ; pshufd $0x50,%xmm6,%xmm6
- DB 102,15,219,53,19,26,0,0 ; pand 0x1a13(%rip),%xmm6 # 2aa0 <_sk_difference_sse2_8bit+0x27f>
+ DB 102,15,219,53,243,34,0,0 ; pand 0x22f3(%rip),%xmm6 # 3380 <_sk_overlay_sse2_8bit+0x533>
DB 102,15,111,254 ; movdqa %xmm6,%xmm7
DB 102,65,15,213,248 ; pmullw %xmm8,%xmm7
DB 102,15,111,230 ; movdqa %xmm6,%xmm4
@@ -46307,7 +47172,7 @@ _sk_scale_u8_sse2_8bit LABEL PROC
DB 15,133,239,0,0,0 ; jne 11ed <_sk_scale_u8_sse2_8bit+0x110>
DB 243,66,15,126,36,2 ; movq (%rdx,%r8,1),%xmm4
DB 102,15,96,224 ; punpcklbw %xmm0,%xmm4
- DB 102,15,84,37,160,25,0,0 ; andpd 0x19a0(%rip),%xmm4 # 2ab0 <_sk_difference_sse2_8bit+0x28f>
+ DB 102,15,84,37,128,34,0,0 ; andpd 0x2280(%rip),%xmm4 # 3390 <_sk_overlay_sse2_8bit+0x543>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,40,236 ; movapd %xmm4,%xmm5
DB 102,65,15,105,232 ; punpckhwd %xmm8,%xmm5
@@ -46414,7 +47279,7 @@ PUBLIC _sk_lerp_1_float_sse2_8bit
_sk_lerp_1_float_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,32 ; movss (%rax),%xmm4
- DB 243,15,89,37,62,23,0,0 ; mulss 0x173e(%rip),%xmm4 # 29ec <_sk_difference_sse2_8bit+0x1cb>
+ DB 243,15,89,37,18,32,0,0 ; mulss 0x2012(%rip),%xmm4 # 32c0 <_sk_overlay_sse2_8bit+0x473>
DB 243,15,44,196 ; cvttss2si %xmm4,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
@@ -46427,7 +47292,7 @@ _sk_lerp_1_float_sse2_8bit LABEL PROC
DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11
DB 102,69,15,96,217 ; punpcklbw %xmm9,%xmm11
DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1
- DB 102,15,111,53,209,23,0,0 ; movdqa 0x17d1(%rip),%xmm6 # 2ac0 <_sk_difference_sse2_8bit+0x29f>
+ DB 102,15,111,53,177,32,0,0 ; movdqa 0x20b1(%rip),%xmm6 # 33a0 <_sk_overlay_sse2_8bit+0x553>
DB 102,65,15,219,240 ; pand %xmm8,%xmm6
DB 102,15,111,230 ; movdqa %xmm6,%xmm4
DB 102,15,213,225 ; pmullw %xmm1,%xmm4
@@ -46493,7 +47358,7 @@ _sk_lerp_u8_sse2_8bit LABEL PROC
DB 15,133,141,1,0,0 ; jne 1584 <_sk_lerp_u8_sse2_8bit+0x1ae>
DB 243,66,15,126,44,2 ; movq (%rdx,%r8,1),%xmm5
DB 102,15,96,232 ; punpcklbw %xmm0,%xmm5
- DB 102,15,84,45,199,22,0,0 ; andpd 0x16c7(%rip),%xmm5 # 2ad0 <_sk_difference_sse2_8bit+0x2af>
+ DB 102,15,84,45,167,31,0,0 ; andpd 0x1fa7(%rip),%xmm5 # 33b0 <_sk_overlay_sse2_8bit+0x563>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,40,229 ; movapd %xmm5,%xmm4
DB 102,65,15,105,224 ; punpckhwd %xmm8,%xmm4
@@ -46645,7 +47510,7 @@ _sk_move_dst_src_sse2_8bit LABEL PROC
PUBLIC _sk_black_color_sse2_8bit
_sk_black_color_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 15,40,5,143,20,0,0 ; movaps 0x148f(%rip),%xmm0 # 2ae0 <_sk_difference_sse2_8bit+0x2bf>
+ DB 15,40,5,111,29,0,0 ; movaps 0x1d6f(%rip),%xmm0 # 33c0 <_sk_overlay_sse2_8bit+0x573>
DB 15,40,200 ; movaps %xmm0,%xmm1
DB 255,224 ; jmpq *%rax
@@ -47509,7 +48374,7 @@ _sk_darken_sse2_8bit LABEL PROC
DB 102,65,15,248,234 ; psubb %xmm10,%xmm5
DB 102,15,248,207 ; psubb %xmm7,%xmm1
DB 102,15,248,196 ; psubb %xmm4,%xmm0
- DB 102,15,111,37,164,5,0,0 ; movdqa 0x5a4(%rip),%xmm4 # 2af0 <_sk_difference_sse2_8bit+0x2cf>
+ DB 102,15,111,37,132,14,0,0 ; movdqa 0xe84(%rip),%xmm4 # 33d0 <_sk_overlay_sse2_8bit+0x583>
DB 102,15,219,236 ; pand %xmm4,%xmm5
DB 102,15,111,252 ; movdqa %xmm4,%xmm7
DB 102,15,223,248 ; pandn %xmm0,%xmm7
@@ -47615,7 +48480,7 @@ _sk_lighten_sse2_8bit LABEL PROC
DB 102,65,15,248,234 ; psubb %xmm10,%xmm5
DB 102,15,248,207 ; psubb %xmm7,%xmm1
DB 102,15,248,196 ; psubb %xmm4,%xmm0
- DB 102,15,111,37,200,3,0,0 ; movdqa 0x3c8(%rip),%xmm4 # 2b00 <_sk_difference_sse2_8bit+0x2df>
+ DB 102,15,111,37,168,12,0,0 ; movdqa 0xca8(%rip),%xmm4 # 33e0 <_sk_overlay_sse2_8bit+0x593>
DB 102,15,219,236 ; pand %xmm4,%xmm5
DB 102,15,111,252 ; movdqa %xmm4,%xmm7
DB 102,15,223,248 ; pandn %xmm0,%xmm7
@@ -47663,7 +48528,7 @@ _sk_exclusion_sse2_8bit LABEL PROC
DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
DB 102,15,103,244 ; packuswb %xmm4,%xmm6
DB 102,15,103,239 ; packuswb %xmm7,%xmm5
- DB 102,15,111,37,11,3,0,0 ; movdqa 0x30b(%rip),%xmm4 # 2b10 <_sk_difference_sse2_8bit+0x2ef>
+ DB 102,15,111,37,235,11,0,0 ; movdqa 0xbeb(%rip),%xmm4 # 33f0 <_sk_overlay_sse2_8bit+0x5a3>
DB 102,15,248,205 ; psubb %xmm5,%xmm1
DB 102,15,219,236 ; pand %xmm4,%xmm5
DB 102,15,219,230 ; pand %xmm6,%xmm4
@@ -47760,7 +48625,7 @@ _sk_difference_sse2_8bit LABEL PROC
DB 102,65,15,103,244 ; packuswb %xmm12,%xmm6
DB 102,65,15,218,226 ; pminub %xmm10,%xmm4
DB 102,65,15,218,243 ; pminub %xmm11,%xmm6
- DB 102,15,111,45,88,1,0,0 ; movdqa 0x158(%rip),%xmm5 # 2b20 <_sk_difference_sse2_8bit+0x2ff>
+ DB 102,15,111,45,56,10,0,0 ; movdqa 0xa38(%rip),%xmm5 # 3400 <_sk_overlay_sse2_8bit+0x5b3>
DB 102,15,248,206 ; psubb %xmm6,%xmm1
DB 102,15,219,245 ; pand %xmm5,%xmm6
DB 102,15,219,236 ; pand %xmm4,%xmm5
@@ -47770,13 +48635,450 @@ _sk_difference_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_hardlight_sse2_8bit
+_sk_hardlight_sse2_8bit LABEL PROC
+ DB 72,129,236,56,1,0,0 ; sub $0x138,%rsp
+ DB 102,68,15,111,250 ; movdqa %xmm2,%xmm15
+ DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9
+ DB 102,15,239,237 ; pxor %xmm5,%xmm5
+ DB 102,69,15,111,241 ; movdqa %xmm9,%xmm14
+ DB 102,68,15,96,245 ; punpcklbw %xmm5,%xmm14
+ DB 102,15,111,193 ; movdqa %xmm1,%xmm0
+ DB 102,15,96,197 ; punpcklbw %xmm5,%xmm0
+ DB 102,68,15,111,192 ; movdqa %xmm0,%xmm8
+ DB 242,65,15,112,193,231 ; pshuflw $0xe7,%xmm9,%xmm0
+ DB 102,68,15,104,205 ; punpckhbw %xmm5,%xmm9
+ DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0
+ DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0
+ DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
+ DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
+ DB 243,15,112,224,95 ; pshufhw $0x5f,%xmm0,%xmm4
+ DB 242,15,112,193,231 ; pshuflw $0xe7,%xmm1,%xmm0
+ DB 102,15,104,205 ; punpckhbw %xmm5,%xmm1
+ DB 102,15,127,76,36,64 ; movdqa %xmm1,0x40(%rsp)
+ DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0
+ DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0
+ DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
+ DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
+ DB 243,68,15,112,224,95 ; pshufhw $0x5f,%xmm0,%xmm12
+ DB 102,65,15,111,252 ; movdqa %xmm12,%xmm7
+ DB 102,15,104,253 ; punpckhbw %xmm5,%xmm7
+ DB 102,15,127,188,36,176,0,0,0 ; movdqa %xmm7,0xb0(%rsp)
+ DB 102,68,15,96,229 ; punpcklbw %xmm5,%xmm12
+ DB 102,15,111,196 ; movdqa %xmm4,%xmm0
+ DB 102,15,104,197 ; punpckhbw %xmm5,%xmm0
+ DB 102,15,127,68,36,32 ; movdqa %xmm0,0x20(%rsp)
+ DB 102,15,96,229 ; punpcklbw %xmm5,%xmm4
+ DB 102,68,15,127,188,36,32,1,0,0 ; movdqa %xmm15,0x120(%rsp)
+ DB 102,65,15,111,207 ; movdqa %xmm15,%xmm1
+ DB 242,65,15,112,199,231 ; pshuflw $0xe7,%xmm15,%xmm0
+ DB 102,68,15,96,253 ; punpcklbw %xmm5,%xmm15
+ DB 102,15,104,205 ; punpckhbw %xmm5,%xmm1
+ DB 102,15,127,76,36,16 ; movdqa %xmm1,0x10(%rsp)
+ DB 102,15,127,156,36,16,1,0,0 ; movdqa %xmm3,0x110(%rsp)
+ DB 102,15,111,211 ; movdqa %xmm3,%xmm2
+ DB 102,15,111,203 ; movdqa %xmm3,%xmm1
+ DB 102,15,96,205 ; punpcklbw %xmm5,%xmm1
+ DB 102,15,127,12,36 ; movdqa %xmm1,(%rsp)
+ DB 102,15,104,213 ; punpckhbw %xmm5,%xmm2
+ DB 102,15,127,148,36,0,1,0,0 ; movdqa %xmm2,0x100(%rsp)
+ DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0
+ DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0
+ DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
+ DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
+ DB 243,15,112,240,95 ; pshufhw $0x5f,%xmm0,%xmm6
+ DB 242,15,112,195,231 ; pshuflw $0xe7,%xmm3,%xmm0
+ DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0
+ DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0
+ DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
+ DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
+ DB 243,68,15,112,232,95 ; pshufhw $0x5f,%xmm0,%xmm13
+ DB 102,65,15,111,205 ; movdqa %xmm13,%xmm1
+ DB 102,15,104,205 ; punpckhbw %xmm5,%xmm1
+ DB 102,15,127,76,36,48 ; movdqa %xmm1,0x30(%rsp)
+ DB 102,68,15,96,237 ; punpcklbw %xmm5,%xmm13
+ DB 102,15,111,206 ; movdqa %xmm6,%xmm1
+ DB 102,15,104,205 ; punpckhbw %xmm5,%xmm1
+ DB 102,15,96,245 ; punpcklbw %xmm5,%xmm6
+ DB 102,69,15,111,214 ; movdqa %xmm14,%xmm10
+ DB 102,69,15,253,210 ; paddw %xmm10,%xmm10
+ DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,65,15,111,216 ; movdqa %xmm8,%xmm3
+ DB 102,69,15,253,192 ; paddw %xmm8,%xmm8
+ DB 102,15,111,84,36,64 ; movdqa 0x40(%rsp),%xmm2
+ DB 102,15,253,210 ; paddw %xmm2,%xmm2
+ DB 102,15,217,215 ; psubusw %xmm7,%xmm2
+ DB 102,15,117,213 ; pcmpeqw %xmm5,%xmm2
+ DB 102,15,127,148,36,240,0,0,0 ; movdqa %xmm2,0xf0(%rsp)
+ DB 102,69,15,217,196 ; psubusw %xmm12,%xmm8
+ DB 102,68,15,117,197 ; pcmpeqw %xmm5,%xmm8
+ DB 102,15,111,84,36,32 ; movdqa 0x20(%rsp),%xmm2
+ DB 102,15,217,194 ; psubusw %xmm2,%xmm0
+ DB 102,15,117,197 ; pcmpeqw %xmm5,%xmm0
+ DB 102,15,127,132,36,192,0,0,0 ; movdqa %xmm0,0xc0(%rsp)
+ DB 102,68,15,111,220 ; movdqa %xmm4,%xmm11
+ DB 102,69,15,217,211 ; psubusw %xmm11,%xmm10
+ DB 102,68,15,117,213 ; pcmpeqw %xmm5,%xmm10
+ DB 102,15,111,45,123,8,0,0 ; movdqa 0x87b(%rip),%xmm5 # 3410 <_sk_overlay_sse2_8bit+0x5c3>
+ DB 102,15,111,198 ; movdqa %xmm6,%xmm0
+ DB 102,15,239,197 ; pxor %xmm5,%xmm0
+ DB 102,65,15,213,198 ; pmullw %xmm14,%xmm0
+ DB 102,15,127,132,36,224,0,0,0 ; movdqa %xmm0,0xe0(%rsp)
+ DB 102,65,15,111,195 ; movdqa %xmm11,%xmm0
+ DB 102,15,239,197 ; pxor %xmm5,%xmm0
+ DB 102,65,15,213,199 ; pmullw %xmm15,%xmm0
+ DB 102,15,127,132,36,208,0,0,0 ; movdqa %xmm0,0xd0(%rsp)
+ DB 102,15,111,198 ; movdqa %xmm6,%xmm0
+ DB 102,65,15,249,247 ; psubw %xmm15,%xmm6
+ DB 102,15,127,180,36,128,0,0,0 ; movdqa %xmm6,0x80(%rsp)
+ DB 102,65,15,111,255 ; movdqa %xmm15,%xmm7
+ DB 102,65,15,213,254 ; pmullw %xmm14,%xmm7
+ DB 102,65,15,213,195 ; pmullw %xmm11,%xmm0
+ DB 102,15,127,132,36,144,0,0,0 ; movdqa %xmm0,0x90(%rsp)
+ DB 102,69,15,249,222 ; psubw %xmm14,%xmm11
+ DB 102,15,111,193 ; movdqa %xmm1,%xmm0
+ DB 102,15,239,197 ; pxor %xmm5,%xmm0
+ DB 102,65,15,213,193 ; pmullw %xmm9,%xmm0
+ DB 102,15,127,132,36,160,0,0,0 ; movdqa %xmm0,0xa0(%rsp)
+ DB 102,68,15,111,250 ; movdqa %xmm2,%xmm15
+ DB 102,68,15,239,253 ; pxor %xmm5,%xmm15
+ DB 102,15,111,116,36,16 ; movdqa 0x10(%rsp),%xmm6
+ DB 102,68,15,213,254 ; pmullw %xmm6,%xmm15
+ DB 102,15,111,193 ; movdqa %xmm1,%xmm0
+ DB 102,15,249,206 ; psubw %xmm6,%xmm1
+ DB 102,15,127,76,36,96 ; movdqa %xmm1,0x60(%rsp)
+ DB 102,65,15,213,241 ; pmullw %xmm9,%xmm6
+ DB 102,15,213,194 ; pmullw %xmm2,%xmm0
+ DB 102,15,127,68,36,112 ; movdqa %xmm0,0x70(%rsp)
+ DB 102,65,15,249,209 ; psubw %xmm9,%xmm2
+ DB 102,15,127,84,36,32 ; movdqa %xmm2,0x20(%rsp)
+ DB 102,65,15,111,197 ; movdqa %xmm13,%xmm0
+ DB 102,15,111,200 ; movdqa %xmm0,%xmm1
+ DB 102,15,239,205 ; pxor %xmm5,%xmm1
+ DB 102,15,213,203 ; pmullw %xmm3,%xmm1
+ DB 102,15,127,76,36,16 ; movdqa %xmm1,0x10(%rsp)
+ DB 102,69,15,111,204 ; movdqa %xmm12,%xmm9
+ DB 102,68,15,239,205 ; pxor %xmm5,%xmm9
+ DB 102,15,111,36,36 ; movdqa (%rsp),%xmm4
+ DB 102,68,15,213,204 ; pmullw %xmm4,%xmm9
+ DB 102,68,15,111,232 ; movdqa %xmm0,%xmm13
+ DB 102,15,249,196 ; psubw %xmm4,%xmm0
+ DB 102,15,127,68,36,80 ; movdqa %xmm0,0x50(%rsp)
+ DB 102,15,213,227 ; pmullw %xmm3,%xmm4
+ DB 102,69,15,213,236 ; pmullw %xmm12,%xmm13
+ DB 102,68,15,249,227 ; psubw %xmm3,%xmm12
+ DB 102,15,111,76,36,48 ; movdqa 0x30(%rsp),%xmm1
+ DB 102,15,111,193 ; movdqa %xmm1,%xmm0
+ DB 102,15,239,197 ; pxor %xmm5,%xmm0
+ DB 102,15,111,92,36,64 ; movdqa 0x40(%rsp),%xmm3
+ DB 102,15,213,195 ; pmullw %xmm3,%xmm0
+ DB 102,15,127,4,36 ; movdqa %xmm0,(%rsp)
+ DB 102,15,111,132,36,176,0,0,0 ; movdqa 0xb0(%rsp),%xmm0
+ DB 102,15,239,232 ; pxor %xmm0,%xmm5
+ DB 102,15,111,148,36,0,1,0,0 ; movdqa 0x100(%rsp),%xmm2
+ DB 102,15,213,234 ; pmullw %xmm2,%xmm5
+ DB 102,68,15,111,241 ; movdqa %xmm1,%xmm14
+ DB 102,15,249,202 ; psubw %xmm2,%xmm1
+ DB 102,15,127,76,36,48 ; movdqa %xmm1,0x30(%rsp)
+ DB 102,15,213,211 ; pmullw %xmm3,%xmm2
+ DB 102,68,15,213,240 ; pmullw %xmm0,%xmm14
+ DB 102,15,249,195 ; psubw %xmm3,%xmm0
+ DB 102,15,213,68,36,48 ; pmullw 0x30(%rsp),%xmm0
+ DB 102,68,15,213,100,36,80 ; pmullw 0x50(%rsp),%xmm12
+ DB 102,15,111,76,36,32 ; movdqa 0x20(%rsp),%xmm1
+ DB 102,15,213,76,36,96 ; pmullw 0x60(%rsp),%xmm1
+ DB 102,68,15,213,156,36,128,0,0,0 ; pmullw 0x80(%rsp),%xmm11
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,68,15,249,240 ; psubw %xmm0,%xmm14
+ DB 102,69,15,253,228 ; paddw %xmm12,%xmm12
+ DB 102,69,15,249,236 ; psubw %xmm12,%xmm13
+ DB 102,15,253,201 ; paddw %xmm1,%xmm1
+ DB 102,15,111,92,36,112 ; movdqa 0x70(%rsp),%xmm3
+ DB 102,15,249,217 ; psubw %xmm1,%xmm3
+ DB 102,69,15,253,219 ; paddw %xmm11,%xmm11
+ DB 102,15,111,132,36,144,0,0,0 ; movdqa 0x90(%rsp),%xmm0
+ DB 102,65,15,249,195 ; psubw %xmm11,%xmm0
+ DB 102,15,253,255 ; paddw %xmm7,%xmm7
+ DB 102,65,15,219,250 ; pand %xmm10,%xmm7
+ DB 102,68,15,223,208 ; pandn %xmm0,%xmm10
+ DB 102,68,15,235,215 ; por %xmm7,%xmm10
+ DB 102,15,253,246 ; paddw %xmm6,%xmm6
+ DB 102,15,111,132,36,192,0,0,0 ; movdqa 0xc0(%rsp),%xmm0
+ DB 102,15,219,240 ; pand %xmm0,%xmm6
+ DB 102,15,223,195 ; pandn %xmm3,%xmm0
+ DB 102,15,235,198 ; por %xmm6,%xmm0
+ DB 102,15,111,216 ; movdqa %xmm0,%xmm3
+ DB 102,15,253,228 ; paddw %xmm4,%xmm4
+ DB 102,65,15,219,224 ; pand %xmm8,%xmm4
+ DB 102,69,15,223,197 ; pandn %xmm13,%xmm8
+ DB 102,68,15,235,196 ; por %xmm4,%xmm8
+ DB 102,15,253,210 ; paddw %xmm2,%xmm2
+ DB 102,15,111,132,36,240,0,0,0 ; movdqa 0xf0(%rsp),%xmm0
+ DB 102,15,219,208 ; pand %xmm0,%xmm2
+ DB 102,65,15,223,198 ; pandn %xmm14,%xmm0
+ DB 102,15,235,194 ; por %xmm2,%xmm0
+ DB 102,15,111,200 ; movdqa %xmm0,%xmm1
+ DB 102,15,111,148,36,208,0,0,0 ; movdqa 0xd0(%rsp),%xmm2
+ DB 102,15,253,148,36,224,0,0,0 ; paddw 0xe0(%rsp),%xmm2
+ DB 102,68,15,253,188,36,160,0,0,0 ; paddw 0xa0(%rsp),%xmm15
+ DB 102,68,15,253,76,36,16 ; paddw 0x10(%rsp),%xmm9
+ DB 102,15,253,44,36 ; paddw (%rsp),%xmm5
+ DB 102,15,111,5,89,6,0,0 ; movdqa 0x659(%rip),%xmm0 # 3420 <_sk_overlay_sse2_8bit+0x5d3>
+ DB 102,15,253,208 ; paddw %xmm0,%xmm2
+ DB 102,68,15,253,210 ; paddw %xmm2,%xmm10
+ DB 102,68,15,253,248 ; paddw %xmm0,%xmm15
+ DB 102,68,15,253,251 ; paddw %xmm3,%xmm15
+ DB 102,68,15,253,200 ; paddw %xmm0,%xmm9
+ DB 102,69,15,253,193 ; paddw %xmm9,%xmm8
+ DB 102,15,253,232 ; paddw %xmm0,%xmm5
+ DB 102,15,253,233 ; paddw %xmm1,%xmm5
+ DB 102,15,111,5,60,6,0,0 ; movdqa 0x63c(%rip),%xmm0 # 3430 <_sk_overlay_sse2_8bit+0x5e3>
+ DB 102,15,228,232 ; pmulhuw %xmm0,%xmm5
+ DB 102,68,15,228,192 ; pmulhuw %xmm0,%xmm8
+ DB 102,68,15,228,248 ; pmulhuw %xmm0,%xmm15
+ DB 102,68,15,228,208 ; pmulhuw %xmm0,%xmm10
+ DB 102,65,15,113,215,7 ; psrlw $0x7,%xmm15
+ DB 102,65,15,113,210,7 ; psrlw $0x7,%xmm10
+ DB 102,69,15,103,215 ; packuswb %xmm15,%xmm10
+ DB 102,15,113,213,7 ; psrlw $0x7,%xmm5
+ DB 102,65,15,113,208,7 ; psrlw $0x7,%xmm8
+ DB 102,68,15,103,197 ; packuswb %xmm5,%xmm8
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 15,40,148,36,32,1,0,0 ; movaps 0x120(%rsp),%xmm2
+ DB 15,40,156,36,16,1,0,0 ; movaps 0x110(%rsp),%xmm3
+ DB 102,65,15,111,194 ; movdqa %xmm10,%xmm0
+ DB 102,65,15,111,200 ; movdqa %xmm8,%xmm1
+ DB 72,129,196,56,1,0,0 ; add $0x138,%rsp
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_overlay_sse2_8bit
+_sk_overlay_sse2_8bit LABEL PROC
+ DB 72,129,236,72,1,0,0 ; sub $0x148,%rsp
+ DB 102,15,239,228 ; pxor %xmm4,%xmm4
+ DB 102,68,15,111,248 ; movdqa %xmm0,%xmm15
+ DB 102,68,15,96,252 ; punpcklbw %xmm4,%xmm15
+ DB 102,15,111,233 ; movdqa %xmm1,%xmm5
+ DB 102,15,96,236 ; punpcklbw %xmm4,%xmm5
+ DB 102,15,127,108,36,16 ; movdqa %xmm5,0x10(%rsp)
+ DB 242,15,112,232,231 ; pshuflw $0xe7,%xmm0,%xmm5
+ DB 102,15,104,196 ; punpckhbw %xmm4,%xmm0
+ DB 102,15,127,4,36 ; movdqa %xmm0,(%rsp)
+ DB 243,15,112,197,231 ; pshufhw $0xe7,%xmm5,%xmm0
+ DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0
+ DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
+ DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
+ DB 243,15,112,232,95 ; pshufhw $0x5f,%xmm0,%xmm5
+ DB 242,15,112,193,231 ; pshuflw $0xe7,%xmm1,%xmm0
+ DB 102,15,104,204 ; punpckhbw %xmm4,%xmm1
+ DB 102,15,127,140,36,16,1,0,0 ; movdqa %xmm1,0x110(%rsp)
+ DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0
+ DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0
+ DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
+ DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
+ DB 243,15,112,200,95 ; pshufhw $0x5f,%xmm0,%xmm1
+ DB 102,15,111,193 ; movdqa %xmm1,%xmm0
+ DB 102,15,104,196 ; punpckhbw %xmm4,%xmm0
+ DB 102,15,127,132,36,0,1,0,0 ; movdqa %xmm0,0x100(%rsp)
+ DB 102,15,96,204 ; punpcklbw %xmm4,%xmm1
+ DB 102,15,127,140,36,224,0,0,0 ; movdqa %xmm1,0xe0(%rsp)
+ DB 102,68,15,111,221 ; movdqa %xmm5,%xmm11
+ DB 102,68,15,104,220 ; punpckhbw %xmm4,%xmm11
+ DB 102,15,96,236 ; punpcklbw %xmm4,%xmm5
+ DB 102,68,15,111,213 ; movdqa %xmm5,%xmm10
+ DB 102,15,111,202 ; movdqa %xmm2,%xmm1
+ DB 102,15,127,140,36,48,1,0,0 ; movdqa %xmm1,0x130(%rsp)
+ DB 102,68,15,111,193 ; movdqa %xmm1,%xmm8
+ DB 242,15,112,193,231 ; pshuflw $0xe7,%xmm1,%xmm0
+ DB 102,15,96,204 ; punpcklbw %xmm4,%xmm1
+ DB 102,68,15,104,196 ; punpckhbw %xmm4,%xmm8
+ DB 102,15,127,156,36,32,1,0,0 ; movdqa %xmm3,0x120(%rsp)
+ DB 102,15,111,211 ; movdqa %xmm3,%xmm2
+ DB 102,68,15,111,243 ; movdqa %xmm3,%xmm14
+ DB 102,68,15,96,244 ; punpcklbw %xmm4,%xmm14
+ DB 102,15,104,212 ; punpckhbw %xmm4,%xmm2
+ DB 102,15,111,242 ; movdqa %xmm2,%xmm6
+ DB 102,15,127,180,36,144,0,0,0 ; movdqa %xmm6,0x90(%rsp)
+ DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0
+ DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0
+ DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
+ DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
+ DB 243,15,112,208,95 ; pshufhw $0x5f,%xmm0,%xmm2
+ DB 242,15,112,195,231 ; pshuflw $0xe7,%xmm3,%xmm0
+ DB 243,15,112,192,231 ; pshufhw $0xe7,%xmm0,%xmm0
+ DB 102,15,112,192,232 ; pshufd $0xe8,%xmm0,%xmm0
+ DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
+ DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
+ DB 243,15,112,216,95 ; pshufhw $0x5f,%xmm0,%xmm3
+ DB 102,15,111,195 ; movdqa %xmm3,%xmm0
+ DB 102,15,104,196 ; punpckhbw %xmm4,%xmm0
+ DB 102,15,127,132,36,128,0,0,0 ; movdqa %xmm0,0x80(%rsp)
+ DB 102,15,96,220 ; punpcklbw %xmm4,%xmm3
+ DB 102,68,15,111,202 ; movdqa %xmm2,%xmm9
+ DB 102,68,15,104,204 ; punpckhbw %xmm4,%xmm9
+ DB 102,15,96,212 ; punpcklbw %xmm4,%xmm2
+ DB 102,15,111,233 ; movdqa %xmm1,%xmm5
+ DB 102,15,253,237 ; paddw %xmm5,%xmm5
+ DB 102,65,15,111,248 ; movdqa %xmm8,%xmm7
+ DB 102,15,253,255 ; paddw %xmm7,%xmm7
+ DB 102,69,15,111,238 ; movdqa %xmm14,%xmm13
+ DB 102,69,15,253,237 ; paddw %xmm13,%xmm13
+ DB 102,15,253,246 ; paddw %xmm6,%xmm6
+ DB 102,15,217,240 ; psubusw %xmm0,%xmm6
+ DB 102,15,117,244 ; pcmpeqw %xmm4,%xmm6
+ DB 102,15,127,180,36,240,0,0,0 ; movdqa %xmm6,0xf0(%rsp)
+ DB 102,68,15,217,235 ; psubusw %xmm3,%xmm13
+ DB 102,68,15,117,236 ; pcmpeqw %xmm4,%xmm13
+ DB 102,68,15,127,172,36,208,0,0,0 ; movdqa %xmm13,0xd0(%rsp)
+ DB 102,65,15,217,249 ; psubusw %xmm9,%xmm7
+ DB 102,15,117,252 ; pcmpeqw %xmm4,%xmm7
+ DB 102,15,127,188,36,176,0,0,0 ; movdqa %xmm7,0xb0(%rsp)
+ DB 102,15,217,234 ; psubusw %xmm2,%xmm5
+ DB 102,15,117,236 ; pcmpeqw %xmm4,%xmm5
+ DB 102,15,127,172,36,160,0,0,0 ; movdqa %xmm5,0xa0(%rsp)
+ DB 102,15,111,53,64,4,0,0 ; movdqa 0x440(%rip),%xmm6 # 3440 <_sk_overlay_sse2_8bit+0x5f3>
+ DB 102,15,111,194 ; movdqa %xmm2,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,65,15,213,199 ; pmullw %xmm15,%xmm0
+ DB 102,15,127,132,36,192,0,0,0 ; movdqa %xmm0,0xc0(%rsp)
+ DB 102,65,15,111,194 ; movdqa %xmm10,%xmm0
+ DB 102,68,15,111,224 ; movdqa %xmm0,%xmm12
+ DB 102,68,15,239,230 ; pxor %xmm6,%xmm12
+ DB 102,68,15,213,225 ; pmullw %xmm1,%xmm12
+ DB 102,15,111,226 ; movdqa %xmm2,%xmm4
+ DB 102,15,249,209 ; psubw %xmm1,%xmm2
+ DB 102,15,127,84,36,80 ; movdqa %xmm2,0x50(%rsp)
+ DB 102,68,15,111,209 ; movdqa %xmm1,%xmm10
+ DB 102,69,15,213,215 ; pmullw %xmm15,%xmm10
+ DB 102,15,213,224 ; pmullw %xmm0,%xmm4
+ DB 102,15,127,100,36,96 ; movdqa %xmm4,0x60(%rsp)
+ DB 102,65,15,249,199 ; psubw %xmm15,%xmm0
+ DB 102,15,127,68,36,48 ; movdqa %xmm0,0x30(%rsp)
+ DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,15,111,20,36 ; movdqa (%rsp),%xmm2
+ DB 102,15,213,194 ; pmullw %xmm2,%xmm0
+ DB 102,15,127,68,36,112 ; movdqa %xmm0,0x70(%rsp)
+ DB 102,65,15,111,195 ; movdqa %xmm11,%xmm0
+ DB 102,68,15,239,222 ; pxor %xmm6,%xmm11
+ DB 102,69,15,213,216 ; pmullw %xmm8,%xmm11
+ DB 102,69,15,111,249 ; movdqa %xmm9,%xmm15
+ DB 102,69,15,249,200 ; psubw %xmm8,%xmm9
+ DB 102,68,15,127,76,36,64 ; movdqa %xmm9,0x40(%rsp)
+ DB 102,68,15,213,194 ; pmullw %xmm2,%xmm8
+ DB 102,68,15,213,248 ; pmullw %xmm0,%xmm15
+ DB 102,15,249,194 ; psubw %xmm2,%xmm0
+ DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9
+ DB 102,15,111,195 ; movdqa %xmm3,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,15,111,84,36,16 ; movdqa 0x10(%rsp),%xmm2
+ DB 102,15,213,194 ; pmullw %xmm2,%xmm0
+ DB 102,15,127,4,36 ; movdqa %xmm0,(%rsp)
+ DB 102,15,111,132,36,224,0,0,0 ; movdqa 0xe0(%rsp),%xmm0
+ DB 102,15,111,232 ; movdqa %xmm0,%xmm5
+ DB 102,15,239,238 ; pxor %xmm6,%xmm5
+ DB 102,65,15,213,238 ; pmullw %xmm14,%xmm5
+ DB 102,68,15,111,235 ; movdqa %xmm3,%xmm13
+ DB 102,65,15,249,222 ; psubw %xmm14,%xmm3
+ DB 102,15,127,92,36,32 ; movdqa %xmm3,0x20(%rsp)
+ DB 102,65,15,111,254 ; movdqa %xmm14,%xmm7
+ DB 102,15,213,250 ; pmullw %xmm2,%xmm7
+ DB 102,68,15,213,232 ; pmullw %xmm0,%xmm13
+ DB 102,15,249,194 ; psubw %xmm2,%xmm0
+ DB 102,15,111,208 ; movdqa %xmm0,%xmm2
+ DB 102,15,111,140,36,128,0,0,0 ; movdqa 0x80(%rsp),%xmm1
+ DB 102,15,111,193 ; movdqa %xmm1,%xmm0
+ DB 102,15,239,198 ; pxor %xmm6,%xmm0
+ DB 102,15,111,156,36,16,1,0,0 ; movdqa 0x110(%rsp),%xmm3
+ DB 102,15,213,195 ; pmullw %xmm3,%xmm0
+ DB 102,15,127,68,36,16 ; movdqa %xmm0,0x10(%rsp)
+ DB 102,15,111,132,36,0,1,0,0 ; movdqa 0x100(%rsp),%xmm0
+ DB 102,15,239,240 ; pxor %xmm0,%xmm6
+ DB 102,15,111,164,36,144,0,0,0 ; movdqa 0x90(%rsp),%xmm4
+ DB 102,15,213,244 ; pmullw %xmm4,%xmm6
+ DB 102,68,15,111,241 ; movdqa %xmm1,%xmm14
+ DB 102,15,249,204 ; psubw %xmm4,%xmm1
+ DB 102,15,213,227 ; pmullw %xmm3,%xmm4
+ DB 102,68,15,213,240 ; pmullw %xmm0,%xmm14
+ DB 102,15,249,195 ; psubw %xmm3,%xmm0
+ DB 102,15,213,193 ; pmullw %xmm1,%xmm0
+ DB 102,15,213,84,36,32 ; pmullw 0x20(%rsp),%xmm2
+ DB 102,68,15,213,76,36,64 ; pmullw 0x40(%rsp),%xmm9
+ DB 102,15,111,76,36,48 ; movdqa 0x30(%rsp),%xmm1
+ DB 102,15,213,76,36,80 ; pmullw 0x50(%rsp),%xmm1
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,68,15,249,240 ; psubw %xmm0,%xmm14
+ DB 102,15,253,210 ; paddw %xmm2,%xmm2
+ DB 102,68,15,249,234 ; psubw %xmm2,%xmm13
+ DB 102,69,15,253,201 ; paddw %xmm9,%xmm9
+ DB 102,69,15,249,249 ; psubw %xmm9,%xmm15
+ DB 102,15,111,193 ; movdqa %xmm1,%xmm0
+ DB 102,15,253,192 ; paddw %xmm0,%xmm0
+ DB 102,15,111,76,36,96 ; movdqa 0x60(%rsp),%xmm1
+ DB 102,15,249,200 ; psubw %xmm0,%xmm1
+ DB 102,69,15,253,210 ; paddw %xmm10,%xmm10
+ DB 102,15,111,132,36,160,0,0,0 ; movdqa 0xa0(%rsp),%xmm0
+ DB 102,68,15,219,208 ; pand %xmm0,%xmm10
+ DB 102,15,223,193 ; pandn %xmm1,%xmm0
+ DB 102,65,15,235,194 ; por %xmm10,%xmm0
+ DB 102,15,111,216 ; movdqa %xmm0,%xmm3
+ DB 102,69,15,253,192 ; paddw %xmm8,%xmm8
+ DB 102,15,111,132,36,176,0,0,0 ; movdqa 0xb0(%rsp),%xmm0
+ DB 102,68,15,219,192 ; pand %xmm0,%xmm8
+ DB 102,65,15,223,199 ; pandn %xmm15,%xmm0
+ DB 102,65,15,235,192 ; por %xmm8,%xmm0
+ DB 102,68,15,111,192 ; movdqa %xmm0,%xmm8
+ DB 102,15,253,255 ; paddw %xmm7,%xmm7
+ DB 102,15,111,132,36,208,0,0,0 ; movdqa 0xd0(%rsp),%xmm0
+ DB 102,15,219,248 ; pand %xmm0,%xmm7
+ DB 102,65,15,223,197 ; pandn %xmm13,%xmm0
+ DB 102,15,235,199 ; por %xmm7,%xmm0
+ DB 102,15,111,208 ; movdqa %xmm0,%xmm2
+ DB 102,15,253,228 ; paddw %xmm4,%xmm4
+ DB 102,15,111,132,36,240,0,0,0 ; movdqa 0xf0(%rsp),%xmm0
+ DB 102,15,219,224 ; pand %xmm0,%xmm4
+ DB 102,65,15,223,198 ; pandn %xmm14,%xmm0
+ DB 102,15,235,196 ; por %xmm4,%xmm0
+ DB 102,15,111,200 ; movdqa %xmm0,%xmm1
+ DB 102,68,15,253,164,36,192,0,0,0 ; paddw 0xc0(%rsp),%xmm12
+ DB 102,68,15,253,92,36,112 ; paddw 0x70(%rsp),%xmm11
+ DB 102,15,253,44,36 ; paddw (%rsp),%xmm5
+ DB 102,15,253,116,36,16 ; paddw 0x10(%rsp),%xmm6
+ DB 102,15,111,5,26,2,0,0 ; movdqa 0x21a(%rip),%xmm0 # 3450 <_sk_overlay_sse2_8bit+0x603>
+ DB 102,68,15,253,224 ; paddw %xmm0,%xmm12
+ DB 102,68,15,253,227 ; paddw %xmm3,%xmm12
+ DB 102,68,15,253,216 ; paddw %xmm0,%xmm11
+ DB 102,69,15,253,216 ; paddw %xmm8,%xmm11
+ DB 102,15,253,232 ; paddw %xmm0,%xmm5
+ DB 102,15,253,234 ; paddw %xmm2,%xmm5
+ DB 102,15,253,240 ; paddw %xmm0,%xmm6
+ DB 102,15,253,241 ; paddw %xmm1,%xmm6
+ DB 102,15,111,5,254,1,0,0 ; movdqa 0x1fe(%rip),%xmm0 # 3460 <_sk_overlay_sse2_8bit+0x613>
+ DB 102,15,228,240 ; pmulhuw %xmm0,%xmm6
+ DB 102,15,228,232 ; pmulhuw %xmm0,%xmm5
+ DB 102,68,15,228,216 ; pmulhuw %xmm0,%xmm11
+ DB 102,68,15,228,224 ; pmulhuw %xmm0,%xmm12
+ DB 102,65,15,113,211,7 ; psrlw $0x7,%xmm11
+ DB 102,65,15,113,212,7 ; psrlw $0x7,%xmm12
+ DB 102,69,15,103,227 ; packuswb %xmm11,%xmm12
+ DB 102,15,113,214,7 ; psrlw $0x7,%xmm6
+ DB 102,15,113,213,7 ; psrlw $0x7,%xmm5
+ DB 102,15,103,238 ; packuswb %xmm6,%xmm5
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 15,40,148,36,48,1,0,0 ; movaps 0x130(%rsp),%xmm2
+ DB 15,40,156,36,32,1,0,0 ; movaps 0x120(%rsp),%xmm3
+ DB 102,65,15,111,196 ; movdqa %xmm12,%xmm0
+ DB 102,15,111,205 ; movdqa %xmm5,%xmm1
+ DB 72,129,196,72,1,0,0 ; add $0x148,%rsp
+ DB 255,224 ; jmpq *%rax
+
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2a2b <_sk_difference_sse2_8bit+0x20a>
+ DB 127,67 ; jg 32ff <_sk_overlay_sse2_8bit+0x4b2>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2a2f <_sk_difference_sse2_8bit+0x20e>
+ DB 127,67 ; jg 3303 <_sk_overlay_sse2_8bit+0x4b6>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2a33 <_sk_difference_sse2_8bit+0x212>
+ DB 127,67 ; jg 3307 <_sk_overlay_sse2_8bit+0x4ba>
ALIGN 16
DB 0,0 ; add %al,(%rax)
@@ -47955,6 +49257,45 @@ ALIGN 16
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 127,0 ; jg 3422 <.literal16+0x152>
+ DB 127,0 ; jg 3424 <.literal16+0x154>
+ DB 127,0 ; jg 3426 <.literal16+0x156>
+ DB 127,0 ; jg 3428 <.literal16+0x158>
+ DB 127,0 ; jg 342a <.literal16+0x15a>
+ DB 127,0 ; jg 342c <.literal16+0x15c>
+ DB 127,0 ; jg 342e <.literal16+0x15e>
+ DB 127,0 ; jg 3430 <.literal16+0x160>
+ DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax)
+ DB 129,128,129,128,129,128,255,0,255,0 ; addl $0xff00ff,-0x7f7e7f7f(%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 255,0 ; incl (%rax)
+ DB 127,0 ; jg 3452 <.literal16+0x182>
+ DB 127,0 ; jg 3454 <.literal16+0x184>
+ DB 127,0 ; jg 3456 <.literal16+0x186>
+ DB 127,0 ; jg 3458 <.literal16+0x188>
+ DB 127,0 ; jg 345a <.literal16+0x18a>
+ DB 127,0 ; jg 345c <.literal16+0x18c>
+ DB 127,0 ; jg 345e <.literal16+0x18e>
+ DB 127,0 ; jg 3460 <.literal16+0x190>
+ DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax)
+ DB 129 ; .byte 0x81
+ DB 128 ; .byte 0x80
+ DB 129 ; .byte 0x81
+ DB 128 ; .byte 0x80
+ DB 129 ; .byte 0x81
+ DB 128 ; .byte 0x80
ELSE
.MODEL FLAT,C
_text32 SEGMENT ALIGN(32) 'CODE'
diff --git a/src/jumper/SkJumper_stages_8bit.cpp b/src/jumper/SkJumper_stages_8bit.cpp
index d1b0f54a41..6ebba0768b 100644
--- a/src/jumper/SkJumper_stages_8bit.cpp
+++ b/src/jumper/SkJumper_stages_8bit.cpp
@@ -61,18 +61,6 @@ SI void split(U8x4 u8x4, R* lo, R* hi) {
memcpy(hi, (char*)&u8x4 + sizeof(R), sizeof(R));
}
-union V {
- U32 u32;
- U8x4 u8x4;
-
- V() = default;
- V(U32 v) : u32 (v) {}
- V(U8x4 v) : u8x4(v) {}
- V(int v) : u8x4(v) {}
- V(float v) : u8x4(v*255) {}
-};
-static const size_t kStride = sizeof(V) / sizeof(uint32_t);
-
// Usually __builtin_convertvector() is pretty good, but sometimes we can do better.
SI U8x4 pack(U16x4 v) {
#if defined(__AVX2__)
@@ -100,6 +88,19 @@ SI U8x4 pack(U16x4 v) {
#endif
}
+union V {
+ U32 u32;
+ U8x4 u8x4;
+
+ V() = default;
+ V(U32 v) : u32 (v) {}
+ V(U8x4 v) : u8x4(v) {}
+ V(U16x4 v) : u8x4(pack((v + 127)/255)) {}
+ V(int v) : u8x4(v) {}
+ V(float v) : u8x4(v*255) {}
+};
+static const size_t kStride = sizeof(V) / sizeof(uint32_t);
+
SI V operator+(V x, V y) { return x.u8x4 + y.u8x4; }
SI V operator-(V x, V y) { return x.u8x4 - y.u8x4; }
SI V operator*(V x, V y) {
@@ -109,7 +110,8 @@ SI V operator*(V x, V y) {
return pack((X*Y + X)>>8);
}
-SI V inv(V v) { return 0xff - v; }
+template <typename T>
+SI T inv(T v) { return 0xff - v; }
SI V two(V v) { return v + v; }
SI V lerp(V from, V to, V t) { return to*t + from*inv(t); }
@@ -141,14 +143,18 @@ SI V swap_rb(V v) {
#endif
}
+
+template <typename MaskT, typename ValT>
+SI ValT if_then_else(MaskT m, ValT t, ValT e) {
+ return (t & m) | (e & ~m);
+}
+
SI V max(V a, V b) {
- auto gt = a.u8x4 > b.u8x4;
- return (a.u8x4 & gt) | (b.u8x4 &~gt);
+ return if_then_else(a.u8x4 > b.u8x4, a.u8x4, b.u8x4);
}
SI V min(V a, V b) {
- auto gt = a.u8x4 > b.u8x4;
- return (a.u8x4 & ~gt) | (b.u8x4 &gt);
+ return if_then_else(a.u8x4 > b.u8x4, b.u8x4, a.u8x4);
}
struct Params {
@@ -440,3 +446,30 @@ STAGE(difference) {
V min_ = min(src*alpha(dst), dst*alpha(src));
src = (src - min_) + (dst - zero_alpha(min_));
}
+
+template <typename Func>
+V blend_rgb16(V src, V dst, Func&& blend) {
+ U16x4 s = __builtin_convertvector( src.u8x4, U16x4),
+ sa = __builtin_convertvector(alpha(src).u8x4, U16x4),
+ d = __builtin_convertvector( dst.u8x4, U16x4),
+ da = __builtin_convertvector(alpha(dst).u8x4, U16x4),
+
+ rgb = blend(s, d, sa, da),
+ a = s + (d - d*sa);
+
+ return if_then_else(0x0000ffffffffffff, rgb, a);
+}
+
+STAGE(hardlight) {
+ src = blend_rgb16(src, dst, [](U16x4 s, U16x4 d, U16x4 sa, U16x4 da) {
+ return s*inv(da) + d*inv(sa)
+ + if_then_else(s*2 <= sa, s*d*2, sa*da - (da - d)*(sa - s)*2);
+ });
+}
+
+STAGE(overlay) {
+ src = blend_rgb16(src, dst, [](U16x4 s, U16x4 d, U16x4 sa, U16x4 da) {
+ return s*inv(da) + d*inv(sa)
+ + if_then_else(d*2 <= da, s*d*2, sa*da - (da - d)*(sa - s)*2);
+ });
+}