aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper
diff options
context:
space:
mode:
authorGravatar Florin Malita <fmalita@chromium.org>2017-08-11 15:44:53 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-08-11 21:13:37 +0000
commit0bf0bc65e2dcd25cf1aeb873903ecbd7df414858 (patch)
treec6634267693d9f6d81e9f2ee956bc30597476989 /src/jumper
parent6ad3ffeed16ce0c6891d858b01453be97b5fb234 (diff)
Lowp darken stage
Change-Id: I4bf618ad8728541fcef3fc1c6aa5b3ca106d50dc Reviewed-on: https://skia-review.googlesource.com/33583 Commit-Queue: Florin Malita <fmalita@chromium.org> Reviewed-by: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper')
-rw-r--r--src/jumper/SkJumper.cpp3
-rw-r--r--src/jumper/SkJumper_generated.S557
-rw-r--r--src/jumper/SkJumper_generated_win.S549
-rw-r--r--src/jumper/SkJumper_stages_8bit.cpp11
4 files changed, 892 insertions, 228 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 0d44e8facd..0b705eb400 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -98,7 +98,8 @@ using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**,K*);
M(modulate) \
M(multiply) \
M(screen) \
- M(xor_)
+ M(xor_) \
+ M(darken)
extern "C" {
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index bb2e8da1de..7fd41f9280 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -56656,7 +56656,7 @@ HIDDEN _sk_set_rgb_hsw_8bit
FUNCTION(_sk_set_rgb_hsw_8bit)
_sk_set_rgb_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 197,250,16,37,62,39,0,0 // vmovss 0x273e(%rip),%xmm4 # 2800 <_sk_xor__hsw_8bit+0x175>
+ .byte 197,250,16,37,194,40,0,0 // vmovss 0x28c2(%rip),%xmm4 # 2984 <_sk_darken_hsw_8bit+0x187>
.byte 197,218,89,40 // vmulss (%rax),%xmm4,%xmm5
.byte 196,225,250,44,205 // vcvttss2si %xmm5,%rcx
.byte 197,218,89,104,4 // vmulss 0x4(%rax),%xmm4,%xmm5
@@ -56669,7 +56669,7 @@ _sk_set_rgb_hsw_8bit:
.byte 9,208 // or %edx,%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
.byte 196,226,125,88,228 // vpbroadcastd %xmm4,%ymm4
- .byte 197,253,111,45,38,39,0,0 // vmovdqa 0x2726(%rip),%ymm5 # 2820 <_sk_xor__hsw_8bit+0x195>
+ .byte 197,253,111,45,198,40,0,0 // vmovdqa 0x28c6(%rip),%ymm5 # 29c0 <_sk_darken_hsw_8bit+0x1c3>
.byte 197,245,219,205 // vpand %ymm5,%ymm1,%ymm1
.byte 197,253,219,197 // vpand %ymm5,%ymm0,%ymm0
.byte 197,221,235,192 // vpor %ymm0,%ymm4,%ymm0
@@ -56681,10 +56681,10 @@ HIDDEN _sk_premul_hsw_8bit
.globl _sk_premul_hsw_8bit
FUNCTION(_sk_premul_hsw_8bit)
_sk_premul_hsw_8bit:
- .byte 197,253,111,37,42,39,0,0 // vmovdqa 0x272a(%rip),%ymm4 # 2840 <_sk_xor__hsw_8bit+0x1b5>
+ .byte 197,253,111,37,202,40,0,0 // vmovdqa 0x28ca(%rip),%ymm4 # 29e0 <_sk_darken_hsw_8bit+0x1e3>
.byte 196,226,125,0,236 // vpshufb %ymm4,%ymm0,%ymm5
.byte 196,226,117,0,228 // vpshufb %ymm4,%ymm1,%ymm4
- .byte 197,253,111,53,56,39,0,0 // vmovdqa 0x2738(%rip),%ymm6 # 2860 <_sk_xor__hsw_8bit+0x1d5>
+ .byte 197,253,111,53,216,40,0,0 // vmovdqa 0x28d8(%rip),%ymm6 # 2a00 <_sk_darken_hsw_8bit+0x203>
.byte 197,221,235,230 // vpor %ymm6,%ymm4,%ymm4
.byte 197,213,235,238 // vpor %ymm6,%ymm5,%ymm5
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -56724,7 +56724,7 @@ HIDDEN _sk_swap_rb_hsw_8bit
.globl _sk_swap_rb_hsw_8bit
FUNCTION(_sk_swap_rb_hsw_8bit)
_sk_swap_rb_hsw_8bit:
- .byte 197,253,111,37,176,38,0,0 // vmovdqa 0x26b0(%rip),%ymm4 # 2880 <_sk_xor__hsw_8bit+0x1f5>
+ .byte 197,253,111,37,80,40,0,0 // vmovdqa 0x2850(%rip),%ymm4 # 2a20 <_sk_darken_hsw_8bit+0x223>
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57074,7 +57074,7 @@ _sk_load_bgra_hsw_8bit:
.byte 117,35 // jne 6b4 <_sk_load_bgra_hsw_8bit+0x44>
.byte 196,161,126,111,76,130,32 // vmovdqu 0x20(%rdx,%r8,4),%ymm1
.byte 196,161,126,111,4,130 // vmovdqu (%rdx,%r8,4),%ymm0
- .byte 197,253,111,37,250,33,0,0 // vmovdqa 0x21fa(%rip),%ymm4 # 28a0 <_sk_xor__hsw_8bit+0x215>
+ .byte 197,253,111,37,154,35,0,0 // vmovdqa 0x239a(%rip),%ymm4 # 2a40 <_sk_darken_hsw_8bit+0x243>
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57189,7 +57189,7 @@ _sk_load_bgra_dst_hsw_8bit:
.byte 117,35 // jne 86c <_sk_load_bgra_dst_hsw_8bit+0x44>
.byte 196,161,126,111,92,130,32 // vmovdqu 0x20(%rdx,%r8,4),%ymm3
.byte 196,161,126,111,20,130 // vmovdqu (%rdx,%r8,4),%ymm2
- .byte 197,253,111,37,98,32,0,0 // vmovdqa 0x2062(%rip),%ymm4 # 28c0 <_sk_xor__hsw_8bit+0x235>
+ .byte 197,253,111,37,2,34,0,0 // vmovdqa 0x2202(%rip),%ymm4 # 2a60 <_sk_darken_hsw_8bit+0x263>
.byte 196,226,109,0,212 // vpshufb %ymm4,%ymm2,%ymm2
.byte 196,226,101,0,220 // vpshufb %ymm4,%ymm3,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57300,7 +57300,7 @@ _sk_store_bgra_hsw_8bit:
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,193,226,2 // shl $0x2,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 197,253,111,37,220,30,0,0 // vmovdqa 0x1edc(%rip),%ymm4 # 28e0 <_sk_xor__hsw_8bit+0x255>
+ .byte 197,253,111,37,124,32,0,0 // vmovdqa 0x207c(%rip),%ymm4 # 2a80 <_sk_darken_hsw_8bit+0x283>
.byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5
.byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4
.byte 77,133,201 // test %r9,%r9
@@ -57586,10 +57586,10 @@ _sk_store_a8_hsw_8bit:
.byte 72,99,87,8 // movslq 0x8(%rdi),%rdx
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 197,253,111,37,8,27,0,0 // vmovdqa 0x1b08(%rip),%ymm4 # 2900 <_sk_xor__hsw_8bit+0x275>
+ .byte 197,253,111,37,168,28,0,0 // vmovdqa 0x1ca8(%rip),%ymm4 # 2aa0 <_sk_darken_hsw_8bit+0x2a3>
.byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5
.byte 196,227,253,0,237,232 // vpermq $0xe8,%ymm5,%ymm5
- .byte 197,249,111,53,245,28,0,0 // vmovdqa 0x1cf5(%rip),%xmm6 # 2b00 <_sk_xor__hsw_8bit+0x475>
+ .byte 197,249,111,53,181,30,0,0 // vmovdqa 0x1eb5(%rip),%xmm6 # 2cc0 <_sk_darken_hsw_8bit+0x4c3>
.byte 196,226,81,0,238 // vpshufb %xmm6,%xmm5,%xmm5
.byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4
.byte 196,227,253,0,228,232 // vpermq $0xe8,%ymm4,%ymm4
@@ -57681,10 +57681,10 @@ _sk_load_g8_hsw_8bit:
.byte 196,226,125,49,200 // vpmovzxbd %xmm0,%ymm1
.byte 197,249,112,192,78 // vpshufd $0x4e,%xmm0,%xmm0
.byte 196,226,125,49,192 // vpmovzxbd %xmm0,%ymm0
- .byte 196,226,125,88,37,185,24,0,0 // vpbroadcastd 0x18b9(%rip),%ymm4 # 2804 <_sk_xor__hsw_8bit+0x179>
+ .byte 196,226,125,88,37,61,26,0,0 // vpbroadcastd 0x1a3d(%rip),%ymm4 # 2988 <_sk_darken_hsw_8bit+0x18b>
.byte 196,226,125,64,236 // vpmulld %ymm4,%ymm0,%ymm5
.byte 196,226,117,64,196 // vpmulld %ymm4,%ymm1,%ymm0
- .byte 196,226,125,88,13,170,24,0,0 // vpbroadcastd 0x18aa(%rip),%ymm1 # 2808 <_sk_xor__hsw_8bit+0x17d>
+ .byte 196,226,125,88,13,46,26,0,0 // vpbroadcastd 0x1a2e(%rip),%ymm1 # 298c <_sk_darken_hsw_8bit+0x18f>
.byte 197,253,235,193 // vpor %ymm1,%ymm0,%ymm0
.byte 197,213,235,201 // vpor %ymm1,%ymm5,%ymm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57782,10 +57782,10 @@ _sk_load_g8_dst_hsw_8bit:
.byte 196,226,125,49,218 // vpmovzxbd %xmm2,%ymm3
.byte 197,249,112,210,78 // vpshufd $0x4e,%xmm2,%xmm2
.byte 196,226,125,49,210 // vpmovzxbd %xmm2,%ymm2
- .byte 196,226,125,88,37,73,23,0,0 // vpbroadcastd 0x1749(%rip),%ymm4 # 280c <_sk_xor__hsw_8bit+0x181>
+ .byte 196,226,125,88,37,205,24,0,0 // vpbroadcastd 0x18cd(%rip),%ymm4 # 2990 <_sk_darken_hsw_8bit+0x193>
.byte 196,226,109,64,236 // vpmulld %ymm4,%ymm2,%ymm5
.byte 196,226,101,64,212 // vpmulld %ymm4,%ymm3,%ymm2
- .byte 196,226,125,88,29,58,23,0,0 // vpbroadcastd 0x173a(%rip),%ymm3 # 2810 <_sk_xor__hsw_8bit+0x185>
+ .byte 196,226,125,88,29,190,24,0,0 // vpbroadcastd 0x18be(%rip),%ymm3 # 2994 <_sk_darken_hsw_8bit+0x197>
.byte 197,237,235,211 // vpor %ymm3,%ymm2,%ymm2
.byte 197,213,235,219 // vpor %ymm3,%ymm5,%ymm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -57882,7 +57882,7 @@ _sk_srcover_rgba_8888_hsw_8bit:
.byte 15,133,222,0,0,0 // jne 1303 <_sk_srcover_rgba_8888_hsw_8bit+0x103>
.byte 196,33,126,111,76,138,32 // vmovdqu 0x20(%rdx,%r9,4),%ymm9
.byte 196,33,126,111,28,138 // vmovdqu (%rdx,%r9,4),%ymm11
- .byte 197,253,111,53,230,22,0,0 // vmovdqa 0x16e6(%rip),%ymm6 # 2920 <_sk_xor__hsw_8bit+0x295>
+ .byte 197,253,111,53,134,24,0,0 // vmovdqa 0x1886(%rip),%ymm6 # 2ac0 <_sk_darken_hsw_8bit+0x2c3>
.byte 196,226,117,0,254 // vpshufb %ymm6,%ymm1,%ymm7
.byte 196,226,125,0,246 // vpshufb %ymm6,%ymm0,%ymm6
.byte 196,66,125,48,195 // vpmovzxbw %xmm11,%ymm8
@@ -58090,7 +58090,7 @@ FUNCTION(_sk_scale_1_float_hsw_8bit)
_sk_scale_1_float_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,250,16,32 // vmovss (%rax),%xmm4
- .byte 197,218,89,37,126,18,0,0 // vmulss 0x127e(%rip),%xmm4,%xmm4 # 2814 <_sk_xor__hsw_8bit+0x189>
+ .byte 197,218,89,37,2,20,0,0 // vmulss 0x1402(%rip),%xmm4,%xmm4 # 2998 <_sk_darken_hsw_8bit+0x19b>
.byte 197,250,44,196 // vcvttss2si %xmm4,%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
.byte 196,226,125,120,228 // vpbroadcastb %xmm4,%ymm4
@@ -58100,7 +58100,7 @@ _sk_scale_1_float_hsw_8bit:
.byte 196,226,125,48,241 // vpmovzxbw %xmm1,%ymm6
.byte 196,227,125,57,201,1 // vextracti128 $0x1,%ymm1,%xmm1
.byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1
- .byte 197,221,219,37,117,19,0,0 // vpand 0x1375(%rip),%ymm4,%ymm4 # 2940 <_sk_xor__hsw_8bit+0x2b5>
+ .byte 197,221,219,37,21,21,0,0 // vpand 0x1515(%rip),%ymm4,%ymm4 # 2ae0 <_sk_darken_hsw_8bit+0x2e3>
.byte 197,221,213,249 // vpmullw %ymm1,%ymm4,%ymm7
.byte 197,93,213,198 // vpmullw %ymm6,%ymm4,%ymm8
.byte 197,93,213,200 // vpmullw %ymm0,%ymm4,%ymm9
@@ -58139,7 +58139,7 @@ _sk_scale_u8_hsw_8bit:
.byte 196,226,125,49,236 // vpmovzxbd %xmm4,%ymm5
.byte 197,249,112,228,78 // vpshufd $0x4e,%xmm4,%xmm4
.byte 196,226,125,49,228 // vpmovzxbd %xmm4,%ymm4
- .byte 197,253,111,53,255,18,0,0 // vmovdqa 0x12ff(%rip),%ymm6 # 2960 <_sk_xor__hsw_8bit+0x2d5>
+ .byte 197,253,111,53,159,20,0,0 // vmovdqa 0x149f(%rip),%ymm6 # 2b00 <_sk_darken_hsw_8bit+0x303>
.byte 196,226,93,0,230 // vpshufb %ymm6,%ymm4,%ymm4
.byte 196,226,85,0,238 // vpshufb %ymm6,%ymm5,%ymm5
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -58256,7 +58256,7 @@ FUNCTION(_sk_lerp_1_float_hsw_8bit)
_sk_lerp_1_float_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 197,250,16,32 // vmovss (%rax),%xmm4
- .byte 197,218,89,37,226,15,0,0 // vmulss 0xfe2(%rip),%xmm4,%xmm4 # 2818 <_sk_xor__hsw_8bit+0x18d>
+ .byte 197,218,89,37,102,17,0,0 // vmulss 0x1166(%rip),%xmm4,%xmm4 # 299c <_sk_darken_hsw_8bit+0x19f>
.byte 197,250,44,196 // vcvttss2si %xmm4,%eax
.byte 197,249,110,224 // vmovd %eax,%xmm4
.byte 196,226,125,120,228 // vpbroadcastb %xmm4,%ymm4
@@ -58266,7 +58266,7 @@ _sk_lerp_1_float_hsw_8bit:
.byte 196,226,125,48,241 // vpmovzxbw %xmm1,%ymm6
.byte 196,227,125,57,201,1 // vextracti128 $0x1,%ymm1,%xmm1
.byte 196,226,125,48,201 // vpmovzxbw %xmm1,%ymm1
- .byte 197,221,219,61,21,17,0,0 // vpand 0x1115(%rip),%ymm4,%ymm7 # 2980 <_sk_xor__hsw_8bit+0x2f5>
+ .byte 197,221,219,61,181,18,0,0 // vpand 0x12b5(%rip),%ymm4,%ymm7 # 2b20 <_sk_darken_hsw_8bit+0x323>
.byte 197,69,213,193 // vpmullw %ymm1,%ymm7,%ymm8
.byte 197,69,213,206 // vpmullw %ymm6,%ymm7,%ymm9
.byte 197,69,213,208 // vpmullw %ymm0,%ymm7,%ymm10
@@ -58336,7 +58336,7 @@ _sk_lerp_u8_hsw_8bit:
.byte 196,226,125,49,236 // vpmovzxbd %xmm4,%ymm5
.byte 197,249,112,228,78 // vpshufd $0x4e,%xmm4,%xmm4
.byte 196,226,125,49,228 // vpmovzxbd %xmm4,%ymm4
- .byte 197,253,111,53,10,16,0,0 // vmovdqa 0x100a(%rip),%ymm6 # 29a0 <_sk_xor__hsw_8bit+0x315>
+ .byte 197,253,111,53,170,17,0,0 // vmovdqa 0x11aa(%rip),%ymm6 # 2b40 <_sk_darken_hsw_8bit+0x343>
.byte 196,98,93,0,206 // vpshufb %ymm6,%ymm4,%ymm9
.byte 196,98,85,0,222 // vpshufb %ymm6,%ymm5,%ymm11
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -58505,7 +58505,7 @@ HIDDEN _sk_black_color_hsw_8bit
FUNCTION(_sk_black_color_hsw_8bit)
_sk_black_color_hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 196,226,125,24,5,241,11,0,0 // vbroadcastss 0xbf1(%rip),%ymm0 # 281c <_sk_xor__hsw_8bit+0x191>
+ .byte 196,226,125,24,5,117,13,0,0 // vbroadcastss 0xd75(%rip),%ymm0 # 29a0 <_sk_darken_hsw_8bit+0x1a3>
.byte 197,252,40,200 // vmovaps %ymm0,%ymm1
.byte 255,224 // jmpq *%rax
@@ -58531,7 +58531,7 @@ HIDDEN _sk_srcatop_hsw_8bit
.globl _sk_srcatop_hsw_8bit
FUNCTION(_sk_srcatop_hsw_8bit)
_sk_srcatop_hsw_8bit:
- .byte 197,125,111,5,111,13,0,0 // vmovdqa 0xd6f(%rip),%ymm8 # 29c0 <_sk_xor__hsw_8bit+0x335>
+ .byte 197,125,111,5,15,15,0,0 // vmovdqa 0xf0f(%rip),%ymm8 # 2b60 <_sk_darken_hsw_8bit+0x363>
.byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
.byte 196,194,109,0,232 // vpshufb %ymm8,%ymm2,%ymm5
.byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
@@ -58608,7 +58608,7 @@ HIDDEN _sk_dstatop_hsw_8bit
.globl _sk_dstatop_hsw_8bit
FUNCTION(_sk_dstatop_hsw_8bit)
_sk_dstatop_hsw_8bit:
- .byte 197,125,111,5,42,12,0,0 // vmovdqa 0xc2a(%rip),%ymm8 # 29e0 <_sk_xor__hsw_8bit+0x355>
+ .byte 197,125,111,5,202,13,0,0 // vmovdqa 0xdca(%rip),%ymm8 # 2b80 <_sk_darken_hsw_8bit+0x383>
.byte 196,194,117,0,224 // vpshufb %ymm8,%ymm1,%ymm4
.byte 196,194,125,0,232 // vpshufb %ymm8,%ymm0,%ymm5
.byte 196,98,125,48,210 // vpmovzxbw %xmm2,%ymm10
@@ -58685,7 +58685,7 @@ HIDDEN _sk_srcin_hsw_8bit
.globl _sk_srcin_hsw_8bit
FUNCTION(_sk_srcin_hsw_8bit)
_sk_srcin_hsw_8bit:
- .byte 197,253,111,37,225,10,0,0 // vmovdqa 0xae1(%rip),%ymm4 # 2a00 <_sk_xor__hsw_8bit+0x375>
+ .byte 197,253,111,37,129,12,0,0 // vmovdqa 0xc81(%rip),%ymm4 # 2ba0 <_sk_darken_hsw_8bit+0x3a3>
.byte 196,226,101,0,236 // vpshufb %ymm4,%ymm3,%ymm5
.byte 196,226,109,0,228 // vpshufb %ymm4,%ymm2,%ymm4
.byte 196,226,125,48,240 // vpmovzxbw %xmm0,%ymm6
@@ -58725,7 +58725,7 @@ HIDDEN _sk_dstin_hsw_8bit
.globl _sk_dstin_hsw_8bit
FUNCTION(_sk_dstin_hsw_8bit)
_sk_dstin_hsw_8bit:
- .byte 197,253,111,37,87,10,0,0 // vmovdqa 0xa57(%rip),%ymm4 # 2a20 <_sk_xor__hsw_8bit+0x395>
+ .byte 197,253,111,37,247,11,0,0 // vmovdqa 0xbf7(%rip),%ymm4 # 2bc0 <_sk_darken_hsw_8bit+0x3c3>
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,125,48,226 // vpmovzxbw %xmm2,%ymm4
@@ -58765,7 +58765,7 @@ HIDDEN _sk_srcout_hsw_8bit
.globl _sk_srcout_hsw_8bit
FUNCTION(_sk_srcout_hsw_8bit)
_sk_srcout_hsw_8bit:
- .byte 197,253,111,37,203,9,0,0 // vmovdqa 0x9cb(%rip),%ymm4 # 2a40 <_sk_xor__hsw_8bit+0x3b5>
+ .byte 197,253,111,37,107,11,0,0 // vmovdqa 0xb6b(%rip),%ymm4 # 2be0 <_sk_darken_hsw_8bit+0x3e3>
.byte 196,226,109,0,236 // vpshufb %ymm4,%ymm2,%ymm5
.byte 196,226,101,0,228 // vpshufb %ymm4,%ymm3,%ymm4
.byte 197,205,118,246 // vpcmpeqd %ymm6,%ymm6,%ymm6
@@ -58808,7 +58808,7 @@ HIDDEN _sk_dstout_hsw_8bit
.globl _sk_dstout_hsw_8bit
FUNCTION(_sk_dstout_hsw_8bit)
_sk_dstout_hsw_8bit:
- .byte 197,253,111,37,53,9,0,0 // vmovdqa 0x935(%rip),%ymm4 # 2a60 <_sk_xor__hsw_8bit+0x3d5>
+ .byte 197,253,111,37,213,10,0,0 // vmovdqa 0xad5(%rip),%ymm4 # 2c00 <_sk_darken_hsw_8bit+0x403>
.byte 196,226,125,0,196 // vpshufb %ymm4,%ymm0,%ymm0
.byte 196,226,117,0,204 // vpshufb %ymm4,%ymm1,%ymm1
.byte 197,221,118,228 // vpcmpeqd %ymm4,%ymm4,%ymm4
@@ -58851,7 +58851,7 @@ HIDDEN _sk_srcover_hsw_8bit
.globl _sk_srcover_hsw_8bit
FUNCTION(_sk_srcover_hsw_8bit)
_sk_srcover_hsw_8bit:
- .byte 197,253,111,37,157,8,0,0 // vmovdqa 0x89d(%rip),%ymm4 # 2a80 <_sk_xor__hsw_8bit+0x3f5>
+ .byte 197,253,111,37,61,10,0,0 // vmovdqa 0xa3d(%rip),%ymm4 # 2c20 <_sk_darken_hsw_8bit+0x423>
.byte 196,226,117,0,236 // vpshufb %ymm4,%ymm1,%ymm5
.byte 196,226,125,0,228 // vpshufb %ymm4,%ymm0,%ymm4
.byte 196,98,125,48,202 // vpmovzxbw %xmm2,%ymm9
@@ -58895,7 +58895,7 @@ HIDDEN _sk_dstover_hsw_8bit
.globl _sk_dstover_hsw_8bit
FUNCTION(_sk_dstover_hsw_8bit)
_sk_dstover_hsw_8bit:
- .byte 197,253,111,37,254,7,0,0 // vmovdqa 0x7fe(%rip),%ymm4 # 2aa0 <_sk_xor__hsw_8bit+0x415>
+ .byte 197,253,111,37,158,9,0,0 // vmovdqa 0x99e(%rip),%ymm4 # 2c40 <_sk_darken_hsw_8bit+0x443>
.byte 196,226,101,0,236 // vpshufb %ymm4,%ymm3,%ymm5
.byte 196,226,109,0,228 // vpshufb %ymm4,%ymm2,%ymm4
.byte 196,98,125,48,200 // vpmovzxbw %xmm0,%ymm9
@@ -58978,7 +58978,7 @@ FUNCTION(_sk_multiply_hsw_8bit)
_sk_multiply_hsw_8bit:
.byte 197,253,111,243 // vmovdqa %ymm3,%ymm6
.byte 197,253,111,218 // vmovdqa %ymm2,%ymm3
- .byte 197,125,111,13,191,6,0,0 // vmovdqa 0x6bf(%rip),%ymm9 # 2ac0 <_sk_xor__hsw_8bit+0x435>
+ .byte 197,125,111,13,95,8,0,0 // vmovdqa 0x85f(%rip),%ymm9 # 2c60 <_sk_darken_hsw_8bit+0x463>
.byte 196,194,101,0,225 // vpshufb %ymm9,%ymm3,%ymm4
.byte 196,194,77,0,233 // vpshufb %ymm9,%ymm6,%ymm5
.byte 196,65,45,118,210 // vpcmpeqd %ymm10,%ymm10,%ymm10
@@ -59122,7 +59122,7 @@ HIDDEN _sk_xor__hsw_8bit
.globl _sk_xor__hsw_8bit
FUNCTION(_sk_xor__hsw_8bit)
_sk_xor__hsw_8bit:
- .byte 197,125,111,13,77,4,0,0 // vmovdqa 0x44d(%rip),%ymm9 # 2ae0 <_sk_xor__hsw_8bit+0x455>
+ .byte 197,125,111,13,237,5,0,0 // vmovdqa 0x5ed(%rip),%ymm9 # 2c80 <_sk_darken_hsw_8bit+0x483>
.byte 196,194,109,0,225 // vpshufb %ymm9,%ymm2,%ymm4
.byte 196,194,101,0,249 // vpshufb %ymm9,%ymm3,%ymm7
.byte 196,65,37,118,219 // vpcmpeqd %ymm11,%ymm11,%ymm11
@@ -59197,9 +59197,92 @@ _sk_xor__hsw_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_darken_hsw_8bit
+.globl _sk_darken_hsw_8bit
+FUNCTION(_sk_darken_hsw_8bit)
+_sk_darken_hsw_8bit:
+ .byte 197,125,111,5,155,4,0,0 // vmovdqa 0x49b(%rip),%ymm8 # 2ca0 <_sk_darken_hsw_8bit+0x4a3>
+ .byte 196,194,101,0,224 // vpshufb %ymm8,%ymm3,%ymm4
+ .byte 196,194,109,0,240 // vpshufb %ymm8,%ymm2,%ymm6
+ .byte 196,98,125,48,208 // vpmovzxbw %xmm0,%ymm10
+ .byte 196,227,125,57,197,1 // vextracti128 $0x1,%ymm0,%xmm5
+ .byte 196,226,125,48,237 // vpmovzxbw %xmm5,%ymm5
+ .byte 196,98,125,48,201 // vpmovzxbw %xmm1,%ymm9
+ .byte 196,227,125,57,207,1 // vextracti128 $0x1,%ymm1,%xmm7
+ .byte 196,226,125,48,255 // vpmovzxbw %xmm7,%ymm7
+ .byte 196,98,125,48,222 // vpmovzxbw %xmm6,%ymm11
+ .byte 196,227,125,57,246,1 // vextracti128 $0x1,%ymm6,%xmm6
+ .byte 196,226,125,48,246 // vpmovzxbw %xmm6,%ymm6
+ .byte 196,98,125,48,228 // vpmovzxbw %xmm4,%ymm12
+ .byte 196,227,125,57,228,1 // vextracti128 $0x1,%ymm4,%xmm4
+ .byte 196,226,125,48,228 // vpmovzxbw %xmm4,%ymm4
+ .byte 197,221,213,231 // vpmullw %ymm7,%ymm4,%ymm4
+ .byte 196,65,29,213,225 // vpmullw %ymm9,%ymm12,%ymm12
+ .byte 197,205,213,245 // vpmullw %ymm5,%ymm6,%ymm6
+ .byte 196,65,37,213,218 // vpmullw %ymm10,%ymm11,%ymm11
+ .byte 196,65,37,253,210 // vpaddw %ymm10,%ymm11,%ymm10
+ .byte 197,205,253,237 // vpaddw %ymm5,%ymm6,%ymm5
+ .byte 196,193,29,253,241 // vpaddw %ymm9,%ymm12,%ymm6
+ .byte 197,221,253,231 // vpaddw %ymm7,%ymm4,%ymm4
+ .byte 197,197,113,212,8 // vpsrlw $0x8,%ymm4,%ymm7
+ .byte 197,205,113,214,8 // vpsrlw $0x8,%ymm6,%ymm6
+ .byte 197,221,113,213,8 // vpsrlw $0x8,%ymm5,%ymm4
+ .byte 196,193,85,113,210,8 // vpsrlw $0x8,%ymm10,%ymm5
+ .byte 196,99,85,56,204,1 // vinserti128 $0x1,%xmm4,%ymm5,%ymm9
+ .byte 196,227,85,70,228,49 // vperm2i128 $0x31,%ymm4,%ymm5,%ymm4
+ .byte 197,53,103,204 // vpackuswb %ymm4,%ymm9,%ymm9
+ .byte 196,227,77,56,239,1 // vinserti128 $0x1,%xmm7,%ymm6,%ymm5
+ .byte 196,227,77,70,247,49 // vperm2i128 $0x31,%ymm7,%ymm6,%ymm6
+ .byte 197,85,103,222 // vpackuswb %ymm6,%ymm5,%ymm11
+ .byte 196,194,117,0,232 // vpshufb %ymm8,%ymm1,%ymm5
+ .byte 196,194,125,0,248 // vpshufb %ymm8,%ymm0,%ymm7
+ .byte 196,98,125,48,194 // vpmovzxbw %xmm2,%ymm8
+ .byte 196,227,125,57,212,1 // vextracti128 $0x1,%ymm2,%xmm4
+ .byte 196,226,125,48,228 // vpmovzxbw %xmm4,%ymm4
+ .byte 196,98,125,48,211 // vpmovzxbw %xmm3,%ymm10
+ .byte 196,227,125,57,222,1 // vextracti128 $0x1,%ymm3,%xmm6
+ .byte 196,226,125,48,246 // vpmovzxbw %xmm6,%ymm6
+ .byte 196,98,125,48,231 // vpmovzxbw %xmm7,%ymm12
+ .byte 196,227,125,57,255,1 // vextracti128 $0x1,%ymm7,%xmm7
+ .byte 196,226,125,48,255 // vpmovzxbw %xmm7,%ymm7
+ .byte 196,98,125,48,237 // vpmovzxbw %xmm5,%ymm13
+ .byte 196,227,125,57,237,1 // vextracti128 $0x1,%ymm5,%xmm5
+ .byte 196,226,125,48,237 // vpmovzxbw %xmm5,%ymm5
+ .byte 197,205,213,237 // vpmullw %ymm5,%ymm6,%ymm5
+ .byte 196,65,45,213,237 // vpmullw %ymm13,%ymm10,%ymm13
+ .byte 197,221,213,255 // vpmullw %ymm7,%ymm4,%ymm7
+ .byte 196,65,61,213,228 // vpmullw %ymm12,%ymm8,%ymm12
+ .byte 196,65,29,253,192 // vpaddw %ymm8,%ymm12,%ymm8
+ .byte 197,197,253,228 // vpaddw %ymm4,%ymm7,%ymm4
+ .byte 196,193,21,253,250 // vpaddw %ymm10,%ymm13,%ymm7
+ .byte 197,213,253,238 // vpaddw %ymm6,%ymm5,%ymm5
+ .byte 197,213,113,213,8 // vpsrlw $0x8,%ymm5,%ymm5
+ .byte 197,205,113,215,8 // vpsrlw $0x8,%ymm7,%ymm6
+ .byte 197,221,113,212,8 // vpsrlw $0x8,%ymm4,%ymm4
+ .byte 196,193,69,113,208,8 // vpsrlw $0x8,%ymm8,%ymm7
+ .byte 196,99,69,56,196,1 // vinserti128 $0x1,%xmm4,%ymm7,%ymm8
+ .byte 196,227,69,70,228,49 // vperm2i128 $0x31,%ymm4,%ymm7,%ymm4
+ .byte 197,189,103,228 // vpackuswb %ymm4,%ymm8,%ymm4
+ .byte 196,227,77,56,253,1 // vinserti128 $0x1,%xmm5,%ymm6,%ymm7
+ .byte 196,227,77,70,237,49 // vperm2i128 $0x31,%ymm5,%ymm6,%ymm5
+ .byte 197,197,103,237 // vpackuswb %ymm5,%ymm7,%ymm5
+ .byte 197,181,222,244 // vpmaxub %ymm4,%ymm9,%ymm6
+ .byte 197,165,222,253 // vpmaxub %ymm5,%ymm11,%ymm7
+ .byte 197,237,252,192 // vpaddb %ymm0,%ymm2,%ymm0
+ .byte 197,229,252,201 // vpaddb %ymm1,%ymm3,%ymm1
+ .byte 197,245,248,255 // vpsubb %ymm7,%ymm1,%ymm7
+ .byte 197,253,248,246 // vpsubb %ymm6,%ymm0,%ymm6
+ .byte 197,245,248,205 // vpsubb %ymm5,%ymm1,%ymm1
+ .byte 197,253,248,196 // vpsubb %ymm4,%ymm0,%ymm0
+ .byte 196,226,125,88,37,48,0,0,0 // vpbroadcastd 0x30(%rip),%ymm4 # 29a4 <_sk_darken_hsw_8bit+0x1a7>
+ .byte 196,227,125,76,198,64 // vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
+ .byte 196,227,117,76,207,64 // vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2847 <_sk_xor__hsw_8bit+0x1bc>
+ .byte 127,67 // jg 29cb <_sk_darken_hsw_8bit+0x1ce>
.byte 1,1 // add %eax,(%rcx)
.byte 1,0 // add %eax,(%rax)
.byte 0,0 // add %al,(%rax)
@@ -59209,11 +59292,14 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 0,255 // add %bh,%bh
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 285b <_sk_xor__hsw_8bit+0x1d0>
+ .byte 127,67 // jg 29df <_sk_darken_hsw_8bit+0x1e2>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 285f <_sk_xor__hsw_8bit+0x1d4>
+ .byte 127,67 // jg 29e3 <_sk_darken_hsw_8bit+0x1e6>
.byte 0,0 // add %al,(%rax)
.byte 0,255 // add %bh,%bh
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
BALIGN32
.byte 0,0 // add %al,(%rax)
@@ -59646,6 +59732,28 @@ BALIGN32
.byte 11,11 // or (%rbx),%ecx
.byte 11,11 // or (%rbx),%ecx
.byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,3,3 // lsl (%rbx),%eax
+ .byte 3,3 // add (%rbx),%eax
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,3,3 // lsl (%rbx),%eax
+ .byte 3,3 // add (%rbx),%eax
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 15 // (bad)
.byte 15 // .byte 0xf
.byte 15 // .byte 0xf
.byte 15 // .byte 0xf
@@ -59743,7 +59851,7 @@ HIDDEN _sk_set_rgb_sse41_8bit
FUNCTION(_sk_set_rgb_sse41_8bit)
_sk_set_rgb_sse41_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,15,16,37,97,28,0,0 // movss 0x1c61(%rip),%xmm4 # 1d24 <_sk_xor__sse41_8bit+0x148>
+ .byte 243,15,16,37,209,29,0,0 // movss 0x1dd1(%rip),%xmm4 # 1e94 <_sk_darken_sse41_8bit+0x170>
.byte 243,15,16,40 // movss (%rax),%xmm5
.byte 243,15,89,236 // mulss %xmm4,%xmm5
.byte 243,72,15,44,205 // cvttss2si %xmm5,%rcx
@@ -59758,7 +59866,7 @@ _sk_set_rgb_sse41_8bit:
.byte 9,208 // or %edx,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,112,228,0 // pshufd $0x0,%xmm4,%xmm4
- .byte 102,15,111,45,45,28,0,0 // movdqa 0x1c2d(%rip),%xmm5 # 1d30 <_sk_xor__sse41_8bit+0x154>
+ .byte 102,15,111,45,157,29,0,0 // movdqa 0x1d9d(%rip),%xmm5 # 1ea0 <_sk_darken_sse41_8bit+0x17c>
.byte 102,15,219,205 // pand %xmm5,%xmm1
.byte 102,15,219,197 // pand %xmm5,%xmm0
.byte 102,15,235,196 // por %xmm4,%xmm0
@@ -59772,12 +59880,12 @@ FUNCTION(_sk_premul_sse41_8bit)
_sk_premul_sse41_8bit:
.byte 102,15,111,225 // movdqa %xmm1,%xmm4
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,15,111,5,25,28,0,0 // movdqa 0x1c19(%rip),%xmm0 # 1d40 <_sk_xor__sse41_8bit+0x164>
+ .byte 102,15,111,5,137,29,0,0 // movdqa 0x1d89(%rip),%xmm0 # 1eb0 <_sk_darken_sse41_8bit+0x18c>
.byte 102,15,111,253 // movdqa %xmm5,%xmm7
.byte 102,15,56,0,248 // pshufb %xmm0,%xmm7
.byte 102,15,111,244 // movdqa %xmm4,%xmm6
.byte 102,15,56,0,240 // pshufb %xmm0,%xmm6
- .byte 102,15,111,5,15,28,0,0 // movdqa 0x1c0f(%rip),%xmm0 # 1d50 <_sk_xor__sse41_8bit+0x174>
+ .byte 102,15,111,5,127,29,0,0 // movdqa 0x1d7f(%rip),%xmm0 # 1ec0 <_sk_darken_sse41_8bit+0x19c>
.byte 102,15,235,240 // por %xmm0,%xmm6
.byte 102,15,235,248 // por %xmm0,%xmm7
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
@@ -59810,7 +59918,7 @@ HIDDEN _sk_swap_rb_sse41_8bit
.globl _sk_swap_rb_sse41_8bit
FUNCTION(_sk_swap_rb_sse41_8bit)
_sk_swap_rb_sse41_8bit:
- .byte 102,15,111,37,156,27,0,0 // movdqa 0x1b9c(%rip),%xmm4 # 1d60 <_sk_xor__sse41_8bit+0x184>
+ .byte 102,15,111,37,12,29,0,0 // movdqa 0x1d0c(%rip),%xmm4 # 1ed0 <_sk_darken_sse41_8bit+0x1ac>
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -59937,7 +60045,7 @@ _sk_load_8888_dst_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,221 // jmpq ffffffffde000354 <_sk_xor__sse41_8bit+0xffffffffddffe778>
+ .byte 233,255,255,255,221 // jmpq ffffffffde000354 <_sk_darken_sse41_8bit+0xffffffffddffe630>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -60020,7 +60128,7 @@ _sk_load_bgra_sse41_8bit:
.byte 117,35 // jne 448 <_sk_load_bgra_sse41_8bit+0x44>
.byte 243,66,15,111,76,130,16 // movdqu 0x10(%rdx,%r8,4),%xmm1
.byte 243,66,15,111,4,130 // movdqu (%rdx,%r8,4),%xmm0
- .byte 102,15,111,37,54,25,0,0 // movdqa 0x1936(%rip),%xmm4 # 1d70 <_sk_xor__sse41_8bit+0x194>
+ .byte 102,15,111,37,166,26,0,0 // movdqa 0x1aa6(%rip),%xmm4 # 1ee0 <_sk_darken_sse41_8bit+0x1bc>
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60083,7 +60191,7 @@ _sk_load_bgra_dst_sse41_8bit:
.byte 117,35 // jne 518 <_sk_load_bgra_dst_sse41_8bit+0x44>
.byte 243,66,15,111,92,130,16 // movdqu 0x10(%rdx,%r8,4),%xmm3
.byte 243,66,15,111,20,130 // movdqu (%rdx,%r8,4),%xmm2
- .byte 102,15,111,37,118,24,0,0 // movdqa 0x1876(%rip),%xmm4 # 1d80 <_sk_xor__sse41_8bit+0x1a4>
+ .byte 102,15,111,37,230,25,0,0 // movdqa 0x19e6(%rip),%xmm4 # 1ef0 <_sk_darken_sse41_8bit+0x1cc>
.byte 102,15,56,0,212 // pshufb %xmm4,%xmm2
.byte 102,15,56,0,220 // pshufb %xmm4,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60142,7 +60250,7 @@ _sk_store_bgra_sse41_8bit:
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,193,226,2 // shl $0x2,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 102,15,111,53,200,23,0,0 // movdqa 0x17c8(%rip),%xmm6 # 1d90 <_sk_xor__sse41_8bit+0x1b4>
+ .byte 102,15,111,53,56,25,0,0 // movdqa 0x1938(%rip),%xmm6 # 1f00 <_sk_darken_sse41_8bit+0x1dc>
.byte 102,15,111,233 // movdqa %xmm1,%xmm5
.byte 102,15,56,0,238 // pshufb %xmm6,%xmm5
.byte 102,15,111,224 // movdqa %xmm0,%xmm4
@@ -60208,7 +60316,7 @@ _sk_load_a8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,42 // jne 6af <_sk_load_a8_sse41_8bit+0x47>
.byte 102,66,15,56,48,12,2 // pmovzxbw (%rdx,%r8,1),%xmm1
- .byte 102,15,219,13,12,23,0,0 // pand 0x170c(%rip),%xmm1 # 1da0 <_sk_xor__sse41_8bit+0x1c4>
+ .byte 102,15,219,13,124,24,0,0 // pand 0x187c(%rip),%xmm1 # 1f10 <_sk_darken_sse41_8bit+0x1ec>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,193 // pmovzxwd %xmm1,%xmm0
.byte 102,15,105,204 // punpckhwd %xmm4,%xmm1
@@ -60256,7 +60364,7 @@ _sk_load_a8_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf000750 <_sk_xor__sse41_8bit+0xffffffffdeffeb74>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf000750 <_sk_darken_sse41_8bit+0xffffffffdeffea2c>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -60281,7 +60389,7 @@ _sk_load_a8_dst_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,42 // jne 7a3 <_sk_load_a8_dst_sse41_8bit+0x47>
.byte 102,66,15,56,48,28,2 // pmovzxbw (%rdx,%r8,1),%xmm3
- .byte 102,15,219,29,40,22,0,0 // pand 0x1628(%rip),%xmm3 # 1db0 <_sk_xor__sse41_8bit+0x1d4>
+ .byte 102,15,219,29,152,23,0,0 // pand 0x1798(%rip),%xmm3 # 1f20 <_sk_darken_sse41_8bit+0x1fc>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,211 // pmovzxwd %xmm3,%xmm2
.byte 102,15,105,220 // punpckhwd %xmm4,%xmm3
@@ -60329,7 +60437,7 @@ _sk_load_a8_dst_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf000844 <_sk_xor__sse41_8bit+0xffffffffdeffec68>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf000844 <_sk_darken_sse41_8bit+0xffffffffdeffeb20>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -60351,7 +60459,7 @@ _sk_store_a8_sse41_8bit:
.byte 72,99,87,8 // movslq 0x8(%rdi),%rdx
.byte 72,15,175,209 // imul %rcx,%rdx
.byte 72,3,16 // add (%rax),%rdx
- .byte 102,15,111,45,80,21,0,0 // movdqa 0x1550(%rip),%xmm5 # 1dc0 <_sk_xor__sse41_8bit+0x1e4>
+ .byte 102,15,111,45,192,22,0,0 // movdqa 0x16c0(%rip),%xmm5 # 1f30 <_sk_darken_sse41_8bit+0x20c>
.byte 102,15,111,241 // movdqa %xmm1,%xmm6
.byte 102,15,56,0,245 // pshufb %xmm5,%xmm6
.byte 102,15,111,224 // movdqa %xmm0,%xmm4
@@ -60359,7 +60467,7 @@ _sk_store_a8_sse41_8bit:
.byte 102,15,108,230 // punpcklqdq %xmm6,%xmm4
.byte 77,133,201 // test %r9,%r9
.byte 117,19 // jne 89e <_sk_store_a8_sse41_8bit+0x4e>
- .byte 102,15,56,0,37,92,21,0,0 // pshufb 0x155c(%rip),%xmm4 # 1df0 <_sk_xor__sse41_8bit+0x214>
+ .byte 102,15,56,0,37,204,22,0,0 // pshufb 0x16cc(%rip),%xmm4 # 1f60 <_sk_darken_sse41_8bit+0x23c>
.byte 102,66,15,214,36,2 // movq %xmm4,(%rdx,%r8,1)
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
@@ -60375,13 +60483,13 @@ _sk_store_a8_sse41_8bit:
.byte 102,66,15,58,20,36,2,0 // pextrb $0x0,%xmm4,(%rdx,%r8,1)
.byte 235,209 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a>
.byte 102,66,15,58,20,100,2,2,4 // pextrb $0x4,%xmm4,0x2(%rdx,%r8,1)
- .byte 102,15,56,0,37,5,21,0,0 // pshufb 0x1505(%rip),%xmm4 # 1de0 <_sk_xor__sse41_8bit+0x204>
+ .byte 102,15,56,0,37,117,22,0,0 // pshufb 0x1675(%rip),%xmm4 # 1f50 <_sk_darken_sse41_8bit+0x22c>
.byte 102,66,15,58,21,36,2,0 // pextrw $0x0,%xmm4,(%rdx,%r8,1)
.byte 235,181 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a>
.byte 102,66,15,58,20,100,2,6,12 // pextrb $0xc,%xmm4,0x6(%rdx,%r8,1)
.byte 102,66,15,58,20,100,2,5,10 // pextrb $0xa,%xmm4,0x5(%rdx,%r8,1)
.byte 102,66,15,58,20,100,2,4,8 // pextrb $0x8,%xmm4,0x4(%rdx,%r8,1)
- .byte 102,15,56,0,37,199,20,0,0 // pshufb 0x14c7(%rip),%xmm4 # 1dd0 <_sk_xor__sse41_8bit+0x1f4>
+ .byte 102,15,56,0,37,55,22,0,0 // pshufb 0x1637(%rip),%xmm4 # 1f40 <_sk_darken_sse41_8bit+0x21c>
.byte 102,66,15,126,36,2 // movd %xmm4,(%rdx,%r8,1)
.byte 235,137 // jmp 89a <_sk_store_a8_sse41_8bit+0x4a>
.byte 15,31,0 // nopl (%rax)
@@ -60421,14 +60529,14 @@ _sk_load_g8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,66 // jne 98f <_sk_load_g8_sse41_8bit+0x5f>
.byte 102,66,15,56,48,12,2 // pmovzxbw (%rdx,%r8,1),%xmm1
- .byte 102,15,219,13,164,20,0,0 // pand 0x14a4(%rip),%xmm1 # 1e00 <_sk_xor__sse41_8bit+0x224>
+ .byte 102,15,219,13,20,22,0,0 // pand 0x1614(%rip),%xmm1 # 1f70 <_sk_darken_sse41_8bit+0x24c>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,193 // pmovzxwd %xmm1,%xmm0
.byte 102,15,105,204 // punpckhwd %xmm4,%xmm1
- .byte 102,15,111,37,159,20,0,0 // movdqa 0x149f(%rip),%xmm4 # 1e10 <_sk_xor__sse41_8bit+0x234>
+ .byte 102,15,111,37,15,22,0,0 // movdqa 0x160f(%rip),%xmm4 # 1f80 <_sk_darken_sse41_8bit+0x25c>
.byte 102,15,56,64,204 // pmulld %xmm4,%xmm1
.byte 102,15,56,64,196 // pmulld %xmm4,%xmm0
- .byte 102,15,111,37,157,20,0,0 // movdqa 0x149d(%rip),%xmm4 # 1e20 <_sk_xor__sse41_8bit+0x244>
+ .byte 102,15,111,37,13,22,0,0 // movdqa 0x160d(%rip),%xmm4 # 1f90 <_sk_darken_sse41_8bit+0x26c>
.byte 102,15,235,196 // por %xmm4,%xmm0
.byte 102,15,235,204 // por %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60473,7 +60581,7 @@ _sk_load_g8_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 232,255,255,255,221 // callq ffffffffde000a34 <_sk_xor__sse41_8bit+0xffffffffddffee58>
+ .byte 232,255,255,255,221 // callq ffffffffde000a34 <_sk_darken_sse41_8bit+0xffffffffddffed10>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,210 // callq *%rdx
@@ -60498,14 +60606,14 @@ _sk_load_g8_dst_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 117,66 // jne a9f <_sk_load_g8_dst_sse41_8bit+0x5f>
.byte 102,66,15,56,48,28,2 // pmovzxbw (%rdx,%r8,1),%xmm3
- .byte 102,15,219,29,196,19,0,0 // pand 0x13c4(%rip),%xmm3 # 1e30 <_sk_xor__sse41_8bit+0x254>
+ .byte 102,15,219,29,52,21,0,0 // pand 0x1534(%rip),%xmm3 # 1fa0 <_sk_darken_sse41_8bit+0x27c>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,56,51,211 // pmovzxwd %xmm3,%xmm2
.byte 102,15,105,220 // punpckhwd %xmm4,%xmm3
- .byte 102,15,111,37,191,19,0,0 // movdqa 0x13bf(%rip),%xmm4 # 1e40 <_sk_xor__sse41_8bit+0x264>
+ .byte 102,15,111,37,47,21,0,0 // movdqa 0x152f(%rip),%xmm4 # 1fb0 <_sk_darken_sse41_8bit+0x28c>
.byte 102,15,56,64,220 // pmulld %xmm4,%xmm3
.byte 102,15,56,64,212 // pmulld %xmm4,%xmm2
- .byte 102,15,111,37,189,19,0,0 // movdqa 0x13bd(%rip),%xmm4 # 1e50 <_sk_xor__sse41_8bit+0x274>
+ .byte 102,15,111,37,45,21,0,0 // movdqa 0x152d(%rip),%xmm4 # 1fc0 <_sk_darken_sse41_8bit+0x29c>
.byte 102,15,235,212 // por %xmm4,%xmm2
.byte 102,15,235,220 // por %xmm4,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -60550,7 +60658,7 @@ _sk_load_g8_dst_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 232,255,255,255,221 // callq ffffffffde000b44 <_sk_xor__sse41_8bit+0xffffffffddffef68>
+ .byte 232,255,255,255,221 // callq ffffffffde000b44 <_sk_darken_sse41_8bit+0xffffffffddffee20>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,210 // callq *%rdx
@@ -60578,7 +60686,7 @@ _sk_srcover_rgba_8888_sse41_8bit:
.byte 243,70,15,111,68,138,16 // movdqu 0x10(%rdx,%r9,4),%xmm8
.byte 243,70,15,111,12,138 // movdqu (%rdx,%r9,4),%xmm9
.byte 77,133,192 // test %r8,%r8
- .byte 102,15,111,37,211,18,0,0 // movdqa 0x12d3(%rip),%xmm4 # 1e60 <_sk_xor__sse41_8bit+0x284>
+ .byte 102,15,111,37,67,20,0,0 // movdqa 0x1443(%rip),%xmm4 # 1fd0 <_sk_darken_sse41_8bit+0x2ac>
.byte 102,15,111,241 // movdqa %xmm1,%xmm6
.byte 102,15,56,0,244 // pshufb %xmm4,%xmm6
.byte 102,15,111,248 // movdqa %xmm0,%xmm7
@@ -60700,7 +60808,7 @@ _sk_scale_1_float_sse41_8bit:
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,0 // movss (%rax),%xmm0
- .byte 243,15,89,5,186,15,0,0 // mulss 0xfba(%rip),%xmm0 # 1d28 <_sk_xor__sse41_8bit+0x14c>
+ .byte 243,15,89,5,42,17,0,0 // mulss 0x112a(%rip),%xmm0 # 1e98 <_sk_darken_sse41_8bit+0x174>
.byte 243,15,44,192 // cvttss2si %xmm0,%eax
.byte 15,87,192 // xorps %xmm0,%xmm0
.byte 102,68,15,56,48,197 // pmovzxbw %xmm5,%xmm8
@@ -60708,7 +60816,7 @@ _sk_scale_1_float_sse41_8bit:
.byte 102,68,15,56,48,204 // pmovzxbw %xmm4,%xmm9
.byte 102,15,104,224 // punpckhbw %xmm0,%xmm4
.byte 102,15,110,240 // movd %eax,%xmm6
- .byte 102,15,56,0,53,218,16,0,0 // pshufb 0x10da(%rip),%xmm6 # 1e70 <_sk_xor__sse41_8bit+0x294>
+ .byte 102,15,56,0,53,74,18,0,0 // pshufb 0x124a(%rip),%xmm6 # 1fe0 <_sk_darken_sse41_8bit+0x2bc>
.byte 102,15,111,206 // movdqa %xmm6,%xmm1
.byte 102,65,15,213,201 // pmullw %xmm9,%xmm1
.byte 102,15,111,198 // movdqa %xmm6,%xmm0
@@ -60743,11 +60851,11 @@ _sk_scale_u8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 15,133,160,0,0,0 // jne ea7 <_sk_scale_u8_sse41_8bit+0xc1>
.byte 102,66,15,56,48,52,2 // pmovzxbw (%rdx,%r8,1),%xmm6
- .byte 102,15,219,53,106,16,0,0 // pand 0x106a(%rip),%xmm6 # 1e80 <_sk_xor__sse41_8bit+0x2a4>
+ .byte 102,15,219,53,218,17,0,0 // pand 0x11da(%rip),%xmm6 # 1ff0 <_sk_darken_sse41_8bit+0x2cc>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,111,254 // movdqa %xmm6,%xmm7
- .byte 102,15,56,0,61,104,16,0,0 // pshufb 0x1068(%rip),%xmm7 # 1e90 <_sk_xor__sse41_8bit+0x2b4>
- .byte 102,15,56,0,53,111,16,0,0 // pshufb 0x106f(%rip),%xmm6 # 1ea0 <_sk_xor__sse41_8bit+0x2c4>
+ .byte 102,15,56,0,61,216,17,0,0 // pshufb 0x11d8(%rip),%xmm7 # 2000 <_sk_darken_sse41_8bit+0x2dc>
+ .byte 102,15,56,0,53,223,17,0,0 // pshufb 0x11df(%rip),%xmm6 # 2010 <_sk_darken_sse41_8bit+0x2ec>
.byte 102,68,15,56,48,200 // pmovzxbw %xmm0,%xmm9
.byte 102,65,15,104,192 // punpckhbw %xmm8,%xmm0
.byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10
@@ -60832,7 +60940,7 @@ FUNCTION(_sk_lerp_1_float_sse41_8bit)
_sk_lerp_1_float_sse41_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,32 // movss (%rax),%xmm4
- .byte 243,15,89,37,190,13,0,0 // mulss 0xdbe(%rip),%xmm4 # 1d2c <_sk_xor__sse41_8bit+0x150>
+ .byte 243,15,89,37,46,15,0,0 // mulss 0xf2e(%rip),%xmm4 # 1e9c <_sk_darken_sse41_8bit+0x178>
.byte 243,15,44,196 // cvttss2si %xmm4,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
@@ -60843,7 +60951,7 @@ _sk_lerp_1_float_sse41_8bit:
.byte 102,65,15,104,193 // punpckhbw %xmm9,%xmm0
.byte 102,68,15,56,48,217 // pmovzxbw %xmm1,%xmm11
.byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1
- .byte 102,15,56,0,61,7,15,0,0 // pshufb 0xf07(%rip),%xmm7 # 1eb0 <_sk_xor__sse41_8bit+0x2d4>
+ .byte 102,15,56,0,61,119,16,0,0 // pshufb 0x1077(%rip),%xmm7 # 2020 <_sk_darken_sse41_8bit+0x2fc>
.byte 102,68,15,111,231 // movdqa %xmm7,%xmm12
.byte 102,69,15,213,227 // pmullw %xmm11,%xmm12
.byte 102,68,15,111,239 // movdqa %xmm7,%xmm13
@@ -60906,11 +61014,11 @@ _sk_lerp_u8_sse41_8bit:
.byte 77,133,201 // test %r9,%r9
.byte 15,133,46,1,0,0 // jne 11d1 <_sk_lerp_u8_sse41_8bit+0x14f>
.byte 102,66,15,56,48,60,2 // pmovzxbw (%rdx,%r8,1),%xmm7
- .byte 102,15,219,61,14,14,0,0 // pand 0xe0e(%rip),%xmm7 # 1ec0 <_sk_xor__sse41_8bit+0x2e4>
+ .byte 102,15,219,61,126,15,0,0 // pand 0xf7e(%rip),%xmm7 # 2030 <_sk_darken_sse41_8bit+0x30c>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,111,247 // movdqa %xmm7,%xmm6
- .byte 102,15,56,0,53,12,14,0,0 // pshufb 0xe0c(%rip),%xmm6 # 1ed0 <_sk_xor__sse41_8bit+0x2f4>
- .byte 102,15,56,0,61,19,14,0,0 // pshufb 0xe13(%rip),%xmm7 # 1ee0 <_sk_xor__sse41_8bit+0x304>
+ .byte 102,15,56,0,53,124,15,0,0 // pshufb 0xf7c(%rip),%xmm6 # 2040 <_sk_darken_sse41_8bit+0x31c>
+ .byte 102,15,56,0,61,131,15,0,0 // pshufb 0xf83(%rip),%xmm7 # 2050 <_sk_darken_sse41_8bit+0x32c>
.byte 102,68,15,56,48,200 // pmovzxbw %xmm0,%xmm9
.byte 102,65,15,104,192 // punpckhbw %xmm8,%xmm0
.byte 102,68,15,56,48,209 // pmovzxbw %xmm1,%xmm10
@@ -61005,7 +61113,7 @@ _sk_lerp_u8_sse41_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,222 // jmpq ffffffffdf00127c <_sk_xor__sse41_8bit+0xffffffffdefff6a0>
+ .byte 233,255,255,255,222 // jmpq ffffffffdf00127c <_sk_darken_sse41_8bit+0xffffffffdefff558>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,211 // callq *%rbx
@@ -61039,7 +61147,7 @@ HIDDEN _sk_black_color_sse41_8bit
FUNCTION(_sk_black_color_sse41_8bit)
_sk_black_color_sse41_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 15,40,5,75,12,0,0 // movaps 0xc4b(%rip),%xmm0 # 1ef0 <_sk_xor__sse41_8bit+0x314>
+ .byte 15,40,5,187,13,0,0 // movaps 0xdbb(%rip),%xmm0 # 2060 <_sk_darken_sse41_8bit+0x33c>
.byte 15,40,200 // movaps %xmm0,%xmm1
.byte 255,224 // jmpq *%rax
@@ -61065,7 +61173,7 @@ HIDDEN _sk_srcatop_sse41_8bit
.globl _sk_srcatop_sse41_8bit
FUNCTION(_sk_srcatop_sse41_8bit)
_sk_srcatop_sse41_8bit:
- .byte 102,68,15,111,21,55,12,0,0 // movdqa 0xc37(%rip),%xmm10 # 1f00 <_sk_xor__sse41_8bit+0x324>
+ .byte 102,68,15,111,21,167,13,0,0 // movdqa 0xda7(%rip),%xmm10 # 2070 <_sk_darken_sse41_8bit+0x34c>
.byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
.byte 102,68,15,56,48,195 // pmovzxbw %xmm3,%xmm8
.byte 102,15,111,235 // movdqa %xmm3,%xmm5
@@ -61135,7 +61243,7 @@ HIDDEN _sk_dstatop_sse41_8bit
.globl _sk_dstatop_sse41_8bit
FUNCTION(_sk_dstatop_sse41_8bit)
_sk_dstatop_sse41_8bit:
- .byte 102,68,15,111,29,12,11,0,0 // movdqa 0xb0c(%rip),%xmm11 # 1f10 <_sk_xor__sse41_8bit+0x334>
+ .byte 102,68,15,111,29,124,12,0,0 // movdqa 0xc7c(%rip),%xmm11 # 2080 <_sk_darken_sse41_8bit+0x35c>
.byte 102,68,15,111,233 // movdqa %xmm1,%xmm13
.byte 102,69,15,56,0,235 // pshufb %xmm11,%xmm13
.byte 102,68,15,111,248 // movdqa %xmm0,%xmm15
@@ -61207,7 +61315,7 @@ FUNCTION(_sk_srcin_sse41_8bit)
_sk_srcin_sse41_8bit:
.byte 102,15,111,225 // movdqa %xmm1,%xmm4
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,15,111,5,211,9,0,0 // movdqa 0x9d3(%rip),%xmm0 # 1f20 <_sk_xor__sse41_8bit+0x344>
+ .byte 102,15,111,5,67,11,0,0 // movdqa 0xb43(%rip),%xmm0 # 2090 <_sk_darken_sse41_8bit+0x36c>
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
.byte 102,15,56,0,240 // pshufb %xmm0,%xmm6
.byte 102,15,111,250 // movdqa %xmm2,%xmm7
@@ -61242,7 +61350,7 @@ HIDDEN _sk_dstin_sse41_8bit
.globl _sk_dstin_sse41_8bit
FUNCTION(_sk_dstin_sse41_8bit)
_sk_dstin_sse41_8bit:
- .byte 102,15,111,37,86,9,0,0 // movdqa 0x956(%rip),%xmm4 # 1f30 <_sk_xor__sse41_8bit+0x354>
+ .byte 102,15,111,37,198,10,0,0 // movdqa 0xac6(%rip),%xmm4 # 20a0 <_sk_darken_sse41_8bit+0x37c>
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,69,15,239,210 // pxor %xmm10,%xmm10
@@ -61281,7 +61389,7 @@ FUNCTION(_sk_srcout_sse41_8bit)
_sk_srcout_sse41_8bit:
.byte 102,15,111,225 // movdqa %xmm1,%xmm4
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
- .byte 102,15,111,5,201,8,0,0 // movdqa 0x8c9(%rip),%xmm0 # 1f40 <_sk_xor__sse41_8bit+0x364>
+ .byte 102,15,111,5,57,10,0,0 // movdqa 0xa39(%rip),%xmm0 # 20b0 <_sk_darken_sse41_8bit+0x38c>
.byte 102,15,111,250 // movdqa %xmm2,%xmm7
.byte 102,15,56,0,248 // pshufb %xmm0,%xmm7
.byte 102,15,111,243 // movdqa %xmm3,%xmm6
@@ -61319,7 +61427,7 @@ HIDDEN _sk_dstout_sse41_8bit
.globl _sk_dstout_sse41_8bit
FUNCTION(_sk_dstout_sse41_8bit)
_sk_dstout_sse41_8bit:
- .byte 102,15,111,37,64,8,0,0 // movdqa 0x840(%rip),%xmm4 # 1f50 <_sk_xor__sse41_8bit+0x374>
+ .byte 102,15,111,37,176,9,0,0 // movdqa 0x9b0(%rip),%xmm4 # 20c0 <_sk_darken_sse41_8bit+0x39c>
.byte 102,15,56,0,196 // pshufb %xmm4,%xmm0
.byte 102,15,56,0,204 // pshufb %xmm4,%xmm1
.byte 102,15,118,228 // pcmpeqd %xmm4,%xmm4
@@ -61359,7 +61467,7 @@ HIDDEN _sk_srcover_sse41_8bit
.globl _sk_srcover_sse41_8bit
FUNCTION(_sk_srcover_sse41_8bit)
_sk_srcover_sse41_8bit:
- .byte 102,15,111,53,175,7,0,0 // movdqa 0x7af(%rip),%xmm6 # 1f60 <_sk_xor__sse41_8bit+0x384>
+ .byte 102,15,111,53,31,9,0,0 // movdqa 0x91f(%rip),%xmm6 # 20d0 <_sk_darken_sse41_8bit+0x3ac>
.byte 102,68,15,111,217 // movdqa %xmm1,%xmm11
.byte 102,68,15,56,0,222 // pshufb %xmm6,%xmm11
.byte 102,15,111,232 // movdqa %xmm0,%xmm5
@@ -61400,7 +61508,7 @@ HIDDEN _sk_dstover_sse41_8bit
.globl _sk_dstover_sse41_8bit
FUNCTION(_sk_dstover_sse41_8bit)
_sk_dstover_sse41_8bit:
- .byte 102,68,15,111,5,15,7,0,0 // movdqa 0x70f(%rip),%xmm8 # 1f70 <_sk_xor__sse41_8bit+0x394>
+ .byte 102,68,15,111,5,127,8,0,0 // movdqa 0x87f(%rip),%xmm8 # 20e0 <_sk_darken_sse41_8bit+0x3bc>
.byte 102,68,15,111,209 // movdqa %xmm1,%xmm10
.byte 102,68,15,56,48,201 // pmovzxbw %xmm1,%xmm9
.byte 102,15,252,203 // paddb %xmm3,%xmm1
@@ -61479,7 +61587,7 @@ _sk_multiply_sse41_8bit:
.byte 102,15,111,218 // movdqa %xmm2,%xmm3
.byte 102,15,111,209 // movdqa %xmm1,%xmm2
.byte 102,15,111,200 // movdqa %xmm0,%xmm1
- .byte 102,68,15,111,53,221,5,0,0 // movdqa 0x5dd(%rip),%xmm14 # 1f80 <_sk_xor__sse41_8bit+0x3a4>
+ .byte 102,68,15,111,53,77,7,0,0 // movdqa 0x74d(%rip),%xmm14 # 20f0 <_sk_darken_sse41_8bit+0x3cc>
.byte 102,68,15,111,195 // movdqa %xmm3,%xmm8
.byte 102,15,111,235 // movdqa %xmm3,%xmm5
.byte 102,65,15,56,0,238 // pshufb %xmm14,%xmm5
@@ -61608,7 +61716,7 @@ HIDDEN _sk_xor__sse41_8bit
.globl _sk_xor__sse41_8bit
FUNCTION(_sk_xor__sse41_8bit)
_sk_xor__sse41_8bit:
- .byte 102,68,15,111,21,171,3,0,0 // movdqa 0x3ab(%rip),%xmm10 # 1f90 <_sk_xor__sse41_8bit+0x3b4>
+ .byte 102,68,15,111,21,27,5,0,0 // movdqa 0x51b(%rip),%xmm10 # 2100 <_sk_darken_sse41_8bit+0x3dc>
.byte 102,68,15,111,226 // movdqa %xmm2,%xmm12
.byte 102,68,15,56,48,194 // pmovzxbw %xmm2,%xmm8
.byte 102,15,111,234 // movdqa %xmm2,%xmm5
@@ -61676,13 +61784,92 @@ _sk_xor__sse41_8bit:
.byte 102,15,111,205 // movdqa %xmm5,%xmm1
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_darken_sse41_8bit
+.globl _sk_darken_sse41_8bit
+FUNCTION(_sk_darken_sse41_8bit)
+_sk_darken_sse41_8bit:
+ .byte 102,68,15,111,240 // movdqa %xmm0,%xmm14
+ .byte 102,68,15,111,5,222,3,0,0 // movdqa 0x3de(%rip),%xmm8 # 2110 <_sk_darken_sse41_8bit+0x3ec>
+ .byte 102,68,15,111,219 // movdqa %xmm3,%xmm11
+ .byte 102,68,15,56,48,203 // pmovzxbw %xmm3,%xmm9
+ .byte 102,15,111,243 // movdqa %xmm3,%xmm6
+ .byte 102,65,15,56,0,240 // pshufb %xmm8,%xmm6
+ .byte 102,68,15,111,234 // movdqa %xmm2,%xmm13
+ .byte 102,68,15,56,48,210 // pmovzxbw %xmm2,%xmm10
+ .byte 102,15,111,234 // movdqa %xmm2,%xmm5
+ .byte 102,65,15,56,0,232 // pshufb %xmm8,%xmm5
+ .byte 102,69,15,239,228 // pxor %xmm12,%xmm12
+ .byte 102,65,15,104,196 // punpckhbw %xmm12,%xmm0
+ .byte 102,15,111,225 // movdqa %xmm1,%xmm4
+ .byte 102,65,15,104,228 // punpckhbw %xmm12,%xmm4
+ .byte 102,69,15,56,48,254 // pmovzxbw %xmm14,%xmm15
+ .byte 102,15,56,48,253 // pmovzxbw %xmm5,%xmm7
+ .byte 102,65,15,104,236 // punpckhbw %xmm12,%xmm5
+ .byte 102,15,213,232 // pmullw %xmm0,%xmm5
+ .byte 102,15,253,232 // paddw %xmm0,%xmm5
+ .byte 102,15,56,48,198 // pmovzxbw %xmm6,%xmm0
+ .byte 102,65,15,104,244 // punpckhbw %xmm12,%xmm6
+ .byte 102,15,213,244 // pmullw %xmm4,%xmm6
+ .byte 102,15,253,244 // paddw %xmm4,%xmm6
+ .byte 102,65,15,111,230 // movdqa %xmm14,%xmm4
+ .byte 102,68,15,252,242 // paddb %xmm2,%xmm14
+ .byte 102,65,15,213,255 // pmullw %xmm15,%xmm7
+ .byte 102,65,15,253,255 // paddw %xmm15,%xmm7
+ .byte 102,68,15,56,48,249 // pmovzxbw %xmm1,%xmm15
+ .byte 102,65,15,213,199 // pmullw %xmm15,%xmm0
+ .byte 102,65,15,253,199 // paddw %xmm15,%xmm0
+ .byte 102,68,15,111,249 // movdqa %xmm1,%xmm15
+ .byte 102,15,252,203 // paddb %xmm3,%xmm1
+ .byte 102,15,113,213,8 // psrlw $0x8,%xmm5
+ .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
+ .byte 102,15,103,253 // packuswb %xmm5,%xmm7
+ .byte 102,15,113,214,8 // psrlw $0x8,%xmm6
+ .byte 102,15,113,208,8 // psrlw $0x8,%xmm0
+ .byte 102,15,103,198 // packuswb %xmm6,%xmm0
+ .byte 102,69,15,56,0,248 // pshufb %xmm8,%xmm15
+ .byte 102,65,15,56,0,224 // pshufb %xmm8,%xmm4
+ .byte 102,69,15,104,236 // punpckhbw %xmm12,%xmm13
+ .byte 102,69,15,104,220 // punpckhbw %xmm12,%xmm11
+ .byte 102,15,56,48,236 // pmovzxbw %xmm4,%xmm5
+ .byte 102,65,15,56,48,247 // pmovzxbw %xmm15,%xmm6
+ .byte 102,65,15,104,228 // punpckhbw %xmm12,%xmm4
+ .byte 102,69,15,104,252 // punpckhbw %xmm12,%xmm15
+ .byte 102,69,15,213,251 // pmullw %xmm11,%xmm15
+ .byte 102,65,15,213,229 // pmullw %xmm13,%xmm4
+ .byte 102,65,15,213,241 // pmullw %xmm9,%xmm6
+ .byte 102,65,15,213,234 // pmullw %xmm10,%xmm5
+ .byte 102,65,15,253,229 // paddw %xmm13,%xmm4
+ .byte 102,69,15,253,251 // paddw %xmm11,%xmm15
+ .byte 102,65,15,253,234 // paddw %xmm10,%xmm5
+ .byte 102,65,15,253,241 // paddw %xmm9,%xmm6
+ .byte 102,65,15,113,215,8 // psrlw $0x8,%xmm15
+ .byte 102,15,113,212,8 // psrlw $0x8,%xmm4
+ .byte 102,15,113,214,8 // psrlw $0x8,%xmm6
+ .byte 102,15,113,213,8 // psrlw $0x8,%xmm5
+ .byte 102,15,103,236 // packuswb %xmm4,%xmm5
+ .byte 102,65,15,103,247 // packuswb %xmm15,%xmm6
+ .byte 102,15,222,253 // pmaxub %xmm5,%xmm7
+ .byte 102,15,222,198 // pmaxub %xmm6,%xmm0
+ .byte 102,68,15,111,193 // movdqa %xmm1,%xmm8
+ .byte 102,68,15,248,192 // psubb %xmm0,%xmm8
+ .byte 102,65,15,111,230 // movdqa %xmm14,%xmm4
+ .byte 102,15,248,231 // psubb %xmm7,%xmm4
+ .byte 102,15,248,206 // psubb %xmm6,%xmm1
+ .byte 102,68,15,248,245 // psubb %xmm5,%xmm14
+ .byte 15,40,5,162,2,0,0 // movaps 0x2a2(%rip),%xmm0 # 2120 <_sk_darken_sse41_8bit+0x3fc>
+ .byte 102,68,15,56,16,244 // pblendvb %xmm0,%xmm4,%xmm14
+ .byte 102,65,15,56,16,200 // pblendvb %xmm0,%xmm8,%xmm1
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 102,65,15,111,198 // movdqa %xmm14,%xmm0
+ .byte 255,224 // jmpq *%rax
+
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 1d6b <_sk_xor__sse41_8bit+0x18f>
+ .byte 127,67 // jg 1edb <_sk_darken_sse41_8bit+0x1b7>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 1d6f <_sk_xor__sse41_8bit+0x193>
+ .byte 127,67 // jg 1edf <_sk_darken_sse41_8bit+0x1bb>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 1d73 <_sk_xor__sse41_8bit+0x197>
+ .byte 127,67 // jg 1ee3 <_sk_darken_sse41_8bit+0x1bf>
BALIGN16
.byte 0,0 // add %al,(%rax)
@@ -62030,9 +62217,31 @@ BALIGN16
.byte 11,11 // or (%rbx),%ecx
.byte 11,11 // or (%rbx),%ecx
.byte 15 // (bad)
- .byte 15 // .byte 0xf
- .byte 15 // .byte 0xf
- .byte 15 // .byte 0xf
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,3,3 // lsl (%rbx),%eax
+ .byte 3,3 // add (%rbx),%eax
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 7 // (bad)
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 11,11 // or (%rbx),%ecx
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15 // (bad)
+ .byte 15,255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
BALIGN32
HIDDEN _sk_start_pipeline_sse2_8bit
@@ -62117,7 +62326,7 @@ HIDDEN _sk_set_rgb_sse2_8bit
FUNCTION(_sk_set_rgb_sse2_8bit)
_sk_set_rgb_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 243,15,16,37,253,33,0,0 // movss 0x21fd(%rip),%xmm4 # 22c0 <_sk_xor__sse2_8bit+0x1a7>
+ .byte 243,15,16,37,233,35,0,0 // movss 0x23e9(%rip),%xmm4 # 24ac <_sk_darken_sse2_8bit+0x1ee>
.byte 243,15,16,40 // movss (%rax),%xmm5
.byte 243,15,89,236 // mulss %xmm4,%xmm5
.byte 243,72,15,44,205 // cvttss2si %xmm5,%rcx
@@ -62132,7 +62341,7 @@ _sk_set_rgb_sse2_8bit:
.byte 9,208 // or %edx,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,112,228,0 // pshufd $0x0,%xmm4,%xmm4
- .byte 102,15,111,45,205,33,0,0 // movdqa 0x21cd(%rip),%xmm5 # 22d0 <_sk_xor__sse2_8bit+0x1b7>
+ .byte 102,15,111,45,189,35,0,0 // movdqa 0x23bd(%rip),%xmm5 # 24c0 <_sk_darken_sse2_8bit+0x202>
.byte 102,15,219,205 // pand %xmm5,%xmm1
.byte 102,15,219,197 // pand %xmm5,%xmm0
.byte 102,15,235,196 // por %xmm4,%xmm0
@@ -62157,7 +62366,7 @@ _sk_premul_sse2_8bit:
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
.byte 242,15,112,192,95 // pshuflw $0x5f,%xmm0,%xmm0
.byte 243,15,112,248,95 // pshufhw $0x5f,%xmm0,%xmm7
- .byte 102,15,111,5,129,33,0,0 // movdqa 0x2181(%rip),%xmm0 # 22e0 <_sk_xor__sse2_8bit+0x1c7>
+ .byte 102,15,111,5,113,35,0,0 // movdqa 0x2371(%rip),%xmm0 # 24d0 <_sk_darken_sse2_8bit+0x212>
.byte 102,15,235,248 // por %xmm0,%xmm7
.byte 102,15,235,240 // por %xmm0,%xmm6
.byte 102,69,15,239,201 // pxor %xmm9,%xmm9
@@ -62678,7 +62887,7 @@ _sk_load_a8_sse2_8bit:
.byte 117,48 // jne 7fd <_sk_load_a8_sse2_8bit+0x4d>
.byte 243,66,15,126,4,2 // movq (%rdx,%r8,1),%xmm0
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
- .byte 102,15,84,5,17,27,0,0 // andpd 0x1b11(%rip),%xmm0 # 22f0 <_sk_xor__sse2_8bit+0x1d7>
+ .byte 102,15,84,5,1,29,0,0 // andpd 0x1d01(%rip),%xmm0 # 24e0 <_sk_darken_sse2_8bit+0x222>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,40,200 // movapd %xmm0,%xmm1
.byte 102,15,105,204 // punpckhwd %xmm4,%xmm1
@@ -62755,7 +62964,7 @@ _sk_load_a8_dst_sse2_8bit:
.byte 117,48 // jne 8f1 <_sk_load_a8_dst_sse2_8bit+0x4d>
.byte 243,66,15,126,20,2 // movq (%rdx,%r8,1),%xmm2
.byte 102,15,96,208 // punpcklbw %xmm0,%xmm2
- .byte 102,15,84,21,45,26,0,0 // andpd 0x1a2d(%rip),%xmm2 # 2300 <_sk_xor__sse2_8bit+0x1e7>
+ .byte 102,15,84,21,29,28,0,0 // andpd 0x1c1d(%rip),%xmm2 # 24f0 <_sk_darken_sse2_8bit+0x232>
.byte 102,15,239,228 // pxor %xmm4,%xmm4
.byte 102,15,40,218 // movapd %xmm2,%xmm3
.byte 102,15,105,220 // punpckhwd %xmm4,%xmm3
@@ -62839,7 +63048,7 @@ _sk_store_a8_sse2_8bit:
.byte 102,15,107,229 // packssdw %xmm5,%xmm4
.byte 77,133,201 // test %r9,%r9
.byte 117,22 // jne 9f5 <_sk_store_a8_sse2_8bit+0x5d>
- .byte 102,15,219,37,41,25,0,0 // pand 0x1929(%rip),%xmm4 # 2310 <_sk_xor__sse2_8bit+0x1f7>
+ .byte 102,15,219,37,25,27,0,0 // pand 0x1b19(%rip),%xmm4 # 2500 <_sk_darken_sse2_8bit+0x242>
.byte 102,15,103,228 // packuswb %xmm4,%xmm4
.byte 102,66,15,214,36,2 // movq %xmm4,(%rdx,%r8,1)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -62860,7 +63069,7 @@ _sk_store_a8_sse2_8bit:
.byte 102,15,127,100,36,184 // movdqa %xmm4,-0x48(%rsp)
.byte 138,68,36,188 // mov -0x44(%rsp),%al
.byte 66,136,68,2,2 // mov %al,0x2(%rdx,%r8,1)
- .byte 102,15,219,37,211,24,0,0 // pand 0x18d3(%rip),%xmm4 # 2310 <_sk_xor__sse2_8bit+0x1f7>
+ .byte 102,15,219,37,195,26,0,0 // pand 0x1ac3(%rip),%xmm4 # 2500 <_sk_darken_sse2_8bit+0x242>
.byte 102,15,103,228 // packuswb %xmm4,%xmm4
.byte 102,15,126,224 // movd %xmm4,%eax
.byte 102,66,137,4,2 // mov %ax,(%rdx,%r8,1)
@@ -62874,7 +63083,7 @@ _sk_store_a8_sse2_8bit:
.byte 102,15,127,100,36,200 // movdqa %xmm4,-0x38(%rsp)
.byte 138,68,36,208 // mov -0x30(%rsp),%al
.byte 66,136,68,2,4 // mov %al,0x4(%rdx,%r8,1)
- .byte 102,15,219,37,143,24,0,0 // pand 0x188f(%rip),%xmm4 # 2310 <_sk_xor__sse2_8bit+0x1f7>
+ .byte 102,15,219,37,127,26,0,0 // pand 0x1a7f(%rip),%xmm4 # 2500 <_sk_darken_sse2_8bit+0x242>
.byte 102,15,103,228 // packuswb %xmm4,%xmm4
.byte 102,66,15,126,36,2 // movd %xmm4,(%rdx,%r8,1)
.byte 233,97,255,255,255 // jmpq 9f1 <_sk_store_a8_sse2_8bit+0x59>
@@ -62884,7 +63093,7 @@ _sk_store_a8_sse2_8bit:
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 233,255,255,255,218 // jmpq ffffffffdb000aa0 <_sk_xor__sse2_8bit+0xffffffffdaffe987>
+ .byte 233,255,255,255,218 // jmpq ffffffffdb000aa0 <_sk_darken_sse2_8bit+0xffffffffdaffe7e2>
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,203 // dec %ebx
@@ -62911,12 +63120,12 @@ _sk_load_g8_sse2_8bit:
.byte 117,116 // jne b3d <_sk_load_g8_sse2_8bit+0x91>
.byte 243,66,15,126,4,2 // movq (%rdx,%r8,1),%xmm0
.byte 102,15,96,192 // punpcklbw %xmm0,%xmm0
- .byte 102,15,84,5,69,24,0,0 // andpd 0x1845(%rip),%xmm0 # 2320 <_sk_xor__sse2_8bit+0x207>
+ .byte 102,15,84,5,53,26,0,0 // andpd 0x1a35(%rip),%xmm0 # 2510 <_sk_darken_sse2_8bit+0x252>
.byte 102,15,239,201 // pxor %xmm1,%xmm1
.byte 102,15,40,224 // movapd %xmm0,%xmm4
.byte 102,15,97,225 // punpcklwd %xmm1,%xmm4
.byte 102,15,105,193 // punpckhwd %xmm1,%xmm0
- .byte 102,15,111,45,61,24,0,0 // movdqa 0x183d(%rip),%xmm5 # 2330 <_sk_xor__sse2_8bit+0x217>
+ .byte 102,15,111,45,45,26,0,0 // movdqa 0x1a2d(%rip),%xmm5 # 2520 <_sk_darken_sse2_8bit+0x262>
.byte 102,15,112,240,245 // pshufd $0xf5,%xmm0,%xmm6
.byte 102,15,244,197 // pmuludq %xmm5,%xmm0
.byte 102,15,112,200,232 // pshufd $0xe8,%xmm0,%xmm1
@@ -62929,7 +63138,7 @@ _sk_load_g8_sse2_8bit:
.byte 102,15,244,245 // pmuludq %xmm5,%xmm6
.byte 102,15,112,230,232 // pshufd $0xe8,%xmm6,%xmm4
.byte 102,15,98,196 // punpckldq %xmm4,%xmm0
- .byte 102,15,111,37,15,24,0,0 // movdqa 0x180f(%rip),%xmm4 # 2340 <_sk_xor__sse2_8bit+0x227>
+ .byte 102,15,111,37,255,25,0,0 // movdqa 0x19ff(%rip),%xmm4 # 2530 <_sk_darken_sse2_8bit+0x272>
.byte 102,15,235,196 // por %xmm4,%xmm0
.byte 102,15,235,204 // por %xmm4,%xmm1
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -63003,12 +63212,12 @@ _sk_load_g8_dst_sse2_8bit:
.byte 117,116 // jne c7d <_sk_load_g8_dst_sse2_8bit+0x91>
.byte 243,66,15,126,20,2 // movq (%rdx,%r8,1),%xmm2
.byte 102,15,96,208 // punpcklbw %xmm0,%xmm2
- .byte 102,15,84,21,53,23,0,0 // andpd 0x1735(%rip),%xmm2 # 2350 <_sk_xor__sse2_8bit+0x237>
+ .byte 102,15,84,21,37,25,0,0 // andpd 0x1925(%rip),%xmm2 # 2540 <_sk_darken_sse2_8bit+0x282>
.byte 102,15,239,219 // pxor %xmm3,%xmm3
.byte 102,15,40,226 // movapd %xmm2,%xmm4
.byte 102,15,97,227 // punpcklwd %xmm3,%xmm4
.byte 102,15,105,211 // punpckhwd %xmm3,%xmm2
- .byte 102,15,111,45,45,23,0,0 // movdqa 0x172d(%rip),%xmm5 # 2360 <_sk_xor__sse2_8bit+0x247>
+ .byte 102,15,111,45,29,25,0,0 // movdqa 0x191d(%rip),%xmm5 # 2550 <_sk_darken_sse2_8bit+0x292>
.byte 102,15,112,242,245 // pshufd $0xf5,%xmm2,%xmm6
.byte 102,15,244,213 // pmuludq %xmm5,%xmm2
.byte 102,15,112,218,232 // pshufd $0xe8,%xmm2,%xmm3
@@ -63021,7 +63230,7 @@ _sk_load_g8_dst_sse2_8bit:
.byte 102,15,244,245 // pmuludq %xmm5,%xmm6
.byte 102,15,112,230,232 // pshufd $0xe8,%xmm6,%xmm4
.byte 102,15,98,212 // punpckldq %xmm4,%xmm2
- .byte 102,15,111,37,255,22,0,0 // movdqa 0x16ff(%rip),%xmm4 # 2370 <_sk_xor__sse2_8bit+0x257>
+ .byte 102,15,111,37,239,24,0,0 // movdqa 0x18ef(%rip),%xmm4 # 2560 <_sk_darken_sse2_8bit+0x2a2>
.byte 102,15,235,212 // por %xmm4,%xmm2
.byte 102,15,235,220 // por %xmm4,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -63238,7 +63447,7 @@ _sk_scale_1_float_sse2_8bit:
.byte 102,68,15,111,200 // movdqa %xmm0,%xmm9
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,0 // movss (%rax),%xmm0
- .byte 243,15,89,5,52,19,0,0 // mulss 0x1334(%rip),%xmm0 # 22c4 <_sk_xor__sse2_8bit+0x1ab>
+ .byte 243,15,89,5,32,21,0,0 // mulss 0x1520(%rip),%xmm0 # 24b0 <_sk_darken_sse2_8bit+0x1f2>
.byte 243,15,44,192 // cvttss2si %xmm0,%eax
.byte 102,15,239,246 // pxor %xmm6,%xmm6
.byte 102,65,15,111,193 // movdqa %xmm9,%xmm0
@@ -63250,7 +63459,7 @@ _sk_scale_1_float_sse2_8bit:
.byte 102,15,96,246 // punpcklbw %xmm6,%xmm6
.byte 242,15,112,246,0 // pshuflw $0x0,%xmm6,%xmm6
.byte 102,15,112,246,80 // pshufd $0x50,%xmm6,%xmm6
- .byte 102,15,219,53,183,19,0,0 // pand 0x13b7(%rip),%xmm6 # 2380 <_sk_xor__sse2_8bit+0x267>
+ .byte 102,15,219,53,167,21,0,0 // pand 0x15a7(%rip),%xmm6 # 2570 <_sk_darken_sse2_8bit+0x2b2>
.byte 102,15,111,254 // movdqa %xmm6,%xmm7
.byte 102,65,15,213,248 // pmullw %xmm8,%xmm7
.byte 102,15,111,230 // movdqa %xmm6,%xmm4
@@ -63286,7 +63495,7 @@ _sk_scale_u8_sse2_8bit:
.byte 15,133,239,0,0,0 // jne 1129 <_sk_scale_u8_sse2_8bit+0x110>
.byte 243,66,15,126,36,2 // movq (%rdx,%r8,1),%xmm4
.byte 102,15,96,224 // punpcklbw %xmm0,%xmm4
- .byte 102,15,84,37,68,19,0,0 // andpd 0x1344(%rip),%xmm4 # 2390 <_sk_xor__sse2_8bit+0x277>
+ .byte 102,15,84,37,52,21,0,0 // andpd 0x1534(%rip),%xmm4 # 2580 <_sk_darken_sse2_8bit+0x2c2>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,40,236 // movapd %xmm4,%xmm5
.byte 102,65,15,105,232 // punpckhwd %xmm8,%xmm5
@@ -63395,7 +63604,7 @@ FUNCTION(_sk_lerp_1_float_sse2_8bit)
_sk_lerp_1_float_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,32 // movss (%rax),%xmm4
- .byte 243,15,89,37,222,16,0,0 // mulss 0x10de(%rip),%xmm4 # 22c8 <_sk_xor__sse2_8bit+0x1af>
+ .byte 243,15,89,37,202,18,0,0 // mulss 0x12ca(%rip),%xmm4 # 24b4 <_sk_darken_sse2_8bit+0x1f6>
.byte 243,15,44,196 // cvttss2si %xmm4,%eax
.byte 102,15,110,224 // movd %eax,%xmm4
.byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
@@ -63408,7 +63617,7 @@ _sk_lerp_1_float_sse2_8bit:
.byte 102,68,15,111,217 // movdqa %xmm1,%xmm11
.byte 102,69,15,96,217 // punpcklbw %xmm9,%xmm11
.byte 102,65,15,104,201 // punpckhbw %xmm9,%xmm1
- .byte 102,15,111,53,117,17,0,0 // movdqa 0x1175(%rip),%xmm6 # 23a0 <_sk_xor__sse2_8bit+0x287>
+ .byte 102,15,111,53,101,19,0,0 // movdqa 0x1365(%rip),%xmm6 # 2590 <_sk_darken_sse2_8bit+0x2d2>
.byte 102,65,15,219,240 // pand %xmm8,%xmm6
.byte 102,15,111,230 // movdqa %xmm6,%xmm4
.byte 102,15,213,225 // pmullw %xmm1,%xmm4
@@ -63476,7 +63685,7 @@ _sk_lerp_u8_sse2_8bit:
.byte 15,133,141,1,0,0 // jne 14c0 <_sk_lerp_u8_sse2_8bit+0x1ae>
.byte 243,66,15,126,44,2 // movq (%rdx,%r8,1),%xmm5
.byte 102,15,96,232 // punpcklbw %xmm0,%xmm5
- .byte 102,15,84,45,107,16,0,0 // andpd 0x106b(%rip),%xmm5 # 23b0 <_sk_xor__sse2_8bit+0x297>
+ .byte 102,15,84,45,91,18,0,0 // andpd 0x125b(%rip),%xmm5 # 25a0 <_sk_darken_sse2_8bit+0x2e2>
.byte 102,69,15,239,192 // pxor %xmm8,%xmm8
.byte 102,15,40,229 // movapd %xmm5,%xmm4
.byte 102,65,15,105,224 // punpckhwd %xmm8,%xmm4
@@ -63634,7 +63843,7 @@ HIDDEN _sk_black_color_sse2_8bit
FUNCTION(_sk_black_color_sse2_8bit)
_sk_black_color_sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
- .byte 15,40,5,51,14,0,0 // movaps 0xe33(%rip),%xmm0 # 23c0 <_sk_xor__sse2_8bit+0x2a7>
+ .byte 15,40,5,35,16,0,0 // movaps 0x1023(%rip),%xmm0 # 25b0 <_sk_darken_sse2_8bit+0x2f2>
.byte 15,40,200 // movaps %xmm0,%xmm1
.byte 255,224 // jmpq *%rax
@@ -64431,13 +64640,121 @@ _sk_xor__sse2_8bit:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_darken_sse2_8bit
+.globl _sk_darken_sse2_8bit
+FUNCTION(_sk_darken_sse2_8bit)
+_sk_darken_sse2_8bit:
+ .byte 242,15,112,227,231 // pshuflw $0xe7,%xmm3,%xmm4
+ .byte 243,15,112,228,231 // pshufhw $0xe7,%xmm4,%xmm4
+ .byte 102,15,112,228,232 // pshufd $0xe8,%xmm4,%xmm4
+ .byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
+ .byte 242,15,112,228,95 // pshuflw $0x5f,%xmm4,%xmm4
+ .byte 243,15,112,228,95 // pshufhw $0x5f,%xmm4,%xmm4
+ .byte 242,15,112,234,231 // pshuflw $0xe7,%xmm2,%xmm5
+ .byte 243,15,112,237,231 // pshufhw $0xe7,%xmm5,%xmm5
+ .byte 102,15,112,237,232 // pshufd $0xe8,%xmm5,%xmm5
+ .byte 102,15,96,237 // punpcklbw %xmm5,%xmm5
+ .byte 242,15,112,237,95 // pshuflw $0x5f,%xmm5,%xmm5
+ .byte 243,15,112,253,95 // pshufhw $0x5f,%xmm5,%xmm7
+ .byte 102,69,15,239,192 // pxor %xmm8,%xmm8
+ .byte 102,15,111,240 // movdqa %xmm0,%xmm6
+ .byte 102,65,15,96,240 // punpcklbw %xmm8,%xmm6
+ .byte 102,68,15,111,200 // movdqa %xmm0,%xmm9
+ .byte 102,69,15,104,200 // punpckhbw %xmm8,%xmm9
+ .byte 102,68,15,111,217 // movdqa %xmm1,%xmm11
+ .byte 102,69,15,96,216 // punpcklbw %xmm8,%xmm11
+ .byte 102,68,15,111,225 // movdqa %xmm1,%xmm12
+ .byte 102,69,15,104,224 // punpckhbw %xmm8,%xmm12
+ .byte 102,68,15,111,215 // movdqa %xmm7,%xmm10
+ .byte 102,69,15,96,208 // punpcklbw %xmm8,%xmm10
+ .byte 102,65,15,104,248 // punpckhbw %xmm8,%xmm7
+ .byte 102,68,15,111,236 // movdqa %xmm4,%xmm13
+ .byte 102,69,15,96,232 // punpcklbw %xmm8,%xmm13
+ .byte 102,65,15,104,224 // punpckhbw %xmm8,%xmm4
+ .byte 102,65,15,213,228 // pmullw %xmm12,%xmm4
+ .byte 102,69,15,213,235 // pmullw %xmm11,%xmm13
+ .byte 102,65,15,213,249 // pmullw %xmm9,%xmm7
+ .byte 102,68,15,213,214 // pmullw %xmm6,%xmm10
+ .byte 102,68,15,253,214 // paddw %xmm6,%xmm10
+ .byte 102,65,15,253,249 // paddw %xmm9,%xmm7
+ .byte 102,69,15,253,235 // paddw %xmm11,%xmm13
+ .byte 102,65,15,253,228 // paddw %xmm12,%xmm4
+ .byte 102,15,113,212,8 // psrlw $0x8,%xmm4
+ .byte 102,65,15,113,213,8 // psrlw $0x8,%xmm13
+ .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
+ .byte 102,65,15,113,210,8 // psrlw $0x8,%xmm10
+ .byte 102,68,15,103,215 // packuswb %xmm7,%xmm10
+ .byte 102,68,15,103,236 // packuswb %xmm4,%xmm13
+ .byte 242,15,112,225,231 // pshuflw $0xe7,%xmm1,%xmm4
+ .byte 243,15,112,228,231 // pshufhw $0xe7,%xmm4,%xmm4
+ .byte 102,15,112,228,232 // pshufd $0xe8,%xmm4,%xmm4
+ .byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
+ .byte 242,15,112,228,95 // pshuflw $0x5f,%xmm4,%xmm4
+ .byte 243,68,15,112,220,95 // pshufhw $0x5f,%xmm4,%xmm11
+ .byte 242,15,112,224,231 // pshuflw $0xe7,%xmm0,%xmm4
+ .byte 243,15,112,228,231 // pshufhw $0xe7,%xmm4,%xmm4
+ .byte 102,15,112,228,232 // pshufd $0xe8,%xmm4,%xmm4
+ .byte 102,15,96,228 // punpcklbw %xmm4,%xmm4
+ .byte 242,15,112,228,95 // pshuflw $0x5f,%xmm4,%xmm4
+ .byte 243,68,15,112,228,95 // pshufhw $0x5f,%xmm4,%xmm12
+ .byte 102,68,15,111,202 // movdqa %xmm2,%xmm9
+ .byte 102,15,252,194 // paddb %xmm2,%xmm0
+ .byte 102,15,111,242 // movdqa %xmm2,%xmm6
+ .byte 102,65,15,96,240 // punpcklbw %xmm8,%xmm6
+ .byte 102,69,15,104,200 // punpckhbw %xmm8,%xmm9
+ .byte 102,68,15,111,243 // movdqa %xmm3,%xmm14
+ .byte 102,15,252,203 // paddb %xmm3,%xmm1
+ .byte 102,15,111,235 // movdqa %xmm3,%xmm5
+ .byte 102,65,15,96,232 // punpcklbw %xmm8,%xmm5
+ .byte 102,69,15,104,240 // punpckhbw %xmm8,%xmm14
+ .byte 102,65,15,111,228 // movdqa %xmm12,%xmm4
+ .byte 102,65,15,96,224 // punpcklbw %xmm8,%xmm4
+ .byte 102,69,15,104,224 // punpckhbw %xmm8,%xmm12
+ .byte 102,65,15,111,251 // movdqa %xmm11,%xmm7
+ .byte 102,65,15,96,248 // punpcklbw %xmm8,%xmm7
+ .byte 102,69,15,104,216 // punpckhbw %xmm8,%xmm11
+ .byte 102,69,15,213,222 // pmullw %xmm14,%xmm11
+ .byte 102,15,213,253 // pmullw %xmm5,%xmm7
+ .byte 102,69,15,213,225 // pmullw %xmm9,%xmm12
+ .byte 102,15,213,230 // pmullw %xmm6,%xmm4
+ .byte 102,15,253,230 // paddw %xmm6,%xmm4
+ .byte 102,69,15,253,225 // paddw %xmm9,%xmm12
+ .byte 102,15,253,253 // paddw %xmm5,%xmm7
+ .byte 102,69,15,253,222 // paddw %xmm14,%xmm11
+ .byte 102,65,15,113,211,8 // psrlw $0x8,%xmm11
+ .byte 102,15,113,215,8 // psrlw $0x8,%xmm7
+ .byte 102,65,15,113,212,8 // psrlw $0x8,%xmm12
+ .byte 102,15,113,212,8 // psrlw $0x8,%xmm4
+ .byte 102,65,15,103,228 // packuswb %xmm12,%xmm4
+ .byte 102,65,15,103,251 // packuswb %xmm11,%xmm7
+ .byte 102,68,15,222,212 // pmaxub %xmm4,%xmm10
+ .byte 102,68,15,222,239 // pmaxub %xmm7,%xmm13
+ .byte 102,15,111,241 // movdqa %xmm1,%xmm6
+ .byte 102,65,15,248,245 // psubb %xmm13,%xmm6
+ .byte 102,15,111,232 // movdqa %xmm0,%xmm5
+ .byte 102,65,15,248,234 // psubb %xmm10,%xmm5
+ .byte 102,15,248,207 // psubb %xmm7,%xmm1
+ .byte 102,15,248,196 // psubb %xmm4,%xmm0
+ .byte 102,15,111,37,62,1,0,0 // movdqa 0x13e(%rip),%xmm4 # 25c0 <_sk_darken_sse2_8bit+0x302>
+ .byte 102,15,219,236 // pand %xmm4,%xmm5
+ .byte 102,15,111,252 // movdqa %xmm4,%xmm7
+ .byte 102,15,223,248 // pandn %xmm0,%xmm7
+ .byte 102,15,235,239 // por %xmm7,%xmm5
+ .byte 102,15,219,244 // pand %xmm4,%xmm6
+ .byte 102,15,223,225 // pandn %xmm1,%xmm4
+ .byte 102,15,235,244 // por %xmm4,%xmm6
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 102,15,111,197 // movdqa %xmm5,%xmm0
+ .byte 102,15,111,206 // movdqa %xmm6,%xmm1
+ .byte 255,224 // jmpq *%rax
+
BALIGN4
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 2307 <_sk_xor__sse2_8bit+0x1ee>
+ .byte 127,67 // jg 24f3 <_sk_darken_sse2_8bit+0x235>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 230b <_sk_xor__sse2_8bit+0x1f2>
+ .byte 127,67 // jg 24f7 <_sk_darken_sse2_8bit+0x239>
.byte 0,0 // add %al,(%rax)
- .byte 127,67 // jg 230f <_sk_xor__sse2_8bit+0x1f6>
+ .byte 127,67 // jg 24fb <_sk_darken_sse2_8bit+0x23d>
BALIGN16
.byte 0,0 // add %al,(%rax)
@@ -64568,6 +64885,18 @@ BALIGN16
.byte 0,255 // add %bh,%bh
.byte 0,0 // add %al,(%rax)
.byte 0,255 // add %bh,%bh
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
+ .byte 255 // (bad)
+ .byte 255 // (bad)
+ .byte 255,0 // incl (%rax)
#elif defined(__i386__)
BALIGN32
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index afa37bc78f..0faff48dcb 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -39059,7 +39059,7 @@ _sk_uniform_color_hsw_8bit LABEL PROC
PUBLIC _sk_set_rgb_hsw_8bit
_sk_set_rgb_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 197,250,16,37,65,39,0,0 ; vmovss 0x2741(%rip),%xmm4 # 28c4 <_sk_xor__hsw_8bit+0x173>
+ DB 197,250,16,37,201,40,0,0 ; vmovss 0x28c9(%rip),%xmm4 # 2a4c <_sk_darken_hsw_8bit+0x189>
DB 197,218,89,40 ; vmulss (%rax),%xmm4,%xmm5
DB 196,225,250,44,205 ; vcvttss2si %xmm5,%rcx
DB 197,218,89,104,4 ; vmulss 0x4(%rax),%xmm4,%xmm5
@@ -39072,7 +39072,7 @@ _sk_set_rgb_hsw_8bit LABEL PROC
DB 9,208 ; or %edx,%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
DB 196,226,125,88,228 ; vpbroadcastd %xmm4,%ymm4
- DB 197,253,111,45,69,39,0,0 ; vmovdqa 0x2745(%rip),%ymm5 # 2900 <_sk_xor__hsw_8bit+0x1af>
+ DB 197,253,111,45,197,40,0,0 ; vmovdqa 0x28c5(%rip),%ymm5 # 2a80 <_sk_darken_hsw_8bit+0x1bd>
DB 197,245,219,205 ; vpand %ymm5,%ymm1,%ymm1
DB 197,253,219,197 ; vpand %ymm5,%ymm0,%ymm0
DB 197,221,235,192 ; vpor %ymm0,%ymm4,%ymm0
@@ -39082,10 +39082,10 @@ _sk_set_rgb_hsw_8bit LABEL PROC
PUBLIC _sk_premul_hsw_8bit
_sk_premul_hsw_8bit LABEL PROC
- DB 197,253,111,37,73,39,0,0 ; vmovdqa 0x2749(%rip),%ymm4 # 2920 <_sk_xor__hsw_8bit+0x1cf>
+ DB 197,253,111,37,201,40,0,0 ; vmovdqa 0x28c9(%rip),%ymm4 # 2aa0 <_sk_darken_hsw_8bit+0x1dd>
DB 196,226,125,0,236 ; vpshufb %ymm4,%ymm0,%ymm5
DB 196,226,117,0,228 ; vpshufb %ymm4,%ymm1,%ymm4
- DB 197,253,111,53,87,39,0,0 ; vmovdqa 0x2757(%rip),%ymm6 # 2940 <_sk_xor__hsw_8bit+0x1ef>
+ DB 197,253,111,53,215,40,0,0 ; vmovdqa 0x28d7(%rip),%ymm6 # 2ac0 <_sk_darken_hsw_8bit+0x1fd>
DB 197,221,235,230 ; vpor %ymm6,%ymm4,%ymm4
DB 197,213,235,238 ; vpor %ymm6,%ymm5,%ymm5
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -39123,7 +39123,7 @@ _sk_premul_hsw_8bit LABEL PROC
PUBLIC _sk_swap_rb_hsw_8bit
_sk_swap_rb_hsw_8bit LABEL PROC
- DB 197,253,111,37,207,38,0,0 ; vmovdqa 0x26cf(%rip),%ymm4 # 2960 <_sk_xor__hsw_8bit+0x20f>
+ DB 197,253,111,37,79,40,0,0 ; vmovdqa 0x284f(%rip),%ymm4 # 2ae0 <_sk_darken_hsw_8bit+0x21d>
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -39463,7 +39463,7 @@ _sk_load_bgra_hsw_8bit LABEL PROC
DB 117,35 ; jne 774 <_sk_load_bgra_hsw_8bit+0x44>
DB 196,161,126,111,76,130,32 ; vmovdqu 0x20(%rdx,%r8,4),%ymm1
DB 196,161,126,111,4,130 ; vmovdqu (%rdx,%r8,4),%ymm0
- DB 197,253,111,37,26,34,0,0 ; vmovdqa 0x221a(%rip),%ymm4 # 2980 <_sk_xor__hsw_8bit+0x22f>
+ DB 197,253,111,37,154,35,0,0 ; vmovdqa 0x239a(%rip),%ymm4 # 2b00 <_sk_darken_hsw_8bit+0x23d>
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -39576,7 +39576,7 @@ _sk_load_bgra_dst_hsw_8bit LABEL PROC
DB 117,35 ; jne 92c <_sk_load_bgra_dst_hsw_8bit+0x44>
DB 196,161,126,111,92,130,32 ; vmovdqu 0x20(%rdx,%r8,4),%ymm3
DB 196,161,126,111,20,130 ; vmovdqu (%rdx,%r8,4),%ymm2
- DB 197,253,111,37,130,32,0,0 ; vmovdqa 0x2082(%rip),%ymm4 # 29a0 <_sk_xor__hsw_8bit+0x24f>
+ DB 197,253,111,37,2,34,0,0 ; vmovdqa 0x2202(%rip),%ymm4 # 2b20 <_sk_darken_hsw_8bit+0x25d>
DB 196,226,109,0,212 ; vpshufb %ymm4,%ymm2,%ymm2
DB 196,226,101,0,220 ; vpshufb %ymm4,%ymm3,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -39685,7 +39685,7 @@ _sk_store_bgra_hsw_8bit LABEL PROC
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,193,226,2 ; shl $0x2,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 197,253,111,37,252,30,0,0 ; vmovdqa 0x1efc(%rip),%ymm4 # 29c0 <_sk_xor__hsw_8bit+0x26f>
+ DB 197,253,111,37,124,32,0,0 ; vmovdqa 0x207c(%rip),%ymm4 # 2b40 <_sk_darken_hsw_8bit+0x27d>
DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5
DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4
DB 77,133,201 ; test %r9,%r9
@@ -39965,10 +39965,10 @@ _sk_store_a8_hsw_8bit LABEL PROC
DB 72,99,87,8 ; movslq 0x8(%rdi),%rdx
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 197,253,111,37,40,27,0,0 ; vmovdqa 0x1b28(%rip),%ymm4 # 29e0 <_sk_xor__hsw_8bit+0x28f>
+ DB 197,253,111,37,168,28,0,0 ; vmovdqa 0x1ca8(%rip),%ymm4 # 2b60 <_sk_darken_hsw_8bit+0x29d>
DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5
DB 196,227,253,0,237,232 ; vpermq $0xe8,%ymm5,%ymm5
- DB 197,249,111,53,21,29,0,0 ; vmovdqa 0x1d15(%rip),%xmm6 # 2be0 <_sk_xor__hsw_8bit+0x48f>
+ DB 197,249,111,53,181,30,0,0 ; vmovdqa 0x1eb5(%rip),%xmm6 # 2d80 <_sk_darken_hsw_8bit+0x4bd>
DB 196,226,81,0,238 ; vpshufb %xmm6,%xmm5,%xmm5
DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4
DB 196,227,253,0,228,232 ; vpermq $0xe8,%ymm4,%ymm4
@@ -40058,10 +40058,10 @@ _sk_load_g8_hsw_8bit LABEL PROC
DB 196,226,125,49,200 ; vpmovzxbd %xmm0,%ymm1
DB 197,249,112,192,78 ; vpshufd $0x4e,%xmm0,%xmm0
DB 196,226,125,49,192 ; vpmovzxbd %xmm0,%ymm0
- DB 196,226,125,88,37,189,24,0,0 ; vpbroadcastd 0x18bd(%rip),%ymm4 # 28c8 <_sk_xor__hsw_8bit+0x177>
+ DB 196,226,125,88,37,69,26,0,0 ; vpbroadcastd 0x1a45(%rip),%ymm4 # 2a50 <_sk_darken_hsw_8bit+0x18d>
DB 196,226,125,64,236 ; vpmulld %ymm4,%ymm0,%ymm5
DB 196,226,117,64,196 ; vpmulld %ymm4,%ymm1,%ymm0
- DB 196,226,125,88,13,174,24,0,0 ; vpbroadcastd 0x18ae(%rip),%ymm1 # 28cc <_sk_xor__hsw_8bit+0x17b>
+ DB 196,226,125,88,13,54,26,0,0 ; vpbroadcastd 0x1a36(%rip),%ymm1 # 2a54 <_sk_darken_hsw_8bit+0x191>
DB 197,253,235,193 ; vpor %ymm1,%ymm0,%ymm0
DB 197,213,235,201 ; vpor %ymm1,%ymm5,%ymm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -40157,10 +40157,10 @@ _sk_load_g8_dst_hsw_8bit LABEL PROC
DB 196,226,125,49,218 ; vpmovzxbd %xmm2,%ymm3
DB 197,249,112,210,78 ; vpshufd $0x4e,%xmm2,%xmm2
DB 196,226,125,49,210 ; vpmovzxbd %xmm2,%ymm2
- DB 196,226,125,88,37,77,23,0,0 ; vpbroadcastd 0x174d(%rip),%ymm4 # 28d0 <_sk_xor__hsw_8bit+0x17f>
+ DB 196,226,125,88,37,213,24,0,0 ; vpbroadcastd 0x18d5(%rip),%ymm4 # 2a58 <_sk_darken_hsw_8bit+0x195>
DB 196,226,109,64,236 ; vpmulld %ymm4,%ymm2,%ymm5
DB 196,226,101,64,212 ; vpmulld %ymm4,%ymm3,%ymm2
- DB 196,226,125,88,29,62,23,0,0 ; vpbroadcastd 0x173e(%rip),%ymm3 # 28d4 <_sk_xor__hsw_8bit+0x183>
+ DB 196,226,125,88,29,198,24,0,0 ; vpbroadcastd 0x18c6(%rip),%ymm3 # 2a5c <_sk_darken_hsw_8bit+0x199>
DB 197,237,235,211 ; vpor %ymm3,%ymm2,%ymm2
DB 197,213,235,219 ; vpor %ymm3,%ymm5,%ymm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -40255,7 +40255,7 @@ _sk_srcover_rgba_8888_hsw_8bit LABEL PROC
DB 15,133,222,0,0,0 ; jne 13c3 <_sk_srcover_rgba_8888_hsw_8bit+0x103>
DB 196,33,126,111,76,138,32 ; vmovdqu 0x20(%rdx,%r9,4),%ymm9
DB 196,33,126,111,28,138 ; vmovdqu (%rdx,%r9,4),%ymm11
- DB 197,253,111,53,6,23,0,0 ; vmovdqa 0x1706(%rip),%ymm6 # 2a00 <_sk_xor__hsw_8bit+0x2af>
+ DB 197,253,111,53,134,24,0,0 ; vmovdqa 0x1886(%rip),%ymm6 # 2b80 <_sk_darken_hsw_8bit+0x2bd>
DB 196,226,117,0,254 ; vpshufb %ymm6,%ymm1,%ymm7
DB 196,226,125,0,246 ; vpshufb %ymm6,%ymm0,%ymm6
DB 196,66,125,48,195 ; vpmovzxbw %xmm11,%ymm8
@@ -40461,7 +40461,7 @@ PUBLIC _sk_scale_1_float_hsw_8bit
_sk_scale_1_float_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,250,16,32 ; vmovss (%rax),%xmm4
- DB 197,218,89,37,130,18,0,0 ; vmulss 0x1282(%rip),%xmm4,%xmm4 # 28d8 <_sk_xor__hsw_8bit+0x187>
+ DB 197,218,89,37,10,20,0,0 ; vmulss 0x140a(%rip),%xmm4,%xmm4 # 2a60 <_sk_darken_hsw_8bit+0x19d>
DB 197,250,44,196 ; vcvttss2si %xmm4,%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
DB 196,226,125,120,228 ; vpbroadcastb %xmm4,%ymm4
@@ -40471,7 +40471,7 @@ _sk_scale_1_float_hsw_8bit LABEL PROC
DB 196,226,125,48,241 ; vpmovzxbw %xmm1,%ymm6
DB 196,227,125,57,201,1 ; vextracti128 $0x1,%ymm1,%xmm1
DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1
- DB 197,221,219,37,149,19,0,0 ; vpand 0x1395(%rip),%ymm4,%ymm4 # 2a20 <_sk_xor__hsw_8bit+0x2cf>
+ DB 197,221,219,37,21,21,0,0 ; vpand 0x1515(%rip),%ymm4,%ymm4 # 2ba0 <_sk_darken_hsw_8bit+0x2dd>
DB 197,221,213,249 ; vpmullw %ymm1,%ymm4,%ymm7
DB 197,93,213,198 ; vpmullw %ymm6,%ymm4,%ymm8
DB 197,93,213,200 ; vpmullw %ymm0,%ymm4,%ymm9
@@ -40508,7 +40508,7 @@ _sk_scale_u8_hsw_8bit LABEL PROC
DB 196,226,125,49,236 ; vpmovzxbd %xmm4,%ymm5
DB 197,249,112,228,78 ; vpshufd $0x4e,%xmm4,%xmm4
DB 196,226,125,49,228 ; vpmovzxbd %xmm4,%ymm4
- DB 197,253,111,53,31,19,0,0 ; vmovdqa 0x131f(%rip),%ymm6 # 2a40 <_sk_xor__hsw_8bit+0x2ef>
+ DB 197,253,111,53,159,20,0,0 ; vmovdqa 0x149f(%rip),%ymm6 # 2bc0 <_sk_darken_hsw_8bit+0x2fd>
DB 196,226,93,0,230 ; vpshufb %ymm6,%ymm4,%ymm4
DB 196,226,85,0,238 ; vpshufb %ymm6,%ymm5,%ymm5
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -40623,7 +40623,7 @@ PUBLIC _sk_lerp_1_float_hsw_8bit
_sk_lerp_1_float_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 197,250,16,32 ; vmovss (%rax),%xmm4
- DB 197,218,89,37,230,15,0,0 ; vmulss 0xfe6(%rip),%xmm4,%xmm4 # 28dc <_sk_xor__hsw_8bit+0x18b>
+ DB 197,218,89,37,110,17,0,0 ; vmulss 0x116e(%rip),%xmm4,%xmm4 # 2a64 <_sk_darken_hsw_8bit+0x1a1>
DB 197,250,44,196 ; vcvttss2si %xmm4,%eax
DB 197,249,110,224 ; vmovd %eax,%xmm4
DB 196,226,125,120,228 ; vpbroadcastb %xmm4,%ymm4
@@ -40633,7 +40633,7 @@ _sk_lerp_1_float_hsw_8bit LABEL PROC
DB 196,226,125,48,241 ; vpmovzxbw %xmm1,%ymm6
DB 196,227,125,57,201,1 ; vextracti128 $0x1,%ymm1,%xmm1
DB 196,226,125,48,201 ; vpmovzxbw %xmm1,%ymm1
- DB 197,221,219,61,53,17,0,0 ; vpand 0x1135(%rip),%ymm4,%ymm7 # 2a60 <_sk_xor__hsw_8bit+0x30f>
+ DB 197,221,219,61,181,18,0,0 ; vpand 0x12b5(%rip),%ymm4,%ymm7 # 2be0 <_sk_darken_hsw_8bit+0x31d>
DB 197,69,213,193 ; vpmullw %ymm1,%ymm7,%ymm8
DB 197,69,213,206 ; vpmullw %ymm6,%ymm7,%ymm9
DB 197,69,213,208 ; vpmullw %ymm0,%ymm7,%ymm10
@@ -40701,7 +40701,7 @@ _sk_lerp_u8_hsw_8bit LABEL PROC
DB 196,226,125,49,236 ; vpmovzxbd %xmm4,%ymm5
DB 197,249,112,228,78 ; vpshufd $0x4e,%xmm4,%xmm4
DB 196,226,125,49,228 ; vpmovzxbd %xmm4,%ymm4
- DB 197,253,111,53,42,16,0,0 ; vmovdqa 0x102a(%rip),%ymm6 # 2a80 <_sk_xor__hsw_8bit+0x32f>
+ DB 197,253,111,53,170,17,0,0 ; vmovdqa 0x11aa(%rip),%ymm6 # 2c00 <_sk_darken_hsw_8bit+0x33d>
DB 196,98,93,0,206 ; vpshufb %ymm6,%ymm4,%ymm9
DB 196,98,85,0,222 ; vpshufb %ymm6,%ymm5,%ymm11
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -40864,7 +40864,7 @@ _sk_move_dst_src_hsw_8bit LABEL PROC
PUBLIC _sk_black_color_hsw_8bit
_sk_black_color_hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 196,226,125,24,5,245,11,0,0 ; vbroadcastss 0xbf5(%rip),%ymm0 # 28e0 <_sk_xor__hsw_8bit+0x18f>
+ DB 196,226,125,24,5,125,13,0,0 ; vbroadcastss 0xd7d(%rip),%ymm0 # 2a68 <_sk_darken_hsw_8bit+0x1a5>
DB 197,252,40,200 ; vmovaps %ymm0,%ymm1
DB 255,224 ; jmpq *%rax
@@ -40884,7 +40884,7 @@ _sk_clear_hsw_8bit LABEL PROC
PUBLIC _sk_srcatop_hsw_8bit
_sk_srcatop_hsw_8bit LABEL PROC
- DB 197,125,111,5,143,13,0,0 ; vmovdqa 0xd8f(%rip),%ymm8 # 2aa0 <_sk_xor__hsw_8bit+0x34f>
+ DB 197,125,111,5,15,15,0,0 ; vmovdqa 0xf0f(%rip),%ymm8 # 2c20 <_sk_darken_hsw_8bit+0x35d>
DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
DB 196,194,109,0,232 ; vpshufb %ymm8,%ymm2,%ymm5
DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
@@ -40959,7 +40959,7 @@ _sk_srcatop_hsw_8bit LABEL PROC
PUBLIC _sk_dstatop_hsw_8bit
_sk_dstatop_hsw_8bit LABEL PROC
- DB 197,125,111,5,74,12,0,0 ; vmovdqa 0xc4a(%rip),%ymm8 # 2ac0 <_sk_xor__hsw_8bit+0x36f>
+ DB 197,125,111,5,202,13,0,0 ; vmovdqa 0xdca(%rip),%ymm8 # 2c40 <_sk_darken_hsw_8bit+0x37d>
DB 196,194,117,0,224 ; vpshufb %ymm8,%ymm1,%ymm4
DB 196,194,125,0,232 ; vpshufb %ymm8,%ymm0,%ymm5
DB 196,98,125,48,210 ; vpmovzxbw %xmm2,%ymm10
@@ -41034,7 +41034,7 @@ _sk_dstatop_hsw_8bit LABEL PROC
PUBLIC _sk_srcin_hsw_8bit
_sk_srcin_hsw_8bit LABEL PROC
- DB 197,253,111,37,1,11,0,0 ; vmovdqa 0xb01(%rip),%ymm4 # 2ae0 <_sk_xor__hsw_8bit+0x38f>
+ DB 197,253,111,37,129,12,0,0 ; vmovdqa 0xc81(%rip),%ymm4 # 2c60 <_sk_darken_hsw_8bit+0x39d>
DB 196,226,101,0,236 ; vpshufb %ymm4,%ymm3,%ymm5
DB 196,226,109,0,228 ; vpshufb %ymm4,%ymm2,%ymm4
DB 196,226,125,48,240 ; vpmovzxbw %xmm0,%ymm6
@@ -41072,7 +41072,7 @@ _sk_srcin_hsw_8bit LABEL PROC
PUBLIC _sk_dstin_hsw_8bit
_sk_dstin_hsw_8bit LABEL PROC
- DB 197,253,111,37,119,10,0,0 ; vmovdqa 0xa77(%rip),%ymm4 # 2b00 <_sk_xor__hsw_8bit+0x3af>
+ DB 197,253,111,37,247,11,0,0 ; vmovdqa 0xbf7(%rip),%ymm4 # 2c80 <_sk_darken_hsw_8bit+0x3bd>
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,125,48,226 ; vpmovzxbw %xmm2,%ymm4
@@ -41110,7 +41110,7 @@ _sk_dstin_hsw_8bit LABEL PROC
PUBLIC _sk_srcout_hsw_8bit
_sk_srcout_hsw_8bit LABEL PROC
- DB 197,253,111,37,235,9,0,0 ; vmovdqa 0x9eb(%rip),%ymm4 # 2b20 <_sk_xor__hsw_8bit+0x3cf>
+ DB 197,253,111,37,107,11,0,0 ; vmovdqa 0xb6b(%rip),%ymm4 # 2ca0 <_sk_darken_hsw_8bit+0x3dd>
DB 196,226,109,0,236 ; vpshufb %ymm4,%ymm2,%ymm5
DB 196,226,101,0,228 ; vpshufb %ymm4,%ymm3,%ymm4
DB 197,205,118,246 ; vpcmpeqd %ymm6,%ymm6,%ymm6
@@ -41151,7 +41151,7 @@ _sk_srcout_hsw_8bit LABEL PROC
PUBLIC _sk_dstout_hsw_8bit
_sk_dstout_hsw_8bit LABEL PROC
- DB 197,253,111,37,85,9,0,0 ; vmovdqa 0x955(%rip),%ymm4 # 2b40 <_sk_xor__hsw_8bit+0x3ef>
+ DB 197,253,111,37,213,10,0,0 ; vmovdqa 0xad5(%rip),%ymm4 # 2cc0 <_sk_darken_hsw_8bit+0x3fd>
DB 196,226,125,0,196 ; vpshufb %ymm4,%ymm0,%ymm0
DB 196,226,117,0,204 ; vpshufb %ymm4,%ymm1,%ymm1
DB 197,221,118,228 ; vpcmpeqd %ymm4,%ymm4,%ymm4
@@ -41192,7 +41192,7 @@ _sk_dstout_hsw_8bit LABEL PROC
PUBLIC _sk_srcover_hsw_8bit
_sk_srcover_hsw_8bit LABEL PROC
- DB 197,253,111,37,189,8,0,0 ; vmovdqa 0x8bd(%rip),%ymm4 # 2b60 <_sk_xor__hsw_8bit+0x40f>
+ DB 197,253,111,37,61,10,0,0 ; vmovdqa 0xa3d(%rip),%ymm4 # 2ce0 <_sk_darken_hsw_8bit+0x41d>
DB 196,226,117,0,236 ; vpshufb %ymm4,%ymm1,%ymm5
DB 196,226,125,0,228 ; vpshufb %ymm4,%ymm0,%ymm4
DB 196,98,125,48,202 ; vpmovzxbw %xmm2,%ymm9
@@ -41234,7 +41234,7 @@ _sk_srcover_hsw_8bit LABEL PROC
PUBLIC _sk_dstover_hsw_8bit
_sk_dstover_hsw_8bit LABEL PROC
- DB 197,253,111,37,30,8,0,0 ; vmovdqa 0x81e(%rip),%ymm4 # 2b80 <_sk_xor__hsw_8bit+0x42f>
+ DB 197,253,111,37,158,9,0,0 ; vmovdqa 0x99e(%rip),%ymm4 # 2d00 <_sk_darken_hsw_8bit+0x43d>
DB 196,226,101,0,236 ; vpshufb %ymm4,%ymm3,%ymm5
DB 196,226,109,0,228 ; vpshufb %ymm4,%ymm2,%ymm4
DB 196,98,125,48,200 ; vpmovzxbw %xmm0,%ymm9
@@ -41314,7 +41314,7 @@ _sk_multiply_hsw_8bit LABEL PROC
DB 72,131,236,56 ; sub $0x38,%rsp
DB 197,253,111,243 ; vmovdqa %ymm3,%ymm6
DB 197,253,111,218 ; vmovdqa %ymm2,%ymm3
- DB 197,125,111,13,219,6,0,0 ; vmovdqa 0x6db(%rip),%ymm9 # 2ba0 <_sk_xor__hsw_8bit+0x44f>
+ DB 197,125,111,13,91,8,0,0 ; vmovdqa 0x85b(%rip),%ymm9 # 2d20 <_sk_darken_hsw_8bit+0x45d>
DB 196,194,101,0,225 ; vpshufb %ymm9,%ymm3,%ymm4
DB 196,194,77,0,233 ; vpshufb %ymm9,%ymm6,%ymm5
DB 196,65,45,118,210 ; vpcmpeqd %ymm10,%ymm10,%ymm10
@@ -41455,7 +41455,7 @@ _sk_screen_hsw_8bit LABEL PROC
PUBLIC _sk_xor__hsw_8bit
_sk_xor__hsw_8bit LABEL PROC
- DB 197,125,111,13,103,4,0,0 ; vmovdqa 0x467(%rip),%ymm9 # 2bc0 <_sk_xor__hsw_8bit+0x46f>
+ DB 197,125,111,13,231,5,0,0 ; vmovdqa 0x5e7(%rip),%ymm9 # 2d40 <_sk_darken_hsw_8bit+0x47d>
DB 196,194,109,0,225 ; vpshufb %ymm9,%ymm2,%ymm4
DB 196,194,101,0,249 ; vpshufb %ymm9,%ymm3,%ymm7
DB 196,65,37,118,219 ; vpcmpeqd %ymm11,%ymm11,%ymm11
@@ -41530,9 +41530,90 @@ _sk_xor__hsw_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_darken_hsw_8bit
+_sk_darken_hsw_8bit LABEL PROC
+ DB 197,125,111,5,149,4,0,0 ; vmovdqa 0x495(%rip),%ymm8 # 2d60 <_sk_darken_hsw_8bit+0x49d>
+ DB 196,194,101,0,224 ; vpshufb %ymm8,%ymm3,%ymm4
+ DB 196,194,109,0,240 ; vpshufb %ymm8,%ymm2,%ymm6
+ DB 196,98,125,48,208 ; vpmovzxbw %xmm0,%ymm10
+ DB 196,227,125,57,197,1 ; vextracti128 $0x1,%ymm0,%xmm5
+ DB 196,226,125,48,237 ; vpmovzxbw %xmm5,%ymm5
+ DB 196,98,125,48,201 ; vpmovzxbw %xmm1,%ymm9
+ DB 196,227,125,57,207,1 ; vextracti128 $0x1,%ymm1,%xmm7
+ DB 196,226,125,48,255 ; vpmovzxbw %xmm7,%ymm7
+ DB 196,98,125,48,222 ; vpmovzxbw %xmm6,%ymm11
+ DB 196,227,125,57,246,1 ; vextracti128 $0x1,%ymm6,%xmm6
+ DB 196,226,125,48,246 ; vpmovzxbw %xmm6,%ymm6
+ DB 196,98,125,48,228 ; vpmovzxbw %xmm4,%ymm12
+ DB 196,227,125,57,228,1 ; vextracti128 $0x1,%ymm4,%xmm4
+ DB 196,226,125,48,228 ; vpmovzxbw %xmm4,%ymm4
+ DB 197,221,213,231 ; vpmullw %ymm7,%ymm4,%ymm4
+ DB 196,65,29,213,225 ; vpmullw %ymm9,%ymm12,%ymm12
+ DB 197,205,213,245 ; vpmullw %ymm5,%ymm6,%ymm6
+ DB 196,65,37,213,218 ; vpmullw %ymm10,%ymm11,%ymm11
+ DB 196,65,37,253,210 ; vpaddw %ymm10,%ymm11,%ymm10
+ DB 197,205,253,237 ; vpaddw %ymm5,%ymm6,%ymm5
+ DB 196,193,29,253,241 ; vpaddw %ymm9,%ymm12,%ymm6
+ DB 197,221,253,231 ; vpaddw %ymm7,%ymm4,%ymm4
+ DB 197,197,113,212,8 ; vpsrlw $0x8,%ymm4,%ymm7
+ DB 197,205,113,214,8 ; vpsrlw $0x8,%ymm6,%ymm6
+ DB 197,221,113,213,8 ; vpsrlw $0x8,%ymm5,%ymm4
+ DB 196,193,85,113,210,8 ; vpsrlw $0x8,%ymm10,%ymm5
+ DB 196,99,85,56,204,1 ; vinserti128 $0x1,%xmm4,%ymm5,%ymm9
+ DB 196,227,85,70,228,49 ; vperm2i128 $0x31,%ymm4,%ymm5,%ymm4
+ DB 197,53,103,204 ; vpackuswb %ymm4,%ymm9,%ymm9
+ DB 196,227,77,56,239,1 ; vinserti128 $0x1,%xmm7,%ymm6,%ymm5
+ DB 196,227,77,70,247,49 ; vperm2i128 $0x31,%ymm7,%ymm6,%ymm6
+ DB 197,85,103,222 ; vpackuswb %ymm6,%ymm5,%ymm11
+ DB 196,194,117,0,232 ; vpshufb %ymm8,%ymm1,%ymm5
+ DB 196,194,125,0,248 ; vpshufb %ymm8,%ymm0,%ymm7
+ DB 196,98,125,48,194 ; vpmovzxbw %xmm2,%ymm8
+ DB 196,227,125,57,212,1 ; vextracti128 $0x1,%ymm2,%xmm4
+ DB 196,226,125,48,228 ; vpmovzxbw %xmm4,%ymm4
+ DB 196,98,125,48,211 ; vpmovzxbw %xmm3,%ymm10
+ DB 196,227,125,57,222,1 ; vextracti128 $0x1,%ymm3,%xmm6
+ DB 196,226,125,48,246 ; vpmovzxbw %xmm6,%ymm6
+ DB 196,98,125,48,231 ; vpmovzxbw %xmm7,%ymm12
+ DB 196,227,125,57,255,1 ; vextracti128 $0x1,%ymm7,%xmm7
+ DB 196,226,125,48,255 ; vpmovzxbw %xmm7,%ymm7
+ DB 196,98,125,48,237 ; vpmovzxbw %xmm5,%ymm13
+ DB 196,227,125,57,237,1 ; vextracti128 $0x1,%ymm5,%xmm5
+ DB 196,226,125,48,237 ; vpmovzxbw %xmm5,%ymm5
+ DB 197,205,213,237 ; vpmullw %ymm5,%ymm6,%ymm5
+ DB 196,65,45,213,237 ; vpmullw %ymm13,%ymm10,%ymm13
+ DB 197,221,213,255 ; vpmullw %ymm7,%ymm4,%ymm7
+ DB 196,65,61,213,228 ; vpmullw %ymm12,%ymm8,%ymm12
+ DB 196,65,29,253,192 ; vpaddw %ymm8,%ymm12,%ymm8
+ DB 197,197,253,228 ; vpaddw %ymm4,%ymm7,%ymm4
+ DB 196,193,21,253,250 ; vpaddw %ymm10,%ymm13,%ymm7
+ DB 197,213,253,238 ; vpaddw %ymm6,%ymm5,%ymm5
+ DB 197,213,113,213,8 ; vpsrlw $0x8,%ymm5,%ymm5
+ DB 197,205,113,215,8 ; vpsrlw $0x8,%ymm7,%ymm6
+ DB 197,221,113,212,8 ; vpsrlw $0x8,%ymm4,%ymm4
+ DB 196,193,69,113,208,8 ; vpsrlw $0x8,%ymm8,%ymm7
+ DB 196,99,69,56,196,1 ; vinserti128 $0x1,%xmm4,%ymm7,%ymm8
+ DB 196,227,69,70,228,49 ; vperm2i128 $0x31,%ymm4,%ymm7,%ymm4
+ DB 197,189,103,228 ; vpackuswb %ymm4,%ymm8,%ymm4
+ DB 196,227,77,56,253,1 ; vinserti128 $0x1,%xmm5,%ymm6,%ymm7
+ DB 196,227,77,70,237,49 ; vperm2i128 $0x31,%ymm5,%ymm6,%ymm5
+ DB 197,197,103,237 ; vpackuswb %ymm5,%ymm7,%ymm5
+ DB 197,181,222,244 ; vpmaxub %ymm4,%ymm9,%ymm6
+ DB 197,165,222,253 ; vpmaxub %ymm5,%ymm11,%ymm7
+ DB 197,237,252,192 ; vpaddb %ymm0,%ymm2,%ymm0
+ DB 197,229,252,201 ; vpaddb %ymm1,%ymm3,%ymm1
+ DB 197,245,248,255 ; vpsubb %ymm7,%ymm1,%ymm7
+ DB 197,253,248,246 ; vpsubb %ymm6,%ymm0,%ymm6
+ DB 197,245,248,205 ; vpsubb %ymm5,%ymm1,%ymm1
+ DB 197,253,248,196 ; vpsubb %ymm4,%ymm0,%ymm0
+ DB 196,226,125,88,37,50,0,0,0 ; vpbroadcastd 0x32(%rip),%ymm4 # 2a6c <_sk_darken_hsw_8bit+0x1a9>
+ DB 196,227,125,76,198,64 ; vpblendvb %ymm4,%ymm6,%ymm0,%ymm0
+ DB 196,227,117,76,207,64 ; vpblendvb %ymm4,%ymm7,%ymm1,%ymm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 290b <_sk_xor__hsw_8bit+0x1ba>
+ DB 127,67 ; jg 2a93 <_sk_darken_hsw_8bit+0x1d0>
DB 1,1 ; add %eax,(%rcx)
DB 1,0 ; add %eax,(%rax)
DB 0,0 ; add %al,(%rax)
@@ -41542,11 +41623,14 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 0,255 ; add %bh,%bh
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 291f <_sk_xor__hsw_8bit+0x1ce>
+ DB 127,67 ; jg 2aa7 <_sk_darken_hsw_8bit+0x1e4>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 2923 <_sk_xor__hsw_8bit+0x1d2>
+ DB 127,67 ; jg 2aab <_sk_darken_hsw_8bit+0x1e8>
DB 0,0 ; add %al,(%rax)
DB 0,255 ; add %bh,%bh
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
ALIGN 32
DB 0,0 ; add %al,(%rax)
@@ -41979,6 +42063,28 @@ ALIGN 32
DB 11,11 ; or (%rbx),%ecx
DB 11,11 ; or (%rbx),%ecx
DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,3,3 ; lsl (%rbx),%eax
+ DB 3,3 ; add (%rbx),%eax
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 11,11 ; or (%rbx),%ecx
+ DB 11,11 ; or (%rbx),%ecx
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,3,3 ; lsl (%rbx),%eax
+ DB 3,3 ; add (%rbx),%eax
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 11,11 ; or (%rbx),%ecx
+ DB 11,11 ; or (%rbx),%ecx
+ DB 15 ; (bad)
DB 15 ; .byte 0xf
DB 15 ; .byte 0xf
DB 15 ; .byte 0xf
@@ -42092,7 +42198,7 @@ _sk_uniform_color_sse41_8bit LABEL PROC
PUBLIC _sk_set_rgb_sse41_8bit
_sk_set_rgb_sse41_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,15,16,37,96,28,0,0 ; movss 0x1c60(%rip),%xmm4 # 1de0 <_sk_xor__sse41_8bit+0x148>
+ DB 243,15,16,37,208,29,0,0 ; movss 0x1dd0(%rip),%xmm4 # 1f50 <_sk_darken_sse41_8bit+0x170>
DB 243,15,16,40 ; movss (%rax),%xmm5
DB 243,15,89,236 ; mulss %xmm4,%xmm5
DB 243,72,15,44,205 ; cvttss2si %xmm5,%rcx
@@ -42107,7 +42213,7 @@ _sk_set_rgb_sse41_8bit LABEL PROC
DB 9,208 ; or %edx,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,112,228,0 ; pshufd $0x0,%xmm4,%xmm4
- DB 102,15,111,45,48,28,0,0 ; movdqa 0x1c30(%rip),%xmm5 # 1df0 <_sk_xor__sse41_8bit+0x158>
+ DB 102,15,111,45,160,29,0,0 ; movdqa 0x1da0(%rip),%xmm5 # 1f60 <_sk_darken_sse41_8bit+0x180>
DB 102,15,219,205 ; pand %xmm5,%xmm1
DB 102,15,219,197 ; pand %xmm5,%xmm0
DB 102,15,235,196 ; por %xmm4,%xmm0
@@ -42119,12 +42225,12 @@ PUBLIC _sk_premul_sse41_8bit
_sk_premul_sse41_8bit LABEL PROC
DB 102,15,111,225 ; movdqa %xmm1,%xmm4
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,15,111,5,28,28,0,0 ; movdqa 0x1c1c(%rip),%xmm0 # 1e00 <_sk_xor__sse41_8bit+0x168>
+ DB 102,15,111,5,140,29,0,0 ; movdqa 0x1d8c(%rip),%xmm0 # 1f70 <_sk_darken_sse41_8bit+0x190>
DB 102,15,111,253 ; movdqa %xmm5,%xmm7
DB 102,15,56,0,248 ; pshufb %xmm0,%xmm7
DB 102,15,111,244 ; movdqa %xmm4,%xmm6
DB 102,15,56,0,240 ; pshufb %xmm0,%xmm6
- DB 102,15,111,5,18,28,0,0 ; movdqa 0x1c12(%rip),%xmm0 # 1e10 <_sk_xor__sse41_8bit+0x178>
+ DB 102,15,111,5,130,29,0,0 ; movdqa 0x1d82(%rip),%xmm0 # 1f80 <_sk_darken_sse41_8bit+0x1a0>
DB 102,15,235,240 ; por %xmm0,%xmm6
DB 102,15,235,248 ; por %xmm0,%xmm7
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
@@ -42155,7 +42261,7 @@ _sk_premul_sse41_8bit LABEL PROC
PUBLIC _sk_swap_rb_sse41_8bit
_sk_swap_rb_sse41_8bit LABEL PROC
- DB 102,15,111,37,159,27,0,0 ; movdqa 0x1b9f(%rip),%xmm4 # 1e20 <_sk_xor__sse41_8bit+0x188>
+ DB 102,15,111,37,15,29,0,0 ; movdqa 0x1d0f(%rip),%xmm4 # 1f90 <_sk_darken_sse41_8bit+0x1b0>
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42276,7 +42382,7 @@ _sk_load_8888_dst_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,221 ; jmpq ffffffffde000410 <_sk_xor__sse41_8bit+0xffffffffddffe778>
+ DB 233,255,255,255,221 ; jmpq ffffffffde000410 <_sk_darken_sse41_8bit+0xffffffffddffe630>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -42355,7 +42461,7 @@ _sk_load_bgra_sse41_8bit LABEL PROC
DB 117,35 ; jne 504 <_sk_load_bgra_sse41_8bit+0x44>
DB 243,66,15,111,76,130,16 ; movdqu 0x10(%rdx,%r8,4),%xmm1
DB 243,66,15,111,4,130 ; movdqu (%rdx,%r8,4),%xmm0
- DB 102,15,111,37,58,25,0,0 ; movdqa 0x193a(%rip),%xmm4 # 1e30 <_sk_xor__sse41_8bit+0x198>
+ DB 102,15,111,37,170,26,0,0 ; movdqa 0x1aaa(%rip),%xmm4 # 1fa0 <_sk_darken_sse41_8bit+0x1c0>
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42416,7 +42522,7 @@ _sk_load_bgra_dst_sse41_8bit LABEL PROC
DB 117,35 ; jne 5d4 <_sk_load_bgra_dst_sse41_8bit+0x44>
DB 243,66,15,111,92,130,16 ; movdqu 0x10(%rdx,%r8,4),%xmm3
DB 243,66,15,111,20,130 ; movdqu (%rdx,%r8,4),%xmm2
- DB 102,15,111,37,122,24,0,0 ; movdqa 0x187a(%rip),%xmm4 # 1e40 <_sk_xor__sse41_8bit+0x1a8>
+ DB 102,15,111,37,234,25,0,0 ; movdqa 0x19ea(%rip),%xmm4 # 1fb0 <_sk_darken_sse41_8bit+0x1d0>
DB 102,15,56,0,212 ; pshufb %xmm4,%xmm2
DB 102,15,56,0,220 ; pshufb %xmm4,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42473,7 +42579,7 @@ _sk_store_bgra_sse41_8bit LABEL PROC
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,193,226,2 ; shl $0x2,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 102,15,111,53,204,23,0,0 ; movdqa 0x17cc(%rip),%xmm6 # 1e50 <_sk_xor__sse41_8bit+0x1b8>
+ DB 102,15,111,53,60,25,0,0 ; movdqa 0x193c(%rip),%xmm6 # 1fc0 <_sk_darken_sse41_8bit+0x1e0>
DB 102,15,111,233 ; movdqa %xmm1,%xmm5
DB 102,15,56,0,238 ; pshufb %xmm6,%xmm5
DB 102,15,111,224 ; movdqa %xmm0,%xmm4
@@ -42537,7 +42643,7 @@ _sk_load_a8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,42 ; jne 76b <_sk_load_a8_sse41_8bit+0x47>
DB 102,66,15,56,48,12,2 ; pmovzxbw (%rdx,%r8,1),%xmm1
- DB 102,15,219,13,16,23,0,0 ; pand 0x1710(%rip),%xmm1 # 1e60 <_sk_xor__sse41_8bit+0x1c8>
+ DB 102,15,219,13,128,24,0,0 ; pand 0x1880(%rip),%xmm1 # 1fd0 <_sk_darken_sse41_8bit+0x1f0>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,193 ; pmovzxwd %xmm1,%xmm0
DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1
@@ -42585,7 +42691,7 @@ _sk_load_a8_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf00080c <_sk_xor__sse41_8bit+0xffffffffdeffeb74>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf00080c <_sk_darken_sse41_8bit+0xffffffffdeffea2c>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -42608,7 +42714,7 @@ _sk_load_a8_dst_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,42 ; jne 85f <_sk_load_a8_dst_sse41_8bit+0x47>
DB 102,66,15,56,48,28,2 ; pmovzxbw (%rdx,%r8,1),%xmm3
- DB 102,15,219,29,44,22,0,0 ; pand 0x162c(%rip),%xmm3 # 1e70 <_sk_xor__sse41_8bit+0x1d8>
+ DB 102,15,219,29,156,23,0,0 ; pand 0x179c(%rip),%xmm3 # 1fe0 <_sk_darken_sse41_8bit+0x200>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,211 ; pmovzxwd %xmm3,%xmm2
DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3
@@ -42656,7 +42762,7 @@ _sk_load_a8_dst_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf000900 <_sk_xor__sse41_8bit+0xffffffffdeffec68>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf000900 <_sk_darken_sse41_8bit+0xffffffffdeffeb20>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -42676,7 +42782,7 @@ _sk_store_a8_sse41_8bit LABEL PROC
DB 72,99,87,8 ; movslq 0x8(%rdi),%rdx
DB 72,15,175,209 ; imul %rcx,%rdx
DB 72,3,16 ; add (%rax),%rdx
- DB 102,15,111,45,84,21,0,0 ; movdqa 0x1554(%rip),%xmm5 # 1e80 <_sk_xor__sse41_8bit+0x1e8>
+ DB 102,15,111,45,196,22,0,0 ; movdqa 0x16c4(%rip),%xmm5 # 1ff0 <_sk_darken_sse41_8bit+0x210>
DB 102,15,111,241 ; movdqa %xmm1,%xmm6
DB 102,15,56,0,245 ; pshufb %xmm5,%xmm6
DB 102,15,111,224 ; movdqa %xmm0,%xmm4
@@ -42684,7 +42790,7 @@ _sk_store_a8_sse41_8bit LABEL PROC
DB 102,15,108,230 ; punpcklqdq %xmm6,%xmm4
DB 77,133,201 ; test %r9,%r9
DB 117,19 ; jne 95a <_sk_store_a8_sse41_8bit+0x4e>
- DB 102,15,56,0,37,96,21,0,0 ; pshufb 0x1560(%rip),%xmm4 # 1eb0 <_sk_xor__sse41_8bit+0x218>
+ DB 102,15,56,0,37,208,22,0,0 ; pshufb 0x16d0(%rip),%xmm4 # 2020 <_sk_darken_sse41_8bit+0x240>
DB 102,66,15,214,36,2 ; movq %xmm4,(%rdx,%r8,1)
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
@@ -42700,13 +42806,13 @@ _sk_store_a8_sse41_8bit LABEL PROC
DB 102,66,15,58,20,36,2,0 ; pextrb $0x0,%xmm4,(%rdx,%r8,1)
DB 235,209 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a>
DB 102,66,15,58,20,100,2,2,4 ; pextrb $0x4,%xmm4,0x2(%rdx,%r8,1)
- DB 102,15,56,0,37,9,21,0,0 ; pshufb 0x1509(%rip),%xmm4 # 1ea0 <_sk_xor__sse41_8bit+0x208>
+ DB 102,15,56,0,37,121,22,0,0 ; pshufb 0x1679(%rip),%xmm4 # 2010 <_sk_darken_sse41_8bit+0x230>
DB 102,66,15,58,21,36,2,0 ; pextrw $0x0,%xmm4,(%rdx,%r8,1)
DB 235,181 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a>
DB 102,66,15,58,20,100,2,6,12 ; pextrb $0xc,%xmm4,0x6(%rdx,%r8,1)
DB 102,66,15,58,20,100,2,5,10 ; pextrb $0xa,%xmm4,0x5(%rdx,%r8,1)
DB 102,66,15,58,20,100,2,4,8 ; pextrb $0x8,%xmm4,0x4(%rdx,%r8,1)
- DB 102,15,56,0,37,203,20,0,0 ; pshufb 0x14cb(%rip),%xmm4 # 1e90 <_sk_xor__sse41_8bit+0x1f8>
+ DB 102,15,56,0,37,59,22,0,0 ; pshufb 0x163b(%rip),%xmm4 # 2000 <_sk_darken_sse41_8bit+0x220>
DB 102,66,15,126,36,2 ; movd %xmm4,(%rdx,%r8,1)
DB 235,137 ; jmp 956 <_sk_store_a8_sse41_8bit+0x4a>
DB 15,31,0 ; nopl (%rax)
@@ -42744,14 +42850,14 @@ _sk_load_g8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,66 ; jne a4b <_sk_load_g8_sse41_8bit+0x5f>
DB 102,66,15,56,48,12,2 ; pmovzxbw (%rdx,%r8,1),%xmm1
- DB 102,15,219,13,168,20,0,0 ; pand 0x14a8(%rip),%xmm1 # 1ec0 <_sk_xor__sse41_8bit+0x228>
+ DB 102,15,219,13,24,22,0,0 ; pand 0x1618(%rip),%xmm1 # 2030 <_sk_darken_sse41_8bit+0x250>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,193 ; pmovzxwd %xmm1,%xmm0
DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1
- DB 102,15,111,37,163,20,0,0 ; movdqa 0x14a3(%rip),%xmm4 # 1ed0 <_sk_xor__sse41_8bit+0x238>
+ DB 102,15,111,37,19,22,0,0 ; movdqa 0x1613(%rip),%xmm4 # 2040 <_sk_darken_sse41_8bit+0x260>
DB 102,15,56,64,204 ; pmulld %xmm4,%xmm1
DB 102,15,56,64,196 ; pmulld %xmm4,%xmm0
- DB 102,15,111,37,161,20,0,0 ; movdqa 0x14a1(%rip),%xmm4 # 1ee0 <_sk_xor__sse41_8bit+0x248>
+ DB 102,15,111,37,17,22,0,0 ; movdqa 0x1611(%rip),%xmm4 # 2050 <_sk_darken_sse41_8bit+0x270>
DB 102,15,235,196 ; por %xmm4,%xmm0
DB 102,15,235,204 ; por %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42796,7 +42902,7 @@ _sk_load_g8_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 232,255,255,255,221 ; callq ffffffffde000af0 <_sk_xor__sse41_8bit+0xffffffffddffee58>
+ DB 232,255,255,255,221 ; callq ffffffffde000af0 <_sk_darken_sse41_8bit+0xffffffffddffed10>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,210 ; callq *%rdx
@@ -42819,14 +42925,14 @@ _sk_load_g8_dst_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 117,66 ; jne b5b <_sk_load_g8_dst_sse41_8bit+0x5f>
DB 102,66,15,56,48,28,2 ; pmovzxbw (%rdx,%r8,1),%xmm3
- DB 102,15,219,29,200,19,0,0 ; pand 0x13c8(%rip),%xmm3 # 1ef0 <_sk_xor__sse41_8bit+0x258>
+ DB 102,15,219,29,56,21,0,0 ; pand 0x1538(%rip),%xmm3 # 2060 <_sk_darken_sse41_8bit+0x280>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,56,51,211 ; pmovzxwd %xmm3,%xmm2
DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3
- DB 102,15,111,37,195,19,0,0 ; movdqa 0x13c3(%rip),%xmm4 # 1f00 <_sk_xor__sse41_8bit+0x268>
+ DB 102,15,111,37,51,21,0,0 ; movdqa 0x1533(%rip),%xmm4 # 2070 <_sk_darken_sse41_8bit+0x290>
DB 102,15,56,64,220 ; pmulld %xmm4,%xmm3
DB 102,15,56,64,212 ; pmulld %xmm4,%xmm2
- DB 102,15,111,37,193,19,0,0 ; movdqa 0x13c1(%rip),%xmm4 # 1f10 <_sk_xor__sse41_8bit+0x278>
+ DB 102,15,111,37,49,21,0,0 ; movdqa 0x1531(%rip),%xmm4 # 2080 <_sk_darken_sse41_8bit+0x2a0>
DB 102,15,235,212 ; por %xmm4,%xmm2
DB 102,15,235,220 ; por %xmm4,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -42871,7 +42977,7 @@ _sk_load_g8_dst_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 232,255,255,255,221 ; callq ffffffffde000c00 <_sk_xor__sse41_8bit+0xffffffffddffef68>
+ DB 232,255,255,255,221 ; callq ffffffffde000c00 <_sk_darken_sse41_8bit+0xffffffffddffee20>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,210 ; callq *%rdx
@@ -42897,7 +43003,7 @@ _sk_srcover_rgba_8888_sse41_8bit LABEL PROC
DB 243,70,15,111,68,138,16 ; movdqu 0x10(%rdx,%r9,4),%xmm8
DB 243,70,15,111,12,138 ; movdqu (%rdx,%r9,4),%xmm9
DB 77,133,192 ; test %r8,%r8
- DB 102,15,111,37,215,18,0,0 ; movdqa 0x12d7(%rip),%xmm4 # 1f20 <_sk_xor__sse41_8bit+0x288>
+ DB 102,15,111,37,71,20,0,0 ; movdqa 0x1447(%rip),%xmm4 # 2090 <_sk_darken_sse41_8bit+0x2b0>
DB 102,15,111,241 ; movdqa %xmm1,%xmm6
DB 102,15,56,0,244 ; pshufb %xmm4,%xmm6
DB 102,15,111,248 ; movdqa %xmm0,%xmm7
@@ -43017,7 +43123,7 @@ _sk_scale_1_float_sse41_8bit LABEL PROC
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,0 ; movss (%rax),%xmm0
- DB 243,15,89,5,186,15,0,0 ; mulss 0xfba(%rip),%xmm0 # 1de4 <_sk_xor__sse41_8bit+0x14c>
+ DB 243,15,89,5,42,17,0,0 ; mulss 0x112a(%rip),%xmm0 # 1f54 <_sk_darken_sse41_8bit+0x174>
DB 243,15,44,192 ; cvttss2si %xmm0,%eax
DB 15,87,192 ; xorps %xmm0,%xmm0
DB 102,68,15,56,48,197 ; pmovzxbw %xmm5,%xmm8
@@ -43025,7 +43131,7 @@ _sk_scale_1_float_sse41_8bit LABEL PROC
DB 102,68,15,56,48,204 ; pmovzxbw %xmm4,%xmm9
DB 102,15,104,224 ; punpckhbw %xmm0,%xmm4
DB 102,15,110,240 ; movd %eax,%xmm6
- DB 102,15,56,0,53,222,16,0,0 ; pshufb 0x10de(%rip),%xmm6 # 1f30 <_sk_xor__sse41_8bit+0x298>
+ DB 102,15,56,0,53,78,18,0,0 ; pshufb 0x124e(%rip),%xmm6 # 20a0 <_sk_darken_sse41_8bit+0x2c0>
DB 102,15,111,206 ; movdqa %xmm6,%xmm1
DB 102,65,15,213,201 ; pmullw %xmm9,%xmm1
DB 102,15,111,198 ; movdqa %xmm6,%xmm0
@@ -43058,11 +43164,11 @@ _sk_scale_u8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 15,133,160,0,0,0 ; jne f63 <_sk_scale_u8_sse41_8bit+0xc1>
DB 102,66,15,56,48,52,2 ; pmovzxbw (%rdx,%r8,1),%xmm6
- DB 102,15,219,53,110,16,0,0 ; pand 0x106e(%rip),%xmm6 # 1f40 <_sk_xor__sse41_8bit+0x2a8>
+ DB 102,15,219,53,222,17,0,0 ; pand 0x11de(%rip),%xmm6 # 20b0 <_sk_darken_sse41_8bit+0x2d0>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,111,254 ; movdqa %xmm6,%xmm7
- DB 102,15,56,0,61,108,16,0,0 ; pshufb 0x106c(%rip),%xmm7 # 1f50 <_sk_xor__sse41_8bit+0x2b8>
- DB 102,15,56,0,53,115,16,0,0 ; pshufb 0x1073(%rip),%xmm6 # 1f60 <_sk_xor__sse41_8bit+0x2c8>
+ DB 102,15,56,0,61,220,17,0,0 ; pshufb 0x11dc(%rip),%xmm7 # 20c0 <_sk_darken_sse41_8bit+0x2e0>
+ DB 102,15,56,0,53,227,17,0,0 ; pshufb 0x11e3(%rip),%xmm6 # 20d0 <_sk_darken_sse41_8bit+0x2f0>
DB 102,68,15,56,48,200 ; pmovzxbw %xmm0,%xmm9
DB 102,65,15,104,192 ; punpckhbw %xmm8,%xmm0
DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10
@@ -43145,7 +43251,7 @@ PUBLIC _sk_lerp_1_float_sse41_8bit
_sk_lerp_1_float_sse41_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,32 ; movss (%rax),%xmm4
- DB 243,15,89,37,190,13,0,0 ; mulss 0xdbe(%rip),%xmm4 # 1de8 <_sk_xor__sse41_8bit+0x150>
+ DB 243,15,89,37,46,15,0,0 ; mulss 0xf2e(%rip),%xmm4 # 1f58 <_sk_darken_sse41_8bit+0x178>
DB 243,15,44,196 ; cvttss2si %xmm4,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
@@ -43156,7 +43262,7 @@ _sk_lerp_1_float_sse41_8bit LABEL PROC
DB 102,65,15,104,193 ; punpckhbw %xmm9,%xmm0
DB 102,68,15,56,48,217 ; pmovzxbw %xmm1,%xmm11
DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1
- DB 102,15,56,0,61,11,15,0,0 ; pshufb 0xf0b(%rip),%xmm7 # 1f70 <_sk_xor__sse41_8bit+0x2d8>
+ DB 102,15,56,0,61,123,16,0,0 ; pshufb 0x107b(%rip),%xmm7 # 20e0 <_sk_darken_sse41_8bit+0x300>
DB 102,68,15,111,231 ; movdqa %xmm7,%xmm12
DB 102,69,15,213,227 ; pmullw %xmm11,%xmm12
DB 102,68,15,111,239 ; movdqa %xmm7,%xmm13
@@ -43217,11 +43323,11 @@ _sk_lerp_u8_sse41_8bit LABEL PROC
DB 77,133,201 ; test %r9,%r9
DB 15,133,46,1,0,0 ; jne 128d <_sk_lerp_u8_sse41_8bit+0x14f>
DB 102,66,15,56,48,60,2 ; pmovzxbw (%rdx,%r8,1),%xmm7
- DB 102,15,219,61,18,14,0,0 ; pand 0xe12(%rip),%xmm7 # 1f80 <_sk_xor__sse41_8bit+0x2e8>
+ DB 102,15,219,61,130,15,0,0 ; pand 0xf82(%rip),%xmm7 # 20f0 <_sk_darken_sse41_8bit+0x310>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,111,247 ; movdqa %xmm7,%xmm6
- DB 102,15,56,0,53,16,14,0,0 ; pshufb 0xe10(%rip),%xmm6 # 1f90 <_sk_xor__sse41_8bit+0x2f8>
- DB 102,15,56,0,61,23,14,0,0 ; pshufb 0xe17(%rip),%xmm7 # 1fa0 <_sk_xor__sse41_8bit+0x308>
+ DB 102,15,56,0,53,128,15,0,0 ; pshufb 0xf80(%rip),%xmm6 # 2100 <_sk_darken_sse41_8bit+0x320>
+ DB 102,15,56,0,61,135,15,0,0 ; pshufb 0xf87(%rip),%xmm7 # 2110 <_sk_darken_sse41_8bit+0x330>
DB 102,68,15,56,48,200 ; pmovzxbw %xmm0,%xmm9
DB 102,65,15,104,192 ; punpckhbw %xmm8,%xmm0
DB 102,68,15,56,48,209 ; pmovzxbw %xmm1,%xmm10
@@ -43316,7 +43422,7 @@ _sk_lerp_u8_sse41_8bit LABEL PROC
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
- DB 233,255,255,255,222 ; jmpq ffffffffdf001338 <_sk_xor__sse41_8bit+0xffffffffdefff6a0>
+ DB 233,255,255,255,222 ; jmpq ffffffffdf001338 <_sk_darken_sse41_8bit+0xffffffffdefff558>
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,211 ; callq *%rbx
@@ -43344,7 +43450,7 @@ _sk_move_dst_src_sse41_8bit LABEL PROC
PUBLIC _sk_black_color_sse41_8bit
_sk_black_color_sse41_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 15,40,5,79,12,0,0 ; movaps 0xc4f(%rip),%xmm0 # 1fb0 <_sk_xor__sse41_8bit+0x318>
+ DB 15,40,5,191,13,0,0 ; movaps 0xdbf(%rip),%xmm0 # 2120 <_sk_darken_sse41_8bit+0x340>
DB 15,40,200 ; movaps %xmm0,%xmm1
DB 255,224 ; jmpq *%rax
@@ -43364,7 +43470,7 @@ _sk_clear_sse41_8bit LABEL PROC
PUBLIC _sk_srcatop_sse41_8bit
_sk_srcatop_sse41_8bit LABEL PROC
- DB 102,68,15,111,21,59,12,0,0 ; movdqa 0xc3b(%rip),%xmm10 # 1fc0 <_sk_xor__sse41_8bit+0x328>
+ DB 102,68,15,111,21,171,13,0,0 ; movdqa 0xdab(%rip),%xmm10 # 2130 <_sk_darken_sse41_8bit+0x350>
DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
DB 102,68,15,56,48,195 ; pmovzxbw %xmm3,%xmm8
DB 102,15,111,235 ; movdqa %xmm3,%xmm5
@@ -43432,7 +43538,7 @@ _sk_srcatop_sse41_8bit LABEL PROC
PUBLIC _sk_dstatop_sse41_8bit
_sk_dstatop_sse41_8bit LABEL PROC
- DB 102,68,15,111,29,16,11,0,0 ; movdqa 0xb10(%rip),%xmm11 # 1fd0 <_sk_xor__sse41_8bit+0x338>
+ DB 102,68,15,111,29,128,12,0,0 ; movdqa 0xc80(%rip),%xmm11 # 2140 <_sk_darken_sse41_8bit+0x360>
DB 102,68,15,111,233 ; movdqa %xmm1,%xmm13
DB 102,69,15,56,0,235 ; pshufb %xmm11,%xmm13
DB 102,68,15,111,248 ; movdqa %xmm0,%xmm15
@@ -43502,7 +43608,7 @@ PUBLIC _sk_srcin_sse41_8bit
_sk_srcin_sse41_8bit LABEL PROC
DB 102,15,111,225 ; movdqa %xmm1,%xmm4
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,15,111,5,215,9,0,0 ; movdqa 0x9d7(%rip),%xmm0 # 1fe0 <_sk_xor__sse41_8bit+0x348>
+ DB 102,15,111,5,71,11,0,0 ; movdqa 0xb47(%rip),%xmm0 # 2150 <_sk_darken_sse41_8bit+0x370>
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
DB 102,15,56,0,240 ; pshufb %xmm0,%xmm6
DB 102,15,111,250 ; movdqa %xmm2,%xmm7
@@ -43535,7 +43641,7 @@ _sk_srcin_sse41_8bit LABEL PROC
PUBLIC _sk_dstin_sse41_8bit
_sk_dstin_sse41_8bit LABEL PROC
- DB 102,15,111,37,90,9,0,0 ; movdqa 0x95a(%rip),%xmm4 # 1ff0 <_sk_xor__sse41_8bit+0x358>
+ DB 102,15,111,37,202,10,0,0 ; movdqa 0xaca(%rip),%xmm4 # 2160 <_sk_darken_sse41_8bit+0x380>
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,69,15,239,210 ; pxor %xmm10,%xmm10
@@ -43572,7 +43678,7 @@ PUBLIC _sk_srcout_sse41_8bit
_sk_srcout_sse41_8bit LABEL PROC
DB 102,15,111,225 ; movdqa %xmm1,%xmm4
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
- DB 102,15,111,5,205,8,0,0 ; movdqa 0x8cd(%rip),%xmm0 # 2000 <_sk_xor__sse41_8bit+0x368>
+ DB 102,15,111,5,61,10,0,0 ; movdqa 0xa3d(%rip),%xmm0 # 2170 <_sk_darken_sse41_8bit+0x390>
DB 102,15,111,250 ; movdqa %xmm2,%xmm7
DB 102,15,56,0,248 ; pshufb %xmm0,%xmm7
DB 102,15,111,243 ; movdqa %xmm3,%xmm6
@@ -43608,7 +43714,7 @@ _sk_srcout_sse41_8bit LABEL PROC
PUBLIC _sk_dstout_sse41_8bit
_sk_dstout_sse41_8bit LABEL PROC
- DB 102,15,111,37,68,8,0,0 ; movdqa 0x844(%rip),%xmm4 # 2010 <_sk_xor__sse41_8bit+0x378>
+ DB 102,15,111,37,180,9,0,0 ; movdqa 0x9b4(%rip),%xmm4 # 2180 <_sk_darken_sse41_8bit+0x3a0>
DB 102,15,56,0,196 ; pshufb %xmm4,%xmm0
DB 102,15,56,0,204 ; pshufb %xmm4,%xmm1
DB 102,15,118,228 ; pcmpeqd %xmm4,%xmm4
@@ -43646,7 +43752,7 @@ _sk_dstout_sse41_8bit LABEL PROC
PUBLIC _sk_srcover_sse41_8bit
_sk_srcover_sse41_8bit LABEL PROC
- DB 102,15,111,53,179,7,0,0 ; movdqa 0x7b3(%rip),%xmm6 # 2020 <_sk_xor__sse41_8bit+0x388>
+ DB 102,15,111,53,35,9,0,0 ; movdqa 0x923(%rip),%xmm6 # 2190 <_sk_darken_sse41_8bit+0x3b0>
DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11
DB 102,68,15,56,0,222 ; pshufb %xmm6,%xmm11
DB 102,15,111,232 ; movdqa %xmm0,%xmm5
@@ -43685,7 +43791,7 @@ _sk_srcover_sse41_8bit LABEL PROC
PUBLIC _sk_dstover_sse41_8bit
_sk_dstover_sse41_8bit LABEL PROC
- DB 102,68,15,111,5,19,7,0,0 ; movdqa 0x713(%rip),%xmm8 # 2030 <_sk_xor__sse41_8bit+0x398>
+ DB 102,68,15,111,5,131,8,0,0 ; movdqa 0x883(%rip),%xmm8 # 21a0 <_sk_darken_sse41_8bit+0x3c0>
DB 102,68,15,111,209 ; movdqa %xmm1,%xmm10
DB 102,68,15,56,48,201 ; pmovzxbw %xmm1,%xmm9
DB 102,15,252,203 ; paddb %xmm3,%xmm1
@@ -43760,7 +43866,7 @@ _sk_multiply_sse41_8bit LABEL PROC
DB 102,15,111,218 ; movdqa %xmm2,%xmm3
DB 102,15,111,209 ; movdqa %xmm1,%xmm2
DB 102,15,111,200 ; movdqa %xmm0,%xmm1
- DB 102,68,15,111,53,225,5,0,0 ; movdqa 0x5e1(%rip),%xmm14 # 2040 <_sk_xor__sse41_8bit+0x3a8>
+ DB 102,68,15,111,53,81,7,0,0 ; movdqa 0x751(%rip),%xmm14 # 21b0 <_sk_darken_sse41_8bit+0x3d0>
DB 102,68,15,111,195 ; movdqa %xmm3,%xmm8
DB 102,15,111,235 ; movdqa %xmm3,%xmm5
DB 102,65,15,56,0,238 ; pshufb %xmm14,%xmm5
@@ -43885,7 +43991,7 @@ _sk_screen_sse41_8bit LABEL PROC
PUBLIC _sk_xor__sse41_8bit
_sk_xor__sse41_8bit LABEL PROC
- DB 102,68,15,111,21,175,3,0,0 ; movdqa 0x3af(%rip),%xmm10 # 2050 <_sk_xor__sse41_8bit+0x3b8>
+ DB 102,68,15,111,21,31,5,0,0 ; movdqa 0x51f(%rip),%xmm10 # 21c0 <_sk_darken_sse41_8bit+0x3e0>
DB 102,68,15,111,226 ; movdqa %xmm2,%xmm12
DB 102,68,15,56,48,194 ; pmovzxbw %xmm2,%xmm8
DB 102,15,111,234 ; movdqa %xmm2,%xmm5
@@ -43953,13 +44059,90 @@ _sk_xor__sse41_8bit LABEL PROC
DB 102,15,111,205 ; movdqa %xmm5,%xmm1
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_darken_sse41_8bit
+_sk_darken_sse41_8bit LABEL PROC
+ DB 102,68,15,111,240 ; movdqa %xmm0,%xmm14
+ DB 102,68,15,111,5,226,3,0,0 ; movdqa 0x3e2(%rip),%xmm8 # 21d0 <_sk_darken_sse41_8bit+0x3f0>
+ DB 102,68,15,111,219 ; movdqa %xmm3,%xmm11
+ DB 102,68,15,56,48,203 ; pmovzxbw %xmm3,%xmm9
+ DB 102,15,111,243 ; movdqa %xmm3,%xmm6
+ DB 102,65,15,56,0,240 ; pshufb %xmm8,%xmm6
+ DB 102,68,15,111,234 ; movdqa %xmm2,%xmm13
+ DB 102,68,15,56,48,210 ; pmovzxbw %xmm2,%xmm10
+ DB 102,15,111,234 ; movdqa %xmm2,%xmm5
+ DB 102,65,15,56,0,232 ; pshufb %xmm8,%xmm5
+ DB 102,69,15,239,228 ; pxor %xmm12,%xmm12
+ DB 102,65,15,104,196 ; punpckhbw %xmm12,%xmm0
+ DB 102,15,111,225 ; movdqa %xmm1,%xmm4
+ DB 102,65,15,104,228 ; punpckhbw %xmm12,%xmm4
+ DB 102,69,15,56,48,254 ; pmovzxbw %xmm14,%xmm15
+ DB 102,15,56,48,253 ; pmovzxbw %xmm5,%xmm7
+ DB 102,65,15,104,236 ; punpckhbw %xmm12,%xmm5
+ DB 102,15,213,232 ; pmullw %xmm0,%xmm5
+ DB 102,15,253,232 ; paddw %xmm0,%xmm5
+ DB 102,15,56,48,198 ; pmovzxbw %xmm6,%xmm0
+ DB 102,65,15,104,244 ; punpckhbw %xmm12,%xmm6
+ DB 102,15,213,244 ; pmullw %xmm4,%xmm6
+ DB 102,15,253,244 ; paddw %xmm4,%xmm6
+ DB 102,65,15,111,230 ; movdqa %xmm14,%xmm4
+ DB 102,68,15,252,242 ; paddb %xmm2,%xmm14
+ DB 102,65,15,213,255 ; pmullw %xmm15,%xmm7
+ DB 102,65,15,253,255 ; paddw %xmm15,%xmm7
+ DB 102,68,15,56,48,249 ; pmovzxbw %xmm1,%xmm15
+ DB 102,65,15,213,199 ; pmullw %xmm15,%xmm0
+ DB 102,65,15,253,199 ; paddw %xmm15,%xmm0
+ DB 102,68,15,111,249 ; movdqa %xmm1,%xmm15
+ DB 102,15,252,203 ; paddb %xmm3,%xmm1
+ DB 102,15,113,213,8 ; psrlw $0x8,%xmm5
+ DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
+ DB 102,15,103,253 ; packuswb %xmm5,%xmm7
+ DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
+ DB 102,15,113,208,8 ; psrlw $0x8,%xmm0
+ DB 102,15,103,198 ; packuswb %xmm6,%xmm0
+ DB 102,69,15,56,0,248 ; pshufb %xmm8,%xmm15
+ DB 102,65,15,56,0,224 ; pshufb %xmm8,%xmm4
+ DB 102,69,15,104,236 ; punpckhbw %xmm12,%xmm13
+ DB 102,69,15,104,220 ; punpckhbw %xmm12,%xmm11
+ DB 102,15,56,48,236 ; pmovzxbw %xmm4,%xmm5
+ DB 102,65,15,56,48,247 ; pmovzxbw %xmm15,%xmm6
+ DB 102,65,15,104,228 ; punpckhbw %xmm12,%xmm4
+ DB 102,69,15,104,252 ; punpckhbw %xmm12,%xmm15
+ DB 102,69,15,213,251 ; pmullw %xmm11,%xmm15
+ DB 102,65,15,213,229 ; pmullw %xmm13,%xmm4
+ DB 102,65,15,213,241 ; pmullw %xmm9,%xmm6
+ DB 102,65,15,213,234 ; pmullw %xmm10,%xmm5
+ DB 102,65,15,253,229 ; paddw %xmm13,%xmm4
+ DB 102,69,15,253,251 ; paddw %xmm11,%xmm15
+ DB 102,65,15,253,234 ; paddw %xmm10,%xmm5
+ DB 102,65,15,253,241 ; paddw %xmm9,%xmm6
+ DB 102,65,15,113,215,8 ; psrlw $0x8,%xmm15
+ DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
+ DB 102,15,113,214,8 ; psrlw $0x8,%xmm6
+ DB 102,15,113,213,8 ; psrlw $0x8,%xmm5
+ DB 102,15,103,236 ; packuswb %xmm4,%xmm5
+ DB 102,65,15,103,247 ; packuswb %xmm15,%xmm6
+ DB 102,15,222,253 ; pmaxub %xmm5,%xmm7
+ DB 102,15,222,198 ; pmaxub %xmm6,%xmm0
+ DB 102,68,15,111,193 ; movdqa %xmm1,%xmm8
+ DB 102,68,15,248,192 ; psubb %xmm0,%xmm8
+ DB 102,65,15,111,230 ; movdqa %xmm14,%xmm4
+ DB 102,15,248,231 ; psubb %xmm7,%xmm4
+ DB 102,15,248,206 ; psubb %xmm6,%xmm1
+ DB 102,68,15,248,245 ; psubb %xmm5,%xmm14
+ DB 15,40,5,166,2,0,0 ; movaps 0x2a6(%rip),%xmm0 # 21e0 <_sk_darken_sse41_8bit+0x400>
+ DB 102,68,15,56,16,244 ; pblendvb %xmm0,%xmm4,%xmm14
+ DB 102,65,15,56,16,200 ; pblendvb %xmm0,%xmm8,%xmm1
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 102,65,15,111,198 ; movdqa %xmm14,%xmm0
+ DB 255,224 ; jmpq *%rax
+
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 1e27 <_sk_xor__sse41_8bit+0x18f>
+ DB 127,67 ; jg 1f97 <_sk_darken_sse41_8bit+0x1b7>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 1e2b <_sk_xor__sse41_8bit+0x193>
+ DB 127,67 ; jg 1f9b <_sk_darken_sse41_8bit+0x1bb>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 1e2f <_sk_xor__sse41_8bit+0x197>
+ DB 127,67 ; jg 1f9f <_sk_darken_sse41_8bit+0x1bf>
ALIGN 16
DB 0,0 ; add %al,(%rax)
@@ -44307,9 +44490,31 @@ ALIGN 16
DB 11,11 ; or (%rbx),%ecx
DB 11,11 ; or (%rbx),%ecx
DB 15 ; (bad)
- DB 15 ; .byte 0xf
- DB 15 ; .byte 0xf
- DB 15 ; .byte 0xf
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,3,3 ; lsl (%rbx),%eax
+ DB 3,3 ; add (%rbx),%eax
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 7 ; (bad)
+ DB 11,11 ; or (%rbx),%ecx
+ DB 11,11 ; or (%rbx),%ecx
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15 ; (bad)
+ DB 15,255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
ALIGN 32
PUBLIC _sk_start_pipeline_sse2_8bit
@@ -44410,7 +44615,7 @@ _sk_uniform_color_sse2_8bit LABEL PROC
PUBLIC _sk_set_rgb_sse2_8bit
_sk_set_rgb_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 243,15,16,37,8,34,0,0 ; movss 0x2208(%rip),%xmm4 # 2388 <_sk_xor__sse2_8bit+0x1a5>
+ DB 243,15,16,37,244,35,0,0 ; movss 0x23f4(%rip),%xmm4 # 2574 <_sk_darken_sse2_8bit+0x1ec>
DB 243,15,16,40 ; movss (%rax),%xmm5
DB 243,15,89,236 ; mulss %xmm4,%xmm5
DB 243,72,15,44,205 ; cvttss2si %xmm5,%rcx
@@ -44425,7 +44630,7 @@ _sk_set_rgb_sse2_8bit LABEL PROC
DB 9,208 ; or %edx,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,112,228,0 ; pshufd $0x0,%xmm4,%xmm4
- DB 102,15,111,45,224,33,0,0 ; movdqa 0x21e0(%rip),%xmm5 # 23a0 <_sk_xor__sse2_8bit+0x1bd>
+ DB 102,15,111,45,192,35,0,0 ; movdqa 0x23c0(%rip),%xmm5 # 2580 <_sk_darken_sse2_8bit+0x1f8>
DB 102,15,219,205 ; pand %xmm5,%xmm1
DB 102,15,219,197 ; pand %xmm5,%xmm0
DB 102,15,235,196 ; por %xmm4,%xmm0
@@ -44448,7 +44653,7 @@ _sk_premul_sse2_8bit LABEL PROC
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
DB 242,15,112,192,95 ; pshuflw $0x5f,%xmm0,%xmm0
DB 243,15,112,248,95 ; pshufhw $0x5f,%xmm0,%xmm7
- DB 102,15,111,5,148,33,0,0 ; movdqa 0x2194(%rip),%xmm0 # 23b0 <_sk_xor__sse2_8bit+0x1cd>
+ DB 102,15,111,5,116,35,0,0 ; movdqa 0x2374(%rip),%xmm0 # 2590 <_sk_darken_sse2_8bit+0x208>
DB 102,15,235,248 ; por %xmm0,%xmm7
DB 102,15,235,240 ; por %xmm0,%xmm6
DB 102,69,15,239,201 ; pxor %xmm9,%xmm9
@@ -44950,7 +45155,7 @@ _sk_load_a8_sse2_8bit LABEL PROC
DB 117,48 ; jne 8b9 <_sk_load_a8_sse2_8bit+0x4d>
DB 243,66,15,126,4,2 ; movq (%rdx,%r8,1),%xmm0
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
- DB 102,15,84,5,37,27,0,0 ; andpd 0x1b25(%rip),%xmm0 # 23c0 <_sk_xor__sse2_8bit+0x1dd>
+ DB 102,15,84,5,5,29,0,0 ; andpd 0x1d05(%rip),%xmm0 # 25a0 <_sk_darken_sse2_8bit+0x218>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,40,200 ; movapd %xmm0,%xmm1
DB 102,15,105,204 ; punpckhwd %xmm4,%xmm1
@@ -45025,7 +45230,7 @@ _sk_load_a8_dst_sse2_8bit LABEL PROC
DB 117,48 ; jne 9ad <_sk_load_a8_dst_sse2_8bit+0x4d>
DB 243,66,15,126,20,2 ; movq (%rdx,%r8,1),%xmm2
DB 102,15,96,208 ; punpcklbw %xmm0,%xmm2
- DB 102,15,84,21,65,26,0,0 ; andpd 0x1a41(%rip),%xmm2 # 23d0 <_sk_xor__sse2_8bit+0x1ed>
+ DB 102,15,84,21,33,28,0,0 ; andpd 0x1c21(%rip),%xmm2 # 25b0 <_sk_darken_sse2_8bit+0x228>
DB 102,15,239,228 ; pxor %xmm4,%xmm4
DB 102,15,40,218 ; movapd %xmm2,%xmm3
DB 102,15,105,220 ; punpckhwd %xmm4,%xmm3
@@ -45108,7 +45313,7 @@ _sk_store_a8_sse2_8bit LABEL PROC
DB 102,15,107,229 ; packssdw %xmm5,%xmm4
DB 77,133,201 ; test %r9,%r9
DB 117,26 ; jne ab9 <_sk_store_a8_sse2_8bit+0x65>
- DB 102,15,219,37,57,25,0,0 ; pand 0x1939(%rip),%xmm4 # 23e0 <_sk_xor__sse2_8bit+0x1fd>
+ DB 102,15,219,37,25,27,0,0 ; pand 0x1b19(%rip),%xmm4 # 25c0 <_sk_darken_sse2_8bit+0x238>
DB 102,15,103,228 ; packuswb %xmm4,%xmm4
DB 102,66,15,214,36,2 ; movq %xmm4,(%rdx,%r8,1)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -45130,7 +45335,7 @@ _sk_store_a8_sse2_8bit LABEL PROC
DB 102,15,127,100,36,16 ; movdqa %xmm4,0x10(%rsp)
DB 138,68,36,20 ; mov 0x14(%rsp),%al
DB 66,136,68,2,2 ; mov %al,0x2(%rdx,%r8,1)
- DB 102,15,219,37,225,24,0,0 ; pand 0x18e1(%rip),%xmm4 # 23e0 <_sk_xor__sse2_8bit+0x1fd>
+ DB 102,15,219,37,193,26,0,0 ; pand 0x1ac1(%rip),%xmm4 # 25c0 <_sk_darken_sse2_8bit+0x238>
DB 102,15,103,228 ; packuswb %xmm4,%xmm4
DB 102,15,126,224 ; movd %xmm4,%eax
DB 102,66,137,4,2 ; mov %ax,(%rdx,%r8,1)
@@ -45144,7 +45349,7 @@ _sk_store_a8_sse2_8bit LABEL PROC
DB 102,15,127,100,36,32 ; movdqa %xmm4,0x20(%rsp)
DB 138,68,36,40 ; mov 0x28(%rsp),%al
DB 66,136,68,2,4 ; mov %al,0x4(%rdx,%r8,1)
- DB 102,15,219,37,157,24,0,0 ; pand 0x189d(%rip),%xmm4 # 23e0 <_sk_xor__sse2_8bit+0x1fd>
+ DB 102,15,219,37,125,26,0,0 ; pand 0x1a7d(%rip),%xmm4 # 25c0 <_sk_darken_sse2_8bit+0x238>
DB 102,15,103,228 ; packuswb %xmm4,%xmm4
DB 102,66,15,126,36,2 ; movd %xmm4,(%rdx,%r8,1)
DB 233,95,255,255,255 ; jmpq ab1 <_sk_store_a8_sse2_8bit+0x5d>
@@ -45182,12 +45387,12 @@ _sk_load_g8_sse2_8bit LABEL PROC
DB 117,116 ; jne c01 <_sk_load_g8_sse2_8bit+0x91>
DB 243,66,15,126,4,2 ; movq (%rdx,%r8,1),%xmm0
DB 102,15,96,192 ; punpcklbw %xmm0,%xmm0
- DB 102,15,84,5,81,24,0,0 ; andpd 0x1851(%rip),%xmm0 # 23f0 <_sk_xor__sse2_8bit+0x20d>
+ DB 102,15,84,5,49,26,0,0 ; andpd 0x1a31(%rip),%xmm0 # 25d0 <_sk_darken_sse2_8bit+0x248>
DB 102,15,239,201 ; pxor %xmm1,%xmm1
DB 102,15,40,224 ; movapd %xmm0,%xmm4
DB 102,15,97,225 ; punpcklwd %xmm1,%xmm4
DB 102,15,105,193 ; punpckhwd %xmm1,%xmm0
- DB 102,15,111,45,73,24,0,0 ; movdqa 0x1849(%rip),%xmm5 # 2400 <_sk_xor__sse2_8bit+0x21d>
+ DB 102,15,111,45,41,26,0,0 ; movdqa 0x1a29(%rip),%xmm5 # 25e0 <_sk_darken_sse2_8bit+0x258>
DB 102,15,112,240,245 ; pshufd $0xf5,%xmm0,%xmm6
DB 102,15,244,197 ; pmuludq %xmm5,%xmm0
DB 102,15,112,200,232 ; pshufd $0xe8,%xmm0,%xmm1
@@ -45200,7 +45405,7 @@ _sk_load_g8_sse2_8bit LABEL PROC
DB 102,15,244,245 ; pmuludq %xmm5,%xmm6
DB 102,15,112,230,232 ; pshufd $0xe8,%xmm6,%xmm4
DB 102,15,98,196 ; punpckldq %xmm4,%xmm0
- DB 102,15,111,37,27,24,0,0 ; movdqa 0x181b(%rip),%xmm4 # 2410 <_sk_xor__sse2_8bit+0x22d>
+ DB 102,15,111,37,251,25,0,0 ; movdqa 0x19fb(%rip),%xmm4 # 25f0 <_sk_darken_sse2_8bit+0x268>
DB 102,15,235,196 ; por %xmm4,%xmm0
DB 102,15,235,204 ; por %xmm4,%xmm1
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -45272,12 +45477,12 @@ _sk_load_g8_dst_sse2_8bit LABEL PROC
DB 117,116 ; jne d41 <_sk_load_g8_dst_sse2_8bit+0x91>
DB 243,66,15,126,20,2 ; movq (%rdx,%r8,1),%xmm2
DB 102,15,96,208 ; punpcklbw %xmm0,%xmm2
- DB 102,15,84,21,65,23,0,0 ; andpd 0x1741(%rip),%xmm2 # 2420 <_sk_xor__sse2_8bit+0x23d>
+ DB 102,15,84,21,33,25,0,0 ; andpd 0x1921(%rip),%xmm2 # 2600 <_sk_darken_sse2_8bit+0x278>
DB 102,15,239,219 ; pxor %xmm3,%xmm3
DB 102,15,40,226 ; movapd %xmm2,%xmm4
DB 102,15,97,227 ; punpcklwd %xmm3,%xmm4
DB 102,15,105,211 ; punpckhwd %xmm3,%xmm2
- DB 102,15,111,45,57,23,0,0 ; movdqa 0x1739(%rip),%xmm5 # 2430 <_sk_xor__sse2_8bit+0x24d>
+ DB 102,15,111,45,25,25,0,0 ; movdqa 0x1919(%rip),%xmm5 # 2610 <_sk_darken_sse2_8bit+0x288>
DB 102,15,112,242,245 ; pshufd $0xf5,%xmm2,%xmm6
DB 102,15,244,213 ; pmuludq %xmm5,%xmm2
DB 102,15,112,218,232 ; pshufd $0xe8,%xmm2,%xmm3
@@ -45290,7 +45495,7 @@ _sk_load_g8_dst_sse2_8bit LABEL PROC
DB 102,15,244,245 ; pmuludq %xmm5,%xmm6
DB 102,15,112,230,232 ; pshufd $0xe8,%xmm6,%xmm4
DB 102,15,98,212 ; punpckldq %xmm4,%xmm2
- DB 102,15,111,37,11,23,0,0 ; movdqa 0x170b(%rip),%xmm4 # 2440 <_sk_xor__sse2_8bit+0x25d>
+ DB 102,15,111,37,235,24,0,0 ; movdqa 0x18eb(%rip),%xmm4 # 2620 <_sk_darken_sse2_8bit+0x298>
DB 102,15,235,212 ; por %xmm4,%xmm2
DB 102,15,235,220 ; por %xmm4,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -45503,7 +45708,7 @@ _sk_scale_1_float_sse2_8bit LABEL PROC
DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,0 ; movss (%rax),%xmm0
- DB 243,15,89,5,56,19,0,0 ; mulss 0x1338(%rip),%xmm0 # 238c <_sk_xor__sse2_8bit+0x1a9>
+ DB 243,15,89,5,36,21,0,0 ; mulss 0x1524(%rip),%xmm0 # 2578 <_sk_darken_sse2_8bit+0x1f0>
DB 243,15,44,192 ; cvttss2si %xmm0,%eax
DB 102,15,239,246 ; pxor %xmm6,%xmm6
DB 102,65,15,111,193 ; movdqa %xmm9,%xmm0
@@ -45515,7 +45720,7 @@ _sk_scale_1_float_sse2_8bit LABEL PROC
DB 102,15,96,246 ; punpcklbw %xmm6,%xmm6
DB 242,15,112,246,0 ; pshuflw $0x0,%xmm6,%xmm6
DB 102,15,112,246,80 ; pshufd $0x50,%xmm6,%xmm6
- DB 102,15,219,53,195,19,0,0 ; pand 0x13c3(%rip),%xmm6 # 2450 <_sk_xor__sse2_8bit+0x26d>
+ DB 102,15,219,53,163,21,0,0 ; pand 0x15a3(%rip),%xmm6 # 2630 <_sk_darken_sse2_8bit+0x2a8>
DB 102,15,111,254 ; movdqa %xmm6,%xmm7
DB 102,65,15,213,248 ; pmullw %xmm8,%xmm7
DB 102,15,111,230 ; movdqa %xmm6,%xmm4
@@ -45549,7 +45754,7 @@ _sk_scale_u8_sse2_8bit LABEL PROC
DB 15,133,239,0,0,0 ; jne 11ed <_sk_scale_u8_sse2_8bit+0x110>
DB 243,66,15,126,36,2 ; movq (%rdx,%r8,1),%xmm4
DB 102,15,96,224 ; punpcklbw %xmm0,%xmm4
- DB 102,15,84,37,80,19,0,0 ; andpd 0x1350(%rip),%xmm4 # 2460 <_sk_xor__sse2_8bit+0x27d>
+ DB 102,15,84,37,48,21,0,0 ; andpd 0x1530(%rip),%xmm4 # 2640 <_sk_darken_sse2_8bit+0x2b8>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,40,236 ; movapd %xmm4,%xmm5
DB 102,65,15,105,232 ; punpckhwd %xmm8,%xmm5
@@ -45656,7 +45861,7 @@ PUBLIC _sk_lerp_1_float_sse2_8bit
_sk_lerp_1_float_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,32 ; movss (%rax),%xmm4
- DB 243,15,89,37,226,16,0,0 ; mulss 0x10e2(%rip),%xmm4 # 2390 <_sk_xor__sse2_8bit+0x1ad>
+ DB 243,15,89,37,206,18,0,0 ; mulss 0x12ce(%rip),%xmm4 # 257c <_sk_darken_sse2_8bit+0x1f4>
DB 243,15,44,196 ; cvttss2si %xmm4,%eax
DB 102,15,110,224 ; movd %eax,%xmm4
DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
@@ -45669,7 +45874,7 @@ _sk_lerp_1_float_sse2_8bit LABEL PROC
DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11
DB 102,69,15,96,217 ; punpcklbw %xmm9,%xmm11
DB 102,65,15,104,201 ; punpckhbw %xmm9,%xmm1
- DB 102,15,111,53,129,17,0,0 ; movdqa 0x1181(%rip),%xmm6 # 2470 <_sk_xor__sse2_8bit+0x28d>
+ DB 102,15,111,53,97,19,0,0 ; movdqa 0x1361(%rip),%xmm6 # 2650 <_sk_darken_sse2_8bit+0x2c8>
DB 102,65,15,219,240 ; pand %xmm8,%xmm6
DB 102,15,111,230 ; movdqa %xmm6,%xmm4
DB 102,15,213,225 ; pmullw %xmm1,%xmm4
@@ -45735,7 +45940,7 @@ _sk_lerp_u8_sse2_8bit LABEL PROC
DB 15,133,141,1,0,0 ; jne 1584 <_sk_lerp_u8_sse2_8bit+0x1ae>
DB 243,66,15,126,44,2 ; movq (%rdx,%r8,1),%xmm5
DB 102,15,96,232 ; punpcklbw %xmm0,%xmm5
- DB 102,15,84,45,119,16,0,0 ; andpd 0x1077(%rip),%xmm5 # 2480 <_sk_xor__sse2_8bit+0x29d>
+ DB 102,15,84,45,87,18,0,0 ; andpd 0x1257(%rip),%xmm5 # 2660 <_sk_darken_sse2_8bit+0x2d8>
DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
DB 102,15,40,229 ; movapd %xmm5,%xmm4
DB 102,65,15,105,224 ; punpckhwd %xmm8,%xmm4
@@ -45887,7 +46092,7 @@ _sk_move_dst_src_sse2_8bit LABEL PROC
PUBLIC _sk_black_color_sse2_8bit
_sk_black_color_sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
- DB 15,40,5,63,14,0,0 ; movaps 0xe3f(%rip),%xmm0 # 2490 <_sk_xor__sse2_8bit+0x2ad>
+ DB 15,40,5,31,16,0,0 ; movaps 0x101f(%rip),%xmm0 # 2670 <_sk_darken_sse2_8bit+0x2e8>
DB 15,40,200 ; movaps %xmm0,%xmm1
DB 255,224 ; jmpq *%rax
@@ -46658,13 +46863,119 @@ _sk_xor__sse2_8bit LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_darken_sse2_8bit
+_sk_darken_sse2_8bit LABEL PROC
+ DB 242,15,112,227,231 ; pshuflw $0xe7,%xmm3,%xmm4
+ DB 243,15,112,228,231 ; pshufhw $0xe7,%xmm4,%xmm4
+ DB 102,15,112,228,232 ; pshufd $0xe8,%xmm4,%xmm4
+ DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
+ DB 242,15,112,228,95 ; pshuflw $0x5f,%xmm4,%xmm4
+ DB 243,15,112,228,95 ; pshufhw $0x5f,%xmm4,%xmm4
+ DB 242,15,112,234,231 ; pshuflw $0xe7,%xmm2,%xmm5
+ DB 243,15,112,237,231 ; pshufhw $0xe7,%xmm5,%xmm5
+ DB 102,15,112,237,232 ; pshufd $0xe8,%xmm5,%xmm5
+ DB 102,15,96,237 ; punpcklbw %xmm5,%xmm5
+ DB 242,15,112,237,95 ; pshuflw $0x5f,%xmm5,%xmm5
+ DB 243,15,112,253,95 ; pshufhw $0x5f,%xmm5,%xmm7
+ DB 102,69,15,239,192 ; pxor %xmm8,%xmm8
+ DB 102,15,111,240 ; movdqa %xmm0,%xmm6
+ DB 102,65,15,96,240 ; punpcklbw %xmm8,%xmm6
+ DB 102,68,15,111,200 ; movdqa %xmm0,%xmm9
+ DB 102,69,15,104,200 ; punpckhbw %xmm8,%xmm9
+ DB 102,68,15,111,217 ; movdqa %xmm1,%xmm11
+ DB 102,69,15,96,216 ; punpcklbw %xmm8,%xmm11
+ DB 102,68,15,111,225 ; movdqa %xmm1,%xmm12
+ DB 102,69,15,104,224 ; punpckhbw %xmm8,%xmm12
+ DB 102,68,15,111,215 ; movdqa %xmm7,%xmm10
+ DB 102,69,15,96,208 ; punpcklbw %xmm8,%xmm10
+ DB 102,65,15,104,248 ; punpckhbw %xmm8,%xmm7
+ DB 102,68,15,111,236 ; movdqa %xmm4,%xmm13
+ DB 102,69,15,96,232 ; punpcklbw %xmm8,%xmm13
+ DB 102,65,15,104,224 ; punpckhbw %xmm8,%xmm4
+ DB 102,65,15,213,228 ; pmullw %xmm12,%xmm4
+ DB 102,69,15,213,235 ; pmullw %xmm11,%xmm13
+ DB 102,65,15,213,249 ; pmullw %xmm9,%xmm7
+ DB 102,68,15,213,214 ; pmullw %xmm6,%xmm10
+ DB 102,68,15,253,214 ; paddw %xmm6,%xmm10
+ DB 102,65,15,253,249 ; paddw %xmm9,%xmm7
+ DB 102,69,15,253,235 ; paddw %xmm11,%xmm13
+ DB 102,65,15,253,228 ; paddw %xmm12,%xmm4
+ DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
+ DB 102,65,15,113,213,8 ; psrlw $0x8,%xmm13
+ DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
+ DB 102,65,15,113,210,8 ; psrlw $0x8,%xmm10
+ DB 102,68,15,103,215 ; packuswb %xmm7,%xmm10
+ DB 102,68,15,103,236 ; packuswb %xmm4,%xmm13
+ DB 242,15,112,225,231 ; pshuflw $0xe7,%xmm1,%xmm4
+ DB 243,15,112,228,231 ; pshufhw $0xe7,%xmm4,%xmm4
+ DB 102,15,112,228,232 ; pshufd $0xe8,%xmm4,%xmm4
+ DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
+ DB 242,15,112,228,95 ; pshuflw $0x5f,%xmm4,%xmm4
+ DB 243,68,15,112,220,95 ; pshufhw $0x5f,%xmm4,%xmm11
+ DB 242,15,112,224,231 ; pshuflw $0xe7,%xmm0,%xmm4
+ DB 243,15,112,228,231 ; pshufhw $0xe7,%xmm4,%xmm4
+ DB 102,15,112,228,232 ; pshufd $0xe8,%xmm4,%xmm4
+ DB 102,15,96,228 ; punpcklbw %xmm4,%xmm4
+ DB 242,15,112,228,95 ; pshuflw $0x5f,%xmm4,%xmm4
+ DB 243,68,15,112,228,95 ; pshufhw $0x5f,%xmm4,%xmm12
+ DB 102,68,15,111,202 ; movdqa %xmm2,%xmm9
+ DB 102,15,252,194 ; paddb %xmm2,%xmm0
+ DB 102,15,111,242 ; movdqa %xmm2,%xmm6
+ DB 102,65,15,96,240 ; punpcklbw %xmm8,%xmm6
+ DB 102,69,15,104,200 ; punpckhbw %xmm8,%xmm9
+ DB 102,68,15,111,243 ; movdqa %xmm3,%xmm14
+ DB 102,15,252,203 ; paddb %xmm3,%xmm1
+ DB 102,15,111,235 ; movdqa %xmm3,%xmm5
+ DB 102,65,15,96,232 ; punpcklbw %xmm8,%xmm5
+ DB 102,69,15,104,240 ; punpckhbw %xmm8,%xmm14
+ DB 102,65,15,111,228 ; movdqa %xmm12,%xmm4
+ DB 102,65,15,96,224 ; punpcklbw %xmm8,%xmm4
+ DB 102,69,15,104,224 ; punpckhbw %xmm8,%xmm12
+ DB 102,65,15,111,251 ; movdqa %xmm11,%xmm7
+ DB 102,65,15,96,248 ; punpcklbw %xmm8,%xmm7
+ DB 102,69,15,104,216 ; punpckhbw %xmm8,%xmm11
+ DB 102,69,15,213,222 ; pmullw %xmm14,%xmm11
+ DB 102,15,213,253 ; pmullw %xmm5,%xmm7
+ DB 102,69,15,213,225 ; pmullw %xmm9,%xmm12
+ DB 102,15,213,230 ; pmullw %xmm6,%xmm4
+ DB 102,15,253,230 ; paddw %xmm6,%xmm4
+ DB 102,69,15,253,225 ; paddw %xmm9,%xmm12
+ DB 102,15,253,253 ; paddw %xmm5,%xmm7
+ DB 102,69,15,253,222 ; paddw %xmm14,%xmm11
+ DB 102,65,15,113,211,8 ; psrlw $0x8,%xmm11
+ DB 102,15,113,215,8 ; psrlw $0x8,%xmm7
+ DB 102,65,15,113,212,8 ; psrlw $0x8,%xmm12
+ DB 102,15,113,212,8 ; psrlw $0x8,%xmm4
+ DB 102,65,15,103,228 ; packuswb %xmm12,%xmm4
+ DB 102,65,15,103,251 ; packuswb %xmm11,%xmm7
+ DB 102,68,15,222,212 ; pmaxub %xmm4,%xmm10
+ DB 102,68,15,222,239 ; pmaxub %xmm7,%xmm13
+ DB 102,15,111,241 ; movdqa %xmm1,%xmm6
+ DB 102,65,15,248,245 ; psubb %xmm13,%xmm6
+ DB 102,15,111,232 ; movdqa %xmm0,%xmm5
+ DB 102,65,15,248,234 ; psubb %xmm10,%xmm5
+ DB 102,15,248,207 ; psubb %xmm7,%xmm1
+ DB 102,15,248,196 ; psubb %xmm4,%xmm0
+ DB 102,15,111,37,52,1,0,0 ; movdqa 0x134(%rip),%xmm4 # 2680 <_sk_darken_sse2_8bit+0x2f8>
+ DB 102,15,219,236 ; pand %xmm4,%xmm5
+ DB 102,15,111,252 ; movdqa %xmm4,%xmm7
+ DB 102,15,223,248 ; pandn %xmm0,%xmm7
+ DB 102,15,235,239 ; por %xmm7,%xmm5
+ DB 102,15,219,244 ; pand %xmm4,%xmm6
+ DB 102,15,223,225 ; pandn %xmm1,%xmm4
+ DB 102,15,235,244 ; por %xmm4,%xmm6
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 102,15,111,197 ; movdqa %xmm5,%xmm0
+ DB 102,15,111,206 ; movdqa %xmm6,%xmm1
+ DB 255,224 ; jmpq *%rax
+
ALIGN 4
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 23cf <_sk_xor__sse2_8bit+0x1ec>
+ DB 127,67 ; jg 25bb <_sk_darken_sse2_8bit+0x233>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 23d3 <_sk_xor__sse2_8bit+0x1f0>
+ DB 127,67 ; jg 25bf <_sk_darken_sse2_8bit+0x237>
DB 0,0 ; add %al,(%rax)
- DB 127,67 ; jg 23d7 <_sk_xor__sse2_8bit+0x1f4>
+ DB 127,67 ; jg 25c3 <_sk_darken_sse2_8bit+0x23b>
ALIGN 16
DB 0,0 ; add %al,(%rax)
@@ -46795,6 +47106,18 @@ ALIGN 16
DB 0,255 ; add %bh,%bh
DB 0,0 ; add %al,(%rax)
DB 0,255 ; add %bh,%bh
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
+ DB 255 ; (bad)
+ DB 255 ; (bad)
+ DB 255,0 ; incl (%rax)
ELSE
.MODEL FLAT,C
_text32 SEGMENT ALIGN(32) 'CODE'
diff --git a/src/jumper/SkJumper_stages_8bit.cpp b/src/jumper/SkJumper_stages_8bit.cpp
index 1d8c2fcb61..bceb872747 100644
--- a/src/jumper/SkJumper_stages_8bit.cpp
+++ b/src/jumper/SkJumper_stages_8bit.cpp
@@ -140,6 +140,11 @@ SI V swap_rb(V v) {
#endif
}
+SI V max(V a, V b) {
+ auto gt = a.u8x4 > b.u8x4;
+ return (a.u8x4 & gt) | (b.u8x4 &~gt);
+}
+
struct Params {
size_t x,y,tail;
};
@@ -401,3 +406,9 @@ STAGE(modulate) { src = src*dst; }
STAGE(multiply) { src = src*inv(alpha(dst)) + dst*inv(alpha(src)) + src*dst; }
STAGE(screen) { src = src + inv(src)*dst; }
STAGE(xor_) { src = src*inv(alpha(dst)) + dst*inv(alpha(src)); }
+
+STAGE(darken) {
+ V rgb = src + (dst - max(src*alpha(dst), dst*alpha(src)));
+ V a = src + (dst - dst*alpha(src));
+ src = (rgb.u32 & 0x00ffffff) | (a.u32 & 0xff000000);
+}