diff options
author | 2017-06-05 14:30:08 -0400 | |
---|---|---|
committer | 2017-06-05 19:25:51 +0000 | |
commit | 9653d3aa84505c30aa5440b5629cdb25525666c3 (patch) | |
tree | 3cbafc789364b65b74d3b69ab6eced7546f1f290 /src | |
parent | d12a67626da4f5919b48c513fee80974f603473e (diff) |
more lowp blend modes
Change-Id: Id62e989d4278f273c040b159ed4d2fd6a2f209e0
Reviewed-on: https://skia-review.googlesource.com/18627
Reviewed-by: Herb Derby <herb@google.com>
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r-- | src/jumper/SkJumper.cpp | 16 | ||||
-rw-r--r-- | src/jumper/SkJumper_generated.S | 355 | ||||
-rw-r--r-- | src/jumper/SkJumper_generated_win.S | 329 | ||||
-rw-r--r-- | src/jumper/SkJumper_stages_lowp.cpp | 17 |
4 files changed, 683 insertions, 34 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp index 8912499d4f..f536f4c741 100644 --- a/src/jumper/SkJumper.cpp +++ b/src/jumper/SkJumper.cpp @@ -83,7 +83,20 @@ using StartPipelineFn = void(size_t,size_t,size_t,void**,K*); M(swap) \ M(move_src_dst) \ M(move_dst_src) \ - M(srcover) + M(clear) \ + M(srcatop) \ + M(dstatop) \ + M(srcin) \ + M(dstin) \ + M(srcout) \ + M(dstout) \ + M(srcover) \ + M(dstover) \ + M(modulate) \ + M(multiply) \ + M(plus_) \ + M(screen) \ + M(xor_) extern "C" { @@ -232,6 +245,7 @@ StartPipelineFn* SkRasterPipeline::build_pipeline(void** ip) const { #define M(st) case SkRasterPipeline::st: fn = ASM(st, ssse3_lowp); break; LOWP_STAGES(M) #undef M + case SkRasterPipeline::clamp_0: continue; // clamp_0 is a no-op in lowp. default: log_missing(st->stage); ip = reset_point; diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S index c9f498b330..526c10d728 100644 --- a/src/jumper/SkJumper_generated.S +++ b/src/jumper/SkJumper_generated.S @@ -36948,7 +36948,7 @@ HIDDEN _sk_constant_color_ssse3_lowp FUNCTION(_sk_constant_color_ssse3_lowp) _sk_constant_color_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax - .byte 243,15,16,29,122,9,0,0 // movss 0x97a(%rip),%xmm3 # a30 <_sk_srcover_ssse3_lowp+0x65> + .byte 243,15,16,29,34,14,0,0 // movss 0xe22(%rip),%xmm3 # ed8 <_sk_xor__ssse3_lowp+0x9b> .byte 243,15,16,0 // movss (%rax),%xmm0 .byte 243,15,89,195 // mulss %xmm3,%xmm0 .byte 243,68,15,44,200 // cvttss2si %xmm0,%r9d @@ -36985,7 +36985,7 @@ _sk_load_8888_ssse3_lowp: .byte 117,113 // jne 1a2 <_sk_load_8888_ssse3_lowp+0x7b> .byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9 .byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8 - .byte 102,15,111,5,252,8,0,0 // movdqa 0x8fc(%rip),%xmm0 # a40 <_sk_srcover_ssse3_lowp+0x75> + .byte 102,15,111,5,172,13,0,0 // movdqa 0xdac(%rip),%xmm0 # ef0 <_sk_xor__ssse3_lowp+0xb3> .byte 102,68,15,56,0,192 // pshufb %xmm0,%xmm8 .byte 102,68,15,56,0,200 // pshufb %xmm0,%xmm9 .byte 102,65,15,111,208 // movdqa %xmm8,%xmm2 @@ -36999,7 +36999,7 @@ _sk_load_8888_ssse3_lowp: .byte 102,15,239,210 // pxor %xmm2,%xmm2 .byte 102,65,15,96,208 // punpcklbw %xmm8,%xmm2 .byte 102,65,15,104,216 // punpckhbw %xmm8,%xmm3 - .byte 102,68,15,111,5,198,8,0,0 // movdqa 0x8c6(%rip),%xmm8 # a50 <_sk_srcover_ssse3_lowp+0x85> + .byte 102,68,15,111,5,118,13,0,0 // movdqa 0xd76(%rip),%xmm8 # f00 <_sk_xor__ssse3_lowp+0xc3> .byte 102,65,15,228,192 // pmulhuw %xmm8,%xmm0 .byte 102,65,15,228,200 // pmulhuw %xmm8,%xmm1 .byte 102,65,15,228,208 // pmulhuw %xmm8,%xmm2 @@ -37141,7 +37141,7 @@ _sk_srcover_rgba_8888_ssse3_lowp: .byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9 .byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8 .byte 77,133,192 // test %r8,%r8 - .byte 102,15,111,37,216,6,0,0 // movdqa 0x6d8(%rip),%xmm4 # a60 <_sk_srcover_ssse3_lowp+0x95> + .byte 102,15,111,37,136,11,0,0 // movdqa 0xb88(%rip),%xmm4 # f10 <_sk_xor__ssse3_lowp+0xd3> .byte 102,68,15,56,0,196 // pshufb %xmm4,%xmm8 .byte 102,68,15,56,0,204 // pshufb %xmm4,%xmm9 .byte 102,65,15,111,240 // movdqa %xmm8,%xmm6 @@ -37155,12 +37155,12 @@ _sk_srcover_rgba_8888_ssse3_lowp: .byte 102,15,239,246 // pxor %xmm6,%xmm6 .byte 102,65,15,96,240 // punpcklbw %xmm8,%xmm6 .byte 102,65,15,104,248 // punpckhbw %xmm8,%xmm7 - .byte 102,68,15,111,5,162,6,0,0 // movdqa 0x6a2(%rip),%xmm8 # a70 <_sk_srcover_ssse3_lowp+0xa5> + .byte 102,68,15,111,5,82,11,0,0 // movdqa 0xb52(%rip),%xmm8 # f20 <_sk_xor__ssse3_lowp+0xe3> .byte 102,65,15,228,224 // pmulhuw %xmm8,%xmm4 .byte 102,65,15,228,232 // pmulhuw %xmm8,%xmm5 .byte 102,65,15,228,240 // pmulhuw %xmm8,%xmm6 .byte 102,65,15,228,248 // pmulhuw %xmm8,%xmm7 - .byte 102,68,15,111,29,149,6,0,0 // movdqa 0x695(%rip),%xmm11 # a80 <_sk_srcover_ssse3_lowp+0xb5> + .byte 102,68,15,111,29,69,11,0,0 // movdqa 0xb45(%rip),%xmm11 # f30 <_sk_xor__ssse3_lowp+0xf3> .byte 102,68,15,249,219 // psubw %xmm3,%xmm11 .byte 102,68,15,111,196 // movdqa %xmm4,%xmm8 .byte 102,69,15,56,11,195 // pmulhrsw %xmm11,%xmm8 @@ -37298,7 +37298,7 @@ FUNCTION(_sk_scale_1_float_ssse3_lowp) _sk_scale_1_float_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,68,15,16,0 // movss (%rax),%xmm8 - .byte 243,68,15,89,5,24,4,0,0 // mulss 0x418(%rip),%xmm8 # a34 <_sk_srcover_ssse3_lowp+0x69> + .byte 243,68,15,89,5,192,8,0,0 // mulss 0x8c0(%rip),%xmm8 # edc <_sk_xor__ssse3_lowp+0x9f> .byte 243,65,15,44,192 // cvttss2si %xmm8,%eax .byte 102,68,15,110,192 // movd %eax,%xmm8 .byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8 @@ -37325,7 +37325,7 @@ _sk_scale_u8_ssse3_lowp: .byte 243,69,15,126,4,19 // movq (%r11,%rdx,1),%xmm8 .byte 102,68,15,96,192 // punpcklbw %xmm0,%xmm8 .byte 102,65,15,113,240,8 // psllw $0x8,%xmm8 - .byte 102,68,15,228,5,10,4,0,0 // pmulhuw 0x40a(%rip),%xmm8 # a90 <_sk_srcover_ssse3_lowp+0xc5> + .byte 102,68,15,228,5,186,8,0,0 // pmulhuw 0x8ba(%rip),%xmm8 # f40 <_sk_xor__ssse3_lowp+0x103> .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 .byte 102,65,15,56,11,200 // pmulhrsw %xmm8,%xmm1 @@ -37387,14 +37387,14 @@ FUNCTION(_sk_lerp_1_float_ssse3_lowp) _sk_lerp_1_float_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 243,68,15,16,0 // movss (%rax),%xmm8 - .byte 243,68,15,89,5,208,2,0,0 // mulss 0x2d0(%rip),%xmm8 # a38 <_sk_srcover_ssse3_lowp+0x6d> + .byte 243,68,15,89,5,120,7,0,0 // mulss 0x778(%rip),%xmm8 # ee0 <_sk_xor__ssse3_lowp+0xa3> .byte 243,65,15,44,192 // cvttss2si %xmm8,%eax .byte 102,68,15,110,192 // movd %eax,%xmm8 .byte 242,69,15,112,192,0 // pshuflw $0x0,%xmm8,%xmm8 .byte 102,69,15,112,192,80 // pshufd $0x50,%xmm8,%xmm8 .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 .byte 102,68,15,56,29,200 // pabsw %xmm0,%xmm9 - .byte 102,68,15,111,21,13,3,0,0 // movdqa 0x30d(%rip),%xmm10 # aa0 <_sk_srcover_ssse3_lowp+0xd5> + .byte 102,68,15,111,21,189,7,0,0 // movdqa 0x7bd(%rip),%xmm10 # f50 <_sk_xor__ssse3_lowp+0x113> .byte 102,69,15,249,208 // psubw %xmm8,%xmm10 .byte 102,15,111,196 // movdqa %xmm4,%xmm0 .byte 102,65,15,56,11,194 // pmulhrsw %xmm10,%xmm0 @@ -37431,10 +37431,10 @@ _sk_lerp_u8_ssse3_lowp: .byte 243,69,15,126,4,19 // movq (%r11,%rdx,1),%xmm8 .byte 102,68,15,96,192 // punpcklbw %xmm0,%xmm8 .byte 102,65,15,113,240,8 // psllw $0x8,%xmm8 - .byte 102,68,15,228,5,123,2,0,0 // pmulhuw 0x27b(%rip),%xmm8 # ab0 <_sk_srcover_ssse3_lowp+0xe5> + .byte 102,68,15,228,5,43,7,0,0 // pmulhuw 0x72b(%rip),%xmm8 # f60 <_sk_xor__ssse3_lowp+0x123> .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 .byte 102,68,15,56,29,200 // pabsw %xmm0,%xmm9 - .byte 102,68,15,111,21,118,2,0,0 // movdqa 0x276(%rip),%xmm10 # ac0 <_sk_srcover_ssse3_lowp+0xf5> + .byte 102,68,15,111,21,38,7,0,0 // movdqa 0x726(%rip),%xmm10 # f70 <_sk_xor__ssse3_lowp+0x133> .byte 102,69,15,249,208 // psubw %xmm8,%xmm10 .byte 102,15,111,196 // movdqa %xmm4,%xmm0 .byte 102,65,15,56,11,194 // pmulhrsw %xmm10,%xmm0 @@ -37560,11 +37560,157 @@ _sk_move_dst_src_ssse3_lowp: .byte 15,40,223 // movaps %xmm7,%xmm3 .byte 255,224 // jmpq *%rax +HIDDEN _sk_clear_ssse3_lowp +.globl _sk_clear_ssse3_lowp +FUNCTION(_sk_clear_ssse3_lowp) +_sk_clear_ssse3_lowp: + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 15,87,192 // xorps %xmm0,%xmm0 + .byte 15,87,201 // xorps %xmm1,%xmm1 + .byte 15,87,210 // xorps %xmm2,%xmm2 + .byte 15,87,219 // xorps %xmm3,%xmm3 + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_srcatop_ssse3_lowp +.globl _sk_srcatop_ssse3_lowp +FUNCTION(_sk_srcatop_ssse3_lowp) +_sk_srcatop_ssse3_lowp: + .byte 102,15,56,11,199 // pmulhrsw %xmm7,%xmm0 + .byte 102,68,15,56,29,192 // pabsw %xmm0,%xmm8 + .byte 102,68,15,111,13,145,5,0,0 // movdqa 0x591(%rip),%xmm9 # f80 <_sk_xor__ssse3_lowp+0x143> + .byte 102,68,15,249,203 // psubw %xmm3,%xmm9 + .byte 102,15,111,196 // movdqa %xmm4,%xmm0 + .byte 102,65,15,56,11,193 // pmulhrsw %xmm9,%xmm0 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,65,15,253,192 // paddw %xmm8,%xmm0 + .byte 102,15,56,11,207 // pmulhrsw %xmm7,%xmm1 + .byte 102,68,15,56,29,193 // pabsw %xmm1,%xmm8 + .byte 102,15,111,205 // movdqa %xmm5,%xmm1 + .byte 102,65,15,56,11,201 // pmulhrsw %xmm9,%xmm1 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,65,15,253,200 // paddw %xmm8,%xmm1 + .byte 102,15,56,11,215 // pmulhrsw %xmm7,%xmm2 + .byte 102,68,15,56,29,194 // pabsw %xmm2,%xmm8 + .byte 102,15,111,214 // movdqa %xmm6,%xmm2 + .byte 102,65,15,56,11,209 // pmulhrsw %xmm9,%xmm2 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,65,15,253,208 // paddw %xmm8,%xmm2 + .byte 102,15,56,11,223 // pmulhrsw %xmm7,%xmm3 + .byte 102,68,15,56,29,195 // pabsw %xmm3,%xmm8 + .byte 102,68,15,56,11,207 // pmulhrsw %xmm7,%xmm9 + .byte 102,65,15,56,29,217 // pabsw %xmm9,%xmm3 + .byte 102,65,15,253,216 // paddw %xmm8,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_dstatop_ssse3_lowp +.globl _sk_dstatop_ssse3_lowp +FUNCTION(_sk_dstatop_ssse3_lowp) +_sk_dstatop_ssse3_lowp: + .byte 102,68,15,111,196 // movdqa %xmm4,%xmm8 + .byte 102,68,15,56,11,195 // pmulhrsw %xmm3,%xmm8 + .byte 102,69,15,56,29,192 // pabsw %xmm8,%xmm8 + .byte 102,68,15,111,13,16,5,0,0 // movdqa 0x510(%rip),%xmm9 # f90 <_sk_xor__ssse3_lowp+0x153> + .byte 102,68,15,249,207 // psubw %xmm7,%xmm9 + .byte 102,65,15,56,11,193 // pmulhrsw %xmm9,%xmm0 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,65,15,253,192 // paddw %xmm8,%xmm0 + .byte 102,68,15,111,197 // movdqa %xmm5,%xmm8 + .byte 102,68,15,56,11,195 // pmulhrsw %xmm3,%xmm8 + .byte 102,69,15,56,29,192 // pabsw %xmm8,%xmm8 + .byte 102,65,15,56,11,201 // pmulhrsw %xmm9,%xmm1 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,65,15,253,200 // paddw %xmm8,%xmm1 + .byte 102,68,15,111,198 // movdqa %xmm6,%xmm8 + .byte 102,68,15,56,11,195 // pmulhrsw %xmm3,%xmm8 + .byte 102,69,15,56,29,192 // pabsw %xmm8,%xmm8 + .byte 102,65,15,56,11,209 // pmulhrsw %xmm9,%xmm2 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,65,15,253,208 // paddw %xmm8,%xmm2 + .byte 102,68,15,111,199 // movdqa %xmm7,%xmm8 + .byte 102,68,15,56,11,195 // pmulhrsw %xmm3,%xmm8 + .byte 102,69,15,56,29,192 // pabsw %xmm8,%xmm8 + .byte 102,68,15,56,11,203 // pmulhrsw %xmm3,%xmm9 + .byte 102,65,15,56,29,217 // pabsw %xmm9,%xmm3 + .byte 102,65,15,253,216 // paddw %xmm8,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_srcin_ssse3_lowp +.globl _sk_srcin_ssse3_lowp +FUNCTION(_sk_srcin_ssse3_lowp) +_sk_srcin_ssse3_lowp: + .byte 102,15,56,11,199 // pmulhrsw %xmm7,%xmm0 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,15,56,11,207 // pmulhrsw %xmm7,%xmm1 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,15,56,11,215 // pmulhrsw %xmm7,%xmm2 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,15,56,11,223 // pmulhrsw %xmm7,%xmm3 + .byte 102,15,56,29,219 // pabsw %xmm3,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_dstin_ssse3_lowp +.globl _sk_dstin_ssse3_lowp +FUNCTION(_sk_dstin_ssse3_lowp) +_sk_dstin_ssse3_lowp: + .byte 102,15,111,196 // movdqa %xmm4,%xmm0 + .byte 102,15,56,11,195 // pmulhrsw %xmm3,%xmm0 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,15,111,205 // movdqa %xmm5,%xmm1 + .byte 102,15,56,11,203 // pmulhrsw %xmm3,%xmm1 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,15,111,214 // movdqa %xmm6,%xmm2 + .byte 102,15,56,11,211 // pmulhrsw %xmm3,%xmm2 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,15,56,11,223 // pmulhrsw %xmm7,%xmm3 + .byte 102,15,56,29,219 // pabsw %xmm3,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_srcout_ssse3_lowp +.globl _sk_srcout_ssse3_lowp +FUNCTION(_sk_srcout_ssse3_lowp) +_sk_srcout_ssse3_lowp: + .byte 102,68,15,111,5,54,4,0,0 // movdqa 0x436(%rip),%xmm8 # fa0 <_sk_xor__ssse3_lowp+0x163> + .byte 102,68,15,249,199 // psubw %xmm7,%xmm8 + .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,65,15,56,11,200 // pmulhrsw %xmm8,%xmm1 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,65,15,56,11,208 // pmulhrsw %xmm8,%xmm2 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,68,15,56,11,195 // pmulhrsw %xmm3,%xmm8 + .byte 102,65,15,56,29,216 // pabsw %xmm8,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_dstout_ssse3_lowp +.globl _sk_dstout_ssse3_lowp +FUNCTION(_sk_dstout_ssse3_lowp) +_sk_dstout_ssse3_lowp: + .byte 102,68,15,111,5,7,4,0,0 // movdqa 0x407(%rip),%xmm8 # fb0 <_sk_xor__ssse3_lowp+0x173> + .byte 102,68,15,249,195 // psubw %xmm3,%xmm8 + .byte 102,15,111,196 // movdqa %xmm4,%xmm0 + .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,15,111,205 // movdqa %xmm5,%xmm1 + .byte 102,65,15,56,11,200 // pmulhrsw %xmm8,%xmm1 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,15,111,214 // movdqa %xmm6,%xmm2 + .byte 102,65,15,56,11,208 // pmulhrsw %xmm8,%xmm2 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,68,15,56,11,199 // pmulhrsw %xmm7,%xmm8 + .byte 102,65,15,56,29,216 // pabsw %xmm8,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + HIDDEN _sk_srcover_ssse3_lowp .globl _sk_srcover_ssse3_lowp FUNCTION(_sk_srcover_ssse3_lowp) _sk_srcover_ssse3_lowp: - .byte 102,68,15,111,5,252,0,0,0 // movdqa 0xfc(%rip),%xmm8 # ad0 <_sk_srcover_ssse3_lowp+0x105> + .byte 102,68,15,111,5,204,3,0,0 // movdqa 0x3cc(%rip),%xmm8 # fc0 <_sk_xor__ssse3_lowp+0x183> .byte 102,68,15,249,195 // psubw %xmm3,%xmm8 .byte 102,68,15,111,204 // movdqa %xmm4,%xmm9 .byte 102,69,15,56,11,200 // pmulhrsw %xmm8,%xmm9 @@ -37584,6 +37730,163 @@ _sk_srcover_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 255,224 // jmpq *%rax +HIDDEN _sk_dstover_ssse3_lowp +.globl _sk_dstover_ssse3_lowp +FUNCTION(_sk_dstover_ssse3_lowp) +_sk_dstover_ssse3_lowp: + .byte 102,68,15,111,5,119,3,0,0 // movdqa 0x377(%rip),%xmm8 # fd0 <_sk_xor__ssse3_lowp+0x193> + .byte 102,68,15,249,199 // psubw %xmm7,%xmm8 + .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,15,253,196 // paddw %xmm4,%xmm0 + .byte 102,65,15,56,11,200 // pmulhrsw %xmm8,%xmm1 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,15,253,205 // paddw %xmm5,%xmm1 + .byte 102,65,15,56,11,208 // pmulhrsw %xmm8,%xmm2 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,15,253,214 // paddw %xmm6,%xmm2 + .byte 102,68,15,56,11,195 // pmulhrsw %xmm3,%xmm8 + .byte 102,65,15,56,29,216 // pabsw %xmm8,%xmm3 + .byte 102,15,253,223 // paddw %xmm7,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_modulate_ssse3_lowp +.globl _sk_modulate_ssse3_lowp +FUNCTION(_sk_modulate_ssse3_lowp) +_sk_modulate_ssse3_lowp: + .byte 102,15,56,11,196 // pmulhrsw %xmm4,%xmm0 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,15,56,11,205 // pmulhrsw %xmm5,%xmm1 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,15,56,11,214 // pmulhrsw %xmm6,%xmm2 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,15,56,11,223 // pmulhrsw %xmm7,%xmm3 + .byte 102,15,56,29,219 // pabsw %xmm3,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_multiply_ssse3_lowp +.globl _sk_multiply_ssse3_lowp +FUNCTION(_sk_multiply_ssse3_lowp) +_sk_multiply_ssse3_lowp: + .byte 102,68,15,111,5,12,3,0,0 // movdqa 0x30c(%rip),%xmm8 # fe0 <_sk_xor__ssse3_lowp+0x1a3> + .byte 102,69,15,111,200 // movdqa %xmm8,%xmm9 + .byte 102,68,15,249,207 // psubw %xmm7,%xmm9 + .byte 102,68,15,111,208 // movdqa %xmm0,%xmm10 + .byte 102,69,15,56,11,209 // pmulhrsw %xmm9,%xmm10 + .byte 102,69,15,56,29,210 // pabsw %xmm10,%xmm10 + .byte 102,68,15,249,195 // psubw %xmm3,%xmm8 + .byte 102,15,56,11,196 // pmulhrsw %xmm4,%xmm0 + .byte 102,68,15,111,220 // movdqa %xmm4,%xmm11 + .byte 102,69,15,56,11,216 // pmulhrsw %xmm8,%xmm11 + .byte 102,69,15,56,29,219 // pabsw %xmm11,%xmm11 + .byte 102,69,15,253,218 // paddw %xmm10,%xmm11 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,65,15,253,195 // paddw %xmm11,%xmm0 + .byte 102,68,15,111,209 // movdqa %xmm1,%xmm10 + .byte 102,69,15,56,11,209 // pmulhrsw %xmm9,%xmm10 + .byte 102,69,15,56,29,210 // pabsw %xmm10,%xmm10 + .byte 102,15,56,11,205 // pmulhrsw %xmm5,%xmm1 + .byte 102,68,15,111,221 // movdqa %xmm5,%xmm11 + .byte 102,69,15,56,11,216 // pmulhrsw %xmm8,%xmm11 + .byte 102,69,15,56,29,219 // pabsw %xmm11,%xmm11 + .byte 102,69,15,253,218 // paddw %xmm10,%xmm11 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,65,15,253,203 // paddw %xmm11,%xmm1 + .byte 102,68,15,111,210 // movdqa %xmm2,%xmm10 + .byte 102,69,15,56,11,209 // pmulhrsw %xmm9,%xmm10 + .byte 102,69,15,56,29,210 // pabsw %xmm10,%xmm10 + .byte 102,15,56,11,214 // pmulhrsw %xmm6,%xmm2 + .byte 102,68,15,111,222 // movdqa %xmm6,%xmm11 + .byte 102,69,15,56,11,216 // pmulhrsw %xmm8,%xmm11 + .byte 102,69,15,56,29,219 // pabsw %xmm11,%xmm11 + .byte 102,69,15,253,218 // paddw %xmm10,%xmm11 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,65,15,253,211 // paddw %xmm11,%xmm2 + .byte 102,68,15,56,11,203 // pmulhrsw %xmm3,%xmm9 + .byte 102,69,15,56,29,201 // pabsw %xmm9,%xmm9 + .byte 102,68,15,56,11,199 // pmulhrsw %xmm7,%xmm8 + .byte 102,69,15,56,29,192 // pabsw %xmm8,%xmm8 + .byte 102,69,15,253,193 // paddw %xmm9,%xmm8 + .byte 102,15,56,11,223 // pmulhrsw %xmm7,%xmm3 + .byte 102,15,56,29,219 // pabsw %xmm3,%xmm3 + .byte 102,65,15,253,216 // paddw %xmm8,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_plus__ssse3_lowp +.globl _sk_plus__ssse3_lowp +FUNCTION(_sk_plus__ssse3_lowp) +_sk_plus__ssse3_lowp: + .byte 102,15,253,196 // paddw %xmm4,%xmm0 + .byte 102,15,253,205 // paddw %xmm5,%xmm1 + .byte 102,15,253,214 // paddw %xmm6,%xmm2 + .byte 102,15,253,223 // paddw %xmm7,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_screen_ssse3_lowp +.globl _sk_screen_ssse3_lowp +FUNCTION(_sk_screen_ssse3_lowp) +_sk_screen_ssse3_lowp: + .byte 102,68,15,111,5,30,2,0,0 // movdqa 0x21e(%rip),%xmm8 # ff0 <_sk_xor__ssse3_lowp+0x1b3> + .byte 102,69,15,111,200 // movdqa %xmm8,%xmm9 + .byte 102,68,15,249,200 // psubw %xmm0,%xmm9 + .byte 102,68,15,56,11,204 // pmulhrsw %xmm4,%xmm9 + .byte 102,69,15,56,29,201 // pabsw %xmm9,%xmm9 + .byte 102,65,15,253,193 // paddw %xmm9,%xmm0 + .byte 102,69,15,111,200 // movdqa %xmm8,%xmm9 + .byte 102,68,15,249,201 // psubw %xmm1,%xmm9 + .byte 102,68,15,56,11,205 // pmulhrsw %xmm5,%xmm9 + .byte 102,69,15,56,29,201 // pabsw %xmm9,%xmm9 + .byte 102,65,15,253,201 // paddw %xmm9,%xmm1 + .byte 102,69,15,111,200 // movdqa %xmm8,%xmm9 + .byte 102,68,15,249,202 // psubw %xmm2,%xmm9 + .byte 102,68,15,56,11,206 // pmulhrsw %xmm6,%xmm9 + .byte 102,69,15,56,29,201 // pabsw %xmm9,%xmm9 + .byte 102,65,15,253,209 // paddw %xmm9,%xmm2 + .byte 102,68,15,249,195 // psubw %xmm3,%xmm8 + .byte 102,68,15,56,11,199 // pmulhrsw %xmm7,%xmm8 + .byte 102,69,15,56,29,192 // pabsw %xmm8,%xmm8 + .byte 102,65,15,253,216 // paddw %xmm8,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + +HIDDEN _sk_xor__ssse3_lowp +.globl _sk_xor__ssse3_lowp +FUNCTION(_sk_xor__ssse3_lowp) +_sk_xor__ssse3_lowp: + .byte 102,68,15,111,5,186,1,0,0 // movdqa 0x1ba(%rip),%xmm8 # 1000 <_sk_xor__ssse3_lowp+0x1c3> + .byte 102,69,15,111,200 // movdqa %xmm8,%xmm9 + .byte 102,68,15,249,207 // psubw %xmm7,%xmm9 + .byte 102,65,15,56,11,193 // pmulhrsw %xmm9,%xmm0 + .byte 102,68,15,56,29,208 // pabsw %xmm0,%xmm10 + .byte 102,68,15,249,195 // psubw %xmm3,%xmm8 + .byte 102,15,111,196 // movdqa %xmm4,%xmm0 + .byte 102,65,15,56,11,192 // pmulhrsw %xmm8,%xmm0 + .byte 102,15,56,29,192 // pabsw %xmm0,%xmm0 + .byte 102,65,15,253,194 // paddw %xmm10,%xmm0 + .byte 102,65,15,56,11,201 // pmulhrsw %xmm9,%xmm1 + .byte 102,68,15,56,29,209 // pabsw %xmm1,%xmm10 + .byte 102,15,111,205 // movdqa %xmm5,%xmm1 + .byte 102,65,15,56,11,200 // pmulhrsw %xmm8,%xmm1 + .byte 102,15,56,29,201 // pabsw %xmm1,%xmm1 + .byte 102,65,15,253,202 // paddw %xmm10,%xmm1 + .byte 102,65,15,56,11,209 // pmulhrsw %xmm9,%xmm2 + .byte 102,68,15,56,29,210 // pabsw %xmm2,%xmm10 + .byte 102,15,111,214 // movdqa %xmm6,%xmm2 + .byte 102,65,15,56,11,208 // pmulhrsw %xmm8,%xmm2 + .byte 102,15,56,29,210 // pabsw %xmm2,%xmm2 + .byte 102,65,15,253,210 // paddw %xmm10,%xmm2 + .byte 102,68,15,56,11,203 // pmulhrsw %xmm3,%xmm9 + .byte 102,69,15,56,29,201 // pabsw %xmm9,%xmm9 + .byte 102,68,15,56,11,199 // pmulhrsw %xmm7,%xmm8 + .byte 102,65,15,56,29,216 // pabsw %xmm8,%xmm3 + .byte 102,65,15,253,217 // paddw %xmm9,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + BALIGN4 .byte 0,0 // add %al,(%rax) .byte 0,71,0 // add %al,0x0(%rdi) @@ -37601,7 +37904,7 @@ BALIGN16 .byte 11,15 // or (%rdi),%ecx .byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax) .byte 129,128,129,128,129,128,0,4,8,12 // addl $0xc080400,-0x7f7e7f7f(%rax) - .byte 1,5,9,13,2,6 // add %eax,0x6020d09(%rip) # 6021773 <_sk_srcover_ssse3_lowp+0x6020da8> + .byte 1,5,9,13,2,6 // add %eax,0x6020d09(%rip) # 6021c23 <_sk_xor__ssse3_lowp+0x6020de6> .byte 10,14 // or (%rsi),%cl .byte 3,7 // add (%rdi),%eax .byte 11,15 // or (%rdi),%ecx @@ -37619,6 +37922,26 @@ BALIGN16 .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) - .byte 0 // .byte 0x0 - .byte 128,0,128 // addb $0x80,(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) + .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax) #endif diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S index e75d982b53..1368f2b8ce 100644 --- a/src/jumper/SkJumper_generated_win.S +++ b/src/jumper/SkJumper_generated_win.S @@ -26361,7 +26361,7 @@ _sk_just_return_ssse3_lowp LABEL PROC PUBLIC _sk_constant_color_ssse3_lowp _sk_constant_color_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax - DB 243,15,16,29,130,9,0,0 ; movss 0x982(%rip),%xmm3 # ac8 <_sk_srcover_ssse3_lowp+0x65> + DB 243,15,16,29,42,14,0,0 ; movss 0xe2a(%rip),%xmm3 # f70 <_sk_xor__ssse3_lowp+0x9b> DB 243,15,16,0 ; movss (%rax),%xmm0 DB 243,15,89,195 ; mulss %xmm3,%xmm0 DB 243,68,15,44,200 ; cvttss2si %xmm0,%r9d @@ -26396,7 +26396,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC DB 117,113 ; jne 232 <_sk_load_8888_ssse3_lowp+0x7b> DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9 DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8 - DB 102,15,111,5,12,9,0,0 ; movdqa 0x90c(%rip),%xmm0 # ae0 <_sk_srcover_ssse3_lowp+0x7d> + DB 102,15,111,5,172,13,0,0 ; movdqa 0xdac(%rip),%xmm0 # f80 <_sk_xor__ssse3_lowp+0xab> DB 102,68,15,56,0,192 ; pshufb %xmm0,%xmm8 DB 102,68,15,56,0,200 ; pshufb %xmm0,%xmm9 DB 102,65,15,111,208 ; movdqa %xmm8,%xmm2 @@ -26410,7 +26410,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC DB 102,15,239,210 ; pxor %xmm2,%xmm2 DB 102,65,15,96,208 ; punpcklbw %xmm8,%xmm2 DB 102,65,15,104,216 ; punpckhbw %xmm8,%xmm3 - DB 102,68,15,111,5,214,8,0,0 ; movdqa 0x8d6(%rip),%xmm8 # af0 <_sk_srcover_ssse3_lowp+0x8d> + DB 102,68,15,111,5,118,13,0,0 ; movdqa 0xd76(%rip),%xmm8 # f90 <_sk_xor__ssse3_lowp+0xbb> DB 102,65,15,228,192 ; pmulhuw %xmm8,%xmm0 DB 102,65,15,228,200 ; pmulhuw %xmm8,%xmm1 DB 102,65,15,228,208 ; pmulhuw %xmm8,%xmm2 @@ -26548,7 +26548,7 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9 DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8 DB 77,133,192 ; test %r8,%r8 - DB 102,15,111,37,232,6,0,0 ; movdqa 0x6e8(%rip),%xmm4 # b00 <_sk_srcover_ssse3_lowp+0x9d> + DB 102,15,111,37,136,11,0,0 ; movdqa 0xb88(%rip),%xmm4 # fa0 <_sk_xor__ssse3_lowp+0xcb> DB 102,68,15,56,0,196 ; pshufb %xmm4,%xmm8 DB 102,68,15,56,0,204 ; pshufb %xmm4,%xmm9 DB 102,65,15,111,240 ; movdqa %xmm8,%xmm6 @@ -26562,12 +26562,12 @@ _sk_srcover_rgba_8888_ssse3_lowp LABEL PROC DB 102,15,239,246 ; pxor %xmm6,%xmm6 DB 102,65,15,96,240 ; punpcklbw %xmm8,%xmm6 DB 102,65,15,104,248 ; punpckhbw %xmm8,%xmm7 - DB 102,68,15,111,5,178,6,0,0 ; movdqa 0x6b2(%rip),%xmm8 # b10 <_sk_srcover_ssse3_lowp+0xad> + DB 102,68,15,111,5,82,11,0,0 ; movdqa 0xb52(%rip),%xmm8 # fb0 <_sk_xor__ssse3_lowp+0xdb> DB 102,65,15,228,224 ; pmulhuw %xmm8,%xmm4 DB 102,65,15,228,232 ; pmulhuw %xmm8,%xmm5 DB 102,65,15,228,240 ; pmulhuw %xmm8,%xmm6 DB 102,65,15,228,248 ; pmulhuw %xmm8,%xmm7 - DB 102,68,15,111,29,165,6,0,0 ; movdqa 0x6a5(%rip),%xmm11 # b20 <_sk_srcover_ssse3_lowp+0xbd> + DB 102,68,15,111,29,69,11,0,0 ; movdqa 0xb45(%rip),%xmm11 # fc0 <_sk_xor__ssse3_lowp+0xeb> DB 102,68,15,249,219 ; psubw %xmm3,%xmm11 DB 102,68,15,111,196 ; movdqa %xmm4,%xmm8 DB 102,69,15,56,11,195 ; pmulhrsw %xmm11,%xmm8 @@ -26703,7 +26703,7 @@ PUBLIC _sk_scale_1_float_ssse3_lowp _sk_scale_1_float_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 243,68,15,16,0 ; movss (%rax),%xmm8 - DB 243,68,15,89,5,32,4,0,0 ; mulss 0x420(%rip),%xmm8 # acc <_sk_srcover_ssse3_lowp+0x69> + DB 243,68,15,89,5,200,8,0,0 ; mulss 0x8c8(%rip),%xmm8 # f74 <_sk_xor__ssse3_lowp+0x9f> DB 243,65,15,44,192 ; cvttss2si %xmm8,%eax DB 102,68,15,110,192 ; movd %eax,%xmm8 DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8 @@ -26729,7 +26729,7 @@ _sk_scale_u8_ssse3_lowp LABEL PROC DB 243,69,15,126,4,19 ; movq (%r11,%rdx,1),%xmm8 DB 102,68,15,96,192 ; punpcklbw %xmm0,%xmm8 DB 102,65,15,113,240,8 ; psllw $0x8,%xmm8 - DB 102,68,15,228,5,25,4,0,0 ; pmulhuw 0x419(%rip),%xmm8 # b30 <_sk_srcover_ssse3_lowp+0xcd> + DB 102,68,15,228,5,185,8,0,0 ; pmulhuw 0x8b9(%rip),%xmm8 # fd0 <_sk_xor__ssse3_lowp+0xfb> DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 DB 102,65,15,56,11,200 ; pmulhrsw %xmm8,%xmm1 @@ -26795,14 +26795,14 @@ PUBLIC _sk_lerp_1_float_ssse3_lowp _sk_lerp_1_float_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 243,68,15,16,0 ; movss (%rax),%xmm8 - DB 243,68,15,89,5,212,2,0,0 ; mulss 0x2d4(%rip),%xmm8 # ad0 <_sk_srcover_ssse3_lowp+0x6d> + DB 243,68,15,89,5,124,7,0,0 ; mulss 0x77c(%rip),%xmm8 # f78 <_sk_xor__ssse3_lowp+0xa3> DB 243,65,15,44,192 ; cvttss2si %xmm8,%eax DB 102,68,15,110,192 ; movd %eax,%xmm8 DB 242,69,15,112,192,0 ; pshuflw $0x0,%xmm8,%xmm8 DB 102,69,15,112,192,80 ; pshufd $0x50,%xmm8,%xmm8 DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 DB 102,68,15,56,29,200 ; pabsw %xmm0,%xmm9 - DB 102,68,15,111,21,25,3,0,0 ; movdqa 0x319(%rip),%xmm10 # b40 <_sk_srcover_ssse3_lowp+0xdd> + DB 102,68,15,111,21,185,7,0,0 ; movdqa 0x7b9(%rip),%xmm10 # fe0 <_sk_xor__ssse3_lowp+0x10b> DB 102,69,15,249,208 ; psubw %xmm8,%xmm10 DB 102,15,111,196 ; movdqa %xmm4,%xmm0 DB 102,65,15,56,11,194 ; pmulhrsw %xmm10,%xmm0 @@ -26838,10 +26838,10 @@ _sk_lerp_u8_ssse3_lowp LABEL PROC DB 243,69,15,126,4,19 ; movq (%r11,%rdx,1),%xmm8 DB 102,68,15,96,192 ; punpcklbw %xmm0,%xmm8 DB 102,65,15,113,240,8 ; psllw $0x8,%xmm8 - DB 102,68,15,228,5,134,2,0,0 ; pmulhuw 0x286(%rip),%xmm8 # b50 <_sk_srcover_ssse3_lowp+0xed> + DB 102,68,15,228,5,38,7,0,0 ; pmulhuw 0x726(%rip),%xmm8 # ff0 <_sk_xor__ssse3_lowp+0x11b> DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 DB 102,68,15,56,29,200 ; pabsw %xmm0,%xmm9 - DB 102,68,15,111,21,129,2,0,0 ; movdqa 0x281(%rip),%xmm10 # b60 <_sk_srcover_ssse3_lowp+0xfd> + DB 102,68,15,111,21,33,7,0,0 ; movdqa 0x721(%rip),%xmm10 # 1000 <_sk_xor__ssse3_lowp+0x12b> DB 102,69,15,249,208 ; psubw %xmm8,%xmm10 DB 102,15,111,196 ; movdqa %xmm4,%xmm0 DB 102,65,15,56,11,194 ; pmulhrsw %xmm10,%xmm0 @@ -26960,9 +26960,141 @@ _sk_move_dst_src_ssse3_lowp LABEL PROC DB 15,40,223 ; movaps %xmm7,%xmm3 DB 255,224 ; jmpq *%rax +PUBLIC _sk_clear_ssse3_lowp +_sk_clear_ssse3_lowp LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 15,87,192 ; xorps %xmm0,%xmm0 + DB 15,87,201 ; xorps %xmm1,%xmm1 + DB 15,87,210 ; xorps %xmm2,%xmm2 + DB 15,87,219 ; xorps %xmm3,%xmm3 + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcatop_ssse3_lowp +_sk_srcatop_ssse3_lowp LABEL PROC + DB 102,15,56,11,199 ; pmulhrsw %xmm7,%xmm0 + DB 102,68,15,56,29,192 ; pabsw %xmm0,%xmm8 + DB 102,68,15,111,13,137,5,0,0 ; movdqa 0x589(%rip),%xmm9 # 1010 <_sk_xor__ssse3_lowp+0x13b> + DB 102,68,15,249,203 ; psubw %xmm3,%xmm9 + DB 102,15,111,196 ; movdqa %xmm4,%xmm0 + DB 102,65,15,56,11,193 ; pmulhrsw %xmm9,%xmm0 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,65,15,253,192 ; paddw %xmm8,%xmm0 + DB 102,15,56,11,207 ; pmulhrsw %xmm7,%xmm1 + DB 102,68,15,56,29,193 ; pabsw %xmm1,%xmm8 + DB 102,15,111,205 ; movdqa %xmm5,%xmm1 + DB 102,65,15,56,11,201 ; pmulhrsw %xmm9,%xmm1 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,65,15,253,200 ; paddw %xmm8,%xmm1 + DB 102,15,56,11,215 ; pmulhrsw %xmm7,%xmm2 + DB 102,68,15,56,29,194 ; pabsw %xmm2,%xmm8 + DB 102,15,111,214 ; movdqa %xmm6,%xmm2 + DB 102,65,15,56,11,209 ; pmulhrsw %xmm9,%xmm2 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,65,15,253,208 ; paddw %xmm8,%xmm2 + DB 102,15,56,11,223 ; pmulhrsw %xmm7,%xmm3 + DB 102,68,15,56,29,195 ; pabsw %xmm3,%xmm8 + DB 102,68,15,56,11,207 ; pmulhrsw %xmm7,%xmm9 + DB 102,65,15,56,29,217 ; pabsw %xmm9,%xmm3 + DB 102,65,15,253,216 ; paddw %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstatop_ssse3_lowp +_sk_dstatop_ssse3_lowp LABEL PROC + DB 102,68,15,111,196 ; movdqa %xmm4,%xmm8 + DB 102,68,15,56,11,195 ; pmulhrsw %xmm3,%xmm8 + DB 102,69,15,56,29,192 ; pabsw %xmm8,%xmm8 + DB 102,68,15,111,13,8,5,0,0 ; movdqa 0x508(%rip),%xmm9 # 1020 <_sk_xor__ssse3_lowp+0x14b> + DB 102,68,15,249,207 ; psubw %xmm7,%xmm9 + DB 102,65,15,56,11,193 ; pmulhrsw %xmm9,%xmm0 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,65,15,253,192 ; paddw %xmm8,%xmm0 + DB 102,68,15,111,197 ; movdqa %xmm5,%xmm8 + DB 102,68,15,56,11,195 ; pmulhrsw %xmm3,%xmm8 + DB 102,69,15,56,29,192 ; pabsw %xmm8,%xmm8 + DB 102,65,15,56,11,201 ; pmulhrsw %xmm9,%xmm1 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,65,15,253,200 ; paddw %xmm8,%xmm1 + DB 102,68,15,111,198 ; movdqa %xmm6,%xmm8 + DB 102,68,15,56,11,195 ; pmulhrsw %xmm3,%xmm8 + DB 102,69,15,56,29,192 ; pabsw %xmm8,%xmm8 + DB 102,65,15,56,11,209 ; pmulhrsw %xmm9,%xmm2 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,65,15,253,208 ; paddw %xmm8,%xmm2 + DB 102,68,15,111,199 ; movdqa %xmm7,%xmm8 + DB 102,68,15,56,11,195 ; pmulhrsw %xmm3,%xmm8 + DB 102,69,15,56,29,192 ; pabsw %xmm8,%xmm8 + DB 102,68,15,56,11,203 ; pmulhrsw %xmm3,%xmm9 + DB 102,65,15,56,29,217 ; pabsw %xmm9,%xmm3 + DB 102,65,15,253,216 ; paddw %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcin_ssse3_lowp +_sk_srcin_ssse3_lowp LABEL PROC + DB 102,15,56,11,199 ; pmulhrsw %xmm7,%xmm0 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,15,56,11,207 ; pmulhrsw %xmm7,%xmm1 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,15,56,11,215 ; pmulhrsw %xmm7,%xmm2 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,15,56,11,223 ; pmulhrsw %xmm7,%xmm3 + DB 102,15,56,29,219 ; pabsw %xmm3,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstin_ssse3_lowp +_sk_dstin_ssse3_lowp LABEL PROC + DB 102,15,111,196 ; movdqa %xmm4,%xmm0 + DB 102,15,56,11,195 ; pmulhrsw %xmm3,%xmm0 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,15,111,205 ; movdqa %xmm5,%xmm1 + DB 102,15,56,11,203 ; pmulhrsw %xmm3,%xmm1 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,15,111,214 ; movdqa %xmm6,%xmm2 + DB 102,15,56,11,211 ; pmulhrsw %xmm3,%xmm2 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,15,56,11,223 ; pmulhrsw %xmm7,%xmm3 + DB 102,15,56,29,219 ; pabsw %xmm3,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_srcout_ssse3_lowp +_sk_srcout_ssse3_lowp LABEL PROC + DB 102,68,15,111,5,46,4,0,0 ; movdqa 0x42e(%rip),%xmm8 # 1030 <_sk_xor__ssse3_lowp+0x15b> + DB 102,68,15,249,199 ; psubw %xmm7,%xmm8 + DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,65,15,56,11,200 ; pmulhrsw %xmm8,%xmm1 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,65,15,56,11,208 ; pmulhrsw %xmm8,%xmm2 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,68,15,56,11,195 ; pmulhrsw %xmm3,%xmm8 + DB 102,65,15,56,29,216 ; pabsw %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_dstout_ssse3_lowp +_sk_dstout_ssse3_lowp LABEL PROC + DB 102,68,15,111,5,255,3,0,0 ; movdqa 0x3ff(%rip),%xmm8 # 1040 <_sk_xor__ssse3_lowp+0x16b> + DB 102,68,15,249,195 ; psubw %xmm3,%xmm8 + DB 102,15,111,196 ; movdqa %xmm4,%xmm0 + DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,15,111,205 ; movdqa %xmm5,%xmm1 + DB 102,65,15,56,11,200 ; pmulhrsw %xmm8,%xmm1 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,15,111,214 ; movdqa %xmm6,%xmm2 + DB 102,65,15,56,11,208 ; pmulhrsw %xmm8,%xmm2 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,68,15,56,11,199 ; pmulhrsw %xmm7,%xmm8 + DB 102,65,15,56,29,216 ; pabsw %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + PUBLIC _sk_srcover_ssse3_lowp _sk_srcover_ssse3_lowp LABEL PROC - DB 102,68,15,111,5,4,1,0,0 ; movdqa 0x104(%rip),%xmm8 # b70 <_sk_srcover_ssse3_lowp+0x10d> + DB 102,68,15,111,5,196,3,0,0 ; movdqa 0x3c4(%rip),%xmm8 # 1050 <_sk_xor__ssse3_lowp+0x17b> DB 102,68,15,249,195 ; psubw %xmm3,%xmm8 DB 102,68,15,111,204 ; movdqa %xmm4,%xmm9 DB 102,69,15,56,11,200 ; pmulhrsw %xmm8,%xmm9 @@ -26982,6 +27114,151 @@ _sk_srcover_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +PUBLIC _sk_dstover_ssse3_lowp +_sk_dstover_ssse3_lowp LABEL PROC + DB 102,68,15,111,5,111,3,0,0 ; movdqa 0x36f(%rip),%xmm8 # 1060 <_sk_xor__ssse3_lowp+0x18b> + DB 102,68,15,249,199 ; psubw %xmm7,%xmm8 + DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,15,253,196 ; paddw %xmm4,%xmm0 + DB 102,65,15,56,11,200 ; pmulhrsw %xmm8,%xmm1 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,15,253,205 ; paddw %xmm5,%xmm1 + DB 102,65,15,56,11,208 ; pmulhrsw %xmm8,%xmm2 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,15,253,214 ; paddw %xmm6,%xmm2 + DB 102,68,15,56,11,195 ; pmulhrsw %xmm3,%xmm8 + DB 102,65,15,56,29,216 ; pabsw %xmm8,%xmm3 + DB 102,15,253,223 ; paddw %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_modulate_ssse3_lowp +_sk_modulate_ssse3_lowp LABEL PROC + DB 102,15,56,11,196 ; pmulhrsw %xmm4,%xmm0 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,15,56,11,205 ; pmulhrsw %xmm5,%xmm1 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,15,56,11,214 ; pmulhrsw %xmm6,%xmm2 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,15,56,11,223 ; pmulhrsw %xmm7,%xmm3 + DB 102,15,56,29,219 ; pabsw %xmm3,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_multiply_ssse3_lowp +_sk_multiply_ssse3_lowp LABEL PROC + DB 102,68,15,111,5,4,3,0,0 ; movdqa 0x304(%rip),%xmm8 # 1070 <_sk_xor__ssse3_lowp+0x19b> + DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 + DB 102,68,15,249,207 ; psubw %xmm7,%xmm9 + DB 102,68,15,111,208 ; movdqa %xmm0,%xmm10 + DB 102,69,15,56,11,209 ; pmulhrsw %xmm9,%xmm10 + DB 102,69,15,56,29,210 ; pabsw %xmm10,%xmm10 + DB 102,68,15,249,195 ; psubw %xmm3,%xmm8 + DB 102,15,56,11,196 ; pmulhrsw %xmm4,%xmm0 + DB 102,68,15,111,220 ; movdqa %xmm4,%xmm11 + DB 102,69,15,56,11,216 ; pmulhrsw %xmm8,%xmm11 + DB 102,69,15,56,29,219 ; pabsw %xmm11,%xmm11 + DB 102,69,15,253,218 ; paddw %xmm10,%xmm11 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,65,15,253,195 ; paddw %xmm11,%xmm0 + DB 102,68,15,111,209 ; movdqa %xmm1,%xmm10 + DB 102,69,15,56,11,209 ; pmulhrsw %xmm9,%xmm10 + DB 102,69,15,56,29,210 ; pabsw %xmm10,%xmm10 + DB 102,15,56,11,205 ; pmulhrsw %xmm5,%xmm1 + DB 102,68,15,111,221 ; movdqa %xmm5,%xmm11 + DB 102,69,15,56,11,216 ; pmulhrsw %xmm8,%xmm11 + DB 102,69,15,56,29,219 ; pabsw %xmm11,%xmm11 + DB 102,69,15,253,218 ; paddw %xmm10,%xmm11 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,65,15,253,203 ; paddw %xmm11,%xmm1 + DB 102,68,15,111,210 ; movdqa %xmm2,%xmm10 + DB 102,69,15,56,11,209 ; pmulhrsw %xmm9,%xmm10 + DB 102,69,15,56,29,210 ; pabsw %xmm10,%xmm10 + DB 102,15,56,11,214 ; pmulhrsw %xmm6,%xmm2 + DB 102,68,15,111,222 ; movdqa %xmm6,%xmm11 + DB 102,69,15,56,11,216 ; pmulhrsw %xmm8,%xmm11 + DB 102,69,15,56,29,219 ; pabsw %xmm11,%xmm11 + DB 102,69,15,253,218 ; paddw %xmm10,%xmm11 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,65,15,253,211 ; paddw %xmm11,%xmm2 + DB 102,68,15,56,11,203 ; pmulhrsw %xmm3,%xmm9 + DB 102,69,15,56,29,201 ; pabsw %xmm9,%xmm9 + DB 102,68,15,56,11,199 ; pmulhrsw %xmm7,%xmm8 + DB 102,69,15,56,29,192 ; pabsw %xmm8,%xmm8 + DB 102,69,15,253,193 ; paddw %xmm9,%xmm8 + DB 102,15,56,11,223 ; pmulhrsw %xmm7,%xmm3 + DB 102,15,56,29,219 ; pabsw %xmm3,%xmm3 + DB 102,65,15,253,216 ; paddw %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_plus__ssse3_lowp +_sk_plus__ssse3_lowp LABEL PROC + DB 102,15,253,196 ; paddw %xmm4,%xmm0 + DB 102,15,253,205 ; paddw %xmm5,%xmm1 + DB 102,15,253,214 ; paddw %xmm6,%xmm2 + DB 102,15,253,223 ; paddw %xmm7,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_screen_ssse3_lowp +_sk_screen_ssse3_lowp LABEL PROC + DB 102,68,15,111,5,22,2,0,0 ; movdqa 0x216(%rip),%xmm8 # 1080 <_sk_xor__ssse3_lowp+0x1ab> + DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 + DB 102,68,15,249,200 ; psubw %xmm0,%xmm9 + DB 102,68,15,56,11,204 ; pmulhrsw %xmm4,%xmm9 + DB 102,69,15,56,29,201 ; pabsw %xmm9,%xmm9 + DB 102,65,15,253,193 ; paddw %xmm9,%xmm0 + DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 + DB 102,68,15,249,201 ; psubw %xmm1,%xmm9 + DB 102,68,15,56,11,205 ; pmulhrsw %xmm5,%xmm9 + DB 102,69,15,56,29,201 ; pabsw %xmm9,%xmm9 + DB 102,65,15,253,201 ; paddw %xmm9,%xmm1 + DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 + DB 102,68,15,249,202 ; psubw %xmm2,%xmm9 + DB 102,68,15,56,11,206 ; pmulhrsw %xmm6,%xmm9 + DB 102,69,15,56,29,201 ; pabsw %xmm9,%xmm9 + DB 102,65,15,253,209 ; paddw %xmm9,%xmm2 + DB 102,68,15,249,195 ; psubw %xmm3,%xmm8 + DB 102,68,15,56,11,199 ; pmulhrsw %xmm7,%xmm8 + DB 102,69,15,56,29,192 ; pabsw %xmm8,%xmm8 + DB 102,65,15,253,216 ; paddw %xmm8,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + +PUBLIC _sk_xor__ssse3_lowp +_sk_xor__ssse3_lowp LABEL PROC + DB 102,68,15,111,5,178,1,0,0 ; movdqa 0x1b2(%rip),%xmm8 # 1090 <_sk_xor__ssse3_lowp+0x1bb> + DB 102,69,15,111,200 ; movdqa %xmm8,%xmm9 + DB 102,68,15,249,207 ; psubw %xmm7,%xmm9 + DB 102,65,15,56,11,193 ; pmulhrsw %xmm9,%xmm0 + DB 102,68,15,56,29,208 ; pabsw %xmm0,%xmm10 + DB 102,68,15,249,195 ; psubw %xmm3,%xmm8 + DB 102,15,111,196 ; movdqa %xmm4,%xmm0 + DB 102,65,15,56,11,192 ; pmulhrsw %xmm8,%xmm0 + DB 102,15,56,29,192 ; pabsw %xmm0,%xmm0 + DB 102,65,15,253,194 ; paddw %xmm10,%xmm0 + DB 102,65,15,56,11,201 ; pmulhrsw %xmm9,%xmm1 + DB 102,68,15,56,29,209 ; pabsw %xmm1,%xmm10 + DB 102,15,111,205 ; movdqa %xmm5,%xmm1 + DB 102,65,15,56,11,200 ; pmulhrsw %xmm8,%xmm1 + DB 102,15,56,29,201 ; pabsw %xmm1,%xmm1 + DB 102,65,15,253,202 ; paddw %xmm10,%xmm1 + DB 102,65,15,56,11,209 ; pmulhrsw %xmm9,%xmm2 + DB 102,68,15,56,29,210 ; pabsw %xmm2,%xmm10 + DB 102,15,111,214 ; movdqa %xmm6,%xmm2 + DB 102,65,15,56,11,208 ; pmulhrsw %xmm8,%xmm2 + DB 102,15,56,29,210 ; pabsw %xmm2,%xmm2 + DB 102,65,15,253,210 ; paddw %xmm10,%xmm2 + DB 102,68,15,56,11,203 ; pmulhrsw %xmm3,%xmm9 + DB 102,69,15,56,29,201 ; pabsw %xmm9,%xmm9 + DB 102,68,15,56,11,199 ; pmulhrsw %xmm7,%xmm8 + DB 102,65,15,56,29,216 ; pabsw %xmm8,%xmm3 + DB 102,65,15,253,217 ; paddw %xmm9,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + ALIGN 4 DB 0,0 ; add %al,(%rax) DB 0,71,0 ; add %al,0x0(%rdi) @@ -26999,7 +27276,7 @@ ALIGN 16 DB 11,15 ; or (%rdi),%ecx DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax) DB 129,128,129,128,129,128,0,4,8,12 ; addl $0xc080400,-0x7f7e7f7f(%rax) - DB 1,5,9,13,2,6 ; add %eax,0x6020d09(%rip) # 6021813 <_sk_srcover_ssse3_lowp+0x6020db0> + DB 1,5,9,13,2,6 ; add %eax,0x6020d09(%rip) # 6021cb3 <_sk_xor__ssse3_lowp+0x6020dde> DB 10,14 ; or (%rsi),%cl DB 3,7 ; add (%rdi),%eax DB 11,15 ; or (%rdi),%ecx @@ -27017,7 +27294,27 @@ ALIGN 16 DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) - DB 0 ; .byte 0x0 - DB 128,0,128 ; addb $0x80,(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) + DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax) ENDIF END diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp index d78de209fc..38273e1dea 100644 --- a/src/jumper/SkJumper_stages_lowp.cpp +++ b/src/jumper/SkJumper_stages_lowp.cpp @@ -39,6 +39,7 @@ SI F operator-(F x, F y) { return x.vec - y.vec; } SI F operator*(F x, F y) { return _mm_abs_epi16(_mm_mulhrs_epi16(x.vec, y.vec)); } SI F mad(F f, F m, F a) { return f*m+a; } SI F inv(F v) { return 1.0f - v; } +SI F two(F v) { return v + v; } SI F lerp(F from, F to, F t) { return to*t + from*inv(t); } SI F operator<<(F x, int bits) { return x.vec << bits; } @@ -283,6 +284,20 @@ STAGE(move_dst_src) { } \ SI F name##_channel(F s, F d, F sa, F da) -BLEND_MODE(srcover) { return mad(d, inv(sa), s); } +BLEND_MODE(clear) { return 0.0f; } +BLEND_MODE(srcatop) { return s*da + d*inv(sa); } +BLEND_MODE(dstatop) { return d*sa + s*inv(da); } +BLEND_MODE(srcin) { return s * da; } +BLEND_MODE(dstin) { return d * sa; } +BLEND_MODE(srcout) { return s * inv(da); } +BLEND_MODE(dstout) { return d * inv(sa); } +BLEND_MODE(srcover) { return mad(d, inv(sa), s); } +BLEND_MODE(dstover) { return mad(s, inv(da), d); } + +BLEND_MODE(modulate) { return s*d; } +BLEND_MODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; } +BLEND_MODE(plus_) { return s + d; } +BLEND_MODE(screen) { return s + inv(s)*d; } +BLEND_MODE(xor_) { return s*inv(da) + d*inv(sa); } #undef BLEND_MODE |