diff options
author | Mike Klein <mtklein@chromium.org> | 2017-06-05 12:20:56 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-06-05 17:23:05 +0000 |
commit | 727b09c8984b5c972ccde7f8f94d404b221eda6d (patch) | |
tree | 6ef2a9474e611724df8a9e0e0422103aaa1fae60 /src/jumper | |
parent | 0e022297fee80add8d2939145f65d3ee56827d03 (diff) |
lowp: add constant_color, swap, move_dst_src
This is enough for us to do some really simple draws.
Also add some debug tools to help prioritize porting.
Change-Id: I334f8fd2133be1aeec3f3406371a81aa6c184776
Reviewed-on: https://skia-review.googlesource.com/18597
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src/jumper')
-rw-r--r-- | src/jumper/SkJumper.cpp | 64 | ||||
-rw-r--r-- | src/jumper/SkJumper_generated.S | 100 | ||||
-rw-r--r-- | src/jumper/SkJumper_generated_win.S | 94 | ||||
-rw-r--r-- | src/jumper/SkJumper_stages_lowp.cpp | 25 |
4 files changed, 234 insertions, 49 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp index 535e4a883b..4244bfda9f 100644 --- a/src/jumper/SkJumper.cpp +++ b/src/jumper/SkJumper.cpp @@ -26,6 +26,37 @@ static K kConstants = { {0,1,2,3,4,5,6,7}, }; +#define M(st) +1 +static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M); +#undef M + +#if !__has_feature(memory_sanitizer) && (defined(__x86_64__) || defined(_M_X64)) + #if 0 + #include <atomic> + + #define M(st) #st, + static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) }; + #undef M + + static std::atomic<int> gMissingStageCounters[kNumStages]; + + static void log_missing(SkRasterPipeline::StockStage st) { + static SkOnce once; + once([] { atexit([] { + for (int i = 0; i < kNumStages; i++) { + if (int count = gMissingStageCounters[i].load()) { + SkDebugf("%7d\t%s\n", count, kStageNames[i]); + } + } + }); }); + + gMissingStageCounters[st]++; + } + #else + static void log_missing(SkRasterPipeline::StockStage) {} + #endif +#endif + // We can't express the real types of most stage functions portably, so we use a stand-in. // We'll only ever call start_pipeline(), which then chains into the rest for us. using StageFn = void(void); @@ -38,6 +69,17 @@ using StartPipelineFn = void(size_t,size_t,size_t,void**,K*); #define ASM(name, suffix) _sk_##name##_##suffix #endif +// Some stages have low-precision (~15 bit) versions from SkJumper_stages_lowp.cpp. +#define LOWP_STAGES(M) \ + M(constant_color) \ + M(load_8888) \ + M(store_8888) \ + M(swap_rb) \ + M(swap) \ + M(move_src_dst) \ + M(move_dst_src) \ + M(srcover) + extern "C" { #if __has_feature(memory_sanitizer) @@ -83,11 +125,9 @@ extern "C" { SK_RASTER_PIPELINE_STAGES(M) #undef M - StageFn ASM(load_8888, ssse3_lowp), - ASM(store_8888, ssse3_lowp), - ASM(swap_rb, ssse3_lowp), - ASM(move_src_dst, ssse3_lowp), - ASM(srcover, ssse3_lowp); + #define M(st) StageFn ASM(st,ssse3_lowp); + LOWP_STAGES(M) + #undef M #endif // Portable, single-pixel stages. @@ -98,10 +138,6 @@ extern "C" { #undef M } -#define M(st) +1 -static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M); -#undef M - // Engines comprise everything we need to run SkRasterPipelines. struct SkJumper_Engine { StageFn* stages[kNumStages]; @@ -188,13 +224,11 @@ StartPipelineFn* SkRasterPipeline::build_pipeline(void** ip) const { for (const StageList* st = fStages; st; st = st->prev) { StageFn* fn = nullptr; switch (st->stage) { - case SkRasterPipeline::load_8888: fn = ASM(load_8888, ssse3_lowp); break; - case SkRasterPipeline::store_8888: fn = ASM(store_8888, ssse3_lowp); break; - case SkRasterPipeline::swap_rb: fn = ASM(swap_rb, ssse3_lowp); break; - case SkRasterPipeline::move_src_dst: fn = ASM(move_src_dst, ssse3_lowp); break; - case SkRasterPipeline::srcover: fn = ASM(srcover, ssse3_lowp); break; + #define M(st) case SkRasterPipeline::st: fn = ASM(st, ssse3_lowp); break; + LOWP_STAGES(M) + #undef M default: - //SkDebugf("can't %d\n", st->stage); + log_missing(st->stage); ip = reset_point; } if (ip == reset_point) { diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S index de0fb731a0..eba1759a3f 100644 --- a/src/jumper/SkJumper_generated.S +++ b/src/jumper/SkJumper_generated.S @@ -36943,6 +36943,38 @@ FUNCTION(_sk_just_return_ssse3_lowp) _sk_just_return_ssse3_lowp: .byte 195 // retq +HIDDEN _sk_constant_color_ssse3_lowp +.globl _sk_constant_color_ssse3_lowp +FUNCTION(_sk_constant_color_ssse3_lowp) +_sk_constant_color_ssse3_lowp: + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 243,15,16,29,114,3,0,0 // movss 0x372(%rip),%xmm3 # 428 <_sk_srcover_ssse3_lowp+0x65> + .byte 243,15,16,0 // movss (%rax),%xmm0 + .byte 243,15,89,195 // mulss %xmm3,%xmm0 + .byte 243,68,15,44,200 // cvttss2si %xmm0,%r9d + .byte 102,65,15,110,193 // movd %r9d,%xmm0 + .byte 242,15,112,192,0 // pshuflw $0x0,%xmm0,%xmm0 + .byte 102,15,112,192,80 // pshufd $0x50,%xmm0,%xmm0 + .byte 243,15,16,72,4 // movss 0x4(%rax),%xmm1 + .byte 243,15,89,203 // mulss %xmm3,%xmm1 + .byte 243,68,15,44,201 // cvttss2si %xmm1,%r9d + .byte 102,65,15,110,201 // movd %r9d,%xmm1 + .byte 242,15,112,201,0 // pshuflw $0x0,%xmm1,%xmm1 + .byte 102,15,112,201,80 // pshufd $0x50,%xmm1,%xmm1 + .byte 243,15,16,80,8 // movss 0x8(%rax),%xmm2 + .byte 243,15,89,211 // mulss %xmm3,%xmm2 + .byte 243,68,15,44,202 // cvttss2si %xmm2,%r9d + .byte 102,65,15,110,209 // movd %r9d,%xmm2 + .byte 242,15,112,210,0 // pshuflw $0x0,%xmm2,%xmm2 + .byte 102,15,112,210,80 // pshufd $0x50,%xmm2,%xmm2 + .byte 243,15,89,88,12 // mulss 0xc(%rax),%xmm3 + .byte 243,15,44,195 // cvttss2si %xmm3,%eax + .byte 102,15,110,216 // movd %eax,%xmm3 + .byte 242,15,112,219,0 // pshuflw $0x0,%xmm3,%xmm3 + .byte 102,15,112,219,80 // pshufd $0x50,%xmm3,%xmm3 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 255,224 // jmpq *%rax + HIDDEN _sk_load_8888_ssse3_lowp .globl _sk_load_8888_ssse3_lowp FUNCTION(_sk_load_8888_ssse3_lowp) @@ -36950,10 +36982,10 @@ _sk_load_8888_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 76,139,24 // mov (%rax),%r11 .byte 77,133,192 // test %r8,%r8 - .byte 117,113 // jne 127 <_sk_load_8888_ssse3_lowp+0x7b> + .byte 117,113 // jne 1a2 <_sk_load_8888_ssse3_lowp+0x7b> .byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9 .byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8 - .byte 102,15,111,5,167,2,0,0 // movdqa 0x2a7(%rip),%xmm0 # 370 <_sk_srcover_ssse3_lowp+0x65> + .byte 102,15,111,5,236,2,0,0 // movdqa 0x2ec(%rip),%xmm0 # 430 <_sk_srcover_ssse3_lowp+0x6d> .byte 102,68,15,56,0,192 // pshufb %xmm0,%xmm8 .byte 102,68,15,56,0,200 // pshufb %xmm0,%xmm9 .byte 102,65,15,111,208 // movdqa %xmm8,%xmm2 @@ -36967,7 +36999,7 @@ _sk_load_8888_ssse3_lowp: .byte 102,15,239,210 // pxor %xmm2,%xmm2 .byte 102,65,15,96,208 // punpcklbw %xmm8,%xmm2 .byte 102,65,15,104,216 // punpckhbw %xmm8,%xmm3 - .byte 102,68,15,111,5,113,2,0,0 // movdqa 0x271(%rip),%xmm8 # 380 <_sk_srcover_ssse3_lowp+0x75> + .byte 102,68,15,111,5,182,2,0,0 // movdqa 0x2b6(%rip),%xmm8 # 440 <_sk_srcover_ssse3_lowp+0x7d> .byte 102,65,15,228,192 // pmulhuw %xmm8,%xmm0 .byte 102,65,15,228,200 // pmulhuw %xmm8,%xmm1 .byte 102,65,15,228,208 // pmulhuw %xmm8,%xmm2 @@ -36980,9 +37012,9 @@ _sk_load_8888_ssse3_lowp: .byte 69,15,87,192 // xorps %xmm8,%xmm8 .byte 65,254,201 // dec %r9b .byte 65,128,249,6 // cmp $0x6,%r9b - .byte 119,129 // ja c1 <_sk_load_8888_ssse3_lowp+0x15> + .byte 119,129 // ja 13c <_sk_load_8888_ssse3_lowp+0x15> .byte 69,15,182,201 // movzbl %r9b,%r9d - .byte 76,141,21,133,0,0,0 // lea 0x85(%rip),%r10 # 1d0 <_sk_load_8888_ssse3_lowp+0x124> + .byte 76,141,21,130,0,0,0 // lea 0x82(%rip),%r10 # 248 <_sk_load_8888_ssse3_lowp+0x121> .byte 75,99,4,138 // movslq (%r10,%r9,4),%rax .byte 76,1,208 // add %r10,%rax .byte 255,224 // jmpq *%rax @@ -37007,21 +37039,20 @@ _sk_load_8888_ssse3_lowp: .byte 68,15,40,192 // movaps %xmm0,%xmm8 .byte 243,65,15,16,4,147 // movss (%r11,%rdx,4),%xmm0 .byte 243,68,15,16,192 // movss %xmm0,%xmm8 - .byte 233,244,254,255,255 // jmpq c1 <_sk_load_8888_ssse3_lowp+0x15> - .byte 15,31,0 // nopl (%rax) - .byte 237 // in (%dx),%eax + .byte 233,244,254,255,255 // jmpq 13c <_sk_load_8888_ssse3_lowp+0x15> + .byte 240,255 // lock (bad) .byte 255 // (bad) .byte 255 // (bad) + .byte 219,255 // (bad) .byte 255 // (bad) - .byte 216,255 // fdivr %st(7),%st + .byte 255,202 // dec %edx .byte 255 // (bad) - .byte 255,199 // inc %edi .byte 255 // (bad) .byte 255 // (bad) - .byte 255,182,255,255,255,170 // pushq -0x55000001(%rsi) + .byte 185,255,255,255,173 // mov $0xadffffff,%ecx .byte 255 // (bad) .byte 255 // (bad) - .byte 255,149,255,255,255,132 // callq *-0x7b000001(%rbp) + .byte 255,152,255,255,255,135 // lcall *-0x78000001(%rax) .byte 255 // (bad) .byte 255 // (bad) .byte 255 // .byte 0xff @@ -37049,7 +37080,7 @@ _sk_store_8888_ssse3_lowp: .byte 102,69,15,97,194 // punpcklwd %xmm10,%xmm8 .byte 102,69,15,105,202 // punpckhwd %xmm10,%xmm9 .byte 77,133,192 // test %r8,%r8 - .byte 117,17 // jne 25b <_sk_store_8888_ssse3_lowp+0x6f> + .byte 117,17 // jne 2d3 <_sk_store_8888_ssse3_lowp+0x6f> .byte 243,69,15,127,76,147,16 // movdqu %xmm9,0x10(%r11,%rdx,4) .byte 243,69,15,127,4,147 // movdqu %xmm8,(%r11,%rdx,4) .byte 72,173 // lods %ds:(%rsi),%rax @@ -37058,9 +37089,9 @@ _sk_store_8888_ssse3_lowp: .byte 65,128,225,7 // and $0x7,%r9b .byte 65,254,201 // dec %r9b .byte 65,128,249,6 // cmp $0x6,%r9b - .byte 119,236 // ja 257 <_sk_store_8888_ssse3_lowp+0x6b> + .byte 119,236 // ja 2cf <_sk_store_8888_ssse3_lowp+0x6b> .byte 69,15,182,201 // movzbl %r9b,%r9d - .byte 76,141,21,90,0,0,0 // lea 0x5a(%rip),%r10 # 2d0 <_sk_store_8888_ssse3_lowp+0xe4> + .byte 76,141,21,90,0,0,0 // lea 0x5a(%rip),%r10 # 348 <_sk_store_8888_ssse3_lowp+0xe4> .byte 75,99,4,138 // movslq (%r10,%r9,4),%rax .byte 76,1,208 // add %r10,%rax .byte 255,224 // jmpq *%rax @@ -37076,7 +37107,7 @@ _sk_store_8888_ssse3_lowp: .byte 102,69,15,112,200,229 // pshufd $0xe5,%xmm8,%xmm9 .byte 102,69,15,126,76,147,4 // movd %xmm9,0x4(%r11,%rdx,4) .byte 102,69,15,126,4,147 // movd %xmm8,(%r11,%rdx,4) - .byte 235,136 // jmp 257 <_sk_store_8888_ssse3_lowp+0x6b> + .byte 235,136 // jmp 2cf <_sk_store_8888_ssse3_lowp+0x6b> .byte 144 // nop .byte 247,255 // idiv %edi .byte 255 // (bad) @@ -37109,6 +37140,25 @@ _sk_swap_rb_ssse3_lowp: .byte 65,15,40,208 // movaps %xmm8,%xmm2 .byte 255,224 // jmpq *%rax +HIDDEN _sk_swap_ssse3_lowp +.globl _sk_swap_ssse3_lowp +FUNCTION(_sk_swap_ssse3_lowp) +_sk_swap_ssse3_lowp: + .byte 68,15,40,195 // movaps %xmm3,%xmm8 + .byte 68,15,40,202 // movaps %xmm2,%xmm9 + .byte 68,15,40,209 // movaps %xmm1,%xmm10 + .byte 68,15,40,216 // movaps %xmm0,%xmm11 + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 15,40,196 // movaps %xmm4,%xmm0 + .byte 15,40,205 // movaps %xmm5,%xmm1 + .byte 15,40,214 // movaps %xmm6,%xmm2 + .byte 15,40,223 // movaps %xmm7,%xmm3 + .byte 65,15,40,227 // movaps %xmm11,%xmm4 + .byte 65,15,40,234 // movaps %xmm10,%xmm5 + .byte 65,15,40,241 // movaps %xmm9,%xmm6 + .byte 65,15,40,248 // movaps %xmm8,%xmm7 + .byte 255,224 // jmpq *%rax + HIDDEN _sk_move_src_dst_ssse3_lowp .globl _sk_move_src_dst_ssse3_lowp FUNCTION(_sk_move_src_dst_ssse3_lowp) @@ -37120,11 +37170,22 @@ _sk_move_src_dst_ssse3_lowp: .byte 15,40,251 // movaps %xmm3,%xmm7 .byte 255,224 // jmpq *%rax +HIDDEN _sk_move_dst_src_ssse3_lowp +.globl _sk_move_dst_src_ssse3_lowp +FUNCTION(_sk_move_dst_src_ssse3_lowp) +_sk_move_dst_src_ssse3_lowp: + .byte 72,173 // lods %ds:(%rsi),%rax + .byte 15,40,196 // movaps %xmm4,%xmm0 + .byte 15,40,205 // movaps %xmm5,%xmm1 + .byte 15,40,214 // movaps %xmm6,%xmm2 + .byte 15,40,223 // movaps %xmm7,%xmm3 + .byte 255,224 // jmpq *%rax + HIDDEN _sk_srcover_ssse3_lowp .globl _sk_srcover_ssse3_lowp FUNCTION(_sk_srcover_ssse3_lowp) _sk_srcover_ssse3_lowp: - .byte 102,68,15,111,5,124,0,0,0 // movdqa 0x7c(%rip),%xmm8 # 390 <_sk_srcover_ssse3_lowp+0x85> + .byte 102,68,15,111,5,132,0,0,0 // movdqa 0x84(%rip),%xmm8 # 450 <_sk_srcover_ssse3_lowp+0x8d> .byte 102,68,15,249,195 // psubw %xmm3,%xmm8 .byte 102,68,15,111,204 // movdqa %xmm4,%xmm9 .byte 102,69,15,56,11,200 // pmulhrsw %xmm8,%xmm9 @@ -37144,6 +37205,11 @@ _sk_srcover_ssse3_lowp: .byte 72,173 // lods %ds:(%rsi),%rax .byte 255,224 // jmpq *%rax +BALIGN4 + .byte 0,0 // add %al,(%rax) + .byte 0 // .byte 0x0 + .byte 71 // rex.RXB + BALIGN16 .byte 0,4,8 // add %al,(%rax,%rcx,1) .byte 12,1 // or $0x1,%al diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S index 67c9b86a2b..a7848d3706 100644 --- a/src/jumper/SkJumper_generated_win.S +++ b/src/jumper/SkJumper_generated_win.S @@ -26358,15 +26358,45 @@ PUBLIC _sk_just_return_ssse3_lowp _sk_just_return_ssse3_lowp LABEL PROC DB 195 ; retq +PUBLIC _sk_constant_color_ssse3_lowp +_sk_constant_color_ssse3_lowp LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 243,15,16,29,114,3,0,0 ; movss 0x372(%rip),%xmm3 # 4b8 <_sk_srcover_ssse3_lowp+0x65> + DB 243,15,16,0 ; movss (%rax),%xmm0 + DB 243,15,89,195 ; mulss %xmm3,%xmm0 + DB 243,68,15,44,200 ; cvttss2si %xmm0,%r9d + DB 102,65,15,110,193 ; movd %r9d,%xmm0 + DB 242,15,112,192,0 ; pshuflw $0x0,%xmm0,%xmm0 + DB 102,15,112,192,80 ; pshufd $0x50,%xmm0,%xmm0 + DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1 + DB 243,15,89,203 ; mulss %xmm3,%xmm1 + DB 243,68,15,44,201 ; cvttss2si %xmm1,%r9d + DB 102,65,15,110,201 ; movd %r9d,%xmm1 + DB 242,15,112,201,0 ; pshuflw $0x0,%xmm1,%xmm1 + DB 102,15,112,201,80 ; pshufd $0x50,%xmm1,%xmm1 + DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2 + DB 243,15,89,211 ; mulss %xmm3,%xmm2 + DB 243,68,15,44,202 ; cvttss2si %xmm2,%r9d + DB 102,65,15,110,209 ; movd %r9d,%xmm2 + DB 242,15,112,210,0 ; pshuflw $0x0,%xmm2,%xmm2 + DB 102,15,112,210,80 ; pshufd $0x50,%xmm2,%xmm2 + DB 243,15,89,88,12 ; mulss 0xc(%rax),%xmm3 + DB 243,15,44,195 ; cvttss2si %xmm3,%eax + DB 102,15,110,216 ; movd %eax,%xmm3 + DB 242,15,112,219,0 ; pshuflw $0x0,%xmm3,%xmm3 + DB 102,15,112,219,80 ; pshufd $0x50,%xmm3,%xmm3 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 255,224 ; jmpq *%rax + PUBLIC _sk_load_8888_ssse3_lowp _sk_load_8888_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 76,139,24 ; mov (%rax),%r11 DB 77,133,192 ; test %r8,%r8 - DB 117,113 ; jne 1b7 <_sk_load_8888_ssse3_lowp+0x7b> + DB 117,113 ; jne 232 <_sk_load_8888_ssse3_lowp+0x7b> DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9 DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8 - DB 102,15,111,5,167,2,0,0 ; movdqa 0x2a7(%rip),%xmm0 # 400 <_sk_srcover_ssse3_lowp+0x65> + DB 102,15,111,5,236,2,0,0 ; movdqa 0x2ec(%rip),%xmm0 # 4c0 <_sk_srcover_ssse3_lowp+0x6d> DB 102,68,15,56,0,192 ; pshufb %xmm0,%xmm8 DB 102,68,15,56,0,200 ; pshufb %xmm0,%xmm9 DB 102,65,15,111,208 ; movdqa %xmm8,%xmm2 @@ -26380,7 +26410,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC DB 102,15,239,210 ; pxor %xmm2,%xmm2 DB 102,65,15,96,208 ; punpcklbw %xmm8,%xmm2 DB 102,65,15,104,216 ; punpckhbw %xmm8,%xmm3 - DB 102,68,15,111,5,113,2,0,0 ; movdqa 0x271(%rip),%xmm8 # 410 <_sk_srcover_ssse3_lowp+0x75> + DB 102,68,15,111,5,182,2,0,0 ; movdqa 0x2b6(%rip),%xmm8 # 4d0 <_sk_srcover_ssse3_lowp+0x7d> DB 102,65,15,228,192 ; pmulhuw %xmm8,%xmm0 DB 102,65,15,228,200 ; pmulhuw %xmm8,%xmm1 DB 102,65,15,228,208 ; pmulhuw %xmm8,%xmm2 @@ -26393,9 +26423,9 @@ _sk_load_8888_ssse3_lowp LABEL PROC DB 69,15,87,192 ; xorps %xmm8,%xmm8 DB 65,254,201 ; dec %r9b DB 65,128,249,6 ; cmp $0x6,%r9b - DB 119,129 ; ja 151 <_sk_load_8888_ssse3_lowp+0x15> + DB 119,129 ; ja 1cc <_sk_load_8888_ssse3_lowp+0x15> DB 69,15,182,201 ; movzbl %r9b,%r9d - DB 76,141,21,133,0,0,0 ; lea 0x85(%rip),%r10 # 260 <_sk_load_8888_ssse3_lowp+0x124> + DB 76,141,21,130,0,0,0 ; lea 0x82(%rip),%r10 # 2d8 <_sk_load_8888_ssse3_lowp+0x121> DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax DB 76,1,208 ; add %r10,%rax DB 255,224 ; jmpq *%rax @@ -26420,21 +26450,20 @@ _sk_load_8888_ssse3_lowp LABEL PROC DB 68,15,40,192 ; movaps %xmm0,%xmm8 DB 243,65,15,16,4,147 ; movss (%r11,%rdx,4),%xmm0 DB 243,68,15,16,192 ; movss %xmm0,%xmm8 - DB 233,244,254,255,255 ; jmpq 151 <_sk_load_8888_ssse3_lowp+0x15> - DB 15,31,0 ; nopl (%rax) - DB 237 ; in (%dx),%eax + DB 233,244,254,255,255 ; jmpq 1cc <_sk_load_8888_ssse3_lowp+0x15> + DB 240,255 ; lock (bad) DB 255 ; (bad) DB 255 ; (bad) + DB 219,255 ; (bad) DB 255 ; (bad) - DB 216,255 ; fdivr %st(7),%st + DB 255,202 ; dec %edx DB 255 ; (bad) - DB 255,199 ; inc %edi DB 255 ; (bad) DB 255 ; (bad) - DB 255,182,255,255,255,170 ; pushq -0x55000001(%rsi) + DB 185,255,255,255,173 ; mov $0xadffffff,%ecx DB 255 ; (bad) DB 255 ; (bad) - DB 255,149,255,255,255,132 ; callq *-0x7b000001(%rbp) + DB 255,152,255,255,255,135 ; lcall *-0x78000001(%rax) DB 255 ; (bad) DB 255 ; (bad) DB 255 ; .byte 0xff @@ -26460,7 +26489,7 @@ _sk_store_8888_ssse3_lowp LABEL PROC DB 102,69,15,97,194 ; punpcklwd %xmm10,%xmm8 DB 102,69,15,105,202 ; punpckhwd %xmm10,%xmm9 DB 77,133,192 ; test %r8,%r8 - DB 117,17 ; jne 2eb <_sk_store_8888_ssse3_lowp+0x6f> + DB 117,17 ; jne 363 <_sk_store_8888_ssse3_lowp+0x6f> DB 243,69,15,127,76,147,16 ; movdqu %xmm9,0x10(%r11,%rdx,4) DB 243,69,15,127,4,147 ; movdqu %xmm8,(%r11,%rdx,4) DB 72,173 ; lods %ds:(%rsi),%rax @@ -26469,9 +26498,9 @@ _sk_store_8888_ssse3_lowp LABEL PROC DB 65,128,225,7 ; and $0x7,%r9b DB 65,254,201 ; dec %r9b DB 65,128,249,6 ; cmp $0x6,%r9b - DB 119,236 ; ja 2e7 <_sk_store_8888_ssse3_lowp+0x6b> + DB 119,236 ; ja 35f <_sk_store_8888_ssse3_lowp+0x6b> DB 69,15,182,201 ; movzbl %r9b,%r9d - DB 76,141,21,90,0,0,0 ; lea 0x5a(%rip),%r10 # 360 <_sk_store_8888_ssse3_lowp+0xe4> + DB 76,141,21,90,0,0,0 ; lea 0x5a(%rip),%r10 # 3d8 <_sk_store_8888_ssse3_lowp+0xe4> DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax DB 76,1,208 ; add %r10,%rax DB 255,224 ; jmpq *%rax @@ -26487,7 +26516,7 @@ _sk_store_8888_ssse3_lowp LABEL PROC DB 102,69,15,112,200,229 ; pshufd $0xe5,%xmm8,%xmm9 DB 102,69,15,126,76,147,4 ; movd %xmm9,0x4(%r11,%rdx,4) DB 102,69,15,126,4,147 ; movd %xmm8,(%r11,%rdx,4) - DB 235,136 ; jmp 2e7 <_sk_store_8888_ssse3_lowp+0x6b> + DB 235,136 ; jmp 35f <_sk_store_8888_ssse3_lowp+0x6b> DB 144 ; nop DB 247,255 ; idiv %edi DB 255 ; (bad) @@ -26518,6 +26547,23 @@ _sk_swap_rb_ssse3_lowp LABEL PROC DB 65,15,40,208 ; movaps %xmm8,%xmm2 DB 255,224 ; jmpq *%rax +PUBLIC _sk_swap_ssse3_lowp +_sk_swap_ssse3_lowp LABEL PROC + DB 68,15,40,195 ; movaps %xmm3,%xmm8 + DB 68,15,40,202 ; movaps %xmm2,%xmm9 + DB 68,15,40,209 ; movaps %xmm1,%xmm10 + DB 68,15,40,216 ; movaps %xmm0,%xmm11 + DB 72,173 ; lods %ds:(%rsi),%rax + DB 15,40,196 ; movaps %xmm4,%xmm0 + DB 15,40,205 ; movaps %xmm5,%xmm1 + DB 15,40,214 ; movaps %xmm6,%xmm2 + DB 15,40,223 ; movaps %xmm7,%xmm3 + DB 65,15,40,227 ; movaps %xmm11,%xmm4 + DB 65,15,40,234 ; movaps %xmm10,%xmm5 + DB 65,15,40,241 ; movaps %xmm9,%xmm6 + DB 65,15,40,248 ; movaps %xmm8,%xmm7 + DB 255,224 ; jmpq *%rax + PUBLIC _sk_move_src_dst_ssse3_lowp _sk_move_src_dst_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax @@ -26527,9 +26573,18 @@ _sk_move_src_dst_ssse3_lowp LABEL PROC DB 15,40,251 ; movaps %xmm3,%xmm7 DB 255,224 ; jmpq *%rax +PUBLIC _sk_move_dst_src_ssse3_lowp +_sk_move_dst_src_ssse3_lowp LABEL PROC + DB 72,173 ; lods %ds:(%rsi),%rax + DB 15,40,196 ; movaps %xmm4,%xmm0 + DB 15,40,205 ; movaps %xmm5,%xmm1 + DB 15,40,214 ; movaps %xmm6,%xmm2 + DB 15,40,223 ; movaps %xmm7,%xmm3 + DB 255,224 ; jmpq *%rax + PUBLIC _sk_srcover_ssse3_lowp _sk_srcover_ssse3_lowp LABEL PROC - DB 102,68,15,111,5,124,0,0,0 ; movdqa 0x7c(%rip),%xmm8 # 420 <_sk_srcover_ssse3_lowp+0x85> + DB 102,68,15,111,5,132,0,0,0 ; movdqa 0x84(%rip),%xmm8 # 4e0 <_sk_srcover_ssse3_lowp+0x8d> DB 102,68,15,249,195 ; psubw %xmm3,%xmm8 DB 102,68,15,111,204 ; movdqa %xmm4,%xmm9 DB 102,69,15,56,11,200 ; pmulhrsw %xmm8,%xmm9 @@ -26549,6 +26604,11 @@ _sk_srcover_ssse3_lowp LABEL PROC DB 72,173 ; lods %ds:(%rsi),%rax DB 255,224 ; jmpq *%rax +ALIGN 4 + DB 0,0 ; add %al,(%rax) + DB 0 ; .byte 0x0 + DB 71 ; rex.RXB + ALIGN 16 DB 0,4,8 ; add %al,(%rax,%rcx,1) DB 12,1 ; or $0x1,%al diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp index 139382377d..987bfa6518 100644 --- a/src/jumper/SkJumper_stages_lowp.cpp +++ b/src/jumper/SkJumper_stages_lowp.cpp @@ -165,6 +165,14 @@ SI U32 to_8888(F r, F g, F b, F a) { // Stages! +STAGE(constant_color) { + auto rgba = (const float*)ctx; + r = rgba[0]; + g = rgba[1]; + b = rgba[2]; + a = rgba[3]; +} + STAGE(load_8888) { auto ptr = *(const uint32_t**)ctx + x; from_8888(load<U32>(ptr, tail), &r,&g,&b,&a); @@ -180,12 +188,29 @@ STAGE(swap_rb) { b = tmp; } +STAGE(swap) { + auto swap = [](F& v, F& dv) { + auto tmp = v; + v = dv; + dv = tmp; + }; + swap(r, dr); + swap(g, dg); + swap(b, db); + swap(a, da); +} STAGE(move_src_dst) { dr = r; dg = g; db = b; da = a; } +STAGE(move_dst_src) { + r = dr; + g = dg; + b = db; + a = da; +} // Most blend modes apply the same logic to each channel. #define BLEND_MODE(name) \ |