aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-06-05 12:20:56 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-06-05 17:23:05 +0000
commit727b09c8984b5c972ccde7f8f94d404b221eda6d (patch)
tree6ef2a9474e611724df8a9e0e0422103aaa1fae60 /src
parent0e022297fee80add8d2939145f65d3ee56827d03 (diff)
lowp: add constant_color, swap, move_dst_src
This is enough for us to do some really simple draws. Also add some debug tools to help prioritize porting. Change-Id: I334f8fd2133be1aeec3f3406371a81aa6c184776 Reviewed-on: https://skia-review.googlesource.com/18597 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/jumper/SkJumper.cpp64
-rw-r--r--src/jumper/SkJumper_generated.S100
-rw-r--r--src/jumper/SkJumper_generated_win.S94
-rw-r--r--src/jumper/SkJumper_stages_lowp.cpp25
4 files changed, 234 insertions, 49 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 535e4a883b..4244bfda9f 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -26,6 +26,37 @@ static K kConstants = {
{0,1,2,3,4,5,6,7},
};
+#define M(st) +1
+static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
+#undef M
+
+#if !__has_feature(memory_sanitizer) && (defined(__x86_64__) || defined(_M_X64))
+ #if 0
+ #include <atomic>
+
+ #define M(st) #st,
+ static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) };
+ #undef M
+
+ static std::atomic<int> gMissingStageCounters[kNumStages];
+
+ static void log_missing(SkRasterPipeline::StockStage st) {
+ static SkOnce once;
+ once([] { atexit([] {
+ for (int i = 0; i < kNumStages; i++) {
+ if (int count = gMissingStageCounters[i].load()) {
+ SkDebugf("%7d\t%s\n", count, kStageNames[i]);
+ }
+ }
+ }); });
+
+ gMissingStageCounters[st]++;
+ }
+ #else
+ static void log_missing(SkRasterPipeline::StockStage) {}
+ #endif
+#endif
+
// We can't express the real types of most stage functions portably, so we use a stand-in.
// We'll only ever call start_pipeline(), which then chains into the rest for us.
using StageFn = void(void);
@@ -38,6 +69,17 @@ using StartPipelineFn = void(size_t,size_t,size_t,void**,K*);
#define ASM(name, suffix) _sk_##name##_##suffix
#endif
+// Some stages have low-precision (~15 bit) versions from SkJumper_stages_lowp.cpp.
+#define LOWP_STAGES(M) \
+ M(constant_color) \
+ M(load_8888) \
+ M(store_8888) \
+ M(swap_rb) \
+ M(swap) \
+ M(move_src_dst) \
+ M(move_dst_src) \
+ M(srcover)
+
extern "C" {
#if __has_feature(memory_sanitizer)
@@ -83,11 +125,9 @@ extern "C" {
SK_RASTER_PIPELINE_STAGES(M)
#undef M
- StageFn ASM(load_8888, ssse3_lowp),
- ASM(store_8888, ssse3_lowp),
- ASM(swap_rb, ssse3_lowp),
- ASM(move_src_dst, ssse3_lowp),
- ASM(srcover, ssse3_lowp);
+ #define M(st) StageFn ASM(st,ssse3_lowp);
+ LOWP_STAGES(M)
+ #undef M
#endif
// Portable, single-pixel stages.
@@ -98,10 +138,6 @@ extern "C" {
#undef M
}
-#define M(st) +1
-static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
-#undef M
-
// Engines comprise everything we need to run SkRasterPipelines.
struct SkJumper_Engine {
StageFn* stages[kNumStages];
@@ -188,13 +224,11 @@ StartPipelineFn* SkRasterPipeline::build_pipeline(void** ip) const {
for (const StageList* st = fStages; st; st = st->prev) {
StageFn* fn = nullptr;
switch (st->stage) {
- case SkRasterPipeline::load_8888: fn = ASM(load_8888, ssse3_lowp); break;
- case SkRasterPipeline::store_8888: fn = ASM(store_8888, ssse3_lowp); break;
- case SkRasterPipeline::swap_rb: fn = ASM(swap_rb, ssse3_lowp); break;
- case SkRasterPipeline::move_src_dst: fn = ASM(move_src_dst, ssse3_lowp); break;
- case SkRasterPipeline::srcover: fn = ASM(srcover, ssse3_lowp); break;
+ #define M(st) case SkRasterPipeline::st: fn = ASM(st, ssse3_lowp); break;
+ LOWP_STAGES(M)
+ #undef M
default:
- //SkDebugf("can't %d\n", st->stage);
+ log_missing(st->stage);
ip = reset_point;
}
if (ip == reset_point) {
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index de0fb731a0..eba1759a3f 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -36943,6 +36943,38 @@ FUNCTION(_sk_just_return_ssse3_lowp)
_sk_just_return_ssse3_lowp:
.byte 195 // retq
+HIDDEN _sk_constant_color_ssse3_lowp
+.globl _sk_constant_color_ssse3_lowp
+FUNCTION(_sk_constant_color_ssse3_lowp)
+_sk_constant_color_ssse3_lowp:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 243,15,16,29,114,3,0,0 // movss 0x372(%rip),%xmm3 # 428 <_sk_srcover_ssse3_lowp+0x65>
+ .byte 243,15,16,0 // movss (%rax),%xmm0
+ .byte 243,15,89,195 // mulss %xmm3,%xmm0
+ .byte 243,68,15,44,200 // cvttss2si %xmm0,%r9d
+ .byte 102,65,15,110,193 // movd %r9d,%xmm0
+ .byte 242,15,112,192,0 // pshuflw $0x0,%xmm0,%xmm0
+ .byte 102,15,112,192,80 // pshufd $0x50,%xmm0,%xmm0
+ .byte 243,15,16,72,4 // movss 0x4(%rax),%xmm1
+ .byte 243,15,89,203 // mulss %xmm3,%xmm1
+ .byte 243,68,15,44,201 // cvttss2si %xmm1,%r9d
+ .byte 102,65,15,110,201 // movd %r9d,%xmm1
+ .byte 242,15,112,201,0 // pshuflw $0x0,%xmm1,%xmm1
+ .byte 102,15,112,201,80 // pshufd $0x50,%xmm1,%xmm1
+ .byte 243,15,16,80,8 // movss 0x8(%rax),%xmm2
+ .byte 243,15,89,211 // mulss %xmm3,%xmm2
+ .byte 243,68,15,44,202 // cvttss2si %xmm2,%r9d
+ .byte 102,65,15,110,209 // movd %r9d,%xmm2
+ .byte 242,15,112,210,0 // pshuflw $0x0,%xmm2,%xmm2
+ .byte 102,15,112,210,80 // pshufd $0x50,%xmm2,%xmm2
+ .byte 243,15,89,88,12 // mulss 0xc(%rax),%xmm3
+ .byte 243,15,44,195 // cvttss2si %xmm3,%eax
+ .byte 102,15,110,216 // movd %eax,%xmm3
+ .byte 242,15,112,219,0 // pshuflw $0x0,%xmm3,%xmm3
+ .byte 102,15,112,219,80 // pshufd $0x50,%xmm3,%xmm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_load_8888_ssse3_lowp
.globl _sk_load_8888_ssse3_lowp
FUNCTION(_sk_load_8888_ssse3_lowp)
@@ -36950,10 +36982,10 @@ _sk_load_8888_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,24 // mov (%rax),%r11
.byte 77,133,192 // test %r8,%r8
- .byte 117,113 // jne 127 <_sk_load_8888_ssse3_lowp+0x7b>
+ .byte 117,113 // jne 1a2 <_sk_load_8888_ssse3_lowp+0x7b>
.byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9
.byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8
- .byte 102,15,111,5,167,2,0,0 // movdqa 0x2a7(%rip),%xmm0 # 370 <_sk_srcover_ssse3_lowp+0x65>
+ .byte 102,15,111,5,236,2,0,0 // movdqa 0x2ec(%rip),%xmm0 # 430 <_sk_srcover_ssse3_lowp+0x6d>
.byte 102,68,15,56,0,192 // pshufb %xmm0,%xmm8
.byte 102,68,15,56,0,200 // pshufb %xmm0,%xmm9
.byte 102,65,15,111,208 // movdqa %xmm8,%xmm2
@@ -36967,7 +36999,7 @@ _sk_load_8888_ssse3_lowp:
.byte 102,15,239,210 // pxor %xmm2,%xmm2
.byte 102,65,15,96,208 // punpcklbw %xmm8,%xmm2
.byte 102,65,15,104,216 // punpckhbw %xmm8,%xmm3
- .byte 102,68,15,111,5,113,2,0,0 // movdqa 0x271(%rip),%xmm8 # 380 <_sk_srcover_ssse3_lowp+0x75>
+ .byte 102,68,15,111,5,182,2,0,0 // movdqa 0x2b6(%rip),%xmm8 # 440 <_sk_srcover_ssse3_lowp+0x7d>
.byte 102,65,15,228,192 // pmulhuw %xmm8,%xmm0
.byte 102,65,15,228,200 // pmulhuw %xmm8,%xmm1
.byte 102,65,15,228,208 // pmulhuw %xmm8,%xmm2
@@ -36980,9 +37012,9 @@ _sk_load_8888_ssse3_lowp:
.byte 69,15,87,192 // xorps %xmm8,%xmm8
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 119,129 // ja c1 <_sk_load_8888_ssse3_lowp+0x15>
+ .byte 119,129 // ja 13c <_sk_load_8888_ssse3_lowp+0x15>
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,133,0,0,0 // lea 0x85(%rip),%r10 # 1d0 <_sk_load_8888_ssse3_lowp+0x124>
+ .byte 76,141,21,130,0,0,0 // lea 0x82(%rip),%r10 # 248 <_sk_load_8888_ssse3_lowp+0x121>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -37007,21 +37039,20 @@ _sk_load_8888_ssse3_lowp:
.byte 68,15,40,192 // movaps %xmm0,%xmm8
.byte 243,65,15,16,4,147 // movss (%r11,%rdx,4),%xmm0
.byte 243,68,15,16,192 // movss %xmm0,%xmm8
- .byte 233,244,254,255,255 // jmpq c1 <_sk_load_8888_ssse3_lowp+0x15>
- .byte 15,31,0 // nopl (%rax)
- .byte 237 // in (%dx),%eax
+ .byte 233,244,254,255,255 // jmpq 13c <_sk_load_8888_ssse3_lowp+0x15>
+ .byte 240,255 // lock (bad)
.byte 255 // (bad)
.byte 255 // (bad)
+ .byte 219,255 // (bad)
.byte 255 // (bad)
- .byte 216,255 // fdivr %st(7),%st
+ .byte 255,202 // dec %edx
.byte 255 // (bad)
- .byte 255,199 // inc %edi
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,182,255,255,255,170 // pushq -0x55000001(%rsi)
+ .byte 185,255,255,255,173 // mov $0xadffffff,%ecx
.byte 255 // (bad)
.byte 255 // (bad)
- .byte 255,149,255,255,255,132 // callq *-0x7b000001(%rbp)
+ .byte 255,152,255,255,255,135 // lcall *-0x78000001(%rax)
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@@ -37049,7 +37080,7 @@ _sk_store_8888_ssse3_lowp:
.byte 102,69,15,97,194 // punpcklwd %xmm10,%xmm8
.byte 102,69,15,105,202 // punpckhwd %xmm10,%xmm9
.byte 77,133,192 // test %r8,%r8
- .byte 117,17 // jne 25b <_sk_store_8888_ssse3_lowp+0x6f>
+ .byte 117,17 // jne 2d3 <_sk_store_8888_ssse3_lowp+0x6f>
.byte 243,69,15,127,76,147,16 // movdqu %xmm9,0x10(%r11,%rdx,4)
.byte 243,69,15,127,4,147 // movdqu %xmm8,(%r11,%rdx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
@@ -37058,9 +37089,9 @@ _sk_store_8888_ssse3_lowp:
.byte 65,128,225,7 // and $0x7,%r9b
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
- .byte 119,236 // ja 257 <_sk_store_8888_ssse3_lowp+0x6b>
+ .byte 119,236 // ja 2cf <_sk_store_8888_ssse3_lowp+0x6b>
.byte 69,15,182,201 // movzbl %r9b,%r9d
- .byte 76,141,21,90,0,0,0 // lea 0x5a(%rip),%r10 # 2d0 <_sk_store_8888_ssse3_lowp+0xe4>
+ .byte 76,141,21,90,0,0,0 // lea 0x5a(%rip),%r10 # 348 <_sk_store_8888_ssse3_lowp+0xe4>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@@ -37076,7 +37107,7 @@ _sk_store_8888_ssse3_lowp:
.byte 102,69,15,112,200,229 // pshufd $0xe5,%xmm8,%xmm9
.byte 102,69,15,126,76,147,4 // movd %xmm9,0x4(%r11,%rdx,4)
.byte 102,69,15,126,4,147 // movd %xmm8,(%r11,%rdx,4)
- .byte 235,136 // jmp 257 <_sk_store_8888_ssse3_lowp+0x6b>
+ .byte 235,136 // jmp 2cf <_sk_store_8888_ssse3_lowp+0x6b>
.byte 144 // nop
.byte 247,255 // idiv %edi
.byte 255 // (bad)
@@ -37109,6 +37140,25 @@ _sk_swap_rb_ssse3_lowp:
.byte 65,15,40,208 // movaps %xmm8,%xmm2
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_swap_ssse3_lowp
+.globl _sk_swap_ssse3_lowp
+FUNCTION(_sk_swap_ssse3_lowp)
+_sk_swap_ssse3_lowp:
+ .byte 68,15,40,195 // movaps %xmm3,%xmm8
+ .byte 68,15,40,202 // movaps %xmm2,%xmm9
+ .byte 68,15,40,209 // movaps %xmm1,%xmm10
+ .byte 68,15,40,216 // movaps %xmm0,%xmm11
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 15,40,196 // movaps %xmm4,%xmm0
+ .byte 15,40,205 // movaps %xmm5,%xmm1
+ .byte 15,40,214 // movaps %xmm6,%xmm2
+ .byte 15,40,223 // movaps %xmm7,%xmm3
+ .byte 65,15,40,227 // movaps %xmm11,%xmm4
+ .byte 65,15,40,234 // movaps %xmm10,%xmm5
+ .byte 65,15,40,241 // movaps %xmm9,%xmm6
+ .byte 65,15,40,248 // movaps %xmm8,%xmm7
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_move_src_dst_ssse3_lowp
.globl _sk_move_src_dst_ssse3_lowp
FUNCTION(_sk_move_src_dst_ssse3_lowp)
@@ -37120,11 +37170,22 @@ _sk_move_src_dst_ssse3_lowp:
.byte 15,40,251 // movaps %xmm3,%xmm7
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_move_dst_src_ssse3_lowp
+.globl _sk_move_dst_src_ssse3_lowp
+FUNCTION(_sk_move_dst_src_ssse3_lowp)
+_sk_move_dst_src_ssse3_lowp:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 15,40,196 // movaps %xmm4,%xmm0
+ .byte 15,40,205 // movaps %xmm5,%xmm1
+ .byte 15,40,214 // movaps %xmm6,%xmm2
+ .byte 15,40,223 // movaps %xmm7,%xmm3
+ .byte 255,224 // jmpq *%rax
+
HIDDEN _sk_srcover_ssse3_lowp
.globl _sk_srcover_ssse3_lowp
FUNCTION(_sk_srcover_ssse3_lowp)
_sk_srcover_ssse3_lowp:
- .byte 102,68,15,111,5,124,0,0,0 // movdqa 0x7c(%rip),%xmm8 # 390 <_sk_srcover_ssse3_lowp+0x85>
+ .byte 102,68,15,111,5,132,0,0,0 // movdqa 0x84(%rip),%xmm8 # 450 <_sk_srcover_ssse3_lowp+0x8d>
.byte 102,68,15,249,195 // psubw %xmm3,%xmm8
.byte 102,68,15,111,204 // movdqa %xmm4,%xmm9
.byte 102,69,15,56,11,200 // pmulhrsw %xmm8,%xmm9
@@ -37144,6 +37205,11 @@ _sk_srcover_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
+BALIGN4
+ .byte 0,0 // add %al,(%rax)
+ .byte 0 // .byte 0x0
+ .byte 71 // rex.RXB
+
BALIGN16
.byte 0,4,8 // add %al,(%rax,%rcx,1)
.byte 12,1 // or $0x1,%al
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 67c9b86a2b..a7848d3706 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -26358,15 +26358,45 @@ PUBLIC _sk_just_return_ssse3_lowp
_sk_just_return_ssse3_lowp LABEL PROC
DB 195 ; retq
+PUBLIC _sk_constant_color_ssse3_lowp
+_sk_constant_color_ssse3_lowp LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 243,15,16,29,114,3,0,0 ; movss 0x372(%rip),%xmm3 # 4b8 <_sk_srcover_ssse3_lowp+0x65>
+ DB 243,15,16,0 ; movss (%rax),%xmm0
+ DB 243,15,89,195 ; mulss %xmm3,%xmm0
+ DB 243,68,15,44,200 ; cvttss2si %xmm0,%r9d
+ DB 102,65,15,110,193 ; movd %r9d,%xmm0
+ DB 242,15,112,192,0 ; pshuflw $0x0,%xmm0,%xmm0
+ DB 102,15,112,192,80 ; pshufd $0x50,%xmm0,%xmm0
+ DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1
+ DB 243,15,89,203 ; mulss %xmm3,%xmm1
+ DB 243,68,15,44,201 ; cvttss2si %xmm1,%r9d
+ DB 102,65,15,110,201 ; movd %r9d,%xmm1
+ DB 242,15,112,201,0 ; pshuflw $0x0,%xmm1,%xmm1
+ DB 102,15,112,201,80 ; pshufd $0x50,%xmm1,%xmm1
+ DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2
+ DB 243,15,89,211 ; mulss %xmm3,%xmm2
+ DB 243,68,15,44,202 ; cvttss2si %xmm2,%r9d
+ DB 102,65,15,110,209 ; movd %r9d,%xmm2
+ DB 242,15,112,210,0 ; pshuflw $0x0,%xmm2,%xmm2
+ DB 102,15,112,210,80 ; pshufd $0x50,%xmm2,%xmm2
+ DB 243,15,89,88,12 ; mulss 0xc(%rax),%xmm3
+ DB 243,15,44,195 ; cvttss2si %xmm3,%eax
+ DB 102,15,110,216 ; movd %eax,%xmm3
+ DB 242,15,112,219,0 ; pshuflw $0x0,%xmm3,%xmm3
+ DB 102,15,112,219,80 ; pshufd $0x50,%xmm3,%xmm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_load_8888_ssse3_lowp
_sk_load_8888_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,24 ; mov (%rax),%r11
DB 77,133,192 ; test %r8,%r8
- DB 117,113 ; jne 1b7 <_sk_load_8888_ssse3_lowp+0x7b>
+ DB 117,113 ; jne 232 <_sk_load_8888_ssse3_lowp+0x7b>
DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9
DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8
- DB 102,15,111,5,167,2,0,0 ; movdqa 0x2a7(%rip),%xmm0 # 400 <_sk_srcover_ssse3_lowp+0x65>
+ DB 102,15,111,5,236,2,0,0 ; movdqa 0x2ec(%rip),%xmm0 # 4c0 <_sk_srcover_ssse3_lowp+0x6d>
DB 102,68,15,56,0,192 ; pshufb %xmm0,%xmm8
DB 102,68,15,56,0,200 ; pshufb %xmm0,%xmm9
DB 102,65,15,111,208 ; movdqa %xmm8,%xmm2
@@ -26380,7 +26410,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 102,15,239,210 ; pxor %xmm2,%xmm2
DB 102,65,15,96,208 ; punpcklbw %xmm8,%xmm2
DB 102,65,15,104,216 ; punpckhbw %xmm8,%xmm3
- DB 102,68,15,111,5,113,2,0,0 ; movdqa 0x271(%rip),%xmm8 # 410 <_sk_srcover_ssse3_lowp+0x75>
+ DB 102,68,15,111,5,182,2,0,0 ; movdqa 0x2b6(%rip),%xmm8 # 4d0 <_sk_srcover_ssse3_lowp+0x7d>
DB 102,65,15,228,192 ; pmulhuw %xmm8,%xmm0
DB 102,65,15,228,200 ; pmulhuw %xmm8,%xmm1
DB 102,65,15,228,208 ; pmulhuw %xmm8,%xmm2
@@ -26393,9 +26423,9 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 69,15,87,192 ; xorps %xmm8,%xmm8
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 119,129 ; ja 151 <_sk_load_8888_ssse3_lowp+0x15>
+ DB 119,129 ; ja 1cc <_sk_load_8888_ssse3_lowp+0x15>
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,133,0,0,0 ; lea 0x85(%rip),%r10 # 260 <_sk_load_8888_ssse3_lowp+0x124>
+ DB 76,141,21,130,0,0,0 ; lea 0x82(%rip),%r10 # 2d8 <_sk_load_8888_ssse3_lowp+0x121>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26420,21 +26450,20 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 68,15,40,192 ; movaps %xmm0,%xmm8
DB 243,65,15,16,4,147 ; movss (%r11,%rdx,4),%xmm0
DB 243,68,15,16,192 ; movss %xmm0,%xmm8
- DB 233,244,254,255,255 ; jmpq 151 <_sk_load_8888_ssse3_lowp+0x15>
- DB 15,31,0 ; nopl (%rax)
- DB 237 ; in (%dx),%eax
+ DB 233,244,254,255,255 ; jmpq 1cc <_sk_load_8888_ssse3_lowp+0x15>
+ DB 240,255 ; lock (bad)
DB 255 ; (bad)
DB 255 ; (bad)
+ DB 219,255 ; (bad)
DB 255 ; (bad)
- DB 216,255 ; fdivr %st(7),%st
+ DB 255,202 ; dec %edx
DB 255 ; (bad)
- DB 255,199 ; inc %edi
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,182,255,255,255,170 ; pushq -0x55000001(%rsi)
+ DB 185,255,255,255,173 ; mov $0xadffffff,%ecx
DB 255 ; (bad)
DB 255 ; (bad)
- DB 255,149,255,255,255,132 ; callq *-0x7b000001(%rbp)
+ DB 255,152,255,255,255,135 ; lcall *-0x78000001(%rax)
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@@ -26460,7 +26489,7 @@ _sk_store_8888_ssse3_lowp LABEL PROC
DB 102,69,15,97,194 ; punpcklwd %xmm10,%xmm8
DB 102,69,15,105,202 ; punpckhwd %xmm10,%xmm9
DB 77,133,192 ; test %r8,%r8
- DB 117,17 ; jne 2eb <_sk_store_8888_ssse3_lowp+0x6f>
+ DB 117,17 ; jne 363 <_sk_store_8888_ssse3_lowp+0x6f>
DB 243,69,15,127,76,147,16 ; movdqu %xmm9,0x10(%r11,%rdx,4)
DB 243,69,15,127,4,147 ; movdqu %xmm8,(%r11,%rdx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -26469,9 +26498,9 @@ _sk_store_8888_ssse3_lowp LABEL PROC
DB 65,128,225,7 ; and $0x7,%r9b
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
- DB 119,236 ; ja 2e7 <_sk_store_8888_ssse3_lowp+0x6b>
+ DB 119,236 ; ja 35f <_sk_store_8888_ssse3_lowp+0x6b>
DB 69,15,182,201 ; movzbl %r9b,%r9d
- DB 76,141,21,90,0,0,0 ; lea 0x5a(%rip),%r10 # 360 <_sk_store_8888_ssse3_lowp+0xe4>
+ DB 76,141,21,90,0,0,0 ; lea 0x5a(%rip),%r10 # 3d8 <_sk_store_8888_ssse3_lowp+0xe4>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@@ -26487,7 +26516,7 @@ _sk_store_8888_ssse3_lowp LABEL PROC
DB 102,69,15,112,200,229 ; pshufd $0xe5,%xmm8,%xmm9
DB 102,69,15,126,76,147,4 ; movd %xmm9,0x4(%r11,%rdx,4)
DB 102,69,15,126,4,147 ; movd %xmm8,(%r11,%rdx,4)
- DB 235,136 ; jmp 2e7 <_sk_store_8888_ssse3_lowp+0x6b>
+ DB 235,136 ; jmp 35f <_sk_store_8888_ssse3_lowp+0x6b>
DB 144 ; nop
DB 247,255 ; idiv %edi
DB 255 ; (bad)
@@ -26518,6 +26547,23 @@ _sk_swap_rb_ssse3_lowp LABEL PROC
DB 65,15,40,208 ; movaps %xmm8,%xmm2
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_swap_ssse3_lowp
+_sk_swap_ssse3_lowp LABEL PROC
+ DB 68,15,40,195 ; movaps %xmm3,%xmm8
+ DB 68,15,40,202 ; movaps %xmm2,%xmm9
+ DB 68,15,40,209 ; movaps %xmm1,%xmm10
+ DB 68,15,40,216 ; movaps %xmm0,%xmm11
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 15,40,196 ; movaps %xmm4,%xmm0
+ DB 15,40,205 ; movaps %xmm5,%xmm1
+ DB 15,40,214 ; movaps %xmm6,%xmm2
+ DB 15,40,223 ; movaps %xmm7,%xmm3
+ DB 65,15,40,227 ; movaps %xmm11,%xmm4
+ DB 65,15,40,234 ; movaps %xmm10,%xmm5
+ DB 65,15,40,241 ; movaps %xmm9,%xmm6
+ DB 65,15,40,248 ; movaps %xmm8,%xmm7
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_move_src_dst_ssse3_lowp
_sk_move_src_dst_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
@@ -26527,9 +26573,18 @@ _sk_move_src_dst_ssse3_lowp LABEL PROC
DB 15,40,251 ; movaps %xmm3,%xmm7
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_move_dst_src_ssse3_lowp
+_sk_move_dst_src_ssse3_lowp LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 15,40,196 ; movaps %xmm4,%xmm0
+ DB 15,40,205 ; movaps %xmm5,%xmm1
+ DB 15,40,214 ; movaps %xmm6,%xmm2
+ DB 15,40,223 ; movaps %xmm7,%xmm3
+ DB 255,224 ; jmpq *%rax
+
PUBLIC _sk_srcover_ssse3_lowp
_sk_srcover_ssse3_lowp LABEL PROC
- DB 102,68,15,111,5,124,0,0,0 ; movdqa 0x7c(%rip),%xmm8 # 420 <_sk_srcover_ssse3_lowp+0x85>
+ DB 102,68,15,111,5,132,0,0,0 ; movdqa 0x84(%rip),%xmm8 # 4e0 <_sk_srcover_ssse3_lowp+0x8d>
DB 102,68,15,249,195 ; psubw %xmm3,%xmm8
DB 102,68,15,111,204 ; movdqa %xmm4,%xmm9
DB 102,69,15,56,11,200 ; pmulhrsw %xmm8,%xmm9
@@ -26549,6 +26604,11 @@ _sk_srcover_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
+ALIGN 4
+ DB 0,0 ; add %al,(%rax)
+ DB 0 ; .byte 0x0
+ DB 71 ; rex.RXB
+
ALIGN 16
DB 0,4,8 ; add %al,(%rax,%rcx,1)
DB 12,1 ; or $0x1,%al
diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp
index 139382377d..987bfa6518 100644
--- a/src/jumper/SkJumper_stages_lowp.cpp
+++ b/src/jumper/SkJumper_stages_lowp.cpp
@@ -165,6 +165,14 @@ SI U32 to_8888(F r, F g, F b, F a) {
// Stages!
+STAGE(constant_color) {
+ auto rgba = (const float*)ctx;
+ r = rgba[0];
+ g = rgba[1];
+ b = rgba[2];
+ a = rgba[3];
+}
+
STAGE(load_8888) {
auto ptr = *(const uint32_t**)ctx + x;
from_8888(load<U32>(ptr, tail), &r,&g,&b,&a);
@@ -180,12 +188,29 @@ STAGE(swap_rb) {
b = tmp;
}
+STAGE(swap) {
+ auto swap = [](F& v, F& dv) {
+ auto tmp = v;
+ v = dv;
+ dv = tmp;
+ };
+ swap(r, dr);
+ swap(g, dg);
+ swap(b, db);
+ swap(a, da);
+}
STAGE(move_src_dst) {
dr = r;
dg = g;
db = b;
da = a;
}
+STAGE(move_dst_src) {
+ r = dr;
+ g = dg;
+ b = db;
+ a = da;
+}
// Most blend modes apply the same logic to each channel.
#define BLEND_MODE(name) \