aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-06-05 12:09:47 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-06-05 16:32:54 +0000
commit5adb01bf0d190b20abe50fac608f396c65993424 (patch)
tree70086fbd3072a448eed3352d1641ad4fc6b5a7ca /src
parent1f29bf093f01e9c9bf79cbd2ac27da62efc8e3a4 (diff)
lowp: add move_src_dst and srcover
This is enough to run the bench SkRasterPipeline_compile. $ ninja -C out monobench; and out/monobench SkRasterPipeline_compile 300 Before: 300 SkRasterPipeline_compile 48.4858ns After: 300 SkRasterPipeline_compile 37.5801ns Change-Id: Icb80348908dfb016826700a44566222c9f7a853c Reviewed-on: https://skia-review.googlesource.com/18595 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r--src/jumper/SkJumper.cpp16
-rw-r--r--src/jumper/SkJumper_generated.S48
-rw-r--r--src/jumper/SkJumper_generated_win.S44
-rw-r--r--src/jumper/SkJumper_stages_lowp.cpp25
4 files changed, 110 insertions, 23 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 7059822647..535e4a883b 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -83,9 +83,11 @@ extern "C" {
SK_RASTER_PIPELINE_STAGES(M)
#undef M
- StageFn ASM(load_8888, ssse3_lowp),
- ASM(store_8888, ssse3_lowp),
- ASM(swap_rb, ssse3_lowp);
+ StageFn ASM(load_8888, ssse3_lowp),
+ ASM(store_8888, ssse3_lowp),
+ ASM(swap_rb, ssse3_lowp),
+ ASM(move_src_dst, ssse3_lowp),
+ ASM(srcover, ssse3_lowp);
#endif
// Portable, single-pixel stages.
@@ -186,9 +188,11 @@ StartPipelineFn* SkRasterPipeline::build_pipeline(void** ip) const {
for (const StageList* st = fStages; st; st = st->prev) {
StageFn* fn = nullptr;
switch (st->stage) {
- case SkRasterPipeline::load_8888: fn = ASM(load_8888, ssse3_lowp); break;
- case SkRasterPipeline::store_8888: fn = ASM(store_8888,ssse3_lowp); break;
- case SkRasterPipeline::swap_rb: fn = ASM(swap_rb, ssse3_lowp); break;
+ case SkRasterPipeline::load_8888: fn = ASM(load_8888, ssse3_lowp); break;
+ case SkRasterPipeline::store_8888: fn = ASM(store_8888, ssse3_lowp); break;
+ case SkRasterPipeline::swap_rb: fn = ASM(swap_rb, ssse3_lowp); break;
+ case SkRasterPipeline::move_src_dst: fn = ASM(move_src_dst, ssse3_lowp); break;
+ case SkRasterPipeline::srcover: fn = ASM(srcover, ssse3_lowp); break;
default:
//SkDebugf("can't %d\n", st->stage);
ip = reset_point;
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 52bf029be2..de0fb731a0 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -36953,7 +36953,7 @@ _sk_load_8888_ssse3_lowp:
.byte 117,113 // jne 127 <_sk_load_8888_ssse3_lowp+0x7b>
.byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9
.byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8
- .byte 102,15,111,5,55,2,0,0 // movdqa 0x237(%rip),%xmm0 # 300 <_sk_swap_rb_ssse3_lowp+0x14>
+ .byte 102,15,111,5,167,2,0,0 // movdqa 0x2a7(%rip),%xmm0 # 370 <_sk_srcover_ssse3_lowp+0x65>
.byte 102,68,15,56,0,192 // pshufb %xmm0,%xmm8
.byte 102,68,15,56,0,200 // pshufb %xmm0,%xmm9
.byte 102,65,15,111,208 // movdqa %xmm8,%xmm2
@@ -36967,7 +36967,7 @@ _sk_load_8888_ssse3_lowp:
.byte 102,15,239,210 // pxor %xmm2,%xmm2
.byte 102,65,15,96,208 // punpcklbw %xmm8,%xmm2
.byte 102,65,15,104,216 // punpckhbw %xmm8,%xmm3
- .byte 102,68,15,111,5,1,2,0,0 // movdqa 0x201(%rip),%xmm8 # 310 <_sk_swap_rb_ssse3_lowp+0x24>
+ .byte 102,68,15,111,5,113,2,0,0 // movdqa 0x271(%rip),%xmm8 # 380 <_sk_srcover_ssse3_lowp+0x75>
.byte 102,65,15,228,192 // pmulhuw %xmm8,%xmm0
.byte 102,65,15,228,200 // pmulhuw %xmm8,%xmm1
.byte 102,65,15,228,208 // pmulhuw %xmm8,%xmm2
@@ -37109,6 +37109,41 @@ _sk_swap_rb_ssse3_lowp:
.byte 65,15,40,208 // movaps %xmm8,%xmm2
.byte 255,224 // jmpq *%rax
+HIDDEN _sk_move_src_dst_ssse3_lowp
+.globl _sk_move_src_dst_ssse3_lowp
+FUNCTION(_sk_move_src_dst_ssse3_lowp)
+_sk_move_src_dst_ssse3_lowp:
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 15,40,224 // movaps %xmm0,%xmm4
+ .byte 15,40,233 // movaps %xmm1,%xmm5
+ .byte 15,40,242 // movaps %xmm2,%xmm6
+ .byte 15,40,251 // movaps %xmm3,%xmm7
+ .byte 255,224 // jmpq *%rax
+
+HIDDEN _sk_srcover_ssse3_lowp
+.globl _sk_srcover_ssse3_lowp
+FUNCTION(_sk_srcover_ssse3_lowp)
+_sk_srcover_ssse3_lowp:
+ .byte 102,68,15,111,5,124,0,0,0 // movdqa 0x7c(%rip),%xmm8 # 390 <_sk_srcover_ssse3_lowp+0x85>
+ .byte 102,68,15,249,195 // psubw %xmm3,%xmm8
+ .byte 102,68,15,111,204 // movdqa %xmm4,%xmm9
+ .byte 102,69,15,56,11,200 // pmulhrsw %xmm8,%xmm9
+ .byte 102,69,15,56,29,201 // pabsw %xmm9,%xmm9
+ .byte 102,65,15,253,193 // paddw %xmm9,%xmm0
+ .byte 102,68,15,111,205 // movdqa %xmm5,%xmm9
+ .byte 102,69,15,56,11,200 // pmulhrsw %xmm8,%xmm9
+ .byte 102,69,15,56,29,201 // pabsw %xmm9,%xmm9
+ .byte 102,65,15,253,201 // paddw %xmm9,%xmm1
+ .byte 102,68,15,111,206 // movdqa %xmm6,%xmm9
+ .byte 102,69,15,56,11,200 // pmulhrsw %xmm8,%xmm9
+ .byte 102,69,15,56,29,201 // pabsw %xmm9,%xmm9
+ .byte 102,65,15,253,209 // paddw %xmm9,%xmm2
+ .byte 102,68,15,56,11,199 // pmulhrsw %xmm7,%xmm8
+ .byte 102,69,15,56,29,192 // pabsw %xmm8,%xmm8
+ .byte 102,65,15,253,216 // paddw %xmm8,%xmm3
+ .byte 72,173 // lods %ds:(%rsi),%rax
+ .byte 255,224 // jmpq *%rax
+
BALIGN16
.byte 0,4,8 // add %al,(%rax,%rcx,1)
.byte 12,1 // or $0x1,%al
@@ -37117,10 +37152,7 @@ BALIGN16
.byte 3,7 // add (%rdi),%eax
.byte 11,15 // or (%rdi),%ecx
.byte 129,128,129,128,129,128,129,128,129,128// addl $0x80818081,-0x7f7e7f7f(%rax)
- .byte 129 // .byte 0x81
- .byte 128 // .byte 0x80
- .byte 129 // .byte 0x81
- .byte 128 // .byte 0x80
- .byte 129 // .byte 0x81
- .byte 128 // .byte 0x80
+ .byte 129,128,129,128,129,128,0,128,0,128 // addl $0x80008000,-0x7f7e7f7f(%rax)
+ .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax)
+ .byte 0,128,0,128,0,128 // add %al,-0x7fff8000(%rax)
#endif
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index ecbffaa201..67c9b86a2b 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -26366,7 +26366,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 117,113 ; jne 1b7 <_sk_load_8888_ssse3_lowp+0x7b>
DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9
DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8
- DB 102,15,111,5,55,2,0,0 ; movdqa 0x237(%rip),%xmm0 # 390 <_sk_swap_rb_ssse3_lowp+0x14>
+ DB 102,15,111,5,167,2,0,0 ; movdqa 0x2a7(%rip),%xmm0 # 400 <_sk_srcover_ssse3_lowp+0x65>
DB 102,68,15,56,0,192 ; pshufb %xmm0,%xmm8
DB 102,68,15,56,0,200 ; pshufb %xmm0,%xmm9
DB 102,65,15,111,208 ; movdqa %xmm8,%xmm2
@@ -26380,7 +26380,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 102,15,239,210 ; pxor %xmm2,%xmm2
DB 102,65,15,96,208 ; punpcklbw %xmm8,%xmm2
DB 102,65,15,104,216 ; punpckhbw %xmm8,%xmm3
- DB 102,68,15,111,5,1,2,0,0 ; movdqa 0x201(%rip),%xmm8 # 3a0 <_sk_swap_rb_ssse3_lowp+0x24>
+ DB 102,68,15,111,5,113,2,0,0 ; movdqa 0x271(%rip),%xmm8 # 410 <_sk_srcover_ssse3_lowp+0x75>
DB 102,65,15,228,192 ; pmulhuw %xmm8,%xmm0
DB 102,65,15,228,200 ; pmulhuw %xmm8,%xmm1
DB 102,65,15,228,208 ; pmulhuw %xmm8,%xmm2
@@ -26518,6 +26518,37 @@ _sk_swap_rb_ssse3_lowp LABEL PROC
DB 65,15,40,208 ; movaps %xmm8,%xmm2
DB 255,224 ; jmpq *%rax
+PUBLIC _sk_move_src_dst_ssse3_lowp
+_sk_move_src_dst_ssse3_lowp LABEL PROC
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 15,40,224 ; movaps %xmm0,%xmm4
+ DB 15,40,233 ; movaps %xmm1,%xmm5
+ DB 15,40,242 ; movaps %xmm2,%xmm6
+ DB 15,40,251 ; movaps %xmm3,%xmm7
+ DB 255,224 ; jmpq *%rax
+
+PUBLIC _sk_srcover_ssse3_lowp
+_sk_srcover_ssse3_lowp LABEL PROC
+ DB 102,68,15,111,5,124,0,0,0 ; movdqa 0x7c(%rip),%xmm8 # 420 <_sk_srcover_ssse3_lowp+0x85>
+ DB 102,68,15,249,195 ; psubw %xmm3,%xmm8
+ DB 102,68,15,111,204 ; movdqa %xmm4,%xmm9
+ DB 102,69,15,56,11,200 ; pmulhrsw %xmm8,%xmm9
+ DB 102,69,15,56,29,201 ; pabsw %xmm9,%xmm9
+ DB 102,65,15,253,193 ; paddw %xmm9,%xmm0
+ DB 102,68,15,111,205 ; movdqa %xmm5,%xmm9
+ DB 102,69,15,56,11,200 ; pmulhrsw %xmm8,%xmm9
+ DB 102,69,15,56,29,201 ; pabsw %xmm9,%xmm9
+ DB 102,65,15,253,201 ; paddw %xmm9,%xmm1
+ DB 102,68,15,111,206 ; movdqa %xmm6,%xmm9
+ DB 102,69,15,56,11,200 ; pmulhrsw %xmm8,%xmm9
+ DB 102,69,15,56,29,201 ; pabsw %xmm9,%xmm9
+ DB 102,65,15,253,209 ; paddw %xmm9,%xmm2
+ DB 102,68,15,56,11,199 ; pmulhrsw %xmm7,%xmm8
+ DB 102,69,15,56,29,192 ; pabsw %xmm8,%xmm8
+ DB 102,65,15,253,216 ; paddw %xmm8,%xmm3
+ DB 72,173 ; lods %ds:(%rsi),%rax
+ DB 255,224 ; jmpq *%rax
+
ALIGN 16
DB 0,4,8 ; add %al,(%rax,%rcx,1)
DB 12,1 ; or $0x1,%al
@@ -26526,11 +26557,8 @@ ALIGN 16
DB 3,7 ; add (%rdi),%eax
DB 11,15 ; or (%rdi),%ecx
DB 129,128,129,128,129,128,129,128,129,128; addl $0x80818081,-0x7f7e7f7f(%rax)
- DB 129 ; .byte 0x81
- DB 128 ; .byte 0x80
- DB 129 ; .byte 0x81
- DB 128 ; .byte 0x80
- DB 129 ; .byte 0x81
- DB 128 ; .byte 0x80
+ DB 129,128,129,128,129,128,0,128,0,128 ; addl $0x80008000,-0x7f7e7f7f(%rax)
+ DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax)
+ DB 0,128,0,128,0,128 ; add %al,-0x7fff8000(%rax)
ENDIF
END
diff --git a/src/jumper/SkJumper_stages_lowp.cpp b/src/jumper/SkJumper_stages_lowp.cpp
index 4d0df94af6..139382377d 100644
--- a/src/jumper/SkJumper_stages_lowp.cpp
+++ b/src/jumper/SkJumper_stages_lowp.cpp
@@ -28,7 +28,7 @@ struct F {
U16 vec;
F() = default;
- F(uint16_t bits) : vec(bits) {}
+ F(float f) : vec((uint16_t)(f * 0x8000)) {}
F(U16 v) : vec(v) {}
operator U16() const { return vec; }
@@ -38,6 +38,7 @@ SI F operator+(F x, F y) { return x.vec + y.vec; }
SI F operator-(F x, F y) { return x.vec - y.vec; }
SI F operator*(F x, F y) { return _mm_abs_epi16(_mm_mulhrs_epi16(x.vec, y.vec)); }
SI F mad(F f, F m, F a) { return f*m+a; }
+SI F inv(F v) { return 1.0f - v; }
SI F operator<<(F x, int bits) { return x.vec << bits; }
SI F operator>>(F x, int bits) { return x.vec >> bits; }
@@ -178,3 +179,25 @@ STAGE(swap_rb) {
r = b;
b = tmp;
}
+
+STAGE(move_src_dst) {
+ dr = r;
+ dg = g;
+ db = b;
+ da = a;
+}
+
+// Most blend modes apply the same logic to each channel.
+#define BLEND_MODE(name) \
+ SI F name##_channel(F s, F d, F sa, F da); \
+ STAGE(name) { \
+ r = name##_channel(r,dr,a,da); \
+ g = name##_channel(g,dg,a,da); \
+ b = name##_channel(b,db,a,da); \
+ a = name##_channel(a,da,a,da); \
+ } \
+ SI F name##_channel(F s, F d, F sa, F da)
+
+BLEND_MODE(srcover) { return mad(d, inv(sa), s); }
+
+#undef BLEND_MODE