aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2016-09-28 11:54:46 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2016-09-28 17:36:20 +0000
commite08c1d35bab4a3995234f2a226da9eb9775a98ad (patch)
treeb08720ca01233149734cc8d83c9ca5cc18a1110b /src
parent47bf4c0009f1f2c86e831447e69475e50cbfc958 (diff)
SkRasterPipeline: fuse clamp_01 into stores.
This is a less generally applicable trick than I have previously hoped. The need to thread through contexts into each stage really means you can only include one context-dependent stage in each fused batch. We can still manually fuse these, of course, as you can see in SkRasterPipelineBench. It's just that we can't really write a generic compile-time template to do it except for context-free stages. And since we can't write a generic version, and I have only this one specific use case right now, I've kept it quite specific to that use case. This does work pretty well for this use case, though. Here's the fused clamp-then-store-565: +0x00 pushq %rbp +0x01 movq %rsp, %rbp +0x04 movq 8(%rdi), %rax +0x08 xorps %xmm4, %xmm4 +0x0b maxps %xmm4, %xmm3 +0x0e maxps %xmm4, %xmm0 +0x11 maxps %xmm4, %xmm1 +0x14 maxps %xmm4, %xmm2 +0x17 minps 4262818(%rip), %xmm3 +0x1e minps %xmm3, %xmm0 +0x21 minps %xmm3, %xmm1 +0x24 minps %xmm3, %xmm2 +0x27 movaps 4965378(%rip), %xmm3 +0x2e mulps %xmm3, %xmm0 +0x31 cvtps2dq %xmm0, %xmm0 +0x35 pslld $11, %xmm0 +0x3a mulps 4965375(%rip), %xmm1 +0x41 cvtps2dq %xmm1, %xmm1 +0x45 pslld $5, %xmm1 +0x4a mulps %xmm3, %xmm2 +0x4d cvtps2dq %xmm2, %xmm2 +0x51 orpd %xmm0, %xmm2 +0x55 orpd %xmm1, %xmm2 +0x59 pshufb 4474510(%rip), %xmm2 +0x62 movq %xmm2, (%rax,%rsi,2) +0x67 popq %rbp +0x68 retq BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2745 Change-Id: Ia7d66aecc6cbff154158d2600d7874feed1a76f6 Reviewed-on: https://skia-review.googlesource.com/2745 Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r--src/core/SkRasterPipelineBlitter.cpp15
1 files changed, 11 insertions, 4 deletions
diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp
index d618e8b961..46820d3379 100644
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@@ -325,21 +325,28 @@ void SkRasterPipelineBlitter::append_load_d(SkRasterPipeline* p, const void* dst
}
}
+template <SkRasterPipeline::EasyFn fn>
+static void clamp_01_premul_then(void* ctx, size_t x, size_t tail,
+ Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a,
+ Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da) {
+ clamp_01_premul(nullptr, x,tail, r,g,b,a, dr,dg,db,da);
+ fn( ctx, x,tail, r,g,b,a, dr,dg,db,da);
+}
+
void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p, void* dst) const {
SkASSERT(supported(fDst.info()));
- p->append<clamp_01_premul>();
switch (fDst.info().colorType()) {
case kN32_SkColorType:
if (fDst.info().gammaCloseToSRGB()) {
- p->last<store_srgb>(dst);
+ p->last<clamp_01_premul_then<store_srgb>>(dst);
}
break;
case kRGBA_F16_SkColorType:
- p->last<store_f16>(dst);
+ p->last<clamp_01_premul_then<store_f16>>(dst);
break;
case kRGB_565_SkColorType:
- p->last<store_565>(dst);
+ p->last<clamp_01_premul_then<store_565>>(dst);
break;
default: break;
}