diff options
-rw-r--r-- | bench/SkRasterPipelineBench.cpp | 121 | ||||
-rw-r--r-- | src/core/SkRasterPipeline.h | 35 | ||||
-rw-r--r-- | src/core/SkRasterPipelineBlitter.cpp | 172 | ||||
-rw-r--r-- | tests/SkRasterPipelineTest.cpp | 71 |
4 files changed, 180 insertions, 219 deletions
diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp index 0243940a10..a5263d770f 100644 --- a/bench/SkRasterPipelineBench.cpp +++ b/bench/SkRasterPipelineBench.cpp @@ -23,10 +23,8 @@ static uint8_t mask[N]; // - store src back as srgb // Every stage except for srcover interacts with memory, and so will need _tail variants. -static void SK_VECTORCALL load_s_srgb(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint32_t*>() + x; +SK_RASTER_STAGE(load_s_srgb) { + auto ptr = (const uint32_t*)ctx + x; r = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], @@ -44,27 +42,19 @@ static void SK_VECTORCALL load_s_srgb(SkRasterPipeline::Stage* st, size_t x, sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); - - st->next(x, r,g,b,a, dr,dg,db,da); } -static void SK_VECTORCALL load_s_srgb_tail(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint32_t*>() + x; +SK_RASTER_STAGE(load_s_srgb_tail) { + auto ptr = (const uint32_t*)ctx + x; r = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; g = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; b = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; a = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; - - st->next(x, r,g,b,a, dr,dg,db,da); } -static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint32_t*>() + x; +SK_RASTER_STAGE(load_d_srgb) { + auto ptr = (const uint32_t*)ctx + x; dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], @@ -82,67 +72,47 @@ static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x, sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); - - st->next(x, r,g,b,a, dr,dg,db,da); } -static void SK_VECTORCALL load_d_srgb_tail(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint32_t*>() + x; +SK_RASTER_STAGE(load_d_srgb_tail) { + auto ptr = (const uint32_t*)ctx + x; dr = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; dg = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; db = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; da = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; - - st->next(x, r,g,b,a, dr,dg,db,da); } -static void SK_VECTORCALL scale_u8(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint8_t*>() + x; +SK_RASTER_STAGE(scale_u8) { + auto ptr = (const uint8_t*)ctx + x; auto c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f); r *= c; g *= c; b *= c; a *= c; - - st->next(x, r,g,b,a, dr,dg,db,da); } -static void SK_VECTORCALL scale_u8_tail(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint8_t*>() + x; +SK_RASTER_STAGE(scale_u8_tail) { + auto ptr = (const uint8_t*)ctx + x; auto c = *ptr * (1/255.0f); r *= c; g *= c; b *= c; a *= c; - - st->next(x, r,g,b,a, dr,dg,db,da); } -static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { +SK_RASTER_STAGE(srcover) { auto A = 1.0f - a; r += dr * A; g += dg * A; b += db * A; a += da * A; - - st->next(x, r,g,b,a, dr,dg,db,da); } -static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<uint32_t*>() + x; +SK_RASTER_STAGE(store_srgb) { + auto ptr = (uint32_t*)ctx + x; ( sk_linear_to_srgb(r) | sk_linear_to_srgb(g) << 8 @@ -150,10 +120,8 @@ static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x, | Sk4f_round(255.0f*a) << 24).store(ptr); } -static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<uint32_t*>() + x; +SK_RASTER_STAGE(store_srgb_tail) { + auto ptr = (uint32_t*)ctx + x; Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0}); rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)}; @@ -163,23 +131,56 @@ static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x, class SkRasterPipelineBench : public Benchmark { public: - SkRasterPipelineBench() {} + SkRasterPipelineBench(bool fused) : fFused(fused) {} bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; } - const char* onGetName() override { return "SkRasterPipelineBench"; } + const char* onGetName() override { return fFused ? "SkRasterPipelineBench_fused" + : "SkRasterPipelineBench_pipeline"; } void onDraw(int loops, SkCanvas*) override { - SkRasterPipeline p; - p.append(load_s_srgb, load_s_srgb_tail, src); - p.append( scale_u8, scale_u8_tail, mask); - p.append(load_d_srgb, load_d_srgb_tail, dst); - p.append(srcover); - p.append( store_srgb, store_srgb_tail, dst); - while (loops --> 0) { - p.run(N); + fFused ? this->runFused() : this->runPipeline(); } } + + void runFused() { + Sk4f r,g,b,a, dr,dg,db,da; + size_t x = 0, n = N; + while (n >= 4) { + load_s_srgb(src , x, r,g,b,a, dr,dg,db,da); + scale_u8 (mask , x, r,g,b,a, dr,dg,da,da); + load_d_srgb(dst , x, r,g,b,a, dr,dg,da,da); + srcover (nullptr, x, r,g,b,a, dr,dg,da,da); + store_srgb (dst , x, r,g,b,a, dr,dg,da,da); + + x += 4; + n -= 4; + } + while (n > 0) { + load_s_srgb_tail(src , x, r,g,b,a, dr,dg,db,da); + scale_u8_tail (mask , x, r,g,b,a, dr,dg,da,da); + load_d_srgb_tail(dst , x, r,g,b,a, dr,dg,da,da); + srcover (nullptr, x, r,g,b,a, dr,dg,da,da); + store_srgb_tail (dst , x, r,g,b,a, dr,dg,da,da); + + x += 1; + n -= 1; + } + } + + void runPipeline() { + SkRasterPipeline p; + p.append<load_s_srgb, load_s_srgb_tail>( src); + p.append< scale_u8, scale_u8_tail>(mask); + p.append<load_d_srgb, load_d_srgb_tail>( dst); + p.append<srcover>(); + p.append< store_srgb, store_srgb_tail>( dst); + + p.run(N); + } + + bool fFused; }; -DEF_BENCH( return new SkRasterPipelineBench; ) +DEF_BENCH( return new SkRasterPipelineBench(true); ) +DEF_BENCH( return new SkRasterPipelineBench(false); ) diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h index acbabcbadc..475f517b18 100644 --- a/src/core/SkRasterPipeline.h +++ b/src/core/SkRasterPipeline.h @@ -45,6 +45,8 @@ * * Some obvious stages that typically return are those that write a color to a destination pointer, * but any stage can short-circuit the rest of the pipeline by returning instead of calling next(). + * + * TODO: explain EasyFn and SK_RASTER_STAGE */ class SkRasterPipeline { @@ -52,6 +54,9 @@ public: struct Stage; using Fn = void(SK_VECTORCALL *)(Stage*, size_t, Sk4f,Sk4f,Sk4f,Sk4f, Sk4f,Sk4f,Sk4f,Sk4f); + using EasyFn = void(void*, size_t, Sk4f&, Sk4f&, Sk4f&, Sk4f&, + Sk4f&, Sk4f&, Sk4f&, Sk4f&); + struct Stage { template <typename T> T ctx() { return static_cast<T>(fCtx); } @@ -94,6 +99,21 @@ public: this->append(body, ctx, tail, ctx); } + + // Versions of append that can be used with static EasyFns (see SK_RASTER_STAGE). + template <EasyFn body, EasyFn tail> + void append(const void* body_ctx, const void* tail_ctx) { + this->append(Easy<body>, body_ctx, + Easy<tail>, tail_ctx); + } + + template <EasyFn fn> + void append(const void* ctx = nullptr) { this->append<fn, fn>(ctx, ctx); } + + template <EasyFn body, EasyFn tail> + void append(const void* ctx = nullptr) { this->append<body, tail>(ctx, ctx); } + + // Append all stages to this pipeline. void extend(const SkRasterPipeline&); @@ -106,10 +126,25 @@ private: static void SK_VECTORCALL JustReturn(Stage*, size_t, Sk4f,Sk4f,Sk4f,Sk4f, Sk4f,Sk4f,Sk4f,Sk4f); + template <EasyFn kernel> + static void SK_VECTORCALL Easy(SkRasterPipeline::Stage* st, size_t x, + Sk4f r, Sk4f g, Sk4f b, Sk4f a, + Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { + kernel(st->ctx<void*>(), x, r,g,b,a, dr,dg,db,da); + st->next(x, r,g,b,a, dr,dg,db,da); + } + Stages fBody, fTail; Fn fBodyStart = &JustReturn, fTailStart = &JustReturn; }; +// These are always static, and we _really_ want them to inline. +// If you find yourself wanting a non-inline stage, write a SkRasterPipeline::Fn directly. +#define SK_RASTER_STAGE(name) \ + static SK_ALWAYS_INLINE void name(void* ctx, size_t x, \ + Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, \ + Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da) + #endif//SkRasterPipeline_DEFINED diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp index f3fc76ee79..0fae6dd5a2 100644 --- a/src/core/SkRasterPipelineBlitter.cpp +++ b/src/core/SkRasterPipelineBlitter.cpp @@ -58,9 +58,7 @@ SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap& dst, } // Clamp colors into [0,1] premul (e.g. just before storing back to memory). -static void SK_VECTORCALL clamp_01_premul(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { +SK_RASTER_STAGE(clamp_01_premul) { a = Sk4f::Max(a, 0.0f); r = Sk4f::Max(r, 0.0f); g = Sk4f::Max(g, 0.0f); @@ -70,32 +68,23 @@ static void SK_VECTORCALL clamp_01_premul(SkRasterPipeline::Stage* st, size_t x, r = Sk4f::Min(r, a); g = Sk4f::Min(g, a); b = Sk4f::Min(b, a); - - st->next(x, r,g,b,a, dr,dg,db,da); } // The default shader produces a constant color (from the SkPaint). -static void SK_VECTORCALL constant_color(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto color = st->ctx<const SkPM4f*>(); +SK_RASTER_STAGE(constant_color) { + auto color = (const SkPM4f*)ctx; r = color->r(); g = color->g(); b = color->b(); a = color->a(); - st->next(x, r,g,b,a, dr,dg,db,da); } // The default transfer mode is srcover, s' = s + d*(1-sa). -static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto A = 1.0f - a; - r += dr*A; - g += dg*A; - b += db*A; - a += da*A; - st->next(x, r,g,b,a, dr,dg,db,da); +SK_RASTER_STAGE(srcover) { + r += dr*(1.0f - a); + g += dg*(1.0f - a); + b += db*(1.0f - a); + a += da*(1.0f - a); } static Sk4f lerp(const Sk4f& from, const Sk4f& to, const Sk4f& cov) { @@ -103,44 +92,35 @@ static Sk4f lerp(const Sk4f& from, const Sk4f& to, const Sk4f& cov) { } // s' = d(1-c) + sc, for a constant c. -static void SK_VECTORCALL lerp_constant_float(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - Sk4f c = *st->ctx<const float*>(); +SK_RASTER_STAGE(lerp_constant_float) { + Sk4f c = *(const float*)ctx; r = lerp(dr, r, c); g = lerp(dg, g, c); b = lerp(db, b, c); a = lerp(da, a, c); - st->next(x, r,g,b,a, dr,dg,db,da); } // s' = d(1-c) + sc, 4 pixels at a time for 8-bit coverage. -static void SK_VECTORCALL lerp_a8(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint8_t*>() + x; +SK_RASTER_STAGE(lerp_a8) { + auto ptr = (const uint8_t*)ctx + x; Sk4f c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f); r = lerp(dr, r, c); g = lerp(dg, g, c); b = lerp(db, b, c); a = lerp(da, a, c); - st->next(x, r,g,b,a, dr,dg,db,da); } // Tail variant of lerp_a8() handling 1 pixel at a time. -static void SK_VECTORCALL lerp_a8_1(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint8_t*>() + x; +SK_RASTER_STAGE(lerp_a8_1) { + auto ptr = (const uint8_t*)ctx + x; Sk4f c = *ptr * (1/255.0f); r = lerp(dr, r, c); g = lerp(dg, g, c); b = lerp(db, b, c); a = lerp(da, a, c); - st->next(x, r,g,b,a, dr,dg,db,da); } static void from_565(const Sk4h& _565, Sk4f* r, Sk4f* g, Sk4f* b) { @@ -158,10 +138,8 @@ static Sk4h to_565(const Sk4f& r, const Sk4f& g, const Sk4f& b) { } // s' = d(1-c) + sc, 4 pixels at a time for 565 coverage. -static void SK_VECTORCALL lerp_lcd16(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint16_t*>() + x; +SK_RASTER_STAGE(lerp_lcd16) { + auto ptr = (const uint16_t*)ctx + x; Sk4f cr, cg, cb; from_565(Sk4h::Load(ptr), &cr, &cg, &cb); @@ -169,14 +147,11 @@ static void SK_VECTORCALL lerp_lcd16(SkRasterPipeline::Stage* st, size_t x, g = lerp(dg, g, cg); b = lerp(db, b, cb); a = 1.0f; - st->next(x, r,g,b,a, dr,dg,db,da); } // Tail variant of lerp_lcd16() handling 1 pixel at a time. -static void SK_VECTORCALL lerp_lcd16_1(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint16_t*>() + x; +SK_RASTER_STAGE(lerp_lcd16_1) { + auto ptr = (const uint16_t*)ctx + x; Sk4f cr, cg, cb; from_565({*ptr,0,0,0}, &cr, &cg, &cb); @@ -184,52 +159,39 @@ static void SK_VECTORCALL lerp_lcd16_1(SkRasterPipeline::Stage* st, size_t x, g = lerp(dg, g, cg); b = lerp(db, b, cb); a = 1.0f; - st->next(x, r,g,b,a, dr,dg,db,da); } // Load 4 565 dst pixels. -static void SK_VECTORCALL load_d_565(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint16_t*>() + x; +SK_RASTER_STAGE(load_d_565) { + auto ptr = (const uint16_t*)ctx + x; from_565(Sk4h::Load(ptr), &dr,&dg,&db); da = 1.0f; - st->next(x, r,g,b,a, dr,dg,db,da); } // Load 1 565 dst pixel. -static void SK_VECTORCALL load_d_565_1(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint16_t*>() + x; +SK_RASTER_STAGE(load_d_565_1) { + auto ptr = (const uint16_t*)ctx + x; from_565({*ptr,0,0,0}, &dr,&dg,&db); da = 1.0f; - st->next(x, r,g,b,a, dr,dg,db,da); } // Store 4 565 pixels. -static void SK_VECTORCALL store_565(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<uint16_t*>() + x; +SK_RASTER_STAGE(store_565) { + auto ptr = (uint16_t*)ctx + x; to_565(r,g,b).store(ptr); } // Store 1 565 pixel. -static void SK_VECTORCALL store_565_1(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<uint16_t*>() + x; +SK_RASTER_STAGE(store_565_1) { + auto ptr = (uint16_t*)ctx + x; *ptr = to_565(r,g,b)[0]; } // Load 4 F16 pixels. -static void SK_VECTORCALL load_d_f16(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint64_t*>() + x; +SK_RASTER_STAGE(load_d_f16) { + auto ptr = (const uint64_t*)ctx + x; Sk4h rh, gh, bh, ah; Sk4h_load4(ptr, &rh, &gh, &bh, &ah); @@ -238,49 +200,37 @@ static void SK_VECTORCALL load_d_f16(SkRasterPipeline::Stage* st, size_t x, dg = SkHalfToFloat_finite(gh); db = SkHalfToFloat_finite(bh); da = SkHalfToFloat_finite(ah); - - st->next(x, r,g,b,a, dr,dg,db,da); } // Load 1 F16 pixel. -static void SK_VECTORCALL load_d_f16_1(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint64_t*>() + x; +SK_RASTER_STAGE(load_d_f16_1) { + auto ptr = (const uint64_t*)ctx + x; auto p0 = SkHalfToFloat_finite(ptr[0]); dr = { p0[0],0,0,0 }; dg = { p0[1],0,0,0 }; db = { p0[2],0,0,0 }; da = { p0[3],0,0,0 }; - - st->next(x, r,g,b,a, dr,dg,db,da); } // Store 4 F16 pixels. -static void SK_VECTORCALL store_f16(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<uint64_t*>() + x; +SK_RASTER_STAGE(store_f16) { + auto ptr = (uint64_t*)ctx + x; Sk4h_store4(ptr, SkFloatToHalf_finite(r), SkFloatToHalf_finite(g), SkFloatToHalf_finite(b), SkFloatToHalf_finite(a)); } // Store 1 F16 pixel. -static void SK_VECTORCALL store_f16_1(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<uint64_t*>() + x; +SK_RASTER_STAGE(store_f16_1) { + auto ptr = (uint64_t*)ctx + x; SkFloatToHalf_finite({r[0], g[0], b[0], a[0]}).store(ptr); } // Load 4 8-bit sRGB pixels from SkPMColor order to RGBA. -static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint32_t*>() + x; +SK_RASTER_STAGE(load_d_srgb) { + auto ptr = (const uint32_t*)ctx + x; dr = { sk_linear_from_srgb[(ptr[0] >> SK_R32_SHIFT) & 0xff], sk_linear_from_srgb[(ptr[1] >> SK_R32_SHIFT) & 0xff], @@ -298,46 +248,36 @@ static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x, sk_linear_from_srgb[(ptr[3] >> SK_B32_SHIFT) & 0xff] }; da = SkNx_cast<float>(Sk4u::Load(ptr) >> SK_A32_SHIFT) * (1/255.0f); - - st->next(x, r,g,b,a, dr,dg,db,da); } // Tail variant of load_d_srgb() handling 1 pixel at a time. -static void SK_VECTORCALL load_d_srgb_1(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto ptr = st->ctx<const uint32_t*>() + x; +SK_RASTER_STAGE(load_d_srgb_1) { + auto ptr = (const uint32_t*)ctx + x; dr = { sk_linear_from_srgb[(*ptr >> SK_R32_SHIFT) & 0xff], 0,0,0 }; dg = { sk_linear_from_srgb[(*ptr >> SK_G32_SHIFT) & 0xff], 0,0,0 }; db = { sk_linear_from_srgb[(*ptr >> SK_B32_SHIFT) & 0xff], 0,0,0 }; da = { (1/255.0f) * (*ptr >> SK_A32_SHIFT) , 0,0,0 }; - - st->next(x, r,g,b,a, dr,dg,db,da); } // Write out 4 pixels as 8-bit SkPMColor-order sRGB. -static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto dst = st->ctx<uint32_t*>() + x; +SK_RASTER_STAGE(store_srgb) { + auto ptr = (uint32_t*)ctx + x; ( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT | sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT | sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT - | Sk4f_round(255.0f * a) << SK_A32_SHIFT).store(dst); + | Sk4f_round(255.0f * a) << SK_A32_SHIFT).store(ptr); } // Tail variant of store_srgb() handling 1 pixel at a time. -static void SK_VECTORCALL store_srgb_1(SkRasterPipeline::Stage* st, size_t x, - Sk4f r, Sk4f g, Sk4f b, Sk4f a, - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { - auto dst = st->ctx<uint32_t*>() + x; +SK_RASTER_STAGE(store_srgb_1) { + auto ptr = (uint32_t*)ctx + x; Sk4i rgb = sk_linear_to_srgb_noclamp(swizzle_rb_if_bgra({ r[0], g[0], b[0], 0.0f })); uint32_t rgba; SkNx_cast<uint8_t>(rgb).store(&rgba); rgba |= (uint32_t)(255.0f * a[0] + 0.5f) << 24; - *dst = rgba; + *ptr = rgba; } static bool supported(const SkImageInfo& info) { @@ -386,10 +326,10 @@ SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst, color.premul()); if (!paint.getShader()) { - blitter->fShader.append(constant_color, &blitter->fPaintColor); + blitter->fShader.append<constant_color>(&blitter->fPaintColor); } if (!paint.getXfermode()) { - blitter->fXfermode.append(srcover); + blitter->fXfermode.append<srcover>(); } return blitter; @@ -401,14 +341,14 @@ void SkRasterPipelineBlitter::append_load_d(SkRasterPipeline* p, const void* dst switch (fDst.info().colorType()) { case kN32_SkColorType: if (fDst.info().gammaCloseToSRGB()) { - p->append(load_d_srgb, load_d_srgb_1, dst); + p->append<load_d_srgb, load_d_srgb_1>(dst); } break; case kRGBA_F16_SkColorType: - p->append(load_d_f16, load_d_f16_1, dst); + p->append<load_d_f16, load_d_f16_1>(dst); break; case kRGB_565_SkColorType: - p->append(load_d_565, load_d_565_1, dst); + p->append<load_d_565, load_d_565_1>(dst); break; default: break; } @@ -417,18 +357,18 @@ void SkRasterPipelineBlitter::append_load_d(SkRasterPipeline* p, const void* dst void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p, void* dst) const { SkASSERT(supported(fDst.info())); - p->append(clamp_01_premul); + p->append<clamp_01_premul>(); switch (fDst.info().colorType()) { case kN32_SkColorType: if (fDst.info().gammaCloseToSRGB()) { - p->append(store_srgb, store_srgb_1, dst); + p->append<store_srgb, store_srgb_1>(dst); } break; case kRGBA_F16_SkColorType: - p->append(store_f16, store_f16_1, dst); + p->append<store_f16, store_f16_1>(dst); break; case kRGB_565_SkColorType: - p->append(store_565, store_565_1, dst); + p->append<store_565, store_565_1>(dst); break; default: break; } @@ -456,7 +396,7 @@ void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const p.extend(fColorFilter); this->append_load_d(&p, dst); p.extend(fXfermode); - p.append(lerp_constant_float, &coverage); + p.append<lerp_constant_float>(&coverage); this->append_store(&p, dst); for (int16_t run = *runs; run > 0; run = *runs) { @@ -486,10 +426,10 @@ void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip) p.extend(fXfermode); switch (mask.fFormat) { case SkMask::kA8_Format: - p.append(lerp_a8, lerp_a8_1, mask.getAddr8(x,y)-x); + p.append<lerp_a8, lerp_a8_1>(mask.getAddr8(x,y)-x); break; case SkMask::kLCD16_Format: - p.append(lerp_lcd16, lerp_lcd16_1, mask.getAddrLCD16(x,y)-x); + p.append<lerp_lcd16, lerp_lcd16_1>(mask.getAddrLCD16(x,y)-x); break; default: break; } diff --git a/tests/SkRasterPipelineTest.cpp b/tests/SkRasterPipelineTest.cpp index beb517b3f7..29fe59d31b 100644 --- a/tests/SkRasterPipelineTest.cpp +++ b/tests/SkRasterPipelineTest.cpp @@ -9,55 +9,40 @@ #include "SkRasterPipeline.h" // load needs two variants, one to load 4 values... -static void SK_VECTORCALL load(SkRasterPipeline::Stage* st, size_t x, - Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3, - Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) { - auto ptr = st->ctx<const float*>(); - v0 = Sk4f{ptr[x+0]}; - v1 = Sk4f{ptr[x+1]}; - v2 = Sk4f{ptr[x+2]}; - v3 = Sk4f{ptr[x+3]}; - - st->next(x, v0,v1,v2,v3, v4,v5,v6,v7); +SK_RASTER_STAGE(load) { + auto ptr = (const float*)ctx + x; + r = Sk4f{ptr[0]}; + g = Sk4f{ptr[1]}; + b = Sk4f{ptr[2]}; + a = Sk4f{ptr[3]}; } // ...and one to load a single value. -static void SK_VECTORCALL load_tail(SkRasterPipeline::Stage* st, size_t x, - Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3, - Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) { - auto ptr = st->ctx<const float*>(); - v0 = Sk4f{ptr[x]}; - - st->next(x, v0,v1,v2,v3, v4,v5,v6,v7); +SK_RASTER_STAGE(load_tail) { + auto ptr = (const float*)ctx + x; + r = Sk4f{*ptr}; } // square doesn't really care how many of its inputs are active, nor does it need a context. -static void SK_VECTORCALL square(SkRasterPipeline::Stage* st, size_t x, - Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3, - Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) { - v0 *= v0; - v1 *= v1; - v2 *= v2; - v3 *= v3; - st->next(x, v0,v1,v2,v3, v4,v5,v6,v7); +SK_RASTER_STAGE(square) { + r *= r; + g *= g; + b *= b; + a *= a; } -// Like load, store has a _tail variant. It ends the pipeline by returning. -static void SK_VECTORCALL store(SkRasterPipeline::Stage* st, size_t x, - Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3, - Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) { - auto ptr = st->ctx<float*>(); - ptr[x+0] = v0[0]; - ptr[x+1] = v1[0]; - ptr[x+2] = v2[0]; - ptr[x+3] = v3[0]; +// Like load, store has a _tail variant. +SK_RASTER_STAGE(store) { + auto ptr = (float*)ctx + x; + ptr[0] = r[0]; + ptr[1] = g[0]; + ptr[2] = b[0]; + ptr[3] = a[0]; } -static void SK_VECTORCALL store_tail(SkRasterPipeline::Stage* st, size_t x, - Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3, - Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) { - auto ptr = st->ctx<float*>(); - ptr[x+0] = v0[0]; +SK_RASTER_STAGE(store_tail) { + auto ptr = (float*)ctx + x; + *ptr = r[0]; } DEF_TEST(SkRasterPipeline, r) { @@ -73,9 +58,9 @@ DEF_TEST(SkRasterPipeline, r) { float dst_vals[] = { 0,0,0,0,0 }; SkRasterPipeline p; - p.append(load, load_tail, src_vals); - p.append(square); - p.append(store, store_tail, dst_vals); + p.append<load, load_tail>(src_vals); + p.append<square>(); + p.append<store, store_tail>(dst_vals); p.run(5); @@ -96,6 +81,6 @@ DEF_TEST(SkRasterPipeline_nonsense, r) { // No asserts... just a test that this is safe to run and terminates. // square() always calls st->next(); this makes sure we've always got something there to call. SkRasterPipeline p; - p.append(square); + p.append<square>(); p.run(20); } |