diff options
author | Mike Klein <mtklein@chromium.org> | 2016-10-25 10:27:33 -0400 |
---|---|---|
committer | Mike Klein <mtklein@chromium.org> | 2016-10-25 17:30:38 +0000 |
commit | aebfb45104eeb6dab5dbbedda13c2eaa7b7f7868 (patch) | |
tree | 7dcbf9bf86bd1b9fad0048dccbabb11e47a1d3fb /src | |
parent | cc813ae9a0afe4259f12b655d9336662a6e2c100 (diff) |
Move SkRasterPipeline further into SkOpts.
The portable code now becomes entirely focused on enum+ptr descriptions, leaving the concrete implementation of the pipeline to SkOpts::run_pipeline().
As implemented, the concrete implementation is basically the same, with a little more type safety.
Speed is essentially unchanged on my laptop, and that's having run_pipeline() rebuild its concrete state every call. There's room for improvement there if we split this into a compile_pipeline() / run_pipeline() sort of thing, which is my next planned CL.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3920
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Change-Id: Ie4c554f51040426de7c5c144afa5d9d9d8938012
Reviewed-on: https://skia-review.googlesource.com/3920
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/SkOpts.cpp | 102 | ||||
-rw-r--r-- | src/core/SkOpts.h | 8 | ||||
-rw-r--r-- | src/core/SkRasterPipeline.cpp | 36 | ||||
-rw-r--r-- | src/core/SkRasterPipeline.h | 98 | ||||
-rw-r--r-- | src/opts/SkOpts_hsw.cpp | 56 | ||||
-rw-r--r-- | src/opts/SkOpts_sse41.cpp | 56 | ||||
-rw-r--r-- | src/opts/SkRasterPipeline_opts.h | 783 |
7 files changed, 456 insertions, 683 deletions
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 16389484b4..c082b160ee 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -92,108 +92,6 @@ namespace SkOpts { DEFINE_DEFAULT(run_pipeline); #undef DEFINE_DEFAULT - SkOpts::VoidFn body[] = { - (SkOpts::VoidFn)SK_OPTS_NS::just_return, - (SkOpts::VoidFn)SK_OPTS_NS::swap_src_dst, - - (SkOpts::VoidFn)SK_OPTS_NS::store_565, - (SkOpts::VoidFn)SK_OPTS_NS::store_srgb, - (SkOpts::VoidFn)SK_OPTS_NS::store_f16, - - (SkOpts::VoidFn)SK_OPTS_NS::load_s_565, - (SkOpts::VoidFn)SK_OPTS_NS::load_s_srgb, - (SkOpts::VoidFn)SK_OPTS_NS::load_s_f16, - - (SkOpts::VoidFn)SK_OPTS_NS::load_d_565, - (SkOpts::VoidFn)SK_OPTS_NS::load_d_srgb, - (SkOpts::VoidFn)SK_OPTS_NS::load_d_f16, - - (SkOpts::VoidFn)SK_OPTS_NS::scale_u8, - - (SkOpts::VoidFn)SK_OPTS_NS::lerp_u8, - (SkOpts::VoidFn)SK_OPTS_NS::lerp_565, - (SkOpts::VoidFn)SK_OPTS_NS::lerp_constant_float, - - (SkOpts::VoidFn)SK_OPTS_NS::constant_color, - - (SkOpts::VoidFn)SK_OPTS_NS::dst, - (SkOpts::VoidFn)SK_OPTS_NS::dstatop, - (SkOpts::VoidFn)SK_OPTS_NS::dstin, - (SkOpts::VoidFn)SK_OPTS_NS::dstout, - (SkOpts::VoidFn)SK_OPTS_NS::dstover, - (SkOpts::VoidFn)SK_OPTS_NS::srcatop, - (SkOpts::VoidFn)SK_OPTS_NS::srcin, - (SkOpts::VoidFn)SK_OPTS_NS::srcout, - (SkOpts::VoidFn)SK_OPTS_NS::srcover, - (SkOpts::VoidFn)SK_OPTS_NS::clear, - (SkOpts::VoidFn)SK_OPTS_NS::modulate, - (SkOpts::VoidFn)SK_OPTS_NS::multiply, - (SkOpts::VoidFn)SK_OPTS_NS::plus_, - (SkOpts::VoidFn)SK_OPTS_NS::screen, - (SkOpts::VoidFn)SK_OPTS_NS::xor_, - (SkOpts::VoidFn)SK_OPTS_NS::colorburn, - (SkOpts::VoidFn)SK_OPTS_NS::colordodge, - (SkOpts::VoidFn)SK_OPTS_NS::darken, - (SkOpts::VoidFn)SK_OPTS_NS::difference, - (SkOpts::VoidFn)SK_OPTS_NS::exclusion, - (SkOpts::VoidFn)SK_OPTS_NS::hardlight, - (SkOpts::VoidFn)SK_OPTS_NS::lighten, - (SkOpts::VoidFn)SK_OPTS_NS::overlay, - (SkOpts::VoidFn)SK_OPTS_NS::softlight, - }; - static_assert(SK_ARRAY_COUNT(body) == SkRasterPipeline::kNumStockStages, ""); - - SkOpts::VoidFn tail[] = { - (SkOpts::VoidFn)SK_OPTS_NS::just_return_tail, - (SkOpts::VoidFn)SK_OPTS_NS::swap_src_dst_tail, - - (SkOpts::VoidFn)SK_OPTS_NS::store_565_tail, - (SkOpts::VoidFn)SK_OPTS_NS::store_srgb_tail, - (SkOpts::VoidFn)SK_OPTS_NS::store_f16_tail, - - (SkOpts::VoidFn)SK_OPTS_NS::load_s_565_tail, - (SkOpts::VoidFn)SK_OPTS_NS::load_s_srgb_tail, - (SkOpts::VoidFn)SK_OPTS_NS::load_s_f16_tail, - - (SkOpts::VoidFn)SK_OPTS_NS::load_d_565_tail, - (SkOpts::VoidFn)SK_OPTS_NS::load_d_srgb_tail, - (SkOpts::VoidFn)SK_OPTS_NS::load_d_f16_tail, - - (SkOpts::VoidFn)SK_OPTS_NS::scale_u8_tail, - - (SkOpts::VoidFn)SK_OPTS_NS::lerp_u8_tail, - (SkOpts::VoidFn)SK_OPTS_NS::lerp_565_tail, - (SkOpts::VoidFn)SK_OPTS_NS::lerp_constant_float_tail, - - (SkOpts::VoidFn)SK_OPTS_NS::constant_color_tail, - - (SkOpts::VoidFn)SK_OPTS_NS::dst_tail, - (SkOpts::VoidFn)SK_OPTS_NS::dstatop_tail, - (SkOpts::VoidFn)SK_OPTS_NS::dstin_tail, - (SkOpts::VoidFn)SK_OPTS_NS::dstout_tail, - (SkOpts::VoidFn)SK_OPTS_NS::dstover_tail, - (SkOpts::VoidFn)SK_OPTS_NS::srcatop_tail, - (SkOpts::VoidFn)SK_OPTS_NS::srcin_tail, - (SkOpts::VoidFn)SK_OPTS_NS::srcout_tail, - (SkOpts::VoidFn)SK_OPTS_NS::srcover_tail, - (SkOpts::VoidFn)SK_OPTS_NS::clear_tail, - (SkOpts::VoidFn)SK_OPTS_NS::modulate_tail, - (SkOpts::VoidFn)SK_OPTS_NS::multiply_tail, - (SkOpts::VoidFn)SK_OPTS_NS::plus__tail, - (SkOpts::VoidFn)SK_OPTS_NS::screen_tail, - (SkOpts::VoidFn)SK_OPTS_NS::xor__tail, - (SkOpts::VoidFn)SK_OPTS_NS::colorburn_tail, - (SkOpts::VoidFn)SK_OPTS_NS::colordodge_tail, - (SkOpts::VoidFn)SK_OPTS_NS::darken_tail, - (SkOpts::VoidFn)SK_OPTS_NS::difference_tail, - (SkOpts::VoidFn)SK_OPTS_NS::exclusion_tail, - (SkOpts::VoidFn)SK_OPTS_NS::hardlight_tail, - (SkOpts::VoidFn)SK_OPTS_NS::lighten_tail, - (SkOpts::VoidFn)SK_OPTS_NS::overlay_tail, - (SkOpts::VoidFn)SK_OPTS_NS::softlight_tail, - }; - static_assert(SK_ARRAY_COUNT(tail) == SkRasterPipeline::kNumStockStages, ""); - // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_ssse3(); void Init_sse41(); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 4685d86691..7283030068 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -73,13 +73,7 @@ namespace SkOpts { return hash_fn(data, bytes, seed); } - // SkRasterPipeline::Fn has different types in different files (notably, in SkOpts_hsw.cpp - // they're all in terms of Sk8f.) We store them with a type everyone can agree on, void(*)(). - using VoidFn = void(*)(); - extern VoidFn body[SkRasterPipeline::kNumStockStages], - tail[SkRasterPipeline::kNumStockStages]; - extern void (*run_pipeline)(size_t, size_t, void(*)(), SkRasterPipeline::Stage*, - void(*)(), SkRasterPipeline::Stage*); + extern void (*run_pipeline)(size_t, size_t, const SkRasterPipeline::Stage*, int); } #endif//SkOpts_DEFINED diff --git a/src/core/SkRasterPipeline.cpp b/src/core/SkRasterPipeline.cpp index bc7feaccc7..707a33ae6f 100644 --- a/src/core/SkRasterPipeline.cpp +++ b/src/core/SkRasterPipeline.cpp @@ -8,40 +8,20 @@ #include "SkOpts.h" #include "SkRasterPipeline.h" -SkRasterPipeline::SkRasterPipeline() { - fBodyStart = SkOpts::body[just_return]; - fTailStart = SkOpts::tail[just_return]; -} - -void SkRasterPipeline::append(void (*body)(), void (*tail)(), void* ctx) { - // Each stage holds its own context and the next function to call. - // So the pipeline itself has to hold onto the first function that starts the pipeline. - (fBody.empty() ? fBodyStart : fBody.back().fNext) = body; - (fTail.empty() ? fTailStart : fTail.back().fNext) = tail; - - // Each last stage starts with its next function set to JustReturn as a safety net. - // It'll be overwritten by the next call to append(). - fBody.push_back({ SkOpts::body[just_return], ctx }); - fTail.push_back({ SkOpts::tail[just_return], ctx }); -} +SkRasterPipeline::SkRasterPipeline() {} void SkRasterPipeline::append(StockStage stage, void* ctx) { - this->append(SkOpts::body[stage], SkOpts::tail[stage], ctx); + SkASSERT(fNum < (int)SK_ARRAY_COUNT(fStages)); + fStages[fNum++] = { stage, ctx }; } void SkRasterPipeline::extend(const SkRasterPipeline& src) { - SkASSERT(src.fBody.count() == src.fTail.count()); - - auto body = src.fBodyStart, - tail = src.fTailStart; - for (int i = 0; i < src.fBody.count(); i++) { - SkASSERT(src.fBody[i].fCtx == src.fTail[i].fCtx); - this->append(body, tail, src.fBody[i].fCtx); - body = src.fBody[i].fNext; - tail = src.fTail[i].fNext; + for (int i = 0; i < src.fNum; i++) { + const Stage& s = src.fStages[i]; + this->append(s.stage, s.ctx); } } -void SkRasterPipeline::run(size_t x, size_t n) { - SkOpts::run_pipeline(x,n, fBodyStart,fBody.begin(), fTailStart,fTail.begin()); +void SkRasterPipeline::run(size_t x, size_t n) const { + SkOpts::run_pipeline(x,n, fStages, fNum); } diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h index 525a8dbe72..60279dd168 100644 --- a/src/core/SkRasterPipeline.h +++ b/src/core/SkRasterPipeline.h @@ -53,72 +53,31 @@ // TODO: There may be a better place to stuff tail, e.g. in the bottom alignment bits of // the Stage*. This mostly matters on 64-bit Windows where every register is precious. +#define SK_RASTER_PIPELINE_STAGES(M) \ + M(swap_src_dst) M(constant_color) \ + M(load_s_565) M(load_d_565) M(store_565) \ + M(load_s_srgb) M(load_d_srgb) M(store_srgb) \ + M(load_s_f16) M(load_d_f16) M(store_f16) \ + M(scale_u8) \ + M(lerp_u8) M(lerp_565) M(lerp_constant_float) \ + M(dst) \ + M(dstatop) M(dstin) M(dstout) M(dstover) \ + M(srcatop) M(srcin) M(srcout) M(srcover) \ + M(clear) M(modulate) M(multiply) M(plus_) M(screen) M(xor_) \ + M(colorburn) M(colordodge) M(darken) M(difference) \ + M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight) + class SkRasterPipeline { public: - struct Stage { - // It makes next() a good bit cheaper if we hold the next function to call here, - // rather than logically simpler choice of the function implementing this stage. - void (*fNext)(); - void* fCtx; - }; + // No pipeline may be more than kMaxStages long. + static const int kMaxStages = 32; SkRasterPipeline(); - // Run the pipeline constructed with append(), walking x through [x,x+n), - // generally in 4-pixel steps, with perhaps one jagged tail step. - void run(size_t x, size_t n); - void run(size_t n) { this->run(0, n); } - enum StockStage { - just_return, - swap_src_dst, - - store_565, - store_srgb, - store_f16, - - load_s_565, - load_s_srgb, - load_s_f16, - - load_d_565, - load_d_srgb, - load_d_f16, - - scale_u8, - - lerp_u8, - lerp_565, - lerp_constant_float, - - constant_color, - - dst, - dstatop, - dstin, - dstout, - dstover, - srcatop, - srcin, - srcout, - srcover, - clear, - modulate, - multiply, - plus_, - screen, - xor_, - colorburn, - colordodge, - darken, - difference, - exclusion, - hardlight, - lighten, - overlay, - softlight, - - kNumStockStages, + #define M(stage) stage, + SK_RASTER_PIPELINE_STAGES(M) + #undef M }; void append(StockStage, void* = nullptr); void append(StockStage stage, const void* ctx) { this->append(stage, const_cast<void*>(ctx)); } @@ -126,15 +85,20 @@ public: // Append all stages to this pipeline. void extend(const SkRasterPipeline&); -private: - using Stages = SkSTArray<10, Stage, /*MEM_COPY=*/true>; + // Run the pipeline constructed with append(), walking x through [x,x+n), + // generally in 4-pixel steps, with perhaps one jagged tail step. + void run(size_t x, size_t n) const; + void run(size_t n) const { this->run(0, n); } - void append(void (*body)(), void (*tail)(), void*); - Stages fBody, - fTail; - void (*fBodyStart)() = nullptr; - void (*fTailStart)() = nullptr; + struct Stage { + StockStage stage; + void* ctx; + }; + +private: + int fNum = 0; + Stage fStages[kMaxStages]; }; #endif//SkRasterPipeline_DEFINED diff --git a/src/opts/SkOpts_hsw.cpp b/src/opts/SkOpts_hsw.cpp index 3c5d19d47c..55fe045a56 100644 --- a/src/opts/SkOpts_hsw.cpp +++ b/src/opts/SkOpts_hsw.cpp @@ -12,61 +12,7 @@ namespace SkOpts { void Init_hsw() { - - run_pipeline = SK_OPTS_NS::run_pipeline; - - #define STAGE(stage) \ - body[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage; \ - tail[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage##_tail - - STAGE(store_565); - STAGE(store_srgb); - STAGE(store_f16); - - STAGE(load_s_565); - STAGE(load_s_srgb); - STAGE(load_s_f16); - - STAGE(load_d_565); - STAGE(load_d_srgb); - STAGE(load_d_f16); - - STAGE(scale_u8); - - STAGE(lerp_u8); - STAGE(lerp_565); - - STAGE(just_return); - STAGE(swap_src_dst); - STAGE(lerp_constant_float); - STAGE(constant_color); - - STAGE(dst); - STAGE(dstatop); - STAGE(dstin); - STAGE(dstout); - STAGE(dstover); - STAGE(srcatop); - STAGE(srcin); - STAGE(srcout); - STAGE(srcover); - STAGE(clear); - STAGE(modulate); - STAGE(multiply); - STAGE(plus_); - STAGE(screen); - STAGE(xor_); - STAGE(colorburn); - STAGE(colordodge); - STAGE(darken); - STAGE(difference); - STAGE(exclusion); - STAGE(hardlight); - STAGE(lighten); - STAGE(overlay); - STAGE(softlight); - #undef STAGE - + run_pipeline = hsw::run_pipeline; } } diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp index b7f1bdd7c6..7a90f7683e 100644 --- a/src/opts/SkOpts_sse41.cpp +++ b/src/opts/SkOpts_sse41.cpp @@ -20,60 +20,6 @@ namespace SkOpts { box_blur_yx = sse41::box_blur_yx; srcover_srgb_srgb = sse41::srcover_srgb_srgb; blit_row_s32a_opaque = sse41::blit_row_s32a_opaque; - - run_pipeline = SK_OPTS_NS::run_pipeline; - - #define STAGE(stage) \ - body[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage; \ - tail[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage##_tail - - STAGE(store_565); - STAGE(store_srgb); - STAGE(store_f16); - - STAGE(load_s_565); - STAGE(load_s_srgb); - STAGE(load_s_f16); - - STAGE(load_d_565); - STAGE(load_d_srgb); - STAGE(load_d_f16); - - STAGE(scale_u8); - - STAGE(lerp_u8); - STAGE(lerp_565); - - STAGE(just_return); - STAGE(swap_src_dst); - STAGE(lerp_constant_float); - STAGE(constant_color); - - STAGE(dst); - STAGE(dstatop); - STAGE(dstin); - STAGE(dstout); - STAGE(dstover); - STAGE(srcatop); - STAGE(srcin); - STAGE(srcout); - STAGE(srcover); - STAGE(clear); - STAGE(modulate); - STAGE(multiply); - STAGE(plus_); - STAGE(screen); - STAGE(xor_); - STAGE(colorburn); - STAGE(colordodge); - STAGE(darken); - STAGE(difference); - STAGE(exclusion); - STAGE(hardlight); - STAGE(lighten); - STAGE(overlay); - STAGE(softlight); - #undef STAGE - + run_pipeline = sse41::run_pipeline; } } diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index f072f843bd..2e1e3a2a47 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -14,6 +14,8 @@ #include "SkSRGB.h" #include <utility> +namespace { + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 static constexpr int N = 8; #else @@ -24,438 +26,481 @@ using SkNf = SkNx<N, float>; using SkNi = SkNx<N, int>; using SkNh = SkNx<N, uint16_t>; -using Body = void(SK_VECTORCALL *)(SkRasterPipeline::Stage*, size_t, - SkNf,SkNf,SkNf,SkNf, - SkNf,SkNf,SkNf,SkNf); -using Tail = void(SK_VECTORCALL *)(SkRasterPipeline::Stage*, size_t, size_t, - SkNf,SkNf,SkNf,SkNf, - SkNf,SkNf,SkNf,SkNf); + struct BodyStage; + struct TailStage; + + using Body = void(SK_VECTORCALL *)(BodyStage*, size_t, SkNf,SkNf,SkNf,SkNf, + SkNf,SkNf,SkNf,SkNf); + using Tail = void(SK_VECTORCALL *)(TailStage*, size_t, size_t, SkNf,SkNf,SkNf,SkNf, + SkNf,SkNf,SkNf,SkNf); + struct BodyStage { Body next; void* ctx; }; + struct TailStage { Tail next; void* ctx; }; + +} // namespace #define SI static inline // Stages are logically a pipeline, and physically are contiguous in an array. // To get to the next stage, we just increment our pointer to the next array element. -SI void SK_VECTORCALL next_body(SkRasterPipeline::Stage* st, size_t x, - SkNf r, SkNf g, SkNf b, SkNf a, - SkNf dr, SkNf dg, SkNf db, SkNf da) { - ((Body)st->fNext)(st+1, x, r,g,b,a, dr,dg,db,da); +SI void SK_VECTORCALL next(BodyStage* st, size_t x, + SkNf r, SkNf g, SkNf b, SkNf a, + SkNf dr, SkNf dg, SkNf db, SkNf da) { + st->next(st+1, x, r,g,b,a, dr,dg,db,da); } -SI void SK_VECTORCALL next_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, - SkNf r, SkNf g, SkNf b, SkNf a, - SkNf dr, SkNf dg, SkNf db, SkNf da) { - ((Tail)st->fNext)(st+1, x,tail, r,g,b,a, dr,dg,db,da); +SI void SK_VECTORCALL next(TailStage* st, size_t x, size_t tail, + SkNf r, SkNf g, SkNf b, SkNf a, + SkNf dr, SkNf dg, SkNf db, SkNf da) { + st->next(st+1, x,tail, r,g,b,a, dr,dg,db,da); } -#define STAGE(name, kCallNext) \ - template <bool kIsTail> \ - static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \ - SkNf& r, SkNf& g, SkNf& b, SkNf& a, \ - SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \ - SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, \ - SkNf r, SkNf g, SkNf b, SkNf a, \ - SkNf dr, SkNf dg, SkNf db, SkNf da) { \ - name##_kernel<false>(st->fCtx, x,0, r,g,b,a, dr,dg,db,da); \ - if (kCallNext) { \ - next_body(st, x, r,g,b,a, dr,dg,db,da); \ - } \ - } \ - SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \ - SkNf r, SkNf g, SkNf b, SkNf a, \ - SkNf dr, SkNf dg, SkNf db, SkNf da) { \ - name##_kernel<true>(st->fCtx, x,tail, r,g,b,a, dr,dg,db,da); \ - if (kCallNext) { \ - next_tail(st, x,tail, r,g,b,a, dr,dg,db,da); \ - } \ - } \ - template <bool kIsTail> \ - static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \ - SkNf& r, SkNf& g, SkNf& b, SkNf& a, \ +#define STAGE(name, kCallNext) \ + template <bool kIsTail> \ + static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \ + SkNf& r, SkNf& g, SkNf& b, SkNf& a, \ + SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \ + SI void SK_VECTORCALL name(BodyStage* st, size_t x, \ + SkNf r, SkNf g, SkNf b, SkNf a, \ + SkNf dr, SkNf dg, SkNf db, SkNf da) { \ + name##_kernel<false>(st->ctx, x,0, r,g,b,a, dr,dg,db,da); \ + if (kCallNext) { \ + next(st, x, r,g,b,a, dr,dg,db,da); \ + } \ + } \ + SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \ + SkNf r, SkNf g, SkNf b, SkNf a, \ + SkNf dr, SkNf dg, SkNf db, SkNf da) { \ + name##_kernel<true>(st->ctx, x,tail, r,g,b,a, dr,dg,db,da); \ + if (kCallNext) { \ + next(st, x,tail, r,g,b,a, dr,dg,db,da); \ + } \ + } \ + template <bool kIsTail> \ + static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \ + SkNf& r, SkNf& g, SkNf& b, SkNf& a, \ SkNf& dr, SkNf& dg, SkNf& db, SkNf& da) // Many xfermodes apply the same logic to each channel. -#define RGBA_XFERMODE(name) \ - static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \ - const SkNf& d, const SkNf& da); \ - SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, \ - SkNf r, SkNf g, SkNf b, SkNf a, \ - SkNf dr, SkNf dg, SkNf db, SkNf da) { \ - r = name##_kernel(r,a,dr,da); \ - g = name##_kernel(g,a,dg,da); \ - b = name##_kernel(b,a,db,da); \ - a = name##_kernel(a,a,da,da); \ - next_body(st, x, r,g,b,a, dr,dg,db,da); \ - } \ - SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \ - SkNf r, SkNf g, SkNf b, SkNf a, \ - SkNf dr, SkNf dg, SkNf db, SkNf da) { \ - r = name##_kernel(r,a,dr,da); \ - g = name##_kernel(g,a,dg,da); \ - b = name##_kernel(b,a,db,da); \ - a = name##_kernel(a,a,da,da); \ - next_tail(st, x,tail, r,g,b,a, dr,dg,db,da); \ - } \ - static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \ +#define RGBA_XFERMODE(name) \ + static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \ + const SkNf& d, const SkNf& da); \ + SI void SK_VECTORCALL name(BodyStage* st, size_t x, \ + SkNf r, SkNf g, SkNf b, SkNf a, \ + SkNf dr, SkNf dg, SkNf db, SkNf da) { \ + r = name##_kernel(r,a,dr,da); \ + g = name##_kernel(g,a,dg,da); \ + b = name##_kernel(b,a,db,da); \ + a = name##_kernel(a,a,da,da); \ + next(st, x, r,g,b,a, dr,dg,db,da); \ + } \ + SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \ + SkNf r, SkNf g, SkNf b, SkNf a, \ + SkNf dr, SkNf dg, SkNf db, SkNf da) { \ + r = name##_kernel(r,a,dr,da); \ + g = name##_kernel(g,a,dg,da); \ + b = name##_kernel(b,a,db,da); \ + a = name##_kernel(a,a,da,da); \ + next(st, x,tail, r,g,b,a, dr,dg,db,da); \ + } \ + static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \ const SkNf& d, const SkNf& da) // Most of the rest apply the same logic to color channels and use srcover's alpha logic. -#define RGB_XFERMODE(name) \ - static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \ - const SkNf& d, const SkNf& da); \ - SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, \ - SkNf r, SkNf g, SkNf b, SkNf a, \ - SkNf dr, SkNf dg, SkNf db, SkNf da) { \ - r = name##_kernel(r,a,dr,da); \ - g = name##_kernel(g,a,dg,da); \ - b = name##_kernel(b,a,db,da); \ - a = a + (da * (1.0f-a)); \ - next_body(st, x, r,g,b,a, dr,dg,db,da); \ - } \ - SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \ - SkNf r, SkNf g, SkNf b, SkNf a, \ - SkNf dr, SkNf dg, SkNf db, SkNf da) { \ - r = name##_kernel(r,a,dr,da); \ - g = name##_kernel(g,a,dg,da); \ - b = name##_kernel(b,a,db,da); \ - a = a + (da * (1.0f-a)); \ - next_tail(st, x,tail, r,g,b,a, dr,dg,db,da); \ - } \ - static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \ +#define RGB_XFERMODE(name) \ + static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \ + const SkNf& d, const SkNf& da); \ + SI void SK_VECTORCALL name(BodyStage* st, size_t x, \ + SkNf r, SkNf g, SkNf b, SkNf a, \ + SkNf dr, SkNf dg, SkNf db, SkNf da) { \ + r = name##_kernel(r,a,dr,da); \ + g = name##_kernel(g,a,dg,da); \ + b = name##_kernel(b,a,db,da); \ + a = a + (da * (1.0f-a)); \ + next(st, x, r,g,b,a, dr,dg,db,da); \ + } \ + SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \ + SkNf r, SkNf g, SkNf b, SkNf a, \ + SkNf dr, SkNf dg, SkNf db, SkNf da) { \ + r = name##_kernel(r,a,dr,da); \ + g = name##_kernel(g,a,dg,da); \ + b = name##_kernel(b,a,db,da); \ + a = a + (da * (1.0f-a)); \ + next(st, x,tail, r,g,b,a, dr,dg,db,da); \ + } \ + static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \ const SkNf& d, const SkNf& da) +// Clamp colors into [0,1] premul (e.g. just before storing back to memory). +SI void clamp_01_premul(SkNf& r, SkNf& g, SkNf& b, SkNf& a) { + a = SkNf::Max(a, 0.0f); + r = SkNf::Max(r, 0.0f); + g = SkNf::Max(g, 0.0f); + b = SkNf::Max(b, 0.0f); + + a = SkNf::Min(a, 1.0f); + r = SkNf::Min(r, a); + g = SkNf::Min(g, a); + b = SkNf::Min(b, a); +} -namespace SK_OPTS_NS { +SI SkNf inv(const SkNf& x) { return 1.0f - x; } - SI void run_pipeline(size_t x, size_t n, - void (*vBodyStart)(), SkRasterPipeline::Stage* body, - void (*vTailStart)(), SkRasterPipeline::Stage* tail) { - auto bodyStart = (Body)vBodyStart; - auto tailStart = (Tail)vTailStart; - SkNf v; // Fastest to start uninitialized. - while (n >= N) { - bodyStart(body, x, v,v,v,v, v,v,v,v); - x += N; - n -= N; - } - if (n > 0) { - tailStart(tail, x,n, v,v,v,v, v,v,v,v); +SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) { + return SkNx_fma(to-from, cov, from); +} + +template <bool kIsTail, typename T> +SI SkNx<N,T> load(size_t tail, const T* src) { + SkASSERT(kIsTail == (tail > 0)); + // TODO: maskload for 32- and 64-bit T + if (kIsTail) { + T buf[8] = {0}; + switch (tail & (N-1)) { + case 7: buf[6] = src[6]; + case 6: buf[5] = src[5]; + case 5: buf[4] = src[4]; + case 4: buf[3] = src[3]; + case 3: buf[2] = src[2]; + case 2: buf[1] = src[1]; } + buf[0] = src[0]; + return SkNx<N,T>::Load(buf); } + return SkNx<N,T>::Load(src); +} - // Clamp colors into [0,1] premul (e.g. just before storing back to memory). - SI void clamp_01_premul(SkNf& r, SkNf& g, SkNf& b, SkNf& a) { - a = SkNf::Max(a, 0.0f); - r = SkNf::Max(r, 0.0f); - g = SkNf::Max(g, 0.0f); - b = SkNf::Max(b, 0.0f); - - a = SkNf::Min(a, 1.0f); - r = SkNf::Min(r, a); - g = SkNf::Min(g, a); - b = SkNf::Min(b, a); +template <bool kIsTail, typename T> +SI void store(size_t tail, const SkNx<N,T>& v, T* dst) { + SkASSERT(kIsTail == (tail > 0)); + // TODO: maskstore for 32- and 64-bit T + if (kIsTail) { + switch (tail & (N-1)) { + case 7: dst[6] = v[6]; + case 6: dst[5] = v[5]; + case 5: dst[4] = v[4]; + case 4: dst[3] = v[3]; + case 3: dst[2] = v[2]; + case 2: dst[1] = v[1]; + } + dst[0] = v[0]; + return; } + v.store(dst); +} - SI SkNf inv(const SkNf& x) { return 1.0f - x; } +SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) { + auto _32_bit = SkNx_cast<int>(_565); - SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) { - return SkNx_fma(to-from, cov, from); - } + *r = SkNx_cast<float>(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE); + *g = SkNx_cast<float>(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE); + *b = SkNx_cast<float>(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE); +} - template <bool kIsTail, typename T> - SI SkNx<N,T> load(size_t tail, const T* src) { - SkASSERT(kIsTail == (tail > 0)); - // TODO: maskload for 32- and 64-bit T - if (kIsTail) { - T buf[8] = {0}; - switch (tail & (N-1)) { - case 7: buf[6] = src[6]; - case 6: buf[5] = src[5]; - case 5: buf[4] = src[4]; - case 4: buf[3] = src[3]; - case 3: buf[2] = src[2]; - case 2: buf[1] = src[1]; - } - buf[0] = src[0]; - return SkNx<N,T>::Load(buf); - } - return SkNx<N,T>::Load(src); - } +SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) { + return SkNx_cast<uint16_t>( SkNx_cast<int>(r * SK_R16_MASK + 0.5f) << SK_R16_SHIFT + | SkNx_cast<int>(g * SK_G16_MASK + 0.5f) << SK_G16_SHIFT + | SkNx_cast<int>(b * SK_B16_MASK + 0.5f) << SK_B16_SHIFT); +} - template <bool kIsTail, typename T> - SI void store(size_t tail, const SkNx<N,T>& v, T* dst) { - SkASSERT(kIsTail == (tail > 0)); - // TODO: maskstore for 32- and 64-bit T - if (kIsTail) { - switch (tail & (N-1)) { - case 7: dst[6] = v[6]; - case 6: dst[5] = v[5]; - case 5: dst[4] = v[4]; - case 4: dst[3] = v[3]; - case 3: dst[2] = v[2]; - case 2: dst[1] = v[1]; - } - dst[0] = v[0]; - return; - } - v.store(dst); - } +STAGE(just_return, false) { } - SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) { - auto _32_bit = SkNx_cast<int>(_565); +STAGE(swap_src_dst, true) { + SkTSwap(r,dr); + SkTSwap(g,dg); + SkTSwap(b,db); + SkTSwap(a,da); +} - *r = SkNx_cast<float>(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE); - *g = SkNx_cast<float>(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE); - *b = SkNx_cast<float>(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE); - } +// The default shader produces a constant color (from the SkPaint). +STAGE(constant_color, true) { + auto color = (const SkPM4f*)ctx; + r = color->r(); + g = color->g(); + b = color->b(); + a = color->a(); +} - SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) { - return SkNx_cast<uint16_t>( SkNx_cast<int>(r * SK_R16_MASK + 0.5f) << SK_R16_SHIFT - | SkNx_cast<int>(g * SK_G16_MASK + 0.5f) << SK_G16_SHIFT - | SkNx_cast<int>(b * SK_B16_MASK + 0.5f) << SK_B16_SHIFT); - } +// s' = d(1-c) + sc, for a constant c. +STAGE(lerp_constant_float, true) { + SkNf c = *(const float*)ctx; - STAGE(just_return, false) { } + r = lerp(dr, r, c); + g = lerp(dg, g, c); + b = lerp(db, b, c); + a = lerp(da, a, c); +} - STAGE(swap_src_dst, true) { - SkTSwap(r,dr); - SkTSwap(g,dg); - SkTSwap(b,db); - SkTSwap(a,da); - } +// s' = sc for 8-bit c. +STAGE(scale_u8, true) { + auto ptr = (const uint8_t*)ctx + x; - // The default shader produces a constant color (from the SkPaint). - STAGE(constant_color, true) { - auto color = (const SkPM4f*)ctx; - r = color->r(); - g = color->g(); - b = color->b(); - a = color->a(); - } + SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f); + r = r*c; + g = g*c; + b = b*c; + a = a*c; +} - // s' = d(1-c) + sc, for a constant c. - STAGE(lerp_constant_float, true) { - SkNf c = *(const float*)ctx; +// s' = d(1-c) + sc for 8-bit c. +STAGE(lerp_u8, true) { + auto ptr = (const uint8_t*)ctx + x; - r = lerp(dr, r, c); - g = lerp(dg, g, c); - b = lerp(db, b, c); - a = lerp(da, a, c); - } + SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f); + r = lerp(dr, r, c); + g = lerp(dg, g, c); + b = lerp(db, b, c); + a = lerp(da, a, c); +} - // s' = sc for 8-bit c. - STAGE(scale_u8, true) { - auto ptr = (const uint8_t*)ctx + x; +// s' = d(1-c) + sc for 565 c. +STAGE(lerp_565, true) { + auto ptr = (const uint16_t*)ctx + x; + SkNf cr, cg, cb; + from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb); - SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f); - r = r*c; - g = g*c; - b = b*c; - a = a*c; - } + r = lerp(dr, r, cr); + g = lerp(dg, g, cg); + b = lerp(db, b, cb); + a = 1.0f; +} - // s' = d(1-c) + sc for 8-bit c. - STAGE(lerp_u8, true) { - auto ptr = (const uint8_t*)ctx + x; +STAGE(load_d_565, true) { + auto ptr = (const uint16_t*)ctx + x; + from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db); + da = 1.0f; +} - SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f); - r = lerp(dr, r, c); - g = lerp(dg, g, c); - b = lerp(db, b, c); - a = lerp(da, a, c); - } +STAGE(load_s_565, true) { + auto ptr = (const uint16_t*)ctx + x; + from_565(load<kIsTail>(tail, ptr), &r,&g,&b); + a = 1.0f; +} - // s' = d(1-c) + sc for 565 c. - STAGE(lerp_565, true) { - auto ptr = (const uint16_t*)ctx + x; - SkNf cr, cg, cb; - from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb); +STAGE(store_565, false) { + clamp_01_premul(r,g,b,a); + auto ptr = (uint16_t*)ctx + x; + store<kIsTail>(tail, to_565(r,g,b), ptr); +} - r = lerp(dr, r, cr); - g = lerp(dg, g, cg); - b = lerp(db, b, cb); - a = 1.0f; +STAGE(load_d_f16, true) { + auto ptr = (const uint64_t*)ctx + x; + + SkNh rh, gh, bh, ah; + if (kIsTail) { + uint64_t buf[8] = {0}; + switch (tail & (N-1)) { + case 7: buf[6] = ptr[6]; + case 6: buf[5] = ptr[5]; + case 5: buf[4] = ptr[4]; + case 4: buf[3] = ptr[3]; + case 3: buf[2] = ptr[2]; + case 2: buf[1] = ptr[1]; + } + buf[0] = ptr[0]; + SkNh::Load4(buf, &rh, &gh, &bh, &ah); + } else { + SkNh::Load4(ptr, &rh, &gh, &bh, &ah); } - STAGE(load_d_565, true) { - auto ptr = (const uint16_t*)ctx + x; - from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db); - da = 1.0f; - } + dr = SkHalfToFloat_finite_ftz(rh); + dg = SkHalfToFloat_finite_ftz(gh); + db = SkHalfToFloat_finite_ftz(bh); + da = SkHalfToFloat_finite_ftz(ah); +} - STAGE(load_s_565, true) { - auto ptr = (const uint16_t*)ctx + x; - from_565(load<kIsTail>(tail, ptr), &r,&g,&b); - a = 1.0f; +STAGE(load_s_f16, true) { + auto ptr = (const uint64_t*)ctx + x; + + SkNh rh, gh, bh, ah; + if (kIsTail) { + uint64_t buf[8] = {0}; + switch (tail & (N-1)) { + case 7: buf[6] = ptr[6]; + case 6: buf[5] = ptr[5]; + case 5: buf[4] = ptr[4]; + case 4: buf[3] = ptr[3]; + case 3: buf[2] = ptr[2]; + case 2: buf[1] = ptr[1]; + } + buf[0] = ptr[0]; + SkNh::Load4(buf, &rh, &gh, &bh, &ah); + } else { + SkNh::Load4(ptr, &rh, &gh, &bh, &ah); } - STAGE(store_565, false) { - clamp_01_premul(r,g,b,a); - auto ptr = (uint16_t*)ctx + x; - store<kIsTail>(tail, to_565(r,g,b), ptr); - } + r = SkHalfToFloat_finite_ftz(rh); + g = SkHalfToFloat_finite_ftz(gh); + b = SkHalfToFloat_finite_ftz(bh); + a = SkHalfToFloat_finite_ftz(ah); +} - STAGE(load_d_f16, true) { - auto ptr = (const uint64_t*)ctx + x; - - SkNh rh, gh, bh, ah; - if (kIsTail) { - uint64_t buf[8] = {0}; - switch (tail & (N-1)) { - case 7: buf[6] = ptr[6]; - case 6: buf[5] = ptr[5]; - case 5: buf[4] = ptr[4]; - case 4: buf[3] = ptr[3]; - case 3: buf[2] = ptr[2]; - case 2: buf[1] = ptr[1]; - } - buf[0] = ptr[0]; - SkNh::Load4(buf, &rh, &gh, &bh, &ah); - } else { - SkNh::Load4(ptr, &rh, &gh, &bh, &ah); +STAGE(store_f16, false) { + clamp_01_premul(r,g,b,a); + auto ptr = (uint64_t*)ctx + x; + + uint64_t buf[8]; + SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r), + SkFloatToHalf_finite_ftz(g), + SkFloatToHalf_finite_ftz(b), + SkFloatToHalf_finite_ftz(a)); + if (kIsTail) { + switch (tail & (N-1)) { + case 7: ptr[6] = buf[6]; + case 6: ptr[5] = buf[5]; + case 5: ptr[4] = buf[4]; + case 4: ptr[3] = buf[3]; + case 3: ptr[2] = buf[2]; + case 2: ptr[1] = buf[1]; } - - dr = SkHalfToFloat_finite_ftz(rh); - dg = SkHalfToFloat_finite_ftz(gh); - db = SkHalfToFloat_finite_ftz(bh); - da = SkHalfToFloat_finite_ftz(ah); + ptr[0] = buf[0]; } +} - STAGE(load_s_f16, true) { - auto ptr = (const uint64_t*)ctx + x; - - SkNh rh, gh, bh, ah; - if (kIsTail) { - uint64_t buf[8] = {0}; - switch (tail & (N-1)) { - case 7: buf[6] = ptr[6]; - case 6: buf[5] = ptr[5]; - case 5: buf[4] = ptr[4]; - case 4: buf[3] = ptr[3]; - case 3: buf[2] = ptr[2]; - case 2: buf[1] = ptr[1]; - } - buf[0] = ptr[0]; - SkNh::Load4(buf, &rh, &gh, &bh, &ah); - } else { - SkNh::Load4(ptr, &rh, &gh, &bh, &ah); - } - r = SkHalfToFloat_finite_ftz(rh); - g = SkHalfToFloat_finite_ftz(gh); - b = SkHalfToFloat_finite_ftz(bh); - a = SkHalfToFloat_finite_ftz(ah); +// Load 8-bit SkPMColor-order sRGB. +STAGE(load_d_srgb, true) { + auto ptr = (const uint32_t*)ctx + x; + + auto px = load<kIsTail>(tail, ptr); + auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); }; + dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff)); + dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff)); + db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff)); + da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT )); +} + +STAGE(load_s_srgb, true) { + auto ptr = (const uint32_t*)ctx + x; + + auto px = load<kIsTail>(tail, ptr); + auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); }; + r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff)); + g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff)); + b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff)); + a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT )); +} + +STAGE(store_srgb, false) { + clamp_01_premul(r,g,b,a); + auto ptr = (uint32_t*)ctx + x; + store<kIsTail>(tail, ( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT + | sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT + | sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT + | SkNx_cast<int>(255.0f * a + 0.5f) << SK_A32_SHIFT ), (int*)ptr); +} + +RGBA_XFERMODE(clear) { return 0.0f; } +//RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it. +RGBA_XFERMODE(dst) { return d; } + +RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); } +RGBA_XFERMODE(srcin) { return s * da; } +RGBA_XFERMODE(srcout) { return s * inv(da); } +RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); } +RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); } +RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); } +RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); } +RGBA_XFERMODE(dstover) { return srcover_kernel(d,da,s,sa); } + +RGBA_XFERMODE(modulate) { return s*d; } +RGBA_XFERMODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; } +RGBA_XFERMODE(plus_) { return s + d; } +RGBA_XFERMODE(screen) { return s + d - s*d; } +RGBA_XFERMODE(xor_) { return s*inv(da) + d*inv(sa); } + +RGB_XFERMODE(colorburn) { + return (d == da ).thenElse(d + s*inv(da), + (s == 0.0f).thenElse(s + d*inv(sa), + sa*(da - SkNf::Min(da, (da-d)*sa/s)) + s*inv(da) + d*inv(sa))); +} +RGB_XFERMODE(colordodge) { + return (d == 0.0f).thenElse(d + s*inv(da), + (s == sa ).thenElse(s + d*inv(sa), + sa*SkNf::Min(da, (d*sa)/(sa - s)) + s*inv(da) + d*inv(sa))); +} +RGB_XFERMODE(darken) { return s + d - SkNf::Max(s*da, d*sa); } +RGB_XFERMODE(difference) { return s + d - 2.0f*SkNf::Min(s*da,d*sa); } +RGB_XFERMODE(exclusion) { return s + d - 2.0f*s*d; } +RGB_XFERMODE(hardlight) { + return s*inv(da) + d*inv(sa) + + (2.0f*s <= sa).thenElse(2.0f*s*d, sa*da - 2.0f*(da-d)*(sa-s)); +} +RGB_XFERMODE(lighten) { return s + d - SkNf::Min(s*da, d*sa); } +RGB_XFERMODE(overlay) { return hardlight_kernel(d,da,s,sa); } +RGB_XFERMODE(softlight) { + SkNf m = (da > 0.0f).thenElse(d / da, 0.0f), + s2 = 2.0f*s, + m4 = 4.0f*m; + + // The logic forks three ways: + // 1. dark src? + // 2. light src, dark dst? + // 3. light src, light dst? + SkNf darkSrc = d*(sa + (s2 - sa)*(1.0f - m)), // Used in case 1. + darkDst = (m4*m4 + m4)*(m - 1.0f) + 7.0f*m, // Used in case 2. + liteDst = m.rsqrt().invert() - m, // Used in case 3. + liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDst); // 2 or 3? + return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)? +} + + +template <typename Fn> +SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) { + switch (st) { + #define M(stage) case SkRasterPipeline::stage: return stage; + SK_RASTER_PIPELINE_STAGES(M) + #undef M } + SkASSERT(false); + return just_return; +} - STAGE(store_f16, false) { - clamp_01_premul(r,g,b,a); - auto ptr = (uint64_t*)ctx + x; - - uint64_t buf[8]; - SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r), - SkFloatToHalf_finite_ftz(g), - SkFloatToHalf_finite_ftz(b), - SkFloatToHalf_finite_ftz(a)); - if (kIsTail) { - switch (tail & (N-1)) { - case 7: ptr[6] = buf[6]; - case 6: ptr[5] = buf[5]; - case 5: ptr[4] = buf[4]; - case 4: ptr[3] = buf[3]; - case 3: ptr[2] = buf[2]; - case 2: ptr[1] = buf[1]; - } - ptr[0] = buf[0]; +namespace SK_OPTS_NS { + + SI void run_pipeline(size_t x, size_t n, + const SkRasterPipeline::Stage* stages, int nstages) { + SkASSERT(nstages <= SkRasterPipeline::kMaxStages); + if (nstages == 0) { + return; } - } + SkNf v; // Fastest to start uninitialized. - // Load 8-bit SkPMColor-order sRGB. - STAGE(load_d_srgb, true) { - auto ptr = (const uint32_t*)ctx + x; + if (n >= N) { + BodyStage body[SkRasterPipeline::kMaxStages]; - auto px = load<kIsTail>(tail, ptr); - auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); }; - dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff)); - dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff)); - db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff)); - da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT )); - } + Body start = enum_to_Fn<Body>(stages[0].stage); + for (int i = 0; i < nstages-1; i++) { + body[i].next = enum_to_Fn<Body>(stages[i+1].stage); + body[i].ctx = stages[i].ctx; + } + body[nstages-1].next = just_return; + body[nstages-1].ctx = stages[nstages-1].ctx; + + do { + start(body, x, v,v,v,v, v,v,v,v); + x += N; + n -= N; + } while (n >= N); + } - STAGE(load_s_srgb, true) { - auto ptr = (const uint32_t*)ctx + x; + if (n > 0) { + TailStage tail[SkRasterPipeline::kMaxStages]; - auto px = load<kIsTail>(tail, ptr); - auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); }; - r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff)); - g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff)); - b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff)); - a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT )); - } + Tail start = enum_to_Fn<Tail>(stages[0].stage); + for (int i = 0; i < nstages-1; i++) { + tail[i].next = enum_to_Fn<Tail>(stages[i+1].stage); + tail[i].ctx = stages[i].ctx; + } + tail[nstages-1].next = just_return; + tail[nstages-1].ctx = stages[nstages-1].ctx; - STAGE(store_srgb, false) { - clamp_01_premul(r,g,b,a); - auto ptr = (uint32_t*)ctx + x; - store<kIsTail>(tail, ( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT - | sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT - | sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT - | SkNx_cast<int>(255.0f * a + 0.5f) << SK_A32_SHIFT ), (int*)ptr); + start(tail, x,n, v,v,v,v, v,v,v,v); + } } - RGBA_XFERMODE(clear) { return 0.0f; } - //RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it. - RGBA_XFERMODE(dst) { return d; } - - RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); } - RGBA_XFERMODE(srcin) { return s * da; } - RGBA_XFERMODE(srcout) { return s * inv(da); } - RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); } - RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); } - RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); } - RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); } - RGBA_XFERMODE(dstover) { return srcover_kernel(d,da,s,sa); } - - RGBA_XFERMODE(modulate) { return s*d; } - RGBA_XFERMODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; } - RGBA_XFERMODE(plus_) { return s + d; } - RGBA_XFERMODE(screen) { return s + d - s*d; } - RGBA_XFERMODE(xor_) { return s*inv(da) + d*inv(sa); } - - RGB_XFERMODE(colorburn) { - return (d == da ).thenElse(d + s*inv(da), - (s == 0.0f).thenElse(s + d*inv(sa), - sa*(da - SkNf::Min(da, (da-d)*sa/s)) + s*inv(da) + d*inv(sa))); - } - RGB_XFERMODE(colordodge) { - return (d == 0.0f).thenElse(d + s*inv(da), - (s == sa ).thenElse(s + d*inv(sa), - sa*SkNf::Min(da, (d*sa)/(sa - s)) + s*inv(da) + d*inv(sa))); - } - RGB_XFERMODE(darken) { return s + d - SkNf::Max(s*da, d*sa); } - RGB_XFERMODE(difference) { return s + d - 2.0f*SkNf::Min(s*da,d*sa); } - RGB_XFERMODE(exclusion) { return s + d - 2.0f*s*d; } - RGB_XFERMODE(hardlight) { - return s*inv(da) + d*inv(sa) - + (2.0f*s <= sa).thenElse(2.0f*s*d, sa*da - 2.0f*(da-d)*(sa-s)); - } - RGB_XFERMODE(lighten) { return s + d - SkNf::Min(s*da, d*sa); } - RGB_XFERMODE(overlay) { return hardlight_kernel(d,da,s,sa); } - RGB_XFERMODE(softlight) { - SkNf m = (da > 0.0f).thenElse(d / da, 0.0f), - s2 = 2.0f*s, - m4 = 4.0f*m; - - // The logic forks three ways: - // 1. dark src? - // 2. light src, dark dst? - // 3. light src, light dst? - SkNf darkSrc = d*(sa + (s2 - sa)*(1.0f - m)), // Used in case 1. - darkDst = (m4*m4 + m4)*(m - 1.0f) + 7.0f*m, // Used in case 2. - liteDst = m.rsqrt().invert() - m, // Used in case 3. - liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDst); // 2 or 3? - return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)? - } -} +} // namespace SK_OPTS_NS #undef SI #undef STAGE |