aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2016-10-25 10:27:33 -0400
committerGravatar Mike Klein <mtklein@chromium.org>2016-10-25 17:30:38 +0000
commitaebfb45104eeb6dab5dbbedda13c2eaa7b7f7868 (patch)
tree7dcbf9bf86bd1b9fad0048dccbabb11e47a1d3fb /src
parentcc813ae9a0afe4259f12b655d9336662a6e2c100 (diff)
Move SkRasterPipeline further into SkOpts.
The portable code now becomes entirely focused on enum+ptr descriptions, leaving the concrete implementation of the pipeline to SkOpts::run_pipeline(). As implemented, the concrete implementation is basically the same, with a little more type safety. Speed is essentially unchanged on my laptop, and that's having run_pipeline() rebuild its concrete state every call. There's room for improvement there if we split this into a compile_pipeline() / run_pipeline() sort of thing, which is my next planned CL. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3920 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Change-Id: Ie4c554f51040426de7c5c144afa5d9d9d8938012 Reviewed-on: https://skia-review.googlesource.com/3920 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r--src/core/SkOpts.cpp102
-rw-r--r--src/core/SkOpts.h8
-rw-r--r--src/core/SkRasterPipeline.cpp36
-rw-r--r--src/core/SkRasterPipeline.h98
-rw-r--r--src/opts/SkOpts_hsw.cpp56
-rw-r--r--src/opts/SkOpts_sse41.cpp56
-rw-r--r--src/opts/SkRasterPipeline_opts.h783
7 files changed, 456 insertions, 683 deletions
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index 16389484b4..c082b160ee 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -92,108 +92,6 @@ namespace SkOpts {
DEFINE_DEFAULT(run_pipeline);
#undef DEFINE_DEFAULT
- SkOpts::VoidFn body[] = {
- (SkOpts::VoidFn)SK_OPTS_NS::just_return,
- (SkOpts::VoidFn)SK_OPTS_NS::swap_src_dst,
-
- (SkOpts::VoidFn)SK_OPTS_NS::store_565,
- (SkOpts::VoidFn)SK_OPTS_NS::store_srgb,
- (SkOpts::VoidFn)SK_OPTS_NS::store_f16,
-
- (SkOpts::VoidFn)SK_OPTS_NS::load_s_565,
- (SkOpts::VoidFn)SK_OPTS_NS::load_s_srgb,
- (SkOpts::VoidFn)SK_OPTS_NS::load_s_f16,
-
- (SkOpts::VoidFn)SK_OPTS_NS::load_d_565,
- (SkOpts::VoidFn)SK_OPTS_NS::load_d_srgb,
- (SkOpts::VoidFn)SK_OPTS_NS::load_d_f16,
-
- (SkOpts::VoidFn)SK_OPTS_NS::scale_u8,
-
- (SkOpts::VoidFn)SK_OPTS_NS::lerp_u8,
- (SkOpts::VoidFn)SK_OPTS_NS::lerp_565,
- (SkOpts::VoidFn)SK_OPTS_NS::lerp_constant_float,
-
- (SkOpts::VoidFn)SK_OPTS_NS::constant_color,
-
- (SkOpts::VoidFn)SK_OPTS_NS::dst,
- (SkOpts::VoidFn)SK_OPTS_NS::dstatop,
- (SkOpts::VoidFn)SK_OPTS_NS::dstin,
- (SkOpts::VoidFn)SK_OPTS_NS::dstout,
- (SkOpts::VoidFn)SK_OPTS_NS::dstover,
- (SkOpts::VoidFn)SK_OPTS_NS::srcatop,
- (SkOpts::VoidFn)SK_OPTS_NS::srcin,
- (SkOpts::VoidFn)SK_OPTS_NS::srcout,
- (SkOpts::VoidFn)SK_OPTS_NS::srcover,
- (SkOpts::VoidFn)SK_OPTS_NS::clear,
- (SkOpts::VoidFn)SK_OPTS_NS::modulate,
- (SkOpts::VoidFn)SK_OPTS_NS::multiply,
- (SkOpts::VoidFn)SK_OPTS_NS::plus_,
- (SkOpts::VoidFn)SK_OPTS_NS::screen,
- (SkOpts::VoidFn)SK_OPTS_NS::xor_,
- (SkOpts::VoidFn)SK_OPTS_NS::colorburn,
- (SkOpts::VoidFn)SK_OPTS_NS::colordodge,
- (SkOpts::VoidFn)SK_OPTS_NS::darken,
- (SkOpts::VoidFn)SK_OPTS_NS::difference,
- (SkOpts::VoidFn)SK_OPTS_NS::exclusion,
- (SkOpts::VoidFn)SK_OPTS_NS::hardlight,
- (SkOpts::VoidFn)SK_OPTS_NS::lighten,
- (SkOpts::VoidFn)SK_OPTS_NS::overlay,
- (SkOpts::VoidFn)SK_OPTS_NS::softlight,
- };
- static_assert(SK_ARRAY_COUNT(body) == SkRasterPipeline::kNumStockStages, "");
-
- SkOpts::VoidFn tail[] = {
- (SkOpts::VoidFn)SK_OPTS_NS::just_return_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::swap_src_dst_tail,
-
- (SkOpts::VoidFn)SK_OPTS_NS::store_565_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::store_srgb_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::store_f16_tail,
-
- (SkOpts::VoidFn)SK_OPTS_NS::load_s_565_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::load_s_srgb_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::load_s_f16_tail,
-
- (SkOpts::VoidFn)SK_OPTS_NS::load_d_565_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::load_d_srgb_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::load_d_f16_tail,
-
- (SkOpts::VoidFn)SK_OPTS_NS::scale_u8_tail,
-
- (SkOpts::VoidFn)SK_OPTS_NS::lerp_u8_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::lerp_565_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::lerp_constant_float_tail,
-
- (SkOpts::VoidFn)SK_OPTS_NS::constant_color_tail,
-
- (SkOpts::VoidFn)SK_OPTS_NS::dst_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::dstatop_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::dstin_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::dstout_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::dstover_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::srcatop_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::srcin_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::srcout_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::srcover_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::clear_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::modulate_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::multiply_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::plus__tail,
- (SkOpts::VoidFn)SK_OPTS_NS::screen_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::xor__tail,
- (SkOpts::VoidFn)SK_OPTS_NS::colorburn_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::colordodge_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::darken_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::difference_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::exclusion_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::hardlight_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::lighten_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::overlay_tail,
- (SkOpts::VoidFn)SK_OPTS_NS::softlight_tail,
- };
- static_assert(SK_ARRAY_COUNT(tail) == SkRasterPipeline::kNumStockStages, "");
-
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
void Init_ssse3();
void Init_sse41();
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 4685d86691..7283030068 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -73,13 +73,7 @@ namespace SkOpts {
return hash_fn(data, bytes, seed);
}
- // SkRasterPipeline::Fn has different types in different files (notably, in SkOpts_hsw.cpp
- // they're all in terms of Sk8f.) We store them with a type everyone can agree on, void(*)().
- using VoidFn = void(*)();
- extern VoidFn body[SkRasterPipeline::kNumStockStages],
- tail[SkRasterPipeline::kNumStockStages];
- extern void (*run_pipeline)(size_t, size_t, void(*)(), SkRasterPipeline::Stage*,
- void(*)(), SkRasterPipeline::Stage*);
+ extern void (*run_pipeline)(size_t, size_t, const SkRasterPipeline::Stage*, int);
}
#endif//SkOpts_DEFINED
diff --git a/src/core/SkRasterPipeline.cpp b/src/core/SkRasterPipeline.cpp
index bc7feaccc7..707a33ae6f 100644
--- a/src/core/SkRasterPipeline.cpp
+++ b/src/core/SkRasterPipeline.cpp
@@ -8,40 +8,20 @@
#include "SkOpts.h"
#include "SkRasterPipeline.h"
-SkRasterPipeline::SkRasterPipeline() {
- fBodyStart = SkOpts::body[just_return];
- fTailStart = SkOpts::tail[just_return];
-}
-
-void SkRasterPipeline::append(void (*body)(), void (*tail)(), void* ctx) {
- // Each stage holds its own context and the next function to call.
- // So the pipeline itself has to hold onto the first function that starts the pipeline.
- (fBody.empty() ? fBodyStart : fBody.back().fNext) = body;
- (fTail.empty() ? fTailStart : fTail.back().fNext) = tail;
-
- // Each last stage starts with its next function set to JustReturn as a safety net.
- // It'll be overwritten by the next call to append().
- fBody.push_back({ SkOpts::body[just_return], ctx });
- fTail.push_back({ SkOpts::tail[just_return], ctx });
-}
+SkRasterPipeline::SkRasterPipeline() {}
void SkRasterPipeline::append(StockStage stage, void* ctx) {
- this->append(SkOpts::body[stage], SkOpts::tail[stage], ctx);
+ SkASSERT(fNum < (int)SK_ARRAY_COUNT(fStages));
+ fStages[fNum++] = { stage, ctx };
}
void SkRasterPipeline::extend(const SkRasterPipeline& src) {
- SkASSERT(src.fBody.count() == src.fTail.count());
-
- auto body = src.fBodyStart,
- tail = src.fTailStart;
- for (int i = 0; i < src.fBody.count(); i++) {
- SkASSERT(src.fBody[i].fCtx == src.fTail[i].fCtx);
- this->append(body, tail, src.fBody[i].fCtx);
- body = src.fBody[i].fNext;
- tail = src.fTail[i].fNext;
+ for (int i = 0; i < src.fNum; i++) {
+ const Stage& s = src.fStages[i];
+ this->append(s.stage, s.ctx);
}
}
-void SkRasterPipeline::run(size_t x, size_t n) {
- SkOpts::run_pipeline(x,n, fBodyStart,fBody.begin(), fTailStart,fTail.begin());
+void SkRasterPipeline::run(size_t x, size_t n) const {
+ SkOpts::run_pipeline(x,n, fStages, fNum);
}
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 525a8dbe72..60279dd168 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -53,72 +53,31 @@
// TODO: There may be a better place to stuff tail, e.g. in the bottom alignment bits of
// the Stage*. This mostly matters on 64-bit Windows where every register is precious.
+#define SK_RASTER_PIPELINE_STAGES(M) \
+ M(swap_src_dst) M(constant_color) \
+ M(load_s_565) M(load_d_565) M(store_565) \
+ M(load_s_srgb) M(load_d_srgb) M(store_srgb) \
+ M(load_s_f16) M(load_d_f16) M(store_f16) \
+ M(scale_u8) \
+ M(lerp_u8) M(lerp_565) M(lerp_constant_float) \
+ M(dst) \
+ M(dstatop) M(dstin) M(dstout) M(dstover) \
+ M(srcatop) M(srcin) M(srcout) M(srcover) \
+ M(clear) M(modulate) M(multiply) M(plus_) M(screen) M(xor_) \
+ M(colorburn) M(colordodge) M(darken) M(difference) \
+ M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight)
+
class SkRasterPipeline {
public:
- struct Stage {
- // It makes next() a good bit cheaper if we hold the next function to call here,
- // rather than logically simpler choice of the function implementing this stage.
- void (*fNext)();
- void* fCtx;
- };
+ // No pipeline may be more than kMaxStages long.
+ static const int kMaxStages = 32;
SkRasterPipeline();
- // Run the pipeline constructed with append(), walking x through [x,x+n),
- // generally in 4-pixel steps, with perhaps one jagged tail step.
- void run(size_t x, size_t n);
- void run(size_t n) { this->run(0, n); }
-
enum StockStage {
- just_return,
- swap_src_dst,
-
- store_565,
- store_srgb,
- store_f16,
-
- load_s_565,
- load_s_srgb,
- load_s_f16,
-
- load_d_565,
- load_d_srgb,
- load_d_f16,
-
- scale_u8,
-
- lerp_u8,
- lerp_565,
- lerp_constant_float,
-
- constant_color,
-
- dst,
- dstatop,
- dstin,
- dstout,
- dstover,
- srcatop,
- srcin,
- srcout,
- srcover,
- clear,
- modulate,
- multiply,
- plus_,
- screen,
- xor_,
- colorburn,
- colordodge,
- darken,
- difference,
- exclusion,
- hardlight,
- lighten,
- overlay,
- softlight,
-
- kNumStockStages,
+ #define M(stage) stage,
+ SK_RASTER_PIPELINE_STAGES(M)
+ #undef M
};
void append(StockStage, void* = nullptr);
void append(StockStage stage, const void* ctx) { this->append(stage, const_cast<void*>(ctx)); }
@@ -126,15 +85,20 @@ public:
// Append all stages to this pipeline.
void extend(const SkRasterPipeline&);
-private:
- using Stages = SkSTArray<10, Stage, /*MEM_COPY=*/true>;
+ // Run the pipeline constructed with append(), walking x through [x,x+n),
+ // generally in 4-pixel steps, with perhaps one jagged tail step.
+ void run(size_t x, size_t n) const;
+ void run(size_t n) const { this->run(0, n); }
- void append(void (*body)(), void (*tail)(), void*);
- Stages fBody,
- fTail;
- void (*fBodyStart)() = nullptr;
- void (*fTailStart)() = nullptr;
+ struct Stage {
+ StockStage stage;
+ void* ctx;
+ };
+
+private:
+ int fNum = 0;
+ Stage fStages[kMaxStages];
};
#endif//SkRasterPipeline_DEFINED
diff --git a/src/opts/SkOpts_hsw.cpp b/src/opts/SkOpts_hsw.cpp
index 3c5d19d47c..55fe045a56 100644
--- a/src/opts/SkOpts_hsw.cpp
+++ b/src/opts/SkOpts_hsw.cpp
@@ -12,61 +12,7 @@
namespace SkOpts {
void Init_hsw() {
-
- run_pipeline = SK_OPTS_NS::run_pipeline;
-
- #define STAGE(stage) \
- body[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage; \
- tail[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage##_tail
-
- STAGE(store_565);
- STAGE(store_srgb);
- STAGE(store_f16);
-
- STAGE(load_s_565);
- STAGE(load_s_srgb);
- STAGE(load_s_f16);
-
- STAGE(load_d_565);
- STAGE(load_d_srgb);
- STAGE(load_d_f16);
-
- STAGE(scale_u8);
-
- STAGE(lerp_u8);
- STAGE(lerp_565);
-
- STAGE(just_return);
- STAGE(swap_src_dst);
- STAGE(lerp_constant_float);
- STAGE(constant_color);
-
- STAGE(dst);
- STAGE(dstatop);
- STAGE(dstin);
- STAGE(dstout);
- STAGE(dstover);
- STAGE(srcatop);
- STAGE(srcin);
- STAGE(srcout);
- STAGE(srcover);
- STAGE(clear);
- STAGE(modulate);
- STAGE(multiply);
- STAGE(plus_);
- STAGE(screen);
- STAGE(xor_);
- STAGE(colorburn);
- STAGE(colordodge);
- STAGE(darken);
- STAGE(difference);
- STAGE(exclusion);
- STAGE(hardlight);
- STAGE(lighten);
- STAGE(overlay);
- STAGE(softlight);
- #undef STAGE
-
+ run_pipeline = hsw::run_pipeline;
}
}
diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp
index b7f1bdd7c6..7a90f7683e 100644
--- a/src/opts/SkOpts_sse41.cpp
+++ b/src/opts/SkOpts_sse41.cpp
@@ -20,60 +20,6 @@ namespace SkOpts {
box_blur_yx = sse41::box_blur_yx;
srcover_srgb_srgb = sse41::srcover_srgb_srgb;
blit_row_s32a_opaque = sse41::blit_row_s32a_opaque;
-
- run_pipeline = SK_OPTS_NS::run_pipeline;
-
- #define STAGE(stage) \
- body[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage; \
- tail[SkRasterPipeline::stage] = (SkOpts::VoidFn)SK_OPTS_NS::stage##_tail
-
- STAGE(store_565);
- STAGE(store_srgb);
- STAGE(store_f16);
-
- STAGE(load_s_565);
- STAGE(load_s_srgb);
- STAGE(load_s_f16);
-
- STAGE(load_d_565);
- STAGE(load_d_srgb);
- STAGE(load_d_f16);
-
- STAGE(scale_u8);
-
- STAGE(lerp_u8);
- STAGE(lerp_565);
-
- STAGE(just_return);
- STAGE(swap_src_dst);
- STAGE(lerp_constant_float);
- STAGE(constant_color);
-
- STAGE(dst);
- STAGE(dstatop);
- STAGE(dstin);
- STAGE(dstout);
- STAGE(dstover);
- STAGE(srcatop);
- STAGE(srcin);
- STAGE(srcout);
- STAGE(srcover);
- STAGE(clear);
- STAGE(modulate);
- STAGE(multiply);
- STAGE(plus_);
- STAGE(screen);
- STAGE(xor_);
- STAGE(colorburn);
- STAGE(colordodge);
- STAGE(darken);
- STAGE(difference);
- STAGE(exclusion);
- STAGE(hardlight);
- STAGE(lighten);
- STAGE(overlay);
- STAGE(softlight);
- #undef STAGE
-
+ run_pipeline = sse41::run_pipeline;
}
}
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index f072f843bd..2e1e3a2a47 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -14,6 +14,8 @@
#include "SkSRGB.h"
#include <utility>
+namespace {
+
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
static constexpr int N = 8;
#else
@@ -24,438 +26,481 @@ using SkNf = SkNx<N, float>;
using SkNi = SkNx<N, int>;
using SkNh = SkNx<N, uint16_t>;
-using Body = void(SK_VECTORCALL *)(SkRasterPipeline::Stage*, size_t,
- SkNf,SkNf,SkNf,SkNf,
- SkNf,SkNf,SkNf,SkNf);
-using Tail = void(SK_VECTORCALL *)(SkRasterPipeline::Stage*, size_t, size_t,
- SkNf,SkNf,SkNf,SkNf,
- SkNf,SkNf,SkNf,SkNf);
+ struct BodyStage;
+ struct TailStage;
+
+ using Body = void(SK_VECTORCALL *)(BodyStage*, size_t, SkNf,SkNf,SkNf,SkNf,
+ SkNf,SkNf,SkNf,SkNf);
+ using Tail = void(SK_VECTORCALL *)(TailStage*, size_t, size_t, SkNf,SkNf,SkNf,SkNf,
+ SkNf,SkNf,SkNf,SkNf);
+ struct BodyStage { Body next; void* ctx; };
+ struct TailStage { Tail next; void* ctx; };
+
+} // namespace
#define SI static inline
// Stages are logically a pipeline, and physically are contiguous in an array.
// To get to the next stage, we just increment our pointer to the next array element.
-SI void SK_VECTORCALL next_body(SkRasterPipeline::Stage* st, size_t x,
- SkNf r, SkNf g, SkNf b, SkNf a,
- SkNf dr, SkNf dg, SkNf db, SkNf da) {
- ((Body)st->fNext)(st+1, x, r,g,b,a, dr,dg,db,da);
+SI void SK_VECTORCALL next(BodyStage* st, size_t x,
+ SkNf r, SkNf g, SkNf b, SkNf a,
+ SkNf dr, SkNf dg, SkNf db, SkNf da) {
+ st->next(st+1, x, r,g,b,a, dr,dg,db,da);
}
-SI void SK_VECTORCALL next_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail,
- SkNf r, SkNf g, SkNf b, SkNf a,
- SkNf dr, SkNf dg, SkNf db, SkNf da) {
- ((Tail)st->fNext)(st+1, x,tail, r,g,b,a, dr,dg,db,da);
+SI void SK_VECTORCALL next(TailStage* st, size_t x, size_t tail,
+ SkNf r, SkNf g, SkNf b, SkNf a,
+ SkNf dr, SkNf dg, SkNf db, SkNf da) {
+ st->next(st+1, x,tail, r,g,b,a, dr,dg,db,da);
}
-#define STAGE(name, kCallNext) \
- template <bool kIsTail> \
- static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
- SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
- SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
- SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, \
- SkNf r, SkNf g, SkNf b, SkNf a, \
- SkNf dr, SkNf dg, SkNf db, SkNf da) { \
- name##_kernel<false>(st->fCtx, x,0, r,g,b,a, dr,dg,db,da); \
- if (kCallNext) { \
- next_body(st, x, r,g,b,a, dr,dg,db,da); \
- } \
- } \
- SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
- SkNf r, SkNf g, SkNf b, SkNf a, \
- SkNf dr, SkNf dg, SkNf db, SkNf da) { \
- name##_kernel<true>(st->fCtx, x,tail, r,g,b,a, dr,dg,db,da); \
- if (kCallNext) { \
- next_tail(st, x,tail, r,g,b,a, dr,dg,db,da); \
- } \
- } \
- template <bool kIsTail> \
- static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
- SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
+#define STAGE(name, kCallNext) \
+ template <bool kIsTail> \
+ static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
+ SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
+ SkNf& dr, SkNf& dg, SkNf& db, SkNf& da); \
+ SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ name##_kernel<false>(st->ctx, x,0, r,g,b,a, dr,dg,db,da); \
+ if (kCallNext) { \
+ next(st, x, r,g,b,a, dr,dg,db,da); \
+ } \
+ } \
+ SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ name##_kernel<true>(st->ctx, x,tail, r,g,b,a, dr,dg,db,da); \
+ if (kCallNext) { \
+ next(st, x,tail, r,g,b,a, dr,dg,db,da); \
+ } \
+ } \
+ template <bool kIsTail> \
+ static SK_ALWAYS_INLINE void name##_kernel(void* ctx, size_t x, size_t tail, \
+ SkNf& r, SkNf& g, SkNf& b, SkNf& a, \
SkNf& dr, SkNf& dg, SkNf& db, SkNf& da)
// Many xfermodes apply the same logic to each channel.
-#define RGBA_XFERMODE(name) \
- static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
- const SkNf& d, const SkNf& da); \
- SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, \
- SkNf r, SkNf g, SkNf b, SkNf a, \
- SkNf dr, SkNf dg, SkNf db, SkNf da) { \
- r = name##_kernel(r,a,dr,da); \
- g = name##_kernel(g,a,dg,da); \
- b = name##_kernel(b,a,db,da); \
- a = name##_kernel(a,a,da,da); \
- next_body(st, x, r,g,b,a, dr,dg,db,da); \
- } \
- SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
- SkNf r, SkNf g, SkNf b, SkNf a, \
- SkNf dr, SkNf dg, SkNf db, SkNf da) { \
- r = name##_kernel(r,a,dr,da); \
- g = name##_kernel(g,a,dg,da); \
- b = name##_kernel(b,a,db,da); \
- a = name##_kernel(a,a,da,da); \
- next_tail(st, x,tail, r,g,b,a, dr,dg,db,da); \
- } \
- static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
+#define RGBA_XFERMODE(name) \
+ static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
+ const SkNf& d, const SkNf& da); \
+ SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ r = name##_kernel(r,a,dr,da); \
+ g = name##_kernel(g,a,dg,da); \
+ b = name##_kernel(b,a,db,da); \
+ a = name##_kernel(a,a,da,da); \
+ next(st, x, r,g,b,a, dr,dg,db,da); \
+ } \
+ SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ r = name##_kernel(r,a,dr,da); \
+ g = name##_kernel(g,a,dg,da); \
+ b = name##_kernel(b,a,db,da); \
+ a = name##_kernel(a,a,da,da); \
+ next(st, x,tail, r,g,b,a, dr,dg,db,da); \
+ } \
+ static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
const SkNf& d, const SkNf& da)
// Most of the rest apply the same logic to color channels and use srcover's alpha logic.
-#define RGB_XFERMODE(name) \
- static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
- const SkNf& d, const SkNf& da); \
- SI void SK_VECTORCALL name(SkRasterPipeline::Stage* st, size_t x, \
- SkNf r, SkNf g, SkNf b, SkNf a, \
- SkNf dr, SkNf dg, SkNf db, SkNf da) { \
- r = name##_kernel(r,a,dr,da); \
- g = name##_kernel(g,a,dg,da); \
- b = name##_kernel(b,a,db,da); \
- a = a + (da * (1.0f-a)); \
- next_body(st, x, r,g,b,a, dr,dg,db,da); \
- } \
- SI void SK_VECTORCALL name##_tail(SkRasterPipeline::Stage* st, size_t x, size_t tail, \
- SkNf r, SkNf g, SkNf b, SkNf a, \
- SkNf dr, SkNf dg, SkNf db, SkNf da) { \
- r = name##_kernel(r,a,dr,da); \
- g = name##_kernel(g,a,dg,da); \
- b = name##_kernel(b,a,db,da); \
- a = a + (da * (1.0f-a)); \
- next_tail(st, x,tail, r,g,b,a, dr,dg,db,da); \
- } \
- static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
+#define RGB_XFERMODE(name) \
+ static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
+ const SkNf& d, const SkNf& da); \
+ SI void SK_VECTORCALL name(BodyStage* st, size_t x, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ r = name##_kernel(r,a,dr,da); \
+ g = name##_kernel(g,a,dg,da); \
+ b = name##_kernel(b,a,db,da); \
+ a = a + (da * (1.0f-a)); \
+ next(st, x, r,g,b,a, dr,dg,db,da); \
+ } \
+ SI void SK_VECTORCALL name(TailStage* st, size_t x, size_t tail, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ r = name##_kernel(r,a,dr,da); \
+ g = name##_kernel(g,a,dg,da); \
+ b = name##_kernel(b,a,db,da); \
+ a = a + (da * (1.0f-a)); \
+ next(st, x,tail, r,g,b,a, dr,dg,db,da); \
+ } \
+ static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
const SkNf& d, const SkNf& da)
+// Clamp colors into [0,1] premul (e.g. just before storing back to memory).
+SI void clamp_01_premul(SkNf& r, SkNf& g, SkNf& b, SkNf& a) {
+ a = SkNf::Max(a, 0.0f);
+ r = SkNf::Max(r, 0.0f);
+ g = SkNf::Max(g, 0.0f);
+ b = SkNf::Max(b, 0.0f);
+
+ a = SkNf::Min(a, 1.0f);
+ r = SkNf::Min(r, a);
+ g = SkNf::Min(g, a);
+ b = SkNf::Min(b, a);
+}
-namespace SK_OPTS_NS {
+SI SkNf inv(const SkNf& x) { return 1.0f - x; }
- SI void run_pipeline(size_t x, size_t n,
- void (*vBodyStart)(), SkRasterPipeline::Stage* body,
- void (*vTailStart)(), SkRasterPipeline::Stage* tail) {
- auto bodyStart = (Body)vBodyStart;
- auto tailStart = (Tail)vTailStart;
- SkNf v; // Fastest to start uninitialized.
- while (n >= N) {
- bodyStart(body, x, v,v,v,v, v,v,v,v);
- x += N;
- n -= N;
- }
- if (n > 0) {
- tailStart(tail, x,n, v,v,v,v, v,v,v,v);
+SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) {
+ return SkNx_fma(to-from, cov, from);
+}
+
+template <bool kIsTail, typename T>
+SI SkNx<N,T> load(size_t tail, const T* src) {
+ SkASSERT(kIsTail == (tail > 0));
+ // TODO: maskload for 32- and 64-bit T
+ if (kIsTail) {
+ T buf[8] = {0};
+ switch (tail & (N-1)) {
+ case 7: buf[6] = src[6];
+ case 6: buf[5] = src[5];
+ case 5: buf[4] = src[4];
+ case 4: buf[3] = src[3];
+ case 3: buf[2] = src[2];
+ case 2: buf[1] = src[1];
}
+ buf[0] = src[0];
+ return SkNx<N,T>::Load(buf);
}
+ return SkNx<N,T>::Load(src);
+}
- // Clamp colors into [0,1] premul (e.g. just before storing back to memory).
- SI void clamp_01_premul(SkNf& r, SkNf& g, SkNf& b, SkNf& a) {
- a = SkNf::Max(a, 0.0f);
- r = SkNf::Max(r, 0.0f);
- g = SkNf::Max(g, 0.0f);
- b = SkNf::Max(b, 0.0f);
-
- a = SkNf::Min(a, 1.0f);
- r = SkNf::Min(r, a);
- g = SkNf::Min(g, a);
- b = SkNf::Min(b, a);
+template <bool kIsTail, typename T>
+SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
+ SkASSERT(kIsTail == (tail > 0));
+ // TODO: maskstore for 32- and 64-bit T
+ if (kIsTail) {
+ switch (tail & (N-1)) {
+ case 7: dst[6] = v[6];
+ case 6: dst[5] = v[5];
+ case 5: dst[4] = v[4];
+ case 4: dst[3] = v[3];
+ case 3: dst[2] = v[2];
+ case 2: dst[1] = v[1];
+ }
+ dst[0] = v[0];
+ return;
}
+ v.store(dst);
+}
- SI SkNf inv(const SkNf& x) { return 1.0f - x; }
+SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) {
+ auto _32_bit = SkNx_cast<int>(_565);
- SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) {
- return SkNx_fma(to-from, cov, from);
- }
+ *r = SkNx_cast<float>(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE);
+ *g = SkNx_cast<float>(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE);
+ *b = SkNx_cast<float>(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE);
+}
- template <bool kIsTail, typename T>
- SI SkNx<N,T> load(size_t tail, const T* src) {
- SkASSERT(kIsTail == (tail > 0));
- // TODO: maskload for 32- and 64-bit T
- if (kIsTail) {
- T buf[8] = {0};
- switch (tail & (N-1)) {
- case 7: buf[6] = src[6];
- case 6: buf[5] = src[5];
- case 5: buf[4] = src[4];
- case 4: buf[3] = src[3];
- case 3: buf[2] = src[2];
- case 2: buf[1] = src[1];
- }
- buf[0] = src[0];
- return SkNx<N,T>::Load(buf);
- }
- return SkNx<N,T>::Load(src);
- }
+SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) {
+ return SkNx_cast<uint16_t>( SkNx_cast<int>(r * SK_R16_MASK + 0.5f) << SK_R16_SHIFT
+ | SkNx_cast<int>(g * SK_G16_MASK + 0.5f) << SK_G16_SHIFT
+ | SkNx_cast<int>(b * SK_B16_MASK + 0.5f) << SK_B16_SHIFT);
+}
- template <bool kIsTail, typename T>
- SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
- SkASSERT(kIsTail == (tail > 0));
- // TODO: maskstore for 32- and 64-bit T
- if (kIsTail) {
- switch (tail & (N-1)) {
- case 7: dst[6] = v[6];
- case 6: dst[5] = v[5];
- case 5: dst[4] = v[4];
- case 4: dst[3] = v[3];
- case 3: dst[2] = v[2];
- case 2: dst[1] = v[1];
- }
- dst[0] = v[0];
- return;
- }
- v.store(dst);
- }
+STAGE(just_return, false) { }
- SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) {
- auto _32_bit = SkNx_cast<int>(_565);
+STAGE(swap_src_dst, true) {
+ SkTSwap(r,dr);
+ SkTSwap(g,dg);
+ SkTSwap(b,db);
+ SkTSwap(a,da);
+}
- *r = SkNx_cast<float>(_32_bit & SK_R16_MASK_IN_PLACE) * (1.0f / SK_R16_MASK_IN_PLACE);
- *g = SkNx_cast<float>(_32_bit & SK_G16_MASK_IN_PLACE) * (1.0f / SK_G16_MASK_IN_PLACE);
- *b = SkNx_cast<float>(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE);
- }
+// The default shader produces a constant color (from the SkPaint).
+STAGE(constant_color, true) {
+ auto color = (const SkPM4f*)ctx;
+ r = color->r();
+ g = color->g();
+ b = color->b();
+ a = color->a();
+}
- SI SkNh to_565(const SkNf& r, const SkNf& g, const SkNf& b) {
- return SkNx_cast<uint16_t>( SkNx_cast<int>(r * SK_R16_MASK + 0.5f) << SK_R16_SHIFT
- | SkNx_cast<int>(g * SK_G16_MASK + 0.5f) << SK_G16_SHIFT
- | SkNx_cast<int>(b * SK_B16_MASK + 0.5f) << SK_B16_SHIFT);
- }
+// s' = d(1-c) + sc, for a constant c.
+STAGE(lerp_constant_float, true) {
+ SkNf c = *(const float*)ctx;
- STAGE(just_return, false) { }
+ r = lerp(dr, r, c);
+ g = lerp(dg, g, c);
+ b = lerp(db, b, c);
+ a = lerp(da, a, c);
+}
- STAGE(swap_src_dst, true) {
- SkTSwap(r,dr);
- SkTSwap(g,dg);
- SkTSwap(b,db);
- SkTSwap(a,da);
- }
+// s' = sc for 8-bit c.
+STAGE(scale_u8, true) {
+ auto ptr = (const uint8_t*)ctx + x;
- // The default shader produces a constant color (from the SkPaint).
- STAGE(constant_color, true) {
- auto color = (const SkPM4f*)ctx;
- r = color->r();
- g = color->g();
- b = color->b();
- a = color->a();
- }
+ SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
+ r = r*c;
+ g = g*c;
+ b = b*c;
+ a = a*c;
+}
- // s' = d(1-c) + sc, for a constant c.
- STAGE(lerp_constant_float, true) {
- SkNf c = *(const float*)ctx;
+// s' = d(1-c) + sc for 8-bit c.
+STAGE(lerp_u8, true) {
+ auto ptr = (const uint8_t*)ctx + x;
- r = lerp(dr, r, c);
- g = lerp(dg, g, c);
- b = lerp(db, b, c);
- a = lerp(da, a, c);
- }
+ SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
+ r = lerp(dr, r, c);
+ g = lerp(dg, g, c);
+ b = lerp(db, b, c);
+ a = lerp(da, a, c);
+}
- // s' = sc for 8-bit c.
- STAGE(scale_u8, true) {
- auto ptr = (const uint8_t*)ctx + x;
+// s' = d(1-c) + sc for 565 c.
+STAGE(lerp_565, true) {
+ auto ptr = (const uint16_t*)ctx + x;
+ SkNf cr, cg, cb;
+ from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb);
- SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
- r = r*c;
- g = g*c;
- b = b*c;
- a = a*c;
- }
+ r = lerp(dr, r, cr);
+ g = lerp(dg, g, cg);
+ b = lerp(db, b, cb);
+ a = 1.0f;
+}
- // s' = d(1-c) + sc for 8-bit c.
- STAGE(lerp_u8, true) {
- auto ptr = (const uint8_t*)ctx + x;
+STAGE(load_d_565, true) {
+ auto ptr = (const uint16_t*)ctx + x;
+ from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db);
+ da = 1.0f;
+}
- SkNf c = SkNx_cast<float>(load<kIsTail>(tail, ptr)) * (1/255.0f);
- r = lerp(dr, r, c);
- g = lerp(dg, g, c);
- b = lerp(db, b, c);
- a = lerp(da, a, c);
- }
+STAGE(load_s_565, true) {
+ auto ptr = (const uint16_t*)ctx + x;
+ from_565(load<kIsTail>(tail, ptr), &r,&g,&b);
+ a = 1.0f;
+}
- // s' = d(1-c) + sc for 565 c.
- STAGE(lerp_565, true) {
- auto ptr = (const uint16_t*)ctx + x;
- SkNf cr, cg, cb;
- from_565(load<kIsTail>(tail, ptr), &cr, &cg, &cb);
+STAGE(store_565, false) {
+ clamp_01_premul(r,g,b,a);
+ auto ptr = (uint16_t*)ctx + x;
+ store<kIsTail>(tail, to_565(r,g,b), ptr);
+}
- r = lerp(dr, r, cr);
- g = lerp(dg, g, cg);
- b = lerp(db, b, cb);
- a = 1.0f;
+STAGE(load_d_f16, true) {
+ auto ptr = (const uint64_t*)ctx + x;
+
+ SkNh rh, gh, bh, ah;
+ if (kIsTail) {
+ uint64_t buf[8] = {0};
+ switch (tail & (N-1)) {
+ case 7: buf[6] = ptr[6];
+ case 6: buf[5] = ptr[5];
+ case 5: buf[4] = ptr[4];
+ case 4: buf[3] = ptr[3];
+ case 3: buf[2] = ptr[2];
+ case 2: buf[1] = ptr[1];
+ }
+ buf[0] = ptr[0];
+ SkNh::Load4(buf, &rh, &gh, &bh, &ah);
+ } else {
+ SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
}
- STAGE(load_d_565, true) {
- auto ptr = (const uint16_t*)ctx + x;
- from_565(load<kIsTail>(tail, ptr), &dr,&dg,&db);
- da = 1.0f;
- }
+ dr = SkHalfToFloat_finite_ftz(rh);
+ dg = SkHalfToFloat_finite_ftz(gh);
+ db = SkHalfToFloat_finite_ftz(bh);
+ da = SkHalfToFloat_finite_ftz(ah);
+}
- STAGE(load_s_565, true) {
- auto ptr = (const uint16_t*)ctx + x;
- from_565(load<kIsTail>(tail, ptr), &r,&g,&b);
- a = 1.0f;
+STAGE(load_s_f16, true) {
+ auto ptr = (const uint64_t*)ctx + x;
+
+ SkNh rh, gh, bh, ah;
+ if (kIsTail) {
+ uint64_t buf[8] = {0};
+ switch (tail & (N-1)) {
+ case 7: buf[6] = ptr[6];
+ case 6: buf[5] = ptr[5];
+ case 5: buf[4] = ptr[4];
+ case 4: buf[3] = ptr[3];
+ case 3: buf[2] = ptr[2];
+ case 2: buf[1] = ptr[1];
+ }
+ buf[0] = ptr[0];
+ SkNh::Load4(buf, &rh, &gh, &bh, &ah);
+ } else {
+ SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
}
- STAGE(store_565, false) {
- clamp_01_premul(r,g,b,a);
- auto ptr = (uint16_t*)ctx + x;
- store<kIsTail>(tail, to_565(r,g,b), ptr);
- }
+ r = SkHalfToFloat_finite_ftz(rh);
+ g = SkHalfToFloat_finite_ftz(gh);
+ b = SkHalfToFloat_finite_ftz(bh);
+ a = SkHalfToFloat_finite_ftz(ah);
+}
- STAGE(load_d_f16, true) {
- auto ptr = (const uint64_t*)ctx + x;
-
- SkNh rh, gh, bh, ah;
- if (kIsTail) {
- uint64_t buf[8] = {0};
- switch (tail & (N-1)) {
- case 7: buf[6] = ptr[6];
- case 6: buf[5] = ptr[5];
- case 5: buf[4] = ptr[4];
- case 4: buf[3] = ptr[3];
- case 3: buf[2] = ptr[2];
- case 2: buf[1] = ptr[1];
- }
- buf[0] = ptr[0];
- SkNh::Load4(buf, &rh, &gh, &bh, &ah);
- } else {
- SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
+STAGE(store_f16, false) {
+ clamp_01_premul(r,g,b,a);
+ auto ptr = (uint64_t*)ctx + x;
+
+ uint64_t buf[8];
+ SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r),
+ SkFloatToHalf_finite_ftz(g),
+ SkFloatToHalf_finite_ftz(b),
+ SkFloatToHalf_finite_ftz(a));
+ if (kIsTail) {
+ switch (tail & (N-1)) {
+ case 7: ptr[6] = buf[6];
+ case 6: ptr[5] = buf[5];
+ case 5: ptr[4] = buf[4];
+ case 4: ptr[3] = buf[3];
+ case 3: ptr[2] = buf[2];
+ case 2: ptr[1] = buf[1];
}
-
- dr = SkHalfToFloat_finite_ftz(rh);
- dg = SkHalfToFloat_finite_ftz(gh);
- db = SkHalfToFloat_finite_ftz(bh);
- da = SkHalfToFloat_finite_ftz(ah);
+ ptr[0] = buf[0];
}
+}
- STAGE(load_s_f16, true) {
- auto ptr = (const uint64_t*)ctx + x;
-
- SkNh rh, gh, bh, ah;
- if (kIsTail) {
- uint64_t buf[8] = {0};
- switch (tail & (N-1)) {
- case 7: buf[6] = ptr[6];
- case 6: buf[5] = ptr[5];
- case 5: buf[4] = ptr[4];
- case 4: buf[3] = ptr[3];
- case 3: buf[2] = ptr[2];
- case 2: buf[1] = ptr[1];
- }
- buf[0] = ptr[0];
- SkNh::Load4(buf, &rh, &gh, &bh, &ah);
- } else {
- SkNh::Load4(ptr, &rh, &gh, &bh, &ah);
- }
- r = SkHalfToFloat_finite_ftz(rh);
- g = SkHalfToFloat_finite_ftz(gh);
- b = SkHalfToFloat_finite_ftz(bh);
- a = SkHalfToFloat_finite_ftz(ah);
+// Load 8-bit SkPMColor-order sRGB.
+STAGE(load_d_srgb, true) {
+ auto ptr = (const uint32_t*)ctx + x;
+
+ auto px = load<kIsTail>(tail, ptr);
+ auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
+ dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
+ dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
+ db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
+ da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
+}
+
+STAGE(load_s_srgb, true) {
+ auto ptr = (const uint32_t*)ctx + x;
+
+ auto px = load<kIsTail>(tail, ptr);
+ auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
+ r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
+ g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
+ b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
+ a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
+}
+
+STAGE(store_srgb, false) {
+ clamp_01_premul(r,g,b,a);
+ auto ptr = (uint32_t*)ctx + x;
+ store<kIsTail>(tail, ( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT
+ | sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT
+ | sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT
+ | SkNx_cast<int>(255.0f * a + 0.5f) << SK_A32_SHIFT ), (int*)ptr);
+}
+
+RGBA_XFERMODE(clear) { return 0.0f; }
+//RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it.
+RGBA_XFERMODE(dst) { return d; }
+
+RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); }
+RGBA_XFERMODE(srcin) { return s * da; }
+RGBA_XFERMODE(srcout) { return s * inv(da); }
+RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); }
+RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); }
+RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); }
+RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); }
+RGBA_XFERMODE(dstover) { return srcover_kernel(d,da,s,sa); }
+
+RGBA_XFERMODE(modulate) { return s*d; }
+RGBA_XFERMODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; }
+RGBA_XFERMODE(plus_) { return s + d; }
+RGBA_XFERMODE(screen) { return s + d - s*d; }
+RGBA_XFERMODE(xor_) { return s*inv(da) + d*inv(sa); }
+
+RGB_XFERMODE(colorburn) {
+ return (d == da ).thenElse(d + s*inv(da),
+ (s == 0.0f).thenElse(s + d*inv(sa),
+ sa*(da - SkNf::Min(da, (da-d)*sa/s)) + s*inv(da) + d*inv(sa)));
+}
+RGB_XFERMODE(colordodge) {
+ return (d == 0.0f).thenElse(d + s*inv(da),
+ (s == sa ).thenElse(s + d*inv(sa),
+ sa*SkNf::Min(da, (d*sa)/(sa - s)) + s*inv(da) + d*inv(sa)));
+}
+RGB_XFERMODE(darken) { return s + d - SkNf::Max(s*da, d*sa); }
+RGB_XFERMODE(difference) { return s + d - 2.0f*SkNf::Min(s*da,d*sa); }
+RGB_XFERMODE(exclusion) { return s + d - 2.0f*s*d; }
+RGB_XFERMODE(hardlight) {
+ return s*inv(da) + d*inv(sa)
+ + (2.0f*s <= sa).thenElse(2.0f*s*d, sa*da - 2.0f*(da-d)*(sa-s));
+}
+RGB_XFERMODE(lighten) { return s + d - SkNf::Min(s*da, d*sa); }
+RGB_XFERMODE(overlay) { return hardlight_kernel(d,da,s,sa); }
+RGB_XFERMODE(softlight) {
+ SkNf m = (da > 0.0f).thenElse(d / da, 0.0f),
+ s2 = 2.0f*s,
+ m4 = 4.0f*m;
+
+ // The logic forks three ways:
+ // 1. dark src?
+ // 2. light src, dark dst?
+ // 3. light src, light dst?
+ SkNf darkSrc = d*(sa + (s2 - sa)*(1.0f - m)), // Used in case 1.
+ darkDst = (m4*m4 + m4)*(m - 1.0f) + 7.0f*m, // Used in case 2.
+ liteDst = m.rsqrt().invert() - m, // Used in case 3.
+ liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDst); // 2 or 3?
+ return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)?
+}
+
+
+template <typename Fn>
+SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
+ switch (st) {
+ #define M(stage) case SkRasterPipeline::stage: return stage;
+ SK_RASTER_PIPELINE_STAGES(M)
+ #undef M
}
+ SkASSERT(false);
+ return just_return;
+}
- STAGE(store_f16, false) {
- clamp_01_premul(r,g,b,a);
- auto ptr = (uint64_t*)ctx + x;
-
- uint64_t buf[8];
- SkNh::Store4(kIsTail ? buf : ptr, SkFloatToHalf_finite_ftz(r),
- SkFloatToHalf_finite_ftz(g),
- SkFloatToHalf_finite_ftz(b),
- SkFloatToHalf_finite_ftz(a));
- if (kIsTail) {
- switch (tail & (N-1)) {
- case 7: ptr[6] = buf[6];
- case 6: ptr[5] = buf[5];
- case 5: ptr[4] = buf[4];
- case 4: ptr[3] = buf[3];
- case 3: ptr[2] = buf[2];
- case 2: ptr[1] = buf[1];
- }
- ptr[0] = buf[0];
+namespace SK_OPTS_NS {
+
+ SI void run_pipeline(size_t x, size_t n,
+ const SkRasterPipeline::Stage* stages, int nstages) {
+ SkASSERT(nstages <= SkRasterPipeline::kMaxStages);
+ if (nstages == 0) {
+ return;
}
- }
+ SkNf v; // Fastest to start uninitialized.
- // Load 8-bit SkPMColor-order sRGB.
- STAGE(load_d_srgb, true) {
- auto ptr = (const uint32_t*)ctx + x;
+ if (n >= N) {
+ BodyStage body[SkRasterPipeline::kMaxStages];
- auto px = load<kIsTail>(tail, ptr);
- auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
- dr = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
- dg = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
- db = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
- da = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
- }
+ Body start = enum_to_Fn<Body>(stages[0].stage);
+ for (int i = 0; i < nstages-1; i++) {
+ body[i].next = enum_to_Fn<Body>(stages[i+1].stage);
+ body[i].ctx = stages[i].ctx;
+ }
+ body[nstages-1].next = just_return;
+ body[nstages-1].ctx = stages[nstages-1].ctx;
+
+ do {
+ start(body, x, v,v,v,v, v,v,v,v);
+ x += N;
+ n -= N;
+ } while (n >= N);
+ }
- STAGE(load_s_srgb, true) {
- auto ptr = (const uint32_t*)ctx + x;
+ if (n > 0) {
+ TailStage tail[SkRasterPipeline::kMaxStages];
- auto px = load<kIsTail>(tail, ptr);
- auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
- r = sk_linear_from_srgb_math(to_int((px >> SK_R32_SHIFT) & 0xff));
- g = sk_linear_from_srgb_math(to_int((px >> SK_G32_SHIFT) & 0xff));
- b = sk_linear_from_srgb_math(to_int((px >> SK_B32_SHIFT) & 0xff));
- a = (1/255.0f)*SkNx_cast<float>(to_int( px >> SK_A32_SHIFT ));
- }
+ Tail start = enum_to_Fn<Tail>(stages[0].stage);
+ for (int i = 0; i < nstages-1; i++) {
+ tail[i].next = enum_to_Fn<Tail>(stages[i+1].stage);
+ tail[i].ctx = stages[i].ctx;
+ }
+ tail[nstages-1].next = just_return;
+ tail[nstages-1].ctx = stages[nstages-1].ctx;
- STAGE(store_srgb, false) {
- clamp_01_premul(r,g,b,a);
- auto ptr = (uint32_t*)ctx + x;
- store<kIsTail>(tail, ( sk_linear_to_srgb_noclamp(r) << SK_R32_SHIFT
- | sk_linear_to_srgb_noclamp(g) << SK_G32_SHIFT
- | sk_linear_to_srgb_noclamp(b) << SK_B32_SHIFT
- | SkNx_cast<int>(255.0f * a + 0.5f) << SK_A32_SHIFT ), (int*)ptr);
+ start(tail, x,n, v,v,v,v, v,v,v,v);
+ }
}
- RGBA_XFERMODE(clear) { return 0.0f; }
- //RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it.
- RGBA_XFERMODE(dst) { return d; }
-
- RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); }
- RGBA_XFERMODE(srcin) { return s * da; }
- RGBA_XFERMODE(srcout) { return s * inv(da); }
- RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); }
- RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); }
- RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); }
- RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); }
- RGBA_XFERMODE(dstover) { return srcover_kernel(d,da,s,sa); }
-
- RGBA_XFERMODE(modulate) { return s*d; }
- RGBA_XFERMODE(multiply) { return s*inv(da) + d*inv(sa) + s*d; }
- RGBA_XFERMODE(plus_) { return s + d; }
- RGBA_XFERMODE(screen) { return s + d - s*d; }
- RGBA_XFERMODE(xor_) { return s*inv(da) + d*inv(sa); }
-
- RGB_XFERMODE(colorburn) {
- return (d == da ).thenElse(d + s*inv(da),
- (s == 0.0f).thenElse(s + d*inv(sa),
- sa*(da - SkNf::Min(da, (da-d)*sa/s)) + s*inv(da) + d*inv(sa)));
- }
- RGB_XFERMODE(colordodge) {
- return (d == 0.0f).thenElse(d + s*inv(da),
- (s == sa ).thenElse(s + d*inv(sa),
- sa*SkNf::Min(da, (d*sa)/(sa - s)) + s*inv(da) + d*inv(sa)));
- }
- RGB_XFERMODE(darken) { return s + d - SkNf::Max(s*da, d*sa); }
- RGB_XFERMODE(difference) { return s + d - 2.0f*SkNf::Min(s*da,d*sa); }
- RGB_XFERMODE(exclusion) { return s + d - 2.0f*s*d; }
- RGB_XFERMODE(hardlight) {
- return s*inv(da) + d*inv(sa)
- + (2.0f*s <= sa).thenElse(2.0f*s*d, sa*da - 2.0f*(da-d)*(sa-s));
- }
- RGB_XFERMODE(lighten) { return s + d - SkNf::Min(s*da, d*sa); }
- RGB_XFERMODE(overlay) { return hardlight_kernel(d,da,s,sa); }
- RGB_XFERMODE(softlight) {
- SkNf m = (da > 0.0f).thenElse(d / da, 0.0f),
- s2 = 2.0f*s,
- m4 = 4.0f*m;
-
- // The logic forks three ways:
- // 1. dark src?
- // 2. light src, dark dst?
- // 3. light src, light dst?
- SkNf darkSrc = d*(sa + (s2 - sa)*(1.0f - m)), // Used in case 1.
- darkDst = (m4*m4 + m4)*(m - 1.0f) + 7.0f*m, // Used in case 2.
- liteDst = m.rsqrt().invert() - m, // Used in case 3.
- liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDst); // 2 or 3?
- return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)?
- }
-}
+} // namespace SK_OPTS_NS
#undef SI
#undef STAGE