aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper/SkJumper.cpp
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2018-02-27 10:37:40 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2018-03-07 14:40:10 +0000
commit22e536e3a1a09405d1c0e6f071717a726d86e8d4 (patch)
tree1fbcfdd6a6c60e612e22038770ae7eeb8e306ee4 /src/jumper/SkJumper.cpp
parent25d07fc354f9150f6d2292be27554db4fc454ad6 (diff)
make SkJumper stages normal Skia code
Enough clients are using Clang now that we can say, use Clang to build if you want these software pipeline stages to go fast. This lets us drop the offline build aspect of SkJumper stages, instead building as part of Skia using the SkOpts framework. I think everything should work, except I've (temporarily) removed AVX-512 support. I will put this back in a follow up. I have had to drop Windows down to __vectorcall and our narrower stage calling convention that keeps the d-registers on the stack. I tried forcing sysv_abi, but that crashed Clang. :/ Added a TODO to up the same narrower stage calling convention for lowp stages... we just *don't* today, for no good reason. Change-Id: Iaaa792ffe4deab3508d2dc5d0008c163c24b3383 Reviewed-on: https://skia-review.googlesource.com/110641 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Herb Derby <herb@google.com> Reviewed-by: Florin Malita <fmalita@chromium.org>
Diffstat (limited to 'src/jumper/SkJumper.cpp')
-rw-r--r--src/jumper/SkJumper.cpp451
1 files changed, 14 insertions, 437 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 63d8ff1c9c..d14d83a776 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -5,468 +5,46 @@
* found in the LICENSE file.
*/
-#include "SkColorData.h"
-#include "SkCpu.h"
#include "SkJumper.h"
-#include "SkOnce.h"
+#include "SkOpts.h"
#include "SkRasterPipeline.h"
#include "SkTemplates.h"
-#if !defined(SK_JUMPER_USE_ASSEMBLY)
- // We'll use __has_feature(memory_sanitizer) to detect MSAN.
- // SkJumper_generated.S is not compiled with MSAN, so MSAN would yell really loud.
- #if !defined(__has_feature)
- #define __has_feature(x) 0
- #endif
-
- #if 0 || __has_feature(memory_sanitizer)
- #define SK_JUMPER_USE_ASSEMBLY 0
- #else
- #define SK_JUMPER_USE_ASSEMBLY 1
- #endif
-#endif
-
-#define M(st) +1
-static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
-#undef M
-
-#ifndef SK_JUMPER_DISABLE_8BIT
- // Intentionally commented out; optional logging for local debugging.
- #if 0 && SK_JUMPER_USE_ASSEMBLY && (defined(__x86_64__) || defined(_M_X64))
- #include <atomic>
-
- #define M(st) #st,
- static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) };
- #undef M
-
- static std::atomic<int> gMissingStageCounters[kNumStages];
-
- static void log_missing(SkRasterPipeline::StockStage st) {
- static SkOnce once;
- once([] { atexit([] {
- int total = 0;
- for (int i = 0; i < kNumStages; i++) {
- if (int count = gMissingStageCounters[i].load()) {
- SkDebugf("%7d\t%s\n", count, kStageNames[i]);
- total += count;
- }
- }
- SkDebugf("%7d total\n", total);
- }); });
-
- gMissingStageCounters[st]++;
- }
- #else
- static void log_missing(SkRasterPipeline::StockStage) {}
- #endif
-#endif
-
-// We can't express the real types of most stage functions portably, so we use a stand-in.
-// We'll only ever call start_pipeline(), which then chains into the rest.
-using StageFn = void(void);
-using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**);
-
-// Some platforms expect C "name" maps to asm "_name", others to "name".
-#if defined(__APPLE__)
- #define ASM(name, suffix) sk_##name##_##suffix
-#else
- #define ASM(name, suffix) _sk_##name##_##suffix
-#endif
-
-extern "C" {
-
-#if !SK_JUMPER_USE_ASSEMBLY
- // We'll just run baseline code.
-
-#elif defined(__x86_64__) || defined(_M_X64)
- StartPipelineFn ASM(start_pipeline, skx),
- ASM(start_pipeline, hsw),
- ASM(start_pipeline, avx),
- ASM(start_pipeline, sse41),
- ASM(start_pipeline, sse2),
- ASM(start_pipeline, hsw_lowp),
- ASM(start_pipeline,sse41_lowp),
- ASM(start_pipeline, sse2_lowp);
-
- StageFn ASM(just_return, skx),
- ASM(just_return, hsw),
- ASM(just_return, avx),
- ASM(just_return, sse41),
- ASM(just_return, sse2),
- ASM(just_return, hsw_lowp),
- ASM(just_return,sse41_lowp),
- ASM(just_return, sse2_lowp);
-
- #define M(st) StageFn ASM(st, skx), \
- ASM(st, hsw), \
- ASM(st, avx), \
- ASM(st,sse41), \
- ASM(st, sse2), \
- ASM(st, hsw_lowp), \
- ASM(st,sse41_lowp), \
- ASM(st, sse2_lowp);
- SK_RASTER_PIPELINE_STAGES(M)
- #undef M
-
-#elif defined(__i386__) || defined(_M_IX86)
- StartPipelineFn ASM(start_pipeline,sse2),
- ASM(start_pipeline,sse2_lowp);
- StageFn ASM(just_return,sse2),
- ASM(just_return,sse2_lowp);
- #define M(st) StageFn ASM(st,sse2), \
- ASM(st,sse2_lowp);
- SK_RASTER_PIPELINE_STAGES(M)
- #undef M
-
-#endif
-
- // Baseline code compiled as a normal part of Skia.
- StartPipelineFn sk_start_pipeline;
- StageFn sk_just_return;
- #define M(st) StageFn sk_##st;
- SK_RASTER_PIPELINE_STAGES(M)
- #undef M
-
-#if defined(JUMPER_HAS_NEON_LOWP)
- StartPipelineFn sk_start_pipeline_lowp;
- StageFn sk_just_return_lowp;
- #define M(st) StageFn sk_##st##_lowp;
- SK_RASTER_PIPELINE_STAGES(M)
- #undef M
-#endif
-
-}
-
-#if SK_JUMPER_USE_ASSEMBLY
- #if defined(__x86_64__) || defined(_M_X64)
- template <SkRasterPipeline::StockStage st>
- static constexpr StageFn* hsw_lowp();
-
- template <SkRasterPipeline::StockStage st>
- static constexpr StageFn* sse41_lowp();
-
- template <SkRasterPipeline::StockStage st>
- static constexpr StageFn* sse2_lowp();
-
- #define LOWP(st) \
- template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() { \
- return ASM(st,hsw_lowp); \
- } \
- template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \
- return ASM(st,sse41_lowp); \
- } \
- template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \
- return ASM(st,sse2_lowp); \
- }
- #define NOPE(st) \
- template <> constexpr StageFn* hsw_lowp<SkRasterPipeline::st>() { \
- return nullptr; \
- } \
- template <> constexpr StageFn* sse41_lowp<SkRasterPipeline::st>() { \
- return nullptr; \
- } \
- template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \
- return nullptr; \
- }
-
- #elif defined(__i386__) || defined(_M_IX86)
- template <SkRasterPipeline::StockStage st>
- static constexpr StageFn* sse2_lowp();
-
- #define LOWP(st) \
- template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \
- return ASM(st,sse2_lowp); \
- }
- #define NOPE(st) \
- template <> constexpr StageFn* sse2_lowp<SkRasterPipeline::st>() { \
- return nullptr; \
- }
-
- #elif defined(JUMPER_HAS_NEON_LOWP)
- template <SkRasterPipeline::StockStage st>
- static constexpr StageFn* neon_lowp();
-
- #define LOWP(st) \
- template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() { \
- return sk_##st##_lowp; \
- }
- #define NOPE(st) \
- template <> constexpr StageFn* neon_lowp<SkRasterPipeline::st>() { \
- return nullptr; \
- }
-
- #else
- #define LOWP(st)
- #define NOPE(st)
-
- #endif
-
- #define TODO(st) NOPE(st) // stages that should be implemented in lowp, but aren't.
-
- NOPE(callback)
- LOWP(move_src_dst) LOWP(move_dst_src)
- NOPE(clamp_0) NOPE(clamp_1) LOWP(clamp_a) LOWP(clamp_a_dst)
- NOPE(unpremul) LOWP(premul) LOWP(premul_dst)
- LOWP(force_opaque) LOWP(force_opaque_dst)
- LOWP(set_rgb) LOWP(swap_rb) LOWP(invert)
- NOPE(from_srgb) NOPE(from_srgb_dst) NOPE(to_srgb)
- LOWP(black_color) LOWP(white_color) LOWP(uniform_color)
- LOWP(seed_shader) NOPE(dither)
- LOWP(load_a8) LOWP(load_a8_dst) LOWP(store_a8) LOWP(gather_a8)
- LOWP(load_g8) LOWP(load_g8_dst) LOWP(gather_g8)
- LOWP(load_565) LOWP(load_565_dst) LOWP(store_565) LOWP(gather_565)
- LOWP(load_4444) LOWP(load_4444_dst) LOWP(store_4444) LOWP(gather_4444)
- NOPE(load_f16) NOPE(load_f16_dst) NOPE(store_f16) NOPE(gather_f16)
- NOPE(load_f32) NOPE(load_f32_dst) NOPE(store_f32)
- LOWP(load_8888) LOWP(load_8888_dst) LOWP(store_8888) LOWP(gather_8888)
- LOWP(load_bgra) LOWP(load_bgra_dst) LOWP(store_bgra) LOWP(gather_bgra)
- NOPE(load_1010102) NOPE(load_1010102_dst) NOPE(store_1010102) NOPE(gather_1010102)
- TODO(bilerp_clamp_8888)
- TODO(load_u16_be) TODO(load_rgb_u16_be) TODO(store_u16_be)
- NOPE(load_tables_u16_be) NOPE(load_tables_rgb_u16_be) NOPE(load_tables)
- NOPE(load_rgba) NOPE(store_rgba)
- LOWP(scale_u8) LOWP(scale_565) LOWP(scale_1_float)
- LOWP( lerp_u8) LOWP( lerp_565) LOWP( lerp_1_float)
- LOWP(dstatop) LOWP(dstin) LOWP(dstout) LOWP(dstover)
- LOWP(srcatop) LOWP(srcin) LOWP(srcout) LOWP(srcover)
- LOWP(clear) LOWP(modulate) LOWP(multiply) LOWP(plus_) LOWP(screen) LOWP(xor_)
- NOPE(colorburn) NOPE(colordodge) LOWP(darken) LOWP(difference)
- LOWP(exclusion) LOWP(hardlight) LOWP(lighten) LOWP(overlay) NOPE(softlight)
- NOPE(hue) NOPE(saturation) NOPE(color) NOPE(luminosity)
- LOWP(srcover_rgba_8888) LOWP(srcover_bgra_8888)
- LOWP(luminance_to_alpha)
- LOWP(matrix_translate) LOWP(matrix_scale_translate)
- LOWP(matrix_2x3) NOPE(matrix_3x4) TODO(matrix_4x5) TODO(matrix_4x3)
- LOWP(matrix_perspective)
- NOPE(parametric_r) NOPE(parametric_g) NOPE(parametric_b)
- NOPE(parametric_a) NOPE(gamma) NOPE(gamma_dst)
- NOPE(table_r) NOPE(table_g) NOPE(table_b) NOPE(table_a)
- NOPE(lab_to_xyz)
- TODO(mirror_x) TODO(repeat_x)
- TODO(mirror_y) TODO(repeat_y)
- LOWP(decal_x) LOWP(decal_y) LOWP(decal_x_and_y)
- LOWP(check_decal_mask)
- TODO(bilinear_nx) TODO(bilinear_px) TODO(bilinear_ny) TODO(bilinear_py)
- TODO(bicubic_n3x) TODO(bicubic_n1x) TODO(bicubic_p1x) TODO(bicubic_p3x)
- TODO(bicubic_n3y) TODO(bicubic_n1y) TODO(bicubic_p1y) TODO(bicubic_p3y)
- TODO(save_xy) TODO(accumulate)
- LOWP(clamp_x_1) LOWP(mirror_x_1) LOWP(repeat_x_1)
- LOWP(evenly_spaced_gradient)
- LOWP(gradient)
- LOWP(evenly_spaced_2_stop_gradient)
- LOWP(xy_to_unit_angle)
- LOWP(xy_to_radius)
- TODO(negate_x)
- TODO(xy_to_2pt_conical_strip)
- TODO(xy_to_2pt_conical_focal_on_circle)
- TODO(xy_to_2pt_conical_well_behaved)
- TODO(xy_to_2pt_conical_greater)
- TODO(xy_to_2pt_conical_smaller)
- TODO(alter_2pt_conical_compensate_focal)
- TODO(alter_2pt_conical_unswap)
- TODO(mask_2pt_conical_nan) TODO(mask_2pt_conical_degenerates) TODO(apply_vector_mask)
- TODO(byte_tables) TODO(byte_tables_rgb)
- NOPE(rgb_to_hsl) NOPE(hsl_to_rgb)
- NOPE(clut_3D) NOPE(clut_4D)
- NOPE(gauss_a_to_rgba)
-
- #undef LOWP
- #undef TODO
- #undef NOPE
-#endif
-
-// Engines comprise everything we need to run SkRasterPipelines.
-struct SkJumper_Engine {
- StageFn* stages[kNumStages];
- StartPipelineFn* start_pipeline;
- StageFn* just_return;
-};
-
-// We'll default to this baseline engine, but try to choose a better one at runtime.
-static const SkJumper_Engine kBaseline = {
-#define M(stage) sk_##stage,
- { SK_RASTER_PIPELINE_STAGES(M) },
-#undef M
- sk_start_pipeline,
- sk_just_return,
-};
-static SkJumper_Engine gEngine = kBaseline;
-static SkOnce gChooseEngineOnce;
-
-static SkJumper_Engine choose_engine() {
-#if !SK_JUMPER_USE_ASSEMBLY
- // We'll just run baseline code.
-
-#elif defined(__x86_64__) || defined(_M_X64)
- #if !defined(_MSC_VER) // No _skx stages for Windows yet.
- if (1 && SkCpu::Supports(SkCpu::SKX)) {
- return {
- #define M(stage) ASM(stage, skx),
- { SK_RASTER_PIPELINE_STAGES(M) },
- M(start_pipeline)
- M(just_return)
- #undef M
- };
- }
- #endif
- if (1 && SkCpu::Supports(SkCpu::HSW)) {
- return {
- #define M(stage) ASM(stage, hsw),
- { SK_RASTER_PIPELINE_STAGES(M) },
- M(start_pipeline)
- M(just_return)
- #undef M
- };
- }
- if (1 && SkCpu::Supports(SkCpu::AVX)) {
- return {
- #define M(stage) ASM(stage, avx),
- { SK_RASTER_PIPELINE_STAGES(M) },
- M(start_pipeline)
- M(just_return)
- #undef M
- };
- }
- if (1 && SkCpu::Supports(SkCpu::SSE41)) {
- return {
- #define M(stage) ASM(stage, sse41),
- { SK_RASTER_PIPELINE_STAGES(M) },
- M(start_pipeline)
- M(just_return)
- #undef M
- };
- }
- if (1 && SkCpu::Supports(SkCpu::SSE2)) {
- return {
- #define M(stage) ASM(stage, sse2),
- { SK_RASTER_PIPELINE_STAGES(M) },
- M(start_pipeline)
- M(just_return)
- #undef M
- };
- }
-
-#elif defined(__i386__) || defined(_M_IX86)
- if (1 && SkCpu::Supports(SkCpu::SSE2)) {
- return {
- #define M(stage) ASM(stage, sse2),
- { SK_RASTER_PIPELINE_STAGES(M) },
- M(start_pipeline)
- M(just_return)
- #undef M
- };
- }
-
-#endif
- return kBaseline;
-}
-
+SkRasterPipeline::StartPipelineFn SkRasterPipeline::build_pipeline(void** ip) const {
#ifndef SK_JUMPER_DISABLE_8BIT
- static const SkJumper_Engine kNone = {
- #define M(stage) nullptr,
- { SK_RASTER_PIPELINE_STAGES(M) },
- #undef M
- nullptr,
- nullptr,
- };
- static SkJumper_Engine gLowp = kNone;
- static SkOnce gChooseLowpOnce;
-
- static SkJumper_Engine choose_lowp() {
- #if SK_JUMPER_USE_ASSEMBLY
- #if defined(__x86_64__) || defined(_M_X64)
- if (1 && SkCpu::Supports(SkCpu::HSW)) {
- return {
- #define M(st) hsw_lowp<SkRasterPipeline::st>(),
- { SK_RASTER_PIPELINE_STAGES(M) },
- ASM(start_pipeline,hsw_lowp),
- ASM(just_return ,hsw_lowp),
- #undef M
- };
- }
- if (1 && SkCpu::Supports(SkCpu::SSE41)) {
- return {
- #define M(st) sse41_lowp<SkRasterPipeline::st>(),
- { SK_RASTER_PIPELINE_STAGES(M) },
- ASM(start_pipeline,sse41_lowp),
- ASM(just_return ,sse41_lowp),
- #undef M
- };
- }
- if (1 && SkCpu::Supports(SkCpu::SSE2)) {
- return {
- #define M(st) sse2_lowp<SkRasterPipeline::st>(),
- { SK_RASTER_PIPELINE_STAGES(M) },
- ASM(start_pipeline,sse2_lowp),
- ASM(just_return ,sse2_lowp),
- #undef M
- };
- }
- #elif defined(__i386__) || defined(_M_IX86)
- if (1 && SkCpu::Supports(SkCpu::SSE2)) {
- return {
- #define M(st) sse2_lowp<SkRasterPipeline::st>(),
- { SK_RASTER_PIPELINE_STAGES(M) },
- ASM(start_pipeline,sse2_lowp),
- ASM(just_return ,sse2_lowp),
- #undef M
- };
- }
-
- #elif defined(JUMPER_HAS_NEON_LOWP)
- return {
- #define M(st) neon_lowp<SkRasterPipeline::st>(),
- { SK_RASTER_PIPELINE_STAGES(M) },
- sk_start_pipeline_lowp,
- sk_just_return_lowp,
- #undef M
- };
- #endif
- #endif
- return kNone;
- }
-#endif
-
-const SkJumper_Engine& SkRasterPipeline::build_pipeline(void** ip) const {
-#ifndef SK_JUMPER_DISABLE_8BIT
- gChooseLowpOnce([]{ gLowp = choose_lowp(); });
-
- // First try to build a lowp pipeline. If that fails, fall back to normal float gEngine.
+ // We'll try to build a lowp pipeline, but if that fails fallback to a highp float pipeline.
void** reset_point = ip;
- *--ip = (void*)gLowp.just_return;
+
+ // Stages are stored backwards in fStages, so we reverse here, back to front.
+ *--ip = (void*)SkOpts::just_return_lowp;
for (const StageList* st = fStages; st; st = st->prev) {
if (st->stage == SkRasterPipeline::clamp_0 ||
st->stage == SkRasterPipeline::clamp_1) {
continue; // No-ops in lowp.
}
- if (StageFn* fn = gLowp.stages[st->stage]) {
+ if (auto fn = SkOpts::stages_lowp[st->stage]) {
if (st->ctx) {
*--ip = st->ctx;
}
*--ip = (void*)fn;
} else {
- log_missing(st->stage);
ip = reset_point;
break;
}
}
if (ip != reset_point) {
- return gLowp;
+ return SkOpts::start_pipeline_lowp;
}
#endif
- gChooseEngineOnce([]{ gEngine = choose_engine(); });
- // We're building the pipeline backwards, so we start with the final stage just_return.
- *--ip = (void*)gEngine.just_return;
-
- // Still going backwards, each stage's context pointer then its StageFn.
+ *--ip = (void*)SkOpts::just_return_highp;
for (const StageList* st = fStages; st; st = st->prev) {
if (st->ctx) {
*--ip = st->ctx;
}
- *--ip = (void*)gEngine.stages[st->stage];
+ *--ip = (void*)SkOpts::stages_highp[st->stage];
}
- return gEngine;
+ return SkOpts::start_pipeline_highp;
}
void SkRasterPipeline::run(size_t x, size_t y, size_t w, size_t h) const {
@@ -477,8 +55,8 @@ void SkRasterPipeline::run(size_t x, size_t y, size_t w, size_t h) const {
// Best to not use fAlloc here... we can't bound how often run() will be called.
SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
- const SkJumper_Engine& engine = this->build_pipeline(program.get() + fSlotsNeeded);
- engine.start_pipeline(x,y,x+w,y+h, program.get());
+ auto start_pipeline = this->build_pipeline(program.get() + fSlotsNeeded);
+ start_pipeline(x,y,x+w,y+h, program.get());
}
std::function<void(size_t, size_t, size_t, size_t)> SkRasterPipeline::compile() const {
@@ -487,9 +65,8 @@ std::function<void(size_t, size_t, size_t, size_t)> SkRasterPipeline::compile()
}
void** program = fAlloc->makeArray<void*>(fSlotsNeeded);
- const SkJumper_Engine& engine = this->build_pipeline(program + fSlotsNeeded);
- auto start_pipeline = engine.start_pipeline;
+ auto start_pipeline = this->build_pipeline(program + fSlotsNeeded);
return [=](size_t x, size_t y, size_t w, size_t h) {
start_pipeline(x,y,x+w,y+h, program);
};