diff options
Diffstat (limited to 'src/jumper/SkJumper.cpp')
-rw-r--r-- | src/jumper/SkJumper.cpp | 215 |
1 files changed, 85 insertions, 130 deletions
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp index 2ed95a1e0e..ea7da5473d 100644 --- a/src/jumper/SkJumper.cpp +++ b/src/jumper/SkJumper.cpp @@ -7,13 +7,14 @@ #include "SkCpu.h" #include "SkJumper.h" -#include "SkJumper_generated.h" #include "SkRasterPipeline.h" #include "SkTemplates.h" + // Stages expect these constants to be set to these values. // It's fine to rearrange and add new ones if you update SkJumper_constants. -static const SkJumper_constants kConstants = { +using K = const SkJumper_constants; +static K kConstants = { 1.0f, 0.5f, 255.0f, 1/255.0f, 0x000000ff, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}, 0.0025f, 0.6975f, 0.3000f, 1/12.92f, 0.055f, // from_srgb @@ -50,171 +51,125 @@ static const SkJumper_constants kConstants = { M(clamp_y) \ M(linear_gradient_2stops) -// Declare the portable, single pixel stages that are linked into Skia from SkJumper_stages.o. +// We can't express the real types of most stage functions portably, so we use a stand-in. +// We'll only ever call start_pipeline(), which then chains into the rest for us. +using StageFn = void(void); + extern "C" { - void sk_start_pipeline(size_t, void**, const SkJumper_constants*); - - // We use void() as a convenient stand-in for the real stage function type. - // We never call these directly, so we don't really need to know their real types. - void sk_just_return(void); -#define M(st) void sk_##st(void); - STAGES(M) -#undef M -} -// Translate SkRasterPipeline's enum to pointers to our portable, single pixel stages. -static void* portable_lookup(SkRasterPipeline::StockStage st) { - switch (st) { - default: return nullptr; - #define M(st) case SkRasterPipeline::st: return (void*)sk_##st; +#if defined(__x86_64__) || defined(_M_X64) + void sk_start_pipeline_hsw (size_t, void**, K*); + void sk_start_pipeline_sse41(size_t, void**, K*); + void sk_start_pipeline_sse2 (size_t, void**, K*); + + StageFn sk_just_return_hsw, + sk_just_return_sse41, + sk_just_return_sse2; + + #define M(st) StageFn sk_##st##_hsw; + STAGES(M) + #undef M + #define M(st) StageFn sk_##st##_sse41; + STAGES(M) + #undef M + #define M(st) StageFn sk_##st##_sse2; + STAGES(M) + #undef M +#endif + + // Portable, single-pixel stages. + void sk_start_pipeline(size_t, void**, K*); + StageFn sk_just_return; + #define M(st) StageFn sk_##st; STAGES(M) #undef M - } } -// The non-portable options are pre-compiled static data arrays pulled in from SkJumper_generated.h. -#if defined(__aarch64__) - static void* aarch64_lookup(SkRasterPipeline::StockStage st) { - switch (st) { - default: return nullptr; - #define M(st) case SkRasterPipeline::st: return (void*)aarch64_sk_##st; - STAGES(M) - #undef M - } - } -#elif defined(__ARM_NEON__) - static void* armv7_lookup(SkRasterPipeline::StockStage st) { - switch (st) { - default: return nullptr; - #define M(st) case SkRasterPipeline::st: return (void*)armv7_sk_##st; - STAGES(M) - #undef M - } - } -#elif defined(__x86_64__) || defined(_M_X64) - static void* sse2_lookup(SkRasterPipeline::StockStage st) { +// Translate SkRasterPipeline's StockStage enum to StageFn function pointers. + +#if defined(__x86_64__) || defined(_M_X64) + static StageFn* lookup_hsw(SkRasterPipeline::StockStage st) { switch (st) { default: return nullptr; - #define M(st) case SkRasterPipeline::st: return (void*)sse2_sk_##st; + #define M(st) case SkRasterPipeline::st: return sk_##st##_hsw; STAGES(M) #undef M } } - static void* sse41_lookup(SkRasterPipeline::StockStage st) { + static StageFn* lookup_sse41(SkRasterPipeline::StockStage st) { switch (st) { default: return nullptr; - #define M(st) case SkRasterPipeline::st: return (void*)sse41_sk_##st; + #define M(st) case SkRasterPipeline::st: return sk_##st##_sse41; STAGES(M) #undef M } } - static void* hsw_lookup(SkRasterPipeline::StockStage st) { + static StageFn* lookup_sse2(SkRasterPipeline::StockStage st) { switch (st) { default: return nullptr; - #define M(st) case SkRasterPipeline::st: return (void*)hsw_sk_##st; + #define M(st) case SkRasterPipeline::st: return sk_##st##_sse2; STAGES(M) #undef M } } #endif -bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const { - // We'll look for the best vector instruction set and stride we can use. - size_t stride = 0; - void* (*lookup)(SkRasterPipeline::StockStage) = nullptr; - void* start_pipeline = nullptr; - void* just_return = nullptr; - -#if defined(__aarch64__) - stride = 4; - lookup = aarch64_lookup; - start_pipeline = (void*)aarch64_sk_start_pipeline; - just_return = (void*)aarch64_sk_just_return; - -#elif defined(__ARM_NEON__) - if (SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) { - stride = 2; - lookup = armv7_lookup; - start_pipeline = (void*)armv7_sk_start_pipeline; - just_return = (void*)armv7_sk_just_return; - } - -#elif defined(__x86_64__) || defined(_M_X64) - stride = 4; - lookup = sse2_lookup; - start_pipeline = (void*)sse2_sk_start_pipeline; - just_return = (void*)sse2_sk_just_return; - if (SkCpu::Supports(SkCpu::SSE41)) { - stride = 4; - lookup = sse41_lookup; - start_pipeline = (void*)sse41_sk_start_pipeline; - just_return = (void*)sse41_sk_just_return; - } - if (SkCpu::Supports(SkCpu::HSW)) { - stride = 8; - lookup = hsw_lookup; - start_pipeline = (void*)hsw_sk_start_pipeline; - just_return = (void*)hsw_sk_just_return; - } -#endif - -#if defined(_MSC_VER) - if (start_pipeline == (void*)sse2_sk_start_pipeline) { - start_pipeline = (void*)sse2_sk_start_pipeline_ms; - } - if (start_pipeline == (void*)sse41_sk_start_pipeline) { - start_pipeline = (void*)sse41_sk_start_pipeline_ms; - } - if (start_pipeline == (void*)hsw_sk_start_pipeline) { - start_pipeline = (void*)hsw_sk_start_pipeline_ms; +static StageFn* lookup_portable(SkRasterPipeline::StockStage st) { + switch (st) { + default: return nullptr; + #define M(st) case SkRasterPipeline::st: return sk_##st; + STAGES(M) + #undef M } -#endif +} +bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const { SkAutoSTMalloc<64, void*> program(2*fStages.size() + 1); - - // If possible, build and run a program to run at full vector stride. const size_t limit = x+n; - if (stride) { - void** ip = program.get(); - for (auto&& st : fStages) { - auto fn = lookup(st.stage); - if (!fn) { - return false; + auto build_and_run = [&](size_t stride, + StageFn* (*lookup)(SkRasterPipeline::StockStage), + StageFn* just_return, + void (*start_pipeline)(size_t, void**, K*)) { + if (x + stride <= limit) { + void** ip = program.get(); + for (auto&& st : fStages) { + auto fn = lookup(st.stage); + if (!fn) { + return false; + } + *ip++ = (void*)fn; + *ip++ = st.ctx; } - *ip++ = fn; - *ip++ = st.ctx; - } - *ip = (void*)just_return; + *ip = (void*)just_return; - auto start = (decltype(&sk_start_pipeline))start_pipeline; - while (x + stride <= limit) { - start(x, program.get(), &kConstants); - x += stride; + while (x + stride <= limit) { + start_pipeline(x, program.get(), &kConstants); + x += stride; + } + } + return true; + }; + + // While possible, build and run at full vector stride. +#if defined(__x86_64__) || defined(_M_X64) + if (1 && SkCpu::Supports(SkCpu::HSW)) { + if (!build_and_run(8, lookup_hsw, sk_just_return_hsw, sk_start_pipeline_hsw)) { + return false; } } - - // If there's any leftover, build and run stride=1 portable code. - if (x < limit) { - stride = 1; - - void** ip = program.get(); - for (auto&& st : fStages) { - auto fn = portable_lookup(st.stage); - if (!fn) { - return false; - } - *ip++ = fn; - *ip++ = st.ctx; + if (1 && SkCpu::Supports(SkCpu::SSE41)) { + if (!build_and_run(4, lookup_sse41, sk_just_return_sse41, sk_start_pipeline_sse41)) { + return false; } - *ip = (void*)sk_just_return; - - auto start = sk_start_pipeline; - while (x + stride <= limit) { - start(x, program.get(), &kConstants); - x += stride; + } + if (1 && SkCpu::Supports(SkCpu::SSE2)) { + if (!build_and_run(4, lookup_sse2, sk_just_return_sse2, sk_start_pipeline_sse2)) { + return false; } } +#endif - return true; + // Finish up any leftover with portable code one pixel at a time. + return build_and_run(1, lookup_portable, sk_just_return, sk_start_pipeline); } |