diff options
author | Derek Sollenberger <djsollen@google.com> | 2017-08-29 12:37:50 +0000 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-08-29 12:38:02 +0000 |
commit | 6d13575108299951ecdfba6d85c915fcec2bc028 (patch) | |
tree | 0cae7b1da7b606d9f68b4263a51dfe5495285a72 | |
parent | 6b47c7d19fcc95d2c3dbce582a8d68bb3bf6ba2a (diff) |
Revert "8-bit jumper on armv8"
This reverts commit 08133583d5e1cdfdcc41b4bb078fcfb64137f058.
Reason for revert: Blocking Android Autoroller on compile error.
Original change's description:
> 8-bit jumper on armv8
>
> The GM diffs are all minor and what you'd expect.
>
> I did a quick performance sanity check, which also looks fine.
>
> $ out/ok bench rp filter:search=Modulate
> [blendmode_rect_Modulate] 30.2ms @0 32ms @95 32ms @100
> [blendmode_mask_Modulate] 12.6ms @0 12.6ms @95 14.5ms @100
> ~~~>
> [blendmode_rect_Modulate] 11.2ms @0 11.7ms @95 12.4ms @100
> [blendmode_mask_Modulate] 10.5ms @0 23.6ms @95 23.9ms @100
>
> This isn't even really the fastest we can make 8-bit go on ARMv8;
> it's actually much more natural to work de-interlaced there. Lots
> of room to follow up.
>
> Change-Id: I86b1099f6742bcb0b8b4fa153e85eaba9567cbf7
> Reviewed-on: https://skia-review.googlesource.com/39740
> Reviewed-by: Florin Malita <fmalita@chromium.org>
> Commit-Queue: Mike Klein <mtklein@chromium.org>
TBR=mtklein@chromium.org,herb@google.com,fmalita@chromium.org,reed@google.com
Change-Id: I71425d8b7fbb66be5cb50025871dd81358111da4
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/39980
Reviewed-by: Derek Sollenberger <djsollen@google.com>
Commit-Queue: Derek Sollenberger <djsollen@google.com>
-rw-r--r-- | gn/core.gni | 1 | ||||
-rw-r--r-- | public.bzl | 3 | ||||
-rw-r--r-- | src/jumper/SkJumper.cpp | 42 | ||||
-rw-r--r-- | src/jumper/SkJumper_stages_8bit.cpp | 37 |
4 files changed, 22 insertions, 61 deletions
diff --git a/gn/core.gni b/gn/core.gni index 5cb3e094b2..65b08db282 100644 --- a/gn/core.gni +++ b/gn/core.gni @@ -515,7 +515,6 @@ skia_core_sources = [ skia_core_sources += [ "$_src/jumper/SkJumper.cpp", "$_src/jumper/SkJumper_stages.cpp", - "$_src/jumper/SkJumper_stages_8bit.cpp", ] if (is_win) { skia_core_sources += [ "$_src/jumper/SkJumper_generated_win.S" ] diff --git a/public.bzl b/public.bzl index 8a37141467..80042730c9 100644 --- a/public.bzl +++ b/public.bzl @@ -116,6 +116,9 @@ BASE_SRCS_ALL = struct( # Defines main. "src/sksl/SkSLMain.cpp", + + # Only pre-compiled into SkJumper_generated.S. + "src/jumper/SkJumper_stages_8bit.cpp", ], ) diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp index 315110faf2..9f8e970f32 100644 --- a/src/jumper/SkJumper.cpp +++ b/src/jumper/SkJumper.cpp @@ -110,7 +110,7 @@ using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**,K*); extern "C" { #if __has_feature(memory_sanitizer) - // We'll just run baseline code. + // We'll just run portable code. #elif defined(__arm__) StartPipelineFn ASM(start_pipeline,vfp4); @@ -168,22 +168,12 @@ extern "C" { #endif - // Baseline code compiled as a normal part of Skia. + // Portable, single-pixel stages. StartPipelineFn sk_start_pipeline; StageFn sk_just_return; #define M(st) StageFn sk_##st; SK_RASTER_PIPELINE_STAGES(M) #undef M - -#if defined(__clang__) && defined(__aarch64__) - // We also compile 8-bit stages on ARMv8 as a normal part of Skia when compiled with Clang. - StartPipelineFn sk_start_pipeline_8bit; - StageFn sk_just_return_8bit; - #define M(st) StageFn sk_##st##_8bit; - SK_RASTER_PIPELINE_STAGES(M) - #undef M -#endif - } #if !__has_feature(memory_sanitizer) && (defined(__x86_64__) || defined(_M_X64)) @@ -208,16 +198,6 @@ extern "C" { } LOWP_STAGES(M) #undef M -#elif defined(__clang__) && defined(__aarch64__) - template <SkRasterPipeline::StockStage st> - static constexpr StageFn* aarch64_8bit() { return nullptr; } - - #define M(st) \ - template <> constexpr StageFn* aarch64_8bit<SkRasterPipeline::st>() { \ - return sk_##st##_8bit; \ - } - LOWP_STAGES(M) - #undef M #endif // Engines comprise everything we need to run SkRasterPipelines. @@ -227,20 +207,20 @@ struct SkJumper_Engine { StageFn* just_return; }; -// We'll default to this baseline engine, but try to choose a better one at runtime. -static const SkJumper_Engine kBaseline = { +// We'll default to this portable engine, but try to choose a better one at runtime. +static const SkJumper_Engine kPortable = { #define M(stage) sk_##stage, { SK_RASTER_PIPELINE_STAGES(M) }, #undef M sk_start_pipeline, sk_just_return, }; -static SkJumper_Engine gEngine = kBaseline; +static SkJumper_Engine gEngine = kPortable; static SkOnce gChooseEngineOnce; static SkJumper_Engine choose_engine() { #if __has_feature(memory_sanitizer) - // We'll just run baseline code. + // We'll just run portable code. #elif defined(__arm__) if (1 && SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) { @@ -303,7 +283,7 @@ static SkJumper_Engine choose_engine() { } #endif - return kBaseline; + return kPortable; } #ifndef SK_JUMPER_DISABLE_8BIT @@ -346,14 +326,6 @@ static SkJumper_Engine choose_engine() { #undef M }; } - #elif defined(__clang__) && defined(__aarch64__) - return { - #define M(st) aarch64_8bit<SkRasterPipeline::st>(), - { SK_RASTER_PIPELINE_STAGES(M) }, - sk_start_pipeline_8bit, - sk_just_return_8bit, - #undef M - }; #endif return kNone; } diff --git a/src/jumper/SkJumper_stages_8bit.cpp b/src/jumper/SkJumper_stages_8bit.cpp index edd6689c8c..5c73ea8cbe 100644 --- a/src/jumper/SkJumper_stages_8bit.cpp +++ b/src/jumper/SkJumper_stages_8bit.cpp @@ -5,27 +5,23 @@ * found in the LICENSE file. */ -// This restricted SkJumper backend works on 8-bit per channel interlaced -// pixels. This is the natural format for kN32_SkColorType buffers, and we -// hope the stages in this file can replace many custom legacy routines. - #include "SkJumper.h" #include "SkJumper_misc.h" -// As an experiment we bake ARMv8 8-bit code in as normally compiled Skia code. -// Any other platform (so far) is offline-only. -#if defined(JUMPER_IS_OFFLINE) || (defined(__clang__) && defined(__aarch64__)) - -#if defined(__aarch64__) - #include <arm_neon.h> -#else +#if defined(__SSE2__) #include <immintrin.h> #endif +// This restricted SkJumper backend works on 8-bit per channel interlaced +// pixels. This is the natural format for kN32_SkColorType buffers, and we +// hope the stages in this file can replace many custom legacy routines. + #if !defined(JUMPER_IS_OFFLINE) - #define WRAP(name) sk_##name##_8bit + #error "This file must be pre-compiled." #elif defined(__aarch64__) #define WRAP(name) sk_##name##_aarch64_8bit +#elif defined(__arm__) + #define WRAP(name) sk_##name##_vfp4_8bit #elif defined(__AVX2__) #define WRAP(name) sk_##name##_hsw_8bit #elif defined(__SSE4_1__) @@ -116,7 +112,7 @@ SI V operator*(V x, V y) { template <typename T> SI T inv(T v) { return 0xff - v; } - +SI V two(V v) { return v + v; } SI V lerp(V from, V to, V t) { return to*t + from*inv(t); } SI V alpha(V v) { @@ -166,13 +162,10 @@ SI V saturated_add(V a, V b) { b_lo, b_hi; split(a.u8x4, &a_lo, &a_hi); split(b.u8x4, &b_lo, &b_hi); -#if defined(__aarch64__) - return join(vqaddq_u8(a_lo, b_lo), - vqaddq_u8(a_hi, b_hi)); -#elif defined(__AVX2__) +#if defined(__AVX2__) return join(_mm256_adds_epu8(a_lo, b_lo), _mm256_adds_epu8(a_hi, b_hi)); -#elif defined(__SSE2__) +#else return join(_mm_adds_epu8(a_lo, b_lo), _mm_adds_epu8(a_hi, b_hi)); #endif @@ -192,11 +185,7 @@ using Stage = void(const Params* params, void** program, R src_lo, R src_hi, R d MAYBE_MSABI extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t xlimit, size_t ylimit, void** program, const SkJumper_constants*) { -#if defined(JUMPER_IS_OFFLINE) - R r; // Fastest to start uninitialized. -#else - R r{}; // Next best is zero'd for compilers that will complain about uninitialized values. -#endif + R r; auto start = (Stage*)load_and_inc(program); for (; y < ylimit; y++) { Params params = { x,y,0 }; @@ -472,5 +461,3 @@ STAGE(overlay) { // colorburn | // colordodge > these involve division, which makes them (much) slower than the float stages. // softlight | - -#endif |