aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Derek Sollenberger <djsollen@google.com>2017-08-29 12:37:50 +0000
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-08-29 12:38:02 +0000
commit6d13575108299951ecdfba6d85c915fcec2bc028 (patch)
tree0cae7b1da7b606d9f68b4263a51dfe5495285a72
parent6b47c7d19fcc95d2c3dbce582a8d68bb3bf6ba2a (diff)
Revert "8-bit jumper on armv8"
This reverts commit 08133583d5e1cdfdcc41b4bb078fcfb64137f058. Reason for revert: Blocking Android Autoroller on compile error. Original change's description: > 8-bit jumper on armv8 > > The GM diffs are all minor and what you'd expect. > > I did a quick performance sanity check, which also looks fine. > > $ out/ok bench rp filter:search=Modulate > [blendmode_rect_Modulate] 30.2ms @0 32ms @95 32ms @100 > [blendmode_mask_Modulate] 12.6ms @0 12.6ms @95 14.5ms @100 > ~~~> > [blendmode_rect_Modulate] 11.2ms @0 11.7ms @95 12.4ms @100 > [blendmode_mask_Modulate] 10.5ms @0 23.6ms @95 23.9ms @100 > > This isn't even really the fastest we can make 8-bit go on ARMv8; > it's actually much more natural to work de-interlaced there. Lots > of room to follow up. > > Change-Id: I86b1099f6742bcb0b8b4fa153e85eaba9567cbf7 > Reviewed-on: https://skia-review.googlesource.com/39740 > Reviewed-by: Florin Malita <fmalita@chromium.org> > Commit-Queue: Mike Klein <mtklein@chromium.org> TBR=mtklein@chromium.org,herb@google.com,fmalita@chromium.org,reed@google.com Change-Id: I71425d8b7fbb66be5cb50025871dd81358111da4 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://skia-review.googlesource.com/39980 Reviewed-by: Derek Sollenberger <djsollen@google.com> Commit-Queue: Derek Sollenberger <djsollen@google.com>
-rw-r--r--gn/core.gni1
-rw-r--r--public.bzl3
-rw-r--r--src/jumper/SkJumper.cpp42
-rw-r--r--src/jumper/SkJumper_stages_8bit.cpp37
4 files changed, 22 insertions, 61 deletions
diff --git a/gn/core.gni b/gn/core.gni
index 5cb3e094b2..65b08db282 100644
--- a/gn/core.gni
+++ b/gn/core.gni
@@ -515,7 +515,6 @@ skia_core_sources = [
skia_core_sources += [
"$_src/jumper/SkJumper.cpp",
"$_src/jumper/SkJumper_stages.cpp",
- "$_src/jumper/SkJumper_stages_8bit.cpp",
]
if (is_win) {
skia_core_sources += [ "$_src/jumper/SkJumper_generated_win.S" ]
diff --git a/public.bzl b/public.bzl
index 8a37141467..80042730c9 100644
--- a/public.bzl
+++ b/public.bzl
@@ -116,6 +116,9 @@ BASE_SRCS_ALL = struct(
# Defines main.
"src/sksl/SkSLMain.cpp",
+
+ # Only pre-compiled into SkJumper_generated.S.
+ "src/jumper/SkJumper_stages_8bit.cpp",
],
)
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
index 315110faf2..9f8e970f32 100644
--- a/src/jumper/SkJumper.cpp
+++ b/src/jumper/SkJumper.cpp
@@ -110,7 +110,7 @@ using StartPipelineFn = void(size_t,size_t,size_t,size_t, void**,K*);
extern "C" {
#if __has_feature(memory_sanitizer)
- // We'll just run baseline code.
+ // We'll just run portable code.
#elif defined(__arm__)
StartPipelineFn ASM(start_pipeline,vfp4);
@@ -168,22 +168,12 @@ extern "C" {
#endif
- // Baseline code compiled as a normal part of Skia.
+ // Portable, single-pixel stages.
StartPipelineFn sk_start_pipeline;
StageFn sk_just_return;
#define M(st) StageFn sk_##st;
SK_RASTER_PIPELINE_STAGES(M)
#undef M
-
-#if defined(__clang__) && defined(__aarch64__)
- // We also compile 8-bit stages on ARMv8 as a normal part of Skia when compiled with Clang.
- StartPipelineFn sk_start_pipeline_8bit;
- StageFn sk_just_return_8bit;
- #define M(st) StageFn sk_##st##_8bit;
- SK_RASTER_PIPELINE_STAGES(M)
- #undef M
-#endif
-
}
#if !__has_feature(memory_sanitizer) && (defined(__x86_64__) || defined(_M_X64))
@@ -208,16 +198,6 @@ extern "C" {
}
LOWP_STAGES(M)
#undef M
-#elif defined(__clang__) && defined(__aarch64__)
- template <SkRasterPipeline::StockStage st>
- static constexpr StageFn* aarch64_8bit() { return nullptr; }
-
- #define M(st) \
- template <> constexpr StageFn* aarch64_8bit<SkRasterPipeline::st>() { \
- return sk_##st##_8bit; \
- }
- LOWP_STAGES(M)
- #undef M
#endif
// Engines comprise everything we need to run SkRasterPipelines.
@@ -227,20 +207,20 @@ struct SkJumper_Engine {
StageFn* just_return;
};
-// We'll default to this baseline engine, but try to choose a better one at runtime.
-static const SkJumper_Engine kBaseline = {
+// We'll default to this portable engine, but try to choose a better one at runtime.
+static const SkJumper_Engine kPortable = {
#define M(stage) sk_##stage,
{ SK_RASTER_PIPELINE_STAGES(M) },
#undef M
sk_start_pipeline,
sk_just_return,
};
-static SkJumper_Engine gEngine = kBaseline;
+static SkJumper_Engine gEngine = kPortable;
static SkOnce gChooseEngineOnce;
static SkJumper_Engine choose_engine() {
#if __has_feature(memory_sanitizer)
- // We'll just run baseline code.
+ // We'll just run portable code.
#elif defined(__arm__)
if (1 && SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) {
@@ -303,7 +283,7 @@ static SkJumper_Engine choose_engine() {
}
#endif
- return kBaseline;
+ return kPortable;
}
#ifndef SK_JUMPER_DISABLE_8BIT
@@ -346,14 +326,6 @@ static SkJumper_Engine choose_engine() {
#undef M
};
}
- #elif defined(__clang__) && defined(__aarch64__)
- return {
- #define M(st) aarch64_8bit<SkRasterPipeline::st>(),
- { SK_RASTER_PIPELINE_STAGES(M) },
- sk_start_pipeline_8bit,
- sk_just_return_8bit,
- #undef M
- };
#endif
return kNone;
}
diff --git a/src/jumper/SkJumper_stages_8bit.cpp b/src/jumper/SkJumper_stages_8bit.cpp
index edd6689c8c..5c73ea8cbe 100644
--- a/src/jumper/SkJumper_stages_8bit.cpp
+++ b/src/jumper/SkJumper_stages_8bit.cpp
@@ -5,27 +5,23 @@
* found in the LICENSE file.
*/
-// This restricted SkJumper backend works on 8-bit per channel interlaced
-// pixels. This is the natural format for kN32_SkColorType buffers, and we
-// hope the stages in this file can replace many custom legacy routines.
-
#include "SkJumper.h"
#include "SkJumper_misc.h"
-// As an experiment we bake ARMv8 8-bit code in as normally compiled Skia code.
-// Any other platform (so far) is offline-only.
-#if defined(JUMPER_IS_OFFLINE) || (defined(__clang__) && defined(__aarch64__))
-
-#if defined(__aarch64__)
- #include <arm_neon.h>
-#else
+#if defined(__SSE2__)
#include <immintrin.h>
#endif
+// This restricted SkJumper backend works on 8-bit per channel interlaced
+// pixels. This is the natural format for kN32_SkColorType buffers, and we
+// hope the stages in this file can replace many custom legacy routines.
+
#if !defined(JUMPER_IS_OFFLINE)
- #define WRAP(name) sk_##name##_8bit
+ #error "This file must be pre-compiled."
#elif defined(__aarch64__)
#define WRAP(name) sk_##name##_aarch64_8bit
+#elif defined(__arm__)
+ #define WRAP(name) sk_##name##_vfp4_8bit
#elif defined(__AVX2__)
#define WRAP(name) sk_##name##_hsw_8bit
#elif defined(__SSE4_1__)
@@ -116,7 +112,7 @@ SI V operator*(V x, V y) {
template <typename T>
SI T inv(T v) { return 0xff - v; }
-
+SI V two(V v) { return v + v; }
SI V lerp(V from, V to, V t) { return to*t + from*inv(t); }
SI V alpha(V v) {
@@ -166,13 +162,10 @@ SI V saturated_add(V a, V b) {
b_lo, b_hi;
split(a.u8x4, &a_lo, &a_hi);
split(b.u8x4, &b_lo, &b_hi);
-#if defined(__aarch64__)
- return join(vqaddq_u8(a_lo, b_lo),
- vqaddq_u8(a_hi, b_hi));
-#elif defined(__AVX2__)
+#if defined(__AVX2__)
return join(_mm256_adds_epu8(a_lo, b_lo),
_mm256_adds_epu8(a_hi, b_hi));
-#elif defined(__SSE2__)
+#else
return join(_mm_adds_epu8(a_lo, b_lo),
_mm_adds_epu8(a_hi, b_hi));
#endif
@@ -192,11 +185,7 @@ using Stage = void(const Params* params, void** program, R src_lo, R src_hi, R d
MAYBE_MSABI
extern "C" void WRAP(start_pipeline)(size_t x, size_t y, size_t xlimit, size_t ylimit,
void** program, const SkJumper_constants*) {
-#if defined(JUMPER_IS_OFFLINE)
- R r; // Fastest to start uninitialized.
-#else
- R r{}; // Next best is zero'd for compilers that will complain about uninitialized values.
-#endif
+ R r;
auto start = (Stage*)load_and_inc(program);
for (; y < ylimit; y++) {
Params params = { x,y,0 };
@@ -472,5 +461,3 @@ STAGE(overlay) {
// colorburn |
// colordodge > these involve division, which makes them (much) slower than the float stages.
// softlight |
-
-#endif