aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/SkOpts.cpp
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2016-10-06 15:06:38 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2016-10-07 12:52:29 +0000
commit1aebdaee0e2aa4324509fd3ad4c40c21703ae4a2 (patch)
treec5ffae6c59217f3d228891177e1d50d7f784801a /src/core/SkOpts.cpp
parent2766cc567d5c939730fadd2d865e4bdf05477263 (diff)
SkRasterPipeline: 8x pipelines
Bench runtime changes: sRGB: 7194 -> 3735 = 1.93x faster F16: 6531 -> 2559 = 2.55x faster Instead of building 4x and 1-3x pipelines and then maybe 8x and 1-7x, instead build either the short ones or the long ones, but not both. If we just take care to use a compatible run_pipeline(), there's some cross-module type disagreement but everything works out in the end. Oddly, a few places that looked like they'd be faster using SkNx_fma() or Sk4f_round()/Sk8f_round() are actually faster the long way, e.g. multiply, add 0.5, truncate. Curious! In all the other places you see here that I've used SkNx_fma(), it's been a significant speedup. This folds in a couple refactors and cleanups that I've been meaning to do. Hope you don't mind... if find the new code considerably easier to read than the old code. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2990 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Change-Id: I1c82e5755d8e44cc0b9c6673d04b117f85d71a3a Reviewed-on: https://skia-review.googlesource.com/2990 Reviewed-by: Matt Sarett <msarett@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/core/SkOpts.cpp')
-rw-r--r--src/core/SkOpts.cpp70
1 files changed, 37 insertions, 33 deletions
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index 7784e7fcf1..a57808da60 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -88,30 +88,32 @@ namespace SkOpts {
DEFINE_DEFAULT(srcover_srgb_srgb);
DEFINE_DEFAULT(hash_fn);
+
+ DEFINE_DEFAULT(run_pipeline);
#undef DEFINE_DEFAULT
- // TODO: might be nice to only create one instance of tail-insensitive stages.
+ SkRasterPipeline::Fn body[] = {
+ SK_OPTS_NS::just_return,
- SkRasterPipeline::Fn stages_4[] = {
- stage_4<SK_OPTS_NS::store_565 , false>,
- stage_4<SK_OPTS_NS::store_srgb, false>,
- stage_4<SK_OPTS_NS::store_f16 , false>,
+ SK_OPTS_NS::store_565,
+ SK_OPTS_NS::store_srgb,
+ SK_OPTS_NS::store_f16,
- stage_4<SK_OPTS_NS::load_s_565 , true>,
- stage_4<SK_OPTS_NS::load_s_srgb, true>,
- stage_4<SK_OPTS_NS::load_s_f16 , true>,
+ SK_OPTS_NS::load_s_565,
+ SK_OPTS_NS::load_s_srgb,
+ SK_OPTS_NS::load_s_f16,
- stage_4<SK_OPTS_NS::load_d_565 , true>,
- stage_4<SK_OPTS_NS::load_d_srgb, true>,
- stage_4<SK_OPTS_NS::load_d_f16 , true>,
+ SK_OPTS_NS::load_d_565,
+ SK_OPTS_NS::load_d_srgb,
+ SK_OPTS_NS::load_d_f16,
- stage_4<SK_OPTS_NS::scale_u8, true>,
+ SK_OPTS_NS::scale_u8,
- stage_4<SK_OPTS_NS::lerp_u8 , true>,
- stage_4<SK_OPTS_NS::lerp_565 , true>,
- stage_4<SK_OPTS_NS::lerp_constant_float, true>,
+ SK_OPTS_NS::lerp_u8,
+ SK_OPTS_NS::lerp_565,
+ SK_OPTS_NS::lerp_constant_float,
- stage_4<SK_OPTS_NS::constant_color, true>,
+ SK_OPTS_NS::constant_color,
SK_OPTS_NS::dst,
SK_OPTS_NS::dstatop,
@@ -138,28 +140,30 @@ namespace SkOpts {
SK_OPTS_NS::overlay,
SK_OPTS_NS::softlight,
};
- static_assert(SK_ARRAY_COUNT(stages_4) == SkRasterPipeline::kNumStockStages, "");
+ static_assert(SK_ARRAY_COUNT(body) == SkRasterPipeline::kNumStockStages, "");
+
+ SkRasterPipeline::Fn tail[] = {
+ SK_OPTS_NS::just_return,
- SkRasterPipeline::Fn stages_1_3[] = {
- stage_1_3<SK_OPTS_NS::store_565 , false>,
- stage_1_3<SK_OPTS_NS::store_srgb, false>,
- stage_1_3<SK_OPTS_NS::store_f16 , false>,
+ SK_OPTS_NS::store_565_tail,
+ SK_OPTS_NS::store_srgb_tail,
+ SK_OPTS_NS::store_f16_tail,
- stage_1_3<SK_OPTS_NS::load_s_565 , true>,
- stage_1_3<SK_OPTS_NS::load_s_srgb, true>,
- stage_1_3<SK_OPTS_NS::load_s_f16 , true>,
+ SK_OPTS_NS::load_s_565_tail,
+ SK_OPTS_NS::load_s_srgb_tail,
+ SK_OPTS_NS::load_s_f16_tail,
- stage_1_3<SK_OPTS_NS::load_d_565 , true>,
- stage_1_3<SK_OPTS_NS::load_d_srgb, true>,
- stage_1_3<SK_OPTS_NS::load_d_f16 , true>,
+ SK_OPTS_NS::load_d_565_tail,
+ SK_OPTS_NS::load_d_srgb_tail,
+ SK_OPTS_NS::load_d_f16_tail,
- stage_1_3<SK_OPTS_NS::scale_u8, true>,
+ SK_OPTS_NS::scale_u8_tail,
- stage_1_3<SK_OPTS_NS::lerp_u8 , true>,
- stage_1_3<SK_OPTS_NS::lerp_565 , true>,
- stage_1_3<SK_OPTS_NS::lerp_constant_float, true>,
+ SK_OPTS_NS::lerp_u8_tail,
+ SK_OPTS_NS::lerp_565_tail,
+ SK_OPTS_NS::lerp_constant_float,
- stage_1_3<SK_OPTS_NS::constant_color, true>,
+ SK_OPTS_NS::constant_color,
SK_OPTS_NS::dst,
SK_OPTS_NS::dstatop,
@@ -186,7 +190,7 @@ namespace SkOpts {
SK_OPTS_NS::overlay,
SK_OPTS_NS::softlight,
};
- static_assert(SK_ARRAY_COUNT(stages_1_3) == SkRasterPipeline::kNumStockStages, "");
+ static_assert(SK_ARRAY_COUNT(tail) == SkRasterPipeline::kNumStockStages, "");
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
void Init_ssse3();