diff options
author | Mike Klein <mtklein@chromium.org> | 2016-10-06 15:06:38 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2016-10-07 12:52:29 +0000 |
commit | 1aebdaee0e2aa4324509fd3ad4c40c21703ae4a2 (patch) | |
tree | c5ffae6c59217f3d228891177e1d50d7f784801a /src/core/SkOpts.cpp | |
parent | 2766cc567d5c939730fadd2d865e4bdf05477263 (diff) |
SkRasterPipeline: 8x pipelines
Bench runtime changes:
sRGB: 7194 -> 3735 = 1.93x faster
F16: 6531 -> 2559 = 2.55x faster
Instead of building 4x and 1-3x pipelines and then maybe 8x and 1-7x, instead build either the short ones or the long ones, but not both. If we just take care to use a compatible run_pipeline(), there's some cross-module type disagreement but everything works out in the end.
Oddly, a few places that looked like they'd be faster using SkNx_fma() or Sk4f_round()/Sk8f_round() are actually faster the long way, e.g. multiply, add 0.5, truncate. Curious! In all the other places you see here that I've used SkNx_fma(), it's been a significant speedup.
This folds in a couple refactors and cleanups that I've been meaning to do. Hope you don't mind... if find the new code considerably easier to read than the old code.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2990
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Change-Id: I1c82e5755d8e44cc0b9c6673d04b117f85d71a3a
Reviewed-on: https://skia-review.googlesource.com/2990
Reviewed-by: Matt Sarett <msarett@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/core/SkOpts.cpp')
-rw-r--r-- | src/core/SkOpts.cpp | 70 |
1 files changed, 37 insertions, 33 deletions
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 7784e7fcf1..a57808da60 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -88,30 +88,32 @@ namespace SkOpts { DEFINE_DEFAULT(srcover_srgb_srgb); DEFINE_DEFAULT(hash_fn); + + DEFINE_DEFAULT(run_pipeline); #undef DEFINE_DEFAULT - // TODO: might be nice to only create one instance of tail-insensitive stages. + SkRasterPipeline::Fn body[] = { + SK_OPTS_NS::just_return, - SkRasterPipeline::Fn stages_4[] = { - stage_4<SK_OPTS_NS::store_565 , false>, - stage_4<SK_OPTS_NS::store_srgb, false>, - stage_4<SK_OPTS_NS::store_f16 , false>, + SK_OPTS_NS::store_565, + SK_OPTS_NS::store_srgb, + SK_OPTS_NS::store_f16, - stage_4<SK_OPTS_NS::load_s_565 , true>, - stage_4<SK_OPTS_NS::load_s_srgb, true>, - stage_4<SK_OPTS_NS::load_s_f16 , true>, + SK_OPTS_NS::load_s_565, + SK_OPTS_NS::load_s_srgb, + SK_OPTS_NS::load_s_f16, - stage_4<SK_OPTS_NS::load_d_565 , true>, - stage_4<SK_OPTS_NS::load_d_srgb, true>, - stage_4<SK_OPTS_NS::load_d_f16 , true>, + SK_OPTS_NS::load_d_565, + SK_OPTS_NS::load_d_srgb, + SK_OPTS_NS::load_d_f16, - stage_4<SK_OPTS_NS::scale_u8, true>, + SK_OPTS_NS::scale_u8, - stage_4<SK_OPTS_NS::lerp_u8 , true>, - stage_4<SK_OPTS_NS::lerp_565 , true>, - stage_4<SK_OPTS_NS::lerp_constant_float, true>, + SK_OPTS_NS::lerp_u8, + SK_OPTS_NS::lerp_565, + SK_OPTS_NS::lerp_constant_float, - stage_4<SK_OPTS_NS::constant_color, true>, + SK_OPTS_NS::constant_color, SK_OPTS_NS::dst, SK_OPTS_NS::dstatop, @@ -138,28 +140,30 @@ namespace SkOpts { SK_OPTS_NS::overlay, SK_OPTS_NS::softlight, }; - static_assert(SK_ARRAY_COUNT(stages_4) == SkRasterPipeline::kNumStockStages, ""); + static_assert(SK_ARRAY_COUNT(body) == SkRasterPipeline::kNumStockStages, ""); + + SkRasterPipeline::Fn tail[] = { + SK_OPTS_NS::just_return, - SkRasterPipeline::Fn stages_1_3[] = { - stage_1_3<SK_OPTS_NS::store_565 , false>, - stage_1_3<SK_OPTS_NS::store_srgb, false>, - stage_1_3<SK_OPTS_NS::store_f16 , false>, + SK_OPTS_NS::store_565_tail, + SK_OPTS_NS::store_srgb_tail, + SK_OPTS_NS::store_f16_tail, - stage_1_3<SK_OPTS_NS::load_s_565 , true>, - stage_1_3<SK_OPTS_NS::load_s_srgb, true>, - stage_1_3<SK_OPTS_NS::load_s_f16 , true>, + SK_OPTS_NS::load_s_565_tail, + SK_OPTS_NS::load_s_srgb_tail, + SK_OPTS_NS::load_s_f16_tail, - stage_1_3<SK_OPTS_NS::load_d_565 , true>, - stage_1_3<SK_OPTS_NS::load_d_srgb, true>, - stage_1_3<SK_OPTS_NS::load_d_f16 , true>, + SK_OPTS_NS::load_d_565_tail, + SK_OPTS_NS::load_d_srgb_tail, + SK_OPTS_NS::load_d_f16_tail, - stage_1_3<SK_OPTS_NS::scale_u8, true>, + SK_OPTS_NS::scale_u8_tail, - stage_1_3<SK_OPTS_NS::lerp_u8 , true>, - stage_1_3<SK_OPTS_NS::lerp_565 , true>, - stage_1_3<SK_OPTS_NS::lerp_constant_float, true>, + SK_OPTS_NS::lerp_u8_tail, + SK_OPTS_NS::lerp_565_tail, + SK_OPTS_NS::lerp_constant_float, - stage_1_3<SK_OPTS_NS::constant_color, true>, + SK_OPTS_NS::constant_color, SK_OPTS_NS::dst, SK_OPTS_NS::dstatop, @@ -186,7 +190,7 @@ namespace SkOpts { SK_OPTS_NS::overlay, SK_OPTS_NS::softlight, }; - static_assert(SK_ARRAY_COUNT(stages_1_3) == SkRasterPipeline::kNumStockStages, ""); + static_assert(SK_ARRAY_COUNT(tail) == SkRasterPipeline::kNumStockStages, ""); // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_ssse3(); |