SkRasterPipeline: 8x pipelines

Bench runtime changes: sRGB: 7194 -> 3735 = 1.93x faster F16: 6531 -> 2559 = 2.55x faster Instead of building 4x and 1-3x pipelines and then maybe 8x and 1-7x, instead build either the short ones or the long ones, but not both. If we just take care to use a compatible run_pipeline(), there's some cross-module type disagreement but everything works out in the end. Oddly, a few places that looked like they'd be faster using SkNx_fma() or Sk4f_round()/Sk8f_round() are actually faster the long way, e.g. multiply, add 0.5, truncate. Curious! In all the other places you see here that I've used SkNx_fma(), it's been a significant speedup. This folds in a couple refactors and cleanups that I've been meaning to do. Hope you don't mind... if find the new code considerably easier to read than the old code. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2990 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Change-Id: I1c82e5755d8e44cc0b9c6673d04b117f85d71a3a Reviewed-on: https://skia-review.googlesource.com/2990 Reviewed-by: Matt Sarett <msarett@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
author: Mike Klein <mtklein@chromium.org> 2016-10-06 15:06:38 -0400
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> 2016-10-07 12:52:29 +0000
commit: 1aebdaee0e2aa4324509fd3ad4c40c21703ae4a2 (patch)
tree: c5ffae6c59217f3d228891177e1d50d7f784801a /src/opts/SkOpts_hsw.cpp
parent: 2766cc567d5c939730fadd2d865e4bdf05477263 (diff)
1 files changed, 73 insertions, 1 deletions
diff --git a/src/opts/SkOpts_hsw.cpp b/src/opts/SkOpts_hsw.cpp
index 53e2e5acdd..c994bf6534 100644
--- a/src/opts/SkOpts_hsw.cpp
+++ b/src/opts/SkOpts_hsw.cpp
@@ -7,9 +7,81 @@
 
 #include "SkOpts.h"
 
+
 #define SK_OPTS_NS hsw
+#include "SkRasterPipeline_opts.h"
 
 namespace SkOpts {
-    void Init_hsw() { }
+    void Init_hsw() {
+
+// The 32-bit MSVC __vectorcall ABI mangles type information into the names of
+// SkOpts::body, SkOpts::tail, and SkOpts::run_pipeline, so that this code will
+// not link as written: they're all defined in a file where SkRasterPipeline::V
+// is Sk4f, but here we're seeing it as Sk8f.
+//
+// We can work around this by storing those pointers as some generic function
+// pointer type like void(*)(), but it's even simpler to just not do any of this
+// when targeting 32-bit Windows.
+#if !defined(_M_IX86)
+
+        run_pipeline = SK_OPTS_NS::run_pipeline;
+
+    #define STAGE(stage)                                        \
+        body[SkRasterPipeline::stage] = SK_OPTS_NS::stage;      \
+        tail[SkRasterPipeline::stage] = SK_OPTS_NS::stage##_tail
+
+        STAGE(store_565);
+        STAGE(store_srgb);
+        STAGE(store_f16);
+
+        STAGE(load_s_565);
+        STAGE(load_s_srgb);
+        STAGE(load_s_f16);
+
+        STAGE(load_d_565);
+        STAGE(load_d_srgb);
+        STAGE(load_d_f16);
+
+        STAGE(scale_u8);
+
+        STAGE(lerp_u8);
+        STAGE(lerp_565);
+    #undef STAGE
+
+    #define STAGE(stage) \
+        body[SkRasterPipeline::stage] = SK_OPTS_NS::stage; \
+        tail[SkRasterPipeline::stage] = SK_OPTS_NS::stage
+
+        STAGE(lerp_constant_float);
+        STAGE(constant_color);
+
+        STAGE(dst);
+        STAGE(dstatop);
+        STAGE(dstin);
+        STAGE(dstout);
+        STAGE(dstover);
+        STAGE(srcatop);
+        STAGE(srcin);
+        STAGE(srcout);
+        STAGE(srcover);
+        STAGE(clear);
+        STAGE(modulate);
+        STAGE(multiply);
+        STAGE(plus_);
+        STAGE(screen);
+        STAGE(xor_);
+        STAGE(colorburn);
+        STAGE(colordodge);
+        STAGE(darken);
+        STAGE(difference);
+        STAGE(exclusion);
+        STAGE(hardlight);
+        STAGE(lighten);
+        STAGE(overlay);
+        STAGE(softlight);
+    #undef STAGE
+
+#endif // !defined(_M_IX86)
+    }
 }
author	Mike Klein <mtklein@chromium.org>	2016-10-06 15:06:38 -0400
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	2016-10-07 12:52:29 +0000
commit	1aebdaee0e2aa4324509fd3ad4c40c21703ae4a2 (patch)
tree	c5ffae6c59217f3d228891177e1d50d7f784801a /src/opts/SkOpts_hsw.cpp
parent	2766cc567d5c939730fadd2d865e4bdf05477263 (diff)