SkRasterPipeline: 8x pipelines, attempt 2

Original review here: https://skia-review.googlesource.com/c/2990/ Changes since: - simpler implementations of load_tail() / store_tail(): slower, but more obviously correct to all compilers - fleshed out math ops on Sk8i and Sk8u to make unit tests happy on -Fast bot (where we always have AVX2) - now storing stage functions as void(*)() to avoid undefined behavior and/or linker problems. This restores 32-bit Windows. - all AVX2 Sk8x methods are marked always-inline, to avoid linking the "wrong" version on Debug builds. CQ_INCLUDE_TRYBOTS=master.client.skia:Perf-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Debug-ASAN-Trybot,Perf-Ubuntu-Clang-GCE-CPU-AVX2-x86_64-Debug-GN,Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot,Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-Fast-Trybot;master.client.skia.compile:Build-Win-MSVC-x86_64-Debug-Trybot GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3064 Change-Id: Id0ba250037e271a9475fe2f0989d64f0aa909bae Reviewed-on: https://skia-review.googlesource.com/3064 Reviewed-by: Mike Klein <mtklein@chromium.org> Commit-Queue: Mike Klein <mtklein@chromium.org>
author: Mike Klein <mtklein@chromium.org> 2016-10-07 11:21:06 -0400
committer: Mike Klein <mtklein@chromium.org> 2016-10-07 16:28:16 +0000
commit: a71e151c6f0be68dc96ad2d169bbc31edca8f946 (patch)
tree: 56c67a12eb299f814bb3d1f197e21512a38e3d82 /src/core/SkRasterPipeline.h
parent: 49df8d17c56ee08ecf860289d501913d356f67dc (diff)
1 files changed, 17 insertions, 15 deletions
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 996c7838e3..3ef8c50d9f 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -56,22 +56,28 @@
 class SkRasterPipeline {
 public:
     struct Stage;
-    using Fn = void(SK_VECTORCALL *)(Stage*, size_t, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
-                                                             Sk4f,Sk4f,Sk4f,Sk4f);
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
+    using V = Sk8f;
+#else
+    using V = Sk4f;
+#endif
+    using Fn = void(SK_VECTORCALL *)(Stage*, size_t, size_t, V,V,V,V,
+                                                             V,V,V,V);
+
     struct Stage {
         template <typename T>
         T ctx() { return static_cast<T>(fCtx); }
 
-        void SK_VECTORCALL next(size_t x, size_t tail, Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3,
-                                                       Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) {
+        void SK_VECTORCALL next(size_t x, size_t tail, V v0, V v1, V v2, V v3,
+                                                       V v4, V v5, V v6, V v7) {
             // Stages are logically a pipeline, and physically are contiguous in an array.
             // To get to the next stage, we just increment our pointer to the next array element.
-            fNext(this+1, x,tail, v0,v1,v2,v3, v4,v5,v6,v7);
+            ((Fn)fNext)(this+1, x,tail, v0,v1,v2,v3, v4,v5,v6,v7);
         }
 
         // It makes next() a good bit cheaper if we hold the next function to call here,
         // rather than logically simpler choice of the function implementing this stage.
-        Fn fNext;
+        void (*fNext)();
         void* fCtx;
     };
 
@@ -84,6 +90,8 @@ public:
     void run(size_t n) { this->run(0, n); }
 
     enum StockStage {
+        just_return,
+
         store_565,
         store_srgb,
         store_f16,
@@ -134,24 +142,18 @@ public:
     void append(StockStage, void* = nullptr);
     void append(StockStage stage, const void* ctx) { this->append(stage, const_cast<void*>(ctx)); }
 
-
     // Append all stages to this pipeline.
     void extend(const SkRasterPipeline&);
 
 private:
     using Stages = SkSTArray<10, Stage, /*MEM_COPY=*/true>;
 
-    void append(Fn body, Fn tail, void*);
+    void append(void (*body)(), void (*tail)(), void*);
 
-    // This no-op default makes fBodyStart and fTailStart unconditionally safe to call,
-    // and is always the last stage's fNext as a sort of safety net to make sure even a
-    // buggy pipeline can't walk off its own end.
-    static void SK_VECTORCALL JustReturn(Stage*, size_t, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
-                                                                 Sk4f,Sk4f,Sk4f,Sk4f);
     Stages fBody,
            fTail;
-    Fn fBodyStart = &JustReturn,
-       fTailStart = &JustReturn;
+    void (*fBodyStart)() = nullptr;
+    void (*fTailStart)() = nullptr;
 };
 
 #endif//SkRasterPipeline_DEFINED
author	Mike Klein <mtklein@chromium.org>	2016-10-07 11:21:06 -0400
committer	Mike Klein <mtklein@chromium.org>	2016-10-07 16:28:16 +0000
commit	a71e151c6f0be68dc96ad2d169bbc31edca8f946 (patch)
tree	56c67a12eb299f814bb3d1f197e21512a38e3d82 /src/core/SkRasterPipeline.h
parent	49df8d17c56ee08ecf860289d501913d356f67dc (diff)