SkOnce: 2 bytes -> 1 byte

This uses the same logic we worked out for SkOncePtr to reduce the memory footprint of SkOnce from a done byte and lock byte to a single 3-state byte: - NotStarted: no thread has tried to run fn() yet - Active: a thread is running fn() - Done: fn() is complete Threads which see Done return immediately. Threads which see NotStarted try to move to Active, run fn(), then move to Done. Threads which see Active spin until the active thread moves to Done. This additionally fixes a too-weak memory order bug in SkOncePtr, and adds a big note to explain. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1904483003 Committed: https://skia.googlesource.com/skia/+/df02d338be8e3c1c50b48a3a9faa582703a39c07 Review URL: https://codereview.chromium.org/1904483003
author: mtklein <mtklein@chromium.org> 2016-04-20 13:49:15 -0700
committer: Commit bot <commit-bot@chromium.org> 2016-04-20 13:49:15 -0700
commit: 650f9e9a2630026d578970c6d031d7d4644f63b4 (patch)
tree: 05443d78fb7d26fefeddd0efa5919b3b962553fe /include/private/SkOnce.h
parent: 9134686fd9ce384e9163b44942538d033b5bff11 (diff)
1 files changed, 39 insertions, 9 deletions
diff --git a/include/private/SkOnce.h b/include/private/SkOnce.h
index d83b63f283..1c68fb7da1 100644
--- a/include/private/SkOnce.h
+++ b/include/private/SkOnce.h
@@ -8,9 +8,9 @@
 #ifndef SkOnce_DEFINED
 #define SkOnce_DEFINED
 
-#include "../private/SkSpinlock.h"
 #include <atomic>
 #include <utility>
+#include "SkTypes.h"
 
 // SkOnce provides call-once guarantees for Skia, much like std::once_flag/std::call_once().
 //
@@ -21,20 +21,50 @@ class SkOnce {
 public:
     template <typename Fn, typename... Args>
     void operator()(Fn&& fn, Args&&... args) {
-        // Vanilla double-checked locking.
-        if (!fDone.load(std::memory_order_acquire)) {
-            fLock.acquire();
-            if (!fDone.load(std::memory_order_relaxed)) {
+        auto state = fState.load(std::memory_order_acquire);
+
+        if (state == Done) {
+            return;
+        }
+
+        if (state == NotStarted) {
+            // Try to claim the job of calling fn() by swapping from NotStarted to Calling.
+            // See [1] below for why we use std::memory_order_acquire instead of relaxed.
+            if (fState.compare_exchange_strong(state, Calling, std::memory_order_acquire)) {
+                // Claimed!  Call fn(), then mark this SkOnce as Done.
                 fn(std::forward<Args>(args)...);
-                fDone.store(true, std::memory_order_release);
+                return fState.store(Done, std::memory_order_release);
             }
-            fLock.release();
         }
+
+        while (state == Calling) {
+            // Some other thread is calling fn().  Wait for them to finish.
+            state = fState.load(std::memory_order_acquire);
+        }
+        SkASSERT(state == Done);
     }
 
 private:
-    std::atomic<bool> fDone{false};
-    SkSpinlock        fLock;
+    enum State : uint8_t { NotStarted, Calling, Done};
+    std::atomic<uint8_t> fState{NotStarted};
 };
 
+/* [1]  Why do we compare_exchange_strong() with std::memory_order_acquire instead of relaxed?
+ *
+ * If we succeed, we really only need a relaxed compare_exchange_strong()... we're the ones
+ * who are about to do a release store, so there's certainly nothing yet for an acquire to
+ * synchronize with.
+ *
+ * If that compare_exchange_strong() fails, we're either in Calling or Done state.
+ * Again, if we're in Calling state, relaxed would have been fine: the spin loop will
+ * acquire up to the Calling thread's release store.
+ *
+ * But if that compare_exchange_strong() fails and we find ourselves in the Done state,
+ * we've never done an acquire load to sync up to the store of that Done state.
+ *
+ * So on failure we need an acquire load.  Generally the failure memory order cannot be
+ * stronger than the success memory order, so we need acquire on success too.  The single
+ * memory order version of compare_exchange_strong() uses the same acquire order for both.
+ */
+
 #endif  // SkOnce_DEFINED
author	mtklein <mtklein@chromium.org>	2016-04-20 13:49:15 -0700
committer	Commit bot <commit-bot@chromium.org>	2016-04-20 13:49:15 -0700
commit	650f9e9a2630026d578970c6d031d7d4644f63b4 (patch)
tree	05443d78fb7d26fefeddd0efa5919b3b962553fe /include/private/SkOnce.h
parent	9134686fd9ce384e9163b44942538d033b5bff11 (diff)