From 2460bbdfbb1d55ef307c3189c661e65de1a7affb Mon Sep 17 00:00:00 2001
From: mtklein <mtklein@google.com>
Date: Wed, 3 Sep 2014 14:17:48 -0700
Subject: Revert of SkThreadPool ~~> SkTaskGroup (patchset #4 id:60001 of
 https://codereview.chromium.org/531653002/)

Reason for revert:
Leaks, leaks, leaks.

Original issue's description:
> SkThreadPool ~~> SkTaskGroup
>
> SkTaskGroup is like SkThreadPool except the threads stay in
> one global pool.  Each SkTaskGroup itself is tiny (4 bytes)
> and its wait() method applies only to tasks add()ed to that
> instance, not the whole thread pool.
>
> This means we don't need to bring up new thread pools when
> tests themselves want to use multithreading (e.g. pathops,
> quilt).  We just create a new SkTaskGroup and wait for that
> to complete.  This should be more efficient, and allow us
> to expand where we use threads to really latency sensitive
> places.  E.g. we can probably now use these in nanobench
> for CPU .skp rendering.
>
> Now that all threads are sharing the same pool, I think we
> can remove most of the custom mechanism pathops tests use
> to control threading.  They'll just ride on the global pool
> with all other tests now.
>
> This (temporarily?) removes the GPU multithreading feature
> from DM, which we don't use.
>
> On my desktop, DM runs a little faster (57s -> 55s) in
> Debug, and a lot faster in Release (36s -> 24s).  The bots
> show speedups of similar proportions, cutting more than a
> minute off the N4/Release and Win7/Debug runtimes.
>
> BUG=skia:
>
> Committed: https://skia.googlesource.com/skia/+/9c7207b5dc71dc5a96a2eb107d401133333d5b6f

R=caryclark@google.com, bsalomon@google.com, bungeman@google.com, reed@google.com, mtklein@chromium.org
TBR=bsalomon@google.com, bungeman@google.com, caryclark@google.com, mtklein@chromium.org, reed@google.com
NOTREECHECKS=true
NOTRY=true
BUG=skia:

Author: mtklein@google.com

Review URL: https://codereview.chromium.org/533393002
---
 src/utils/SkRunnable.h    |  13 ++-
 src/utils/SkTaskGroup.cpp | 137 ----------------------------
 src/utils/SkTaskGroup.h   |  34 -------
 src/utils/SkThreadPool.h  | 221 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 232 insertions(+), 173 deletions(-)
 delete mode 100644 src/utils/SkTaskGroup.cpp
 delete mode 100644 src/utils/SkTaskGroup.h
 create mode 100644 src/utils/SkThreadPool.h

(limited to 'src')
diff --git a/src/utils/SkRunnable.h b/src/utils/SkRunnable.h
index 7a93b60c89..5acf4dbc61 100644
--- a/src/utils/SkRunnable.h
+++ b/src/utils/SkRunnable.h
@@ -8,9 +8,18 @@
 #ifndef SkRunnable_DEFINED
 #define SkRunnable_DEFINED
 
-struct SkRunnable {
-    virtual ~SkRunnable() {};
+template <typename T>
+struct SkTRunnable {
+    virtual ~SkTRunnable() {};
+    virtual void run(T&) = 0;
+};
+
+template <>
+struct SkTRunnable<void> {
+    virtual ~SkTRunnable() {};
     virtual void run() = 0;
 };
 
+typedef SkTRunnable<void> SkRunnable;
+
 #endif
diff --git a/src/utils/SkTaskGroup.cpp b/src/utils/SkTaskGroup.cpp
deleted file mode 100644
index a42c0a43af..0000000000
--- a/src/utils/SkTaskGroup.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-#include "SkTaskGroup.h"
-
-#include "SkCondVar.h"
-#include "SkLazyPtr.h"
-#include "SkTDArray.h"
-#include "SkThread.h"
-#include "SkThreadUtils.h"
-
-#if defined(SK_BUILD_FOR_WIN32)
-    static inline int num_cores() {
-        SYSTEM_INFO sysinfo;
-        GetSystemInfo(&sysinfo);
-        return sysinfo.dwNumberOfProcessors;
-    }
-#else
-    #include <unistd.h>
-    static inline int num_cores() {
-        return (int) sysconf(_SC_NPROCESSORS_ONLN);
-    }
-#endif
-
-namespace {
-
-static int gThreadCount = 0;
-
-class ThreadPool : SkNoncopyable {
-public:
-    static void Add(SkRunnable* task, int32_t* pending) {
-        Global()->add(task, pending);
-    }
-
-    static void Wait(int32_t* pending) {
-        while (sk_acquire_load(pending) > 0) {  // Pairs with sk_atomic_dec here or in Loop.
-            // Lend a hand until our SkTaskGroup of interest is done.
-            ThreadPool* pool = Global();
-            Work work;
-            {
-                AutoLock lock(&pool->fReady);
-                if (pool->fWork.isEmpty()) {
-                    // Someone has picked up all the work (including ours).  How nice of them!
-                    // (They may still be working on it, so we can't assert *pending == 0 here.)
-                    continue;
-                }
-                pool->fWork.pop(&work);
-            }
-            // This Work isn't necessarily part of our SkTaskGroup of interest, but that's fine.
-            // We threads gotta stick together.  We're always making forward progress.
-            work.task->run();
-            sk_atomic_dec(work.pending);  // Release pairs with the sk_acquire_load() just above.
-        }
-    }
-
-private:
-    struct AutoLock {
-        AutoLock(SkCondVar* c) : fC(c) { fC->lock(); }
-        ~AutoLock() { fC->unlock(); }
-    private:
-        SkCondVar* fC;
-    };
-
-    struct Work {
-        SkRunnable* task;  // A task to ->run(),
-        int32_t* pending;  // then sk_atomic_dec(pending) afterwards.
-    };
-
-    static ThreadPool* Create() { return SkNEW(ThreadPool); }
-    static void Destroy(ThreadPool* p) { SkDELETE(p); }
-    static ThreadPool* Global() {
-        SK_DECLARE_STATIC_LAZY_PTR(ThreadPool, global, Create, Destroy);
-        return global.get();
-    }
-
-    ThreadPool() : fDraining(false) {
-        const int threads = gThreadCount ? gThreadCount : num_cores();
-        for (int i = 0; i < threads; i++) {
-            fThreads.push(SkNEW_ARGS(SkThread, (&ThreadPool::Loop, this)));
-            fThreads.top()->start();
-        }
-    }
-
-    ~ThreadPool() {
-        SkASSERT(fWork.isEmpty());  // All SkTaskGroups should be destroyed by now.
-        {
-            AutoLock lock(&fReady);
-            fDraining = true;
-            fReady.broadcast();
-        }
-        for (int i = 0; i < fThreads.count(); i++) {
-            fThreads[i]->join();
-        }
-        SkASSERT(fWork.isEmpty());  // Can't hurt to double check.
-        fThreads.deleteAll();
-    }
-
-    void add(SkRunnable* task, int32_t* pending) {
-        Work work = { task, pending };
-        sk_atomic_inc(pending);  // No barrier needed.
-        {
-            AutoLock lock(&fReady);
-            fWork.push(work);
-            fReady.signal();
-        }
-    }
-
-    static void Loop(void* arg) {
-        ThreadPool* pool = (ThreadPool*)arg;
-        Work work;
-        while (true) {
-            {
-                AutoLock lock(&pool->fReady);
-                while (pool->fWork.isEmpty()) {
-                    if (pool->fDraining) {
-                        return;
-                    }
-                    pool->fReady.wait();
-                }
-                pool->fWork.pop(&work);
-            }
-            work.task->run();
-            sk_atomic_dec(work.pending);  // Release pairs with sk_acquire_load() in Wait().
-        }
-    }
-
-    SkTDArray<Work>      fWork;
-    SkTDArray<SkThread*> fThreads;
-    SkCondVar            fReady;
-    bool                 fDraining;
-};
-
-}  // namespace
-
-void SkTaskGroup::SetThreadCount(int n) { gThreadCount = n; }
-
-SkTaskGroup::SkTaskGroup() : fPending(0) {}
-
-void SkTaskGroup::add(SkRunnable* task) { ThreadPool::Add(task, &fPending); }
-void SkTaskGroup::wait()                { ThreadPool::Wait(&fPending); }
diff --git a/src/utils/SkTaskGroup.h b/src/utils/SkTaskGroup.h
deleted file mode 100644
index af4d47aa97..0000000000
--- a/src/utils/SkTaskGroup.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2014 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkTaskGroup_DEFINED
-#define SkTaskGroup_DEFINED
-
-#include "SkTypes.h"
-#include "SkRunnable.h"
-
-class SkTaskGroup : SkNoncopyable {
-public:
-    // Call before creating any SkTaskGroup to set the number of threads all SkTaskGroups share.
-    // If not called, we default to the number of system-reported cores.
-    static void SetThreadCount(int);
-
-    SkTaskGroup();
-    ~SkTaskGroup() { this->wait(); }
-
-    // Add a task to this SkTaskGroup.  It will likely run() on another thread.
-    void add(SkRunnable*);
-
-    // Block until all Tasks previously add()ed to this SkTaskGroup have run().
-    // You may safely reuse this SkTaskGroup after wait() returns.
-    void wait();
-
-private:
-    /*atomic*/ int32_t fPending;
-};
-
-#endif//SkTaskGroup_DEFINED
diff --git a/src/utils/SkThreadPool.h b/src/utils/SkThreadPool.h
new file mode 100644
index 0000000000..c99c5c4188
--- /dev/null
+++ b/src/utils/SkThreadPool.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkThreadPool_DEFINED
+#define SkThreadPool_DEFINED
+
+#include "SkCondVar.h"
+#include "SkRunnable.h"
+#include "SkTDArray.h"
+#include "SkTInternalLList.h"
+#include "SkThreadUtils.h"
+#include "SkTypes.h"
+
+#if defined(SK_BUILD_FOR_UNIX) || defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_ANDROID)
+#    include <unistd.h>
+#endif
+
+// Returns the number of cores on this machine.
+static inline int num_cores() {
+#if defined(SK_BUILD_FOR_WIN32)
+    SYSTEM_INFO sysinfo;
+    GetSystemInfo(&sysinfo);
+    return sysinfo.dwNumberOfProcessors;
+#elif defined(SK_BUILD_FOR_UNIX) || defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_ANDROID)
+    return (int) sysconf(_SC_NPROCESSORS_ONLN);
+#else
+    return 1;
+#endif
+}
+
+template <typename T>
+class SkTThreadPool {
+public:
+    /**
+     * Create a threadpool with count threads, or one thread per core if kThreadPerCore.
+     */
+    static const int kThreadPerCore = -1;
+    explicit SkTThreadPool(int count);
+    ~SkTThreadPool();
+
+    /**
+     * Queues up an SkRunnable to run when a thread is available, or synchronously if count is 0.
+     * Does not take ownership. NULL is a safe no-op.  If T is not void, the runnable will be passed
+     * a reference to a T on the thread's local stack.
+     */
+    void add(SkTRunnable<T>*);
+
+    /**
+     * Same as add, but adds the runnable as the very next to run rather than enqueueing it.
+     */
+    void addNext(SkTRunnable<T>*);
+
+    /**
+     * Block until all added SkRunnables have completed.  Once called, calling add() is undefined.
+     */
+    void wait();
+
+ private:
+    struct LinkedRunnable {
+        SkTRunnable<T>* fRunnable;  // Unowned.
+        SK_DECLARE_INTERNAL_LLIST_INTERFACE(LinkedRunnable);
+    };
+
+    enum State {
+        kRunning_State,  // Normal case.  We've been constructed and no one has called wait().
+        kWaiting_State,  // wait has been called, but there still might be work to do or being done.
+        kHalting_State,  // There's no work to do and no thread is busy.  All threads can shut down.
+    };
+
+    void addSomewhere(SkTRunnable<T>* r,
+                      void (SkTInternalLList<LinkedRunnable>::*)(LinkedRunnable*));
+
+    SkTInternalLList<LinkedRunnable> fQueue;
+    SkCondVar                        fReady;
+    SkTDArray<SkThread*>             fThreads;
+    State                            fState;
+    int                              fBusyThreads;
+
+    static void Loop(void*);  // Static because we pass in this.
+};
+
+template <typename T>
+SkTThreadPool<T>::SkTThreadPool(int count) : fState(kRunning_State), fBusyThreads(0) {
+    if (count < 0) {
+        count = num_cores();
+    }
+    // Create count threads, all running SkTThreadPool::Loop.
+    for (int i = 0; i < count; i++) {
+        SkThread* thread = SkNEW_ARGS(SkThread, (&SkTThreadPool::Loop, this));
+        *fThreads.append() = thread;
+        thread->start();
+    }
+}
+
+template <typename T>
+SkTThreadPool<T>::~SkTThreadPool() {
+    if (kRunning_State == fState) {
+        this->wait();
+    }
+}
+
+namespace SkThreadPoolPrivate {
+
+template <typename T>
+struct ThreadLocal {
+    void run(SkTRunnable<T>* r) { r->run(data); }
+    T data;
+};
+
+template <>
+struct ThreadLocal<void> {
+    void run(SkTRunnable<void>* r) { r->run(); }
+};
+
+}  // namespace SkThreadPoolPrivate
+
+template <typename T>
+void SkTThreadPool<T>::addSomewhere(SkTRunnable<T>* r,
+                                    void (SkTInternalLList<LinkedRunnable>::* f)(LinkedRunnable*)) {
+    if (r == NULL) {
+        return;
+    }
+
+    if (fThreads.isEmpty()) {
+        SkThreadPoolPrivate::ThreadLocal<T> threadLocal;
+        threadLocal.run(r);
+        return;
+    }
+
+    LinkedRunnable* linkedRunnable = SkNEW(LinkedRunnable);
+    linkedRunnable->fRunnable = r;
+    fReady.lock();
+    SkASSERT(fState != kHalting_State);  // Shouldn't be able to add work when we're halting.
+    (fQueue.*f)(linkedRunnable);
+    fReady.signal();
+    fReady.unlock();
+}
+
+template <typename T>
+void SkTThreadPool<T>::add(SkTRunnable<T>* r) {
+    this->addSomewhere(r, &SkTInternalLList<LinkedRunnable>::addToTail);
+}
+
+template <typename T>
+void SkTThreadPool<T>::addNext(SkTRunnable<T>* r) {
+    this->addSomewhere(r, &SkTInternalLList<LinkedRunnable>::addToHead);
+}
+
+
+template <typename T>
+void SkTThreadPool<T>::wait() {
+    fReady.lock();
+    fState = kWaiting_State;
+    fReady.broadcast();
+    fReady.unlock();
+
+    // Wait for all threads to stop.
+    for (int i = 0; i < fThreads.count(); i++) {
+        fThreads[i]->join();
+        SkDELETE(fThreads[i]);
+    }
+    SkASSERT(fQueue.isEmpty());
+}
+
+template <typename T>
+/*static*/ void SkTThreadPool<T>::Loop(void* arg) {
+    // The SkTThreadPool passes itself as arg to each thread as they're created.
+    SkTThreadPool<T>* pool = static_cast<SkTThreadPool<T>*>(arg);
+    SkThreadPoolPrivate::ThreadLocal<T> threadLocal;
+
+    while (true) {
+        // We have to be holding the lock to read the queue and to call wait.
+        pool->fReady.lock();
+        while(pool->fQueue.isEmpty()) {
+            // Does the client want to stop and are all the threads ready to stop?
+            // If so, we move into the halting state, and whack all the threads so they notice.
+            if (kWaiting_State == pool->fState && pool->fBusyThreads == 0) {
+                pool->fState = kHalting_State;
+                pool->fReady.broadcast();
+            }
+            // Any time we find ourselves in the halting state, it's quitting time.
+            if (kHalting_State == pool->fState) {
+                pool->fReady.unlock();
+                return;
+            }
+            // wait yields the lock while waiting, but will have it again when awoken.
+            pool->fReady.wait();
+        }
+        // We've got the lock back here, no matter if we ran wait or not.
+
+        // The queue is not empty, so we have something to run.  Claim it.
+        LinkedRunnable* r = pool->fQueue.head();
+
+        pool->fQueue.remove(r);
+
+        // Having claimed our SkRunnable, we now give up the lock while we run it.
+        // Otherwise, we'd only ever do work on one thread at a time, which rather
+        // defeats the point of this code.
+        pool->fBusyThreads++;
+        pool->fReady.unlock();
+
+        // OK, now really do the work.
+        threadLocal.run(r->fRunnable);
+        SkDELETE(r);
+
+        // Let everyone know we're not busy.
+        pool->fReady.lock();
+        pool->fBusyThreads--;
+        pool->fReady.unlock();
+    }
+
+    SkASSERT(false); // Unreachable.  The only exit happens when pool->fState is kHalting_State.
+}
+
+typedef SkTThreadPool<void> SkThreadPool;
+
+#endif
-- 
cgit v1.2.3