From 2460bbdfbb1d55ef307c3189c661e65de1a7affb Mon Sep 17 00:00:00 2001 From: mtklein Date: Wed, 3 Sep 2014 14:17:48 -0700 Subject: Revert of SkThreadPool ~~> SkTaskGroup (patchset #4 id:60001 of https://codereview.chromium.org/531653002/) Reason for revert: Leaks, leaks, leaks. Original issue's description: > SkThreadPool ~~> SkTaskGroup > > SkTaskGroup is like SkThreadPool except the threads stay in > one global pool. Each SkTaskGroup itself is tiny (4 bytes) > and its wait() method applies only to tasks add()ed to that > instance, not the whole thread pool. > > This means we don't need to bring up new thread pools when > tests themselves want to use multithreading (e.g. pathops, > quilt). We just create a new SkTaskGroup and wait for that > to complete. This should be more efficient, and allow us > to expand where we use threads to really latency sensitive > places. E.g. we can probably now use these in nanobench > for CPU .skp rendering. > > Now that all threads are sharing the same pool, I think we > can remove most of the custom mechanism pathops tests use > to control threading. They'll just ride on the global pool > with all other tests now. > > This (temporarily?) removes the GPU multithreading feature > from DM, which we don't use. > > On my desktop, DM runs a little faster (57s -> 55s) in > Debug, and a lot faster in Release (36s -> 24s). The bots > show speedups of similar proportions, cutting more than a > minute off the N4/Release and Win7/Debug runtimes. > > BUG=skia: > > Committed: https://skia.googlesource.com/skia/+/9c7207b5dc71dc5a96a2eb107d401133333d5b6f R=caryclark@google.com, bsalomon@google.com, bungeman@google.com, reed@google.com, mtklein@chromium.org TBR=bsalomon@google.com, bungeman@google.com, caryclark@google.com, mtklein@chromium.org, reed@google.com NOTREECHECKS=true NOTRY=true BUG=skia: Author: mtklein@google.com Review URL: https://codereview.chromium.org/533393002 --- src/utils/SkRunnable.h | 13 ++- src/utils/SkTaskGroup.cpp | 137 ---------------------------- src/utils/SkTaskGroup.h | 34 ------- src/utils/SkThreadPool.h | 221 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 232 insertions(+), 173 deletions(-) delete mode 100644 src/utils/SkTaskGroup.cpp delete mode 100644 src/utils/SkTaskGroup.h create mode 100644 src/utils/SkThreadPool.h (limited to 'src') diff --git a/src/utils/SkRunnable.h b/src/utils/SkRunnable.h index 7a93b60c89..5acf4dbc61 100644 --- a/src/utils/SkRunnable.h +++ b/src/utils/SkRunnable.h @@ -8,9 +8,18 @@ #ifndef SkRunnable_DEFINED #define SkRunnable_DEFINED -struct SkRunnable { - virtual ~SkRunnable() {}; +template +struct SkTRunnable { + virtual ~SkTRunnable() {}; + virtual void run(T&) = 0; +}; + +template <> +struct SkTRunnable { + virtual ~SkTRunnable() {}; virtual void run() = 0; }; +typedef SkTRunnable SkRunnable; + #endif diff --git a/src/utils/SkTaskGroup.cpp b/src/utils/SkTaskGroup.cpp deleted file mode 100644 index a42c0a43af..0000000000 --- a/src/utils/SkTaskGroup.cpp +++ /dev/null @@ -1,137 +0,0 @@ -#include "SkTaskGroup.h" - -#include "SkCondVar.h" -#include "SkLazyPtr.h" -#include "SkTDArray.h" -#include "SkThread.h" -#include "SkThreadUtils.h" - -#if defined(SK_BUILD_FOR_WIN32) - static inline int num_cores() { - SYSTEM_INFO sysinfo; - GetSystemInfo(&sysinfo); - return sysinfo.dwNumberOfProcessors; - } -#else - #include - static inline int num_cores() { - return (int) sysconf(_SC_NPROCESSORS_ONLN); - } -#endif - -namespace { - -static int gThreadCount = 0; - -class ThreadPool : SkNoncopyable { -public: - static void Add(SkRunnable* task, int32_t* pending) { - Global()->add(task, pending); - } - - static void Wait(int32_t* pending) { - while (sk_acquire_load(pending) > 0) { // Pairs with sk_atomic_dec here or in Loop. - // Lend a hand until our SkTaskGroup of interest is done. - ThreadPool* pool = Global(); - Work work; - { - AutoLock lock(&pool->fReady); - if (pool->fWork.isEmpty()) { - // Someone has picked up all the work (including ours). How nice of them! - // (They may still be working on it, so we can't assert *pending == 0 here.) - continue; - } - pool->fWork.pop(&work); - } - // This Work isn't necessarily part of our SkTaskGroup of interest, but that's fine. - // We threads gotta stick together. We're always making forward progress. - work.task->run(); - sk_atomic_dec(work.pending); // Release pairs with the sk_acquire_load() just above. - } - } - -private: - struct AutoLock { - AutoLock(SkCondVar* c) : fC(c) { fC->lock(); } - ~AutoLock() { fC->unlock(); } - private: - SkCondVar* fC; - }; - - struct Work { - SkRunnable* task; // A task to ->run(), - int32_t* pending; // then sk_atomic_dec(pending) afterwards. - }; - - static ThreadPool* Create() { return SkNEW(ThreadPool); } - static void Destroy(ThreadPool* p) { SkDELETE(p); } - static ThreadPool* Global() { - SK_DECLARE_STATIC_LAZY_PTR(ThreadPool, global, Create, Destroy); - return global.get(); - } - - ThreadPool() : fDraining(false) { - const int threads = gThreadCount ? gThreadCount : num_cores(); - for (int i = 0; i < threads; i++) { - fThreads.push(SkNEW_ARGS(SkThread, (&ThreadPool::Loop, this))); - fThreads.top()->start(); - } - } - - ~ThreadPool() { - SkASSERT(fWork.isEmpty()); // All SkTaskGroups should be destroyed by now. - { - AutoLock lock(&fReady); - fDraining = true; - fReady.broadcast(); - } - for (int i = 0; i < fThreads.count(); i++) { - fThreads[i]->join(); - } - SkASSERT(fWork.isEmpty()); // Can't hurt to double check. - fThreads.deleteAll(); - } - - void add(SkRunnable* task, int32_t* pending) { - Work work = { task, pending }; - sk_atomic_inc(pending); // No barrier needed. - { - AutoLock lock(&fReady); - fWork.push(work); - fReady.signal(); - } - } - - static void Loop(void* arg) { - ThreadPool* pool = (ThreadPool*)arg; - Work work; - while (true) { - { - AutoLock lock(&pool->fReady); - while (pool->fWork.isEmpty()) { - if (pool->fDraining) { - return; - } - pool->fReady.wait(); - } - pool->fWork.pop(&work); - } - work.task->run(); - sk_atomic_dec(work.pending); // Release pairs with sk_acquire_load() in Wait(). - } - } - - SkTDArray fWork; - SkTDArray fThreads; - SkCondVar fReady; - bool fDraining; -}; - -} // namespace - -void SkTaskGroup::SetThreadCount(int n) { gThreadCount = n; } - -SkTaskGroup::SkTaskGroup() : fPending(0) {} - -void SkTaskGroup::add(SkRunnable* task) { ThreadPool::Add(task, &fPending); } -void SkTaskGroup::wait() { ThreadPool::Wait(&fPending); } diff --git a/src/utils/SkTaskGroup.h b/src/utils/SkTaskGroup.h deleted file mode 100644 index af4d47aa97..0000000000 --- a/src/utils/SkTaskGroup.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2014 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SkTaskGroup_DEFINED -#define SkTaskGroup_DEFINED - -#include "SkTypes.h" -#include "SkRunnable.h" - -class SkTaskGroup : SkNoncopyable { -public: - // Call before creating any SkTaskGroup to set the number of threads all SkTaskGroups share. - // If not called, we default to the number of system-reported cores. - static void SetThreadCount(int); - - SkTaskGroup(); - ~SkTaskGroup() { this->wait(); } - - // Add a task to this SkTaskGroup. It will likely run() on another thread. - void add(SkRunnable*); - - // Block until all Tasks previously add()ed to this SkTaskGroup have run(). - // You may safely reuse this SkTaskGroup after wait() returns. - void wait(); - -private: - /*atomic*/ int32_t fPending; -}; - -#endif//SkTaskGroup_DEFINED diff --git a/src/utils/SkThreadPool.h b/src/utils/SkThreadPool.h new file mode 100644 index 0000000000..c99c5c4188 --- /dev/null +++ b/src/utils/SkThreadPool.h @@ -0,0 +1,221 @@ +/* + * Copyright 2012 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkThreadPool_DEFINED +#define SkThreadPool_DEFINED + +#include "SkCondVar.h" +#include "SkRunnable.h" +#include "SkTDArray.h" +#include "SkTInternalLList.h" +#include "SkThreadUtils.h" +#include "SkTypes.h" + +#if defined(SK_BUILD_FOR_UNIX) || defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_ANDROID) +# include +#endif + +// Returns the number of cores on this machine. +static inline int num_cores() { +#if defined(SK_BUILD_FOR_WIN32) + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + return sysinfo.dwNumberOfProcessors; +#elif defined(SK_BUILD_FOR_UNIX) || defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_ANDROID) + return (int) sysconf(_SC_NPROCESSORS_ONLN); +#else + return 1; +#endif +} + +template +class SkTThreadPool { +public: + /** + * Create a threadpool with count threads, or one thread per core if kThreadPerCore. + */ + static const int kThreadPerCore = -1; + explicit SkTThreadPool(int count); + ~SkTThreadPool(); + + /** + * Queues up an SkRunnable to run when a thread is available, or synchronously if count is 0. + * Does not take ownership. NULL is a safe no-op. If T is not void, the runnable will be passed + * a reference to a T on the thread's local stack. + */ + void add(SkTRunnable*); + + /** + * Same as add, but adds the runnable as the very next to run rather than enqueueing it. + */ + void addNext(SkTRunnable*); + + /** + * Block until all added SkRunnables have completed. Once called, calling add() is undefined. + */ + void wait(); + + private: + struct LinkedRunnable { + SkTRunnable* fRunnable; // Unowned. + SK_DECLARE_INTERNAL_LLIST_INTERFACE(LinkedRunnable); + }; + + enum State { + kRunning_State, // Normal case. We've been constructed and no one has called wait(). + kWaiting_State, // wait has been called, but there still might be work to do or being done. + kHalting_State, // There's no work to do and no thread is busy. All threads can shut down. + }; + + void addSomewhere(SkTRunnable* r, + void (SkTInternalLList::*)(LinkedRunnable*)); + + SkTInternalLList fQueue; + SkCondVar fReady; + SkTDArray fThreads; + State fState; + int fBusyThreads; + + static void Loop(void*); // Static because we pass in this. +}; + +template +SkTThreadPool::SkTThreadPool(int count) : fState(kRunning_State), fBusyThreads(0) { + if (count < 0) { + count = num_cores(); + } + // Create count threads, all running SkTThreadPool::Loop. + for (int i = 0; i < count; i++) { + SkThread* thread = SkNEW_ARGS(SkThread, (&SkTThreadPool::Loop, this)); + *fThreads.append() = thread; + thread->start(); + } +} + +template +SkTThreadPool::~SkTThreadPool() { + if (kRunning_State == fState) { + this->wait(); + } +} + +namespace SkThreadPoolPrivate { + +template +struct ThreadLocal { + void run(SkTRunnable* r) { r->run(data); } + T data; +}; + +template <> +struct ThreadLocal { + void run(SkTRunnable* r) { r->run(); } +}; + +} // namespace SkThreadPoolPrivate + +template +void SkTThreadPool::addSomewhere(SkTRunnable* r, + void (SkTInternalLList::* f)(LinkedRunnable*)) { + if (r == NULL) { + return; + } + + if (fThreads.isEmpty()) { + SkThreadPoolPrivate::ThreadLocal threadLocal; + threadLocal.run(r); + return; + } + + LinkedRunnable* linkedRunnable = SkNEW(LinkedRunnable); + linkedRunnable->fRunnable = r; + fReady.lock(); + SkASSERT(fState != kHalting_State); // Shouldn't be able to add work when we're halting. + (fQueue.*f)(linkedRunnable); + fReady.signal(); + fReady.unlock(); +} + +template +void SkTThreadPool::add(SkTRunnable* r) { + this->addSomewhere(r, &SkTInternalLList::addToTail); +} + +template +void SkTThreadPool::addNext(SkTRunnable* r) { + this->addSomewhere(r, &SkTInternalLList::addToHead); +} + + +template +void SkTThreadPool::wait() { + fReady.lock(); + fState = kWaiting_State; + fReady.broadcast(); + fReady.unlock(); + + // Wait for all threads to stop. + for (int i = 0; i < fThreads.count(); i++) { + fThreads[i]->join(); + SkDELETE(fThreads[i]); + } + SkASSERT(fQueue.isEmpty()); +} + +template +/*static*/ void SkTThreadPool::Loop(void* arg) { + // The SkTThreadPool passes itself as arg to each thread as they're created. + SkTThreadPool* pool = static_cast*>(arg); + SkThreadPoolPrivate::ThreadLocal threadLocal; + + while (true) { + // We have to be holding the lock to read the queue and to call wait. + pool->fReady.lock(); + while(pool->fQueue.isEmpty()) { + // Does the client want to stop and are all the threads ready to stop? + // If so, we move into the halting state, and whack all the threads so they notice. + if (kWaiting_State == pool->fState && pool->fBusyThreads == 0) { + pool->fState = kHalting_State; + pool->fReady.broadcast(); + } + // Any time we find ourselves in the halting state, it's quitting time. + if (kHalting_State == pool->fState) { + pool->fReady.unlock(); + return; + } + // wait yields the lock while waiting, but will have it again when awoken. + pool->fReady.wait(); + } + // We've got the lock back here, no matter if we ran wait or not. + + // The queue is not empty, so we have something to run. Claim it. + LinkedRunnable* r = pool->fQueue.head(); + + pool->fQueue.remove(r); + + // Having claimed our SkRunnable, we now give up the lock while we run it. + // Otherwise, we'd only ever do work on one thread at a time, which rather + // defeats the point of this code. + pool->fBusyThreads++; + pool->fReady.unlock(); + + // OK, now really do the work. + threadLocal.run(r->fRunnable); + SkDELETE(r); + + // Let everyone know we're not busy. + pool->fReady.lock(); + pool->fBusyThreads--; + pool->fReady.unlock(); + } + + SkASSERT(false); // Unreachable. The only exit happens when pool->fState is kHalting_State. +} + +typedef SkTThreadPool SkThreadPool; + +#endif -- cgit v1.2.3