diff options
author | 2014-09-03 14:06:47 -0700 | |
---|---|---|
committer | 2014-09-03 14:06:48 -0700 | |
commit | 9c7207b5dc71dc5a96a2eb107d401133333d5b6f (patch) | |
tree | d4a19230e5516cb03513c5ad15ab9779dc3eeac0 /src/utils/SkTaskGroup.cpp | |
parent | 00b76bd750e668a6989dd497313e715d1b476fdc (diff) |
SkThreadPool ~~> SkTaskGroup
SkTaskGroup is like SkThreadPool except the threads stay in
one global pool. Each SkTaskGroup itself is tiny (4 bytes)
and its wait() method applies only to tasks add()ed to that
instance, not the whole thread pool.
This means we don't need to bring up new thread pools when
tests themselves want to use multithreading (e.g. pathops,
quilt). We just create a new SkTaskGroup and wait for that
to complete. This should be more efficient, and allow us
to expand where we use threads to really latency sensitive
places. E.g. we can probably now use these in nanobench
for CPU .skp rendering.
Now that all threads are sharing the same pool, I think we
can remove most of the custom mechanism pathops tests use
to control threading. They'll just ride on the global pool
with all other tests now.
This (temporarily?) removes the GPU multithreading feature
from DM, which we don't use.
On my desktop, DM runs a little faster (57s -> 55s) in
Debug, and a lot faster in Release (36s -> 24s). The bots
show speedups of similar proportions, cutting more than a
minute off the N4/Release and Win7/Debug runtimes.
BUG=skia:
R=caryclark@google.com, bsalomon@google.com, bungeman@google.com, mtklein@google.com, reed@google.com
Author: mtklein@chromium.org
Review URL: https://codereview.chromium.org/531653002
Diffstat (limited to 'src/utils/SkTaskGroup.cpp')
-rw-r--r-- | src/utils/SkTaskGroup.cpp | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/src/utils/SkTaskGroup.cpp b/src/utils/SkTaskGroup.cpp new file mode 100644 index 0000000000..a42c0a43af --- /dev/null +++ b/src/utils/SkTaskGroup.cpp @@ -0,0 +1,137 @@ +#include "SkTaskGroup.h" + +#include "SkCondVar.h" +#include "SkLazyPtr.h" +#include "SkTDArray.h" +#include "SkThread.h" +#include "SkThreadUtils.h" + +#if defined(SK_BUILD_FOR_WIN32) + static inline int num_cores() { + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + return sysinfo.dwNumberOfProcessors; + } +#else + #include <unistd.h> + static inline int num_cores() { + return (int) sysconf(_SC_NPROCESSORS_ONLN); + } +#endif + +namespace { + +static int gThreadCount = 0; + +class ThreadPool : SkNoncopyable { +public: + static void Add(SkRunnable* task, int32_t* pending) { + Global()->add(task, pending); + } + + static void Wait(int32_t* pending) { + while (sk_acquire_load(pending) > 0) { // Pairs with sk_atomic_dec here or in Loop. + // Lend a hand until our SkTaskGroup of interest is done. + ThreadPool* pool = Global(); + Work work; + { + AutoLock lock(&pool->fReady); + if (pool->fWork.isEmpty()) { + // Someone has picked up all the work (including ours). How nice of them! + // (They may still be working on it, so we can't assert *pending == 0 here.) + continue; + } + pool->fWork.pop(&work); + } + // This Work isn't necessarily part of our SkTaskGroup of interest, but that's fine. + // We threads gotta stick together. We're always making forward progress. + work.task->run(); + sk_atomic_dec(work.pending); // Release pairs with the sk_acquire_load() just above. + } + } + +private: + struct AutoLock { + AutoLock(SkCondVar* c) : fC(c) { fC->lock(); } + ~AutoLock() { fC->unlock(); } + private: + SkCondVar* fC; + }; + + struct Work { + SkRunnable* task; // A task to ->run(), + int32_t* pending; // then sk_atomic_dec(pending) afterwards. + }; + + static ThreadPool* Create() { return SkNEW(ThreadPool); } + static void Destroy(ThreadPool* p) { SkDELETE(p); } + static ThreadPool* Global() { + SK_DECLARE_STATIC_LAZY_PTR(ThreadPool, global, Create, Destroy); + return global.get(); + } + + ThreadPool() : fDraining(false) { + const int threads = gThreadCount ? gThreadCount : num_cores(); + for (int i = 0; i < threads; i++) { + fThreads.push(SkNEW_ARGS(SkThread, (&ThreadPool::Loop, this))); + fThreads.top()->start(); + } + } + + ~ThreadPool() { + SkASSERT(fWork.isEmpty()); // All SkTaskGroups should be destroyed by now. + { + AutoLock lock(&fReady); + fDraining = true; + fReady.broadcast(); + } + for (int i = 0; i < fThreads.count(); i++) { + fThreads[i]->join(); + } + SkASSERT(fWork.isEmpty()); // Can't hurt to double check. + fThreads.deleteAll(); + } + + void add(SkRunnable* task, int32_t* pending) { + Work work = { task, pending }; + sk_atomic_inc(pending); // No barrier needed. + { + AutoLock lock(&fReady); + fWork.push(work); + fReady.signal(); + } + } + + static void Loop(void* arg) { + ThreadPool* pool = (ThreadPool*)arg; + Work work; + while (true) { + { + AutoLock lock(&pool->fReady); + while (pool->fWork.isEmpty()) { + if (pool->fDraining) { + return; + } + pool->fReady.wait(); + } + pool->fWork.pop(&work); + } + work.task->run(); + sk_atomic_dec(work.pending); // Release pairs with sk_acquire_load() in Wait(). + } + } + + SkTDArray<Work> fWork; + SkTDArray<SkThread*> fThreads; + SkCondVar fReady; + bool fDraining; +}; + +} // namespace + +void SkTaskGroup::SetThreadCount(int n) { gThreadCount = n; } + +SkTaskGroup::SkTaskGroup() : fPending(0) {} + +void SkTaskGroup::add(SkRunnable* task) { ThreadPool::Add(task, &fPending); } +void SkTaskGroup::wait() { ThreadPool::Wait(&fPending); } |