30 files changed, 388 insertions, 359 deletions
diff --git a/dm/DM.cpp b/dm/DM.cpp
index bf37d20a80..9c5c25371c 100644
--- a/dm/DM.cpp
+++ b/dm/DM.cpp
@@ -12,8 +12,8 @@
 #include "gm.h"
 
 #include "DMBenchTask.h"
-#include "DMCpuTask.h"
-#include "DMGpuTask.h"
+#include "DMCpuGMTask.h"
+#include "DMGpuGMTask.h"
 #include "DMReporter.h"
 #include "DMTask.h"
 #include "DMTaskRunner.h"
@@ -28,6 +28,7 @@ using skiatest::Test;
 using skiatest::TestRegistry;
 
 DEFINE_int32(threads, -1, "Threads for CPU work. Default NUM_CPUS.");
+DEFINE_int32(gpuThreads, 1, "Threads for GPU work.");
 DEFINE_string2(expectations, r, "",
                "If a directory, compare generated images against images under this path. "
                "If a file, compare generated images against JSON expectations at this path.");
@@ -86,15 +87,15 @@ static void kick_off_gms(const SkTDArray<GMRegistry::Factory>& gms,
     }
     for (int i = 0; i < gms.count(); i++) {
         for (int j = 0; j < configs.count(); j++) {
-            START("565",      CpuTask, kRGB_565_SkColorType);
-            START("8888",     CpuTask, kPMColor_SkColorType);
-            START("gpu",      GpuTask, native, 0);
-            START("msaa4",    GpuTask, native, 4);
-            START("msaa16",   GpuTask, native, 16);
-            START("gpunull",  GpuTask, null,   0);
-            START("gpudebug", GpuTask, debug,  0);
-            START("angle",    GpuTask, angle,  0);
-            START("mesa",     GpuTask, mesa,   0);
+            START("565",      CpuGMTask, kRGB_565_SkColorType);
+            START("8888",     CpuGMTask, kPMColor_SkColorType);
+            START("gpu",      GpuGMTask, native, 0);
+            START("msaa4",    GpuGMTask, native, 4);
+            START("msaa16",   GpuGMTask, native, 16);
+            START("gpunull",  GpuGMTask, null,   0);
+            START("gpudebug", GpuGMTask, debug,  0);
+            START("angle",    GpuGMTask, angle,  0);
+            START("mesa",     GpuGMTask, mesa,   0);
         }
     }
 #undef START
@@ -129,7 +130,12 @@ static void kick_off_tests(const SkTDArray<TestRegistry::Factory>& tests,
                            DM::Reporter* reporter,
                            DM::TaskRunner* tasks) {
     for (int i = 0; i < tests.count(); i++) {
-        tasks->add(SkNEW_ARGS(DM::TestTask, (reporter, tasks, tests[i])));
+        SkAutoTDelete<Test> test(tests[i](NULL));
+        if (test->isGPUTest()) {
+            tasks->add(SkNEW_ARGS(DM::GpuTestTask, (reporter, tasks, tests[i])));
+        } else {
+            tasks->add(SkNEW_ARGS(DM::CpuTestTask, (reporter, tasks, tests[i])));
+        }
     }
 }
 
@@ -201,7 +207,7 @@ int tool_main(int argc, char** argv) {
     SkDebugf("(%d GMs, %d benches) x %d configs, %d tests\n",
              gms.count(), benches.count(), configs.count(), tests.count());
     DM::Reporter reporter;
-    DM::TaskRunner tasks(FLAGS_threads);
+    DM::TaskRunner tasks(FLAGS_threads, FLAGS_gpuThreads);
     kick_off_gms(gms, configs, *expectations, &reporter, &tasks);
     kick_off_benches(benches, configs, &reporter, &tasks);
     kick_off_tests(tests, &reporter, &tasks);
diff --git a/dm/DMBenchTask.cpp b/dm/DMBenchTask.cpp
index 4e251de2e9..30561a407d 100644
--- a/dm/DMBenchTask.cpp
+++ b/dm/DMBenchTask.cpp
@@ -14,7 +14,7 @@ NonRenderingBenchTask::NonRenderingBenchTask(const char* config,
                                              Reporter* reporter,
                                              TaskRunner* tasks,
                                              BenchRegistry::Factory factory)
-    : Task(reporter, tasks)
+    : CpuTask(reporter, tasks)
     , fBench(factory(NULL))
     , fName(bench_name(fBench->getName(), config)) {}
 
@@ -23,7 +23,7 @@ CpuBenchTask::CpuBenchTask(const char* config,
                            TaskRunner* tasks,
                            BenchRegistry::Factory factory,
                            SkColorType colorType)
-    : Task(reporter, tasks)
+    : CpuTask(reporter, tasks)
     , fBench(factory(NULL))
     , fName(bench_name(fBench->getName(), config))
     , fColorType(colorType) {}
@@ -34,7 +34,7 @@ GpuBenchTask::GpuBenchTask(const char* config,
                            BenchRegistry::Factory factory,
                            GrContextFactory::GLContextType contextType,
                            int sampleCount)
-    : Task(reporter, tasks)
+    : GpuTask(reporter, tasks)
     , fBench(factory(NULL))
     , fName(bench_name(fBench->getName(), config))
     , fContextType(contextType)
@@ -70,13 +70,13 @@ void CpuBenchTask::draw() {
     draw_raster(fBench.get(), fColorType);
 }
 
-void GpuBenchTask::draw() {
+void GpuBenchTask::draw(GrContextFactory* grFactory) {
     SkImageInfo info = SkImageInfo::Make(fBench->getSize().x(),
                                          fBench->getSize().y(),
                                          kPMColor_SkColorType,
                                          kPremul_SkAlphaType);
     SkAutoTUnref<SkSurface> surface(SkSurface::NewRenderTarget(
-            this->getGrContextFactory()->get(fContextType), info, fSampleCount));
+            grFactory->get(fContextType), info, fSampleCount));
 
     fBench->preDraw();
     fBench->draw(1, surface->getCanvas());
diff --git a/dm/DMBenchTask.h b/dm/DMBenchTask.h
index 1e9bc9912e..ac7030b5f0 100644
--- a/dm/DMBenchTask.h
+++ b/dm/DMBenchTask.h
@@ -12,12 +12,11 @@
 
 namespace DM {
 
-class NonRenderingBenchTask : public Task {
+class NonRenderingBenchTask : public CpuTask {
 public:
     NonRenderingBenchTask(const char* config, Reporter*, TaskRunner*, BenchRegistry::Factory);
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return false; }
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
@@ -26,12 +25,11 @@ private:
     const SkString fName;
 };
 
-class CpuBenchTask : public Task {
+class CpuBenchTask : public CpuTask {
 public:
     CpuBenchTask(const char* config, Reporter*, TaskRunner*, BenchRegistry::Factory, SkColorType);
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return false; }
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
@@ -41,7 +39,7 @@ private:
     const SkColorType fColorType;
 };
 
-class GpuBenchTask : public Task {
+class GpuBenchTask : public GpuTask {
 public:
     GpuBenchTask(const char* config,
                  Reporter*,
@@ -50,8 +48,7 @@ public:
                  GrContextFactory::GLContextType,
                  int sampleCount);
 
-    virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return true; }
+    virtual void draw(GrContextFactory*) SK_OVERRIDE;
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
diff --git a/dm/DMCpuTask.cpp b/dm/DMCpuGMTask.cpp
index acbe8d2185..6ab0014fd5 100644
--- a/dm/DMCpuTask.cpp
+++ b/dm/DMCpuGMTask.cpp
@@ -1,4 +1,4 @@
-#include "DMCpuTask.h"
+#include "DMCpuGMTask.h"
 #include "DMExpectationsTask.h"
 #include "DMPipeTask.h"
 #include "DMReplayTask.h"
@@ -9,13 +9,13 @@
 
 namespace DM {
 
-CpuTask::CpuTask(const char* config,
-                 Reporter* reporter,
-                 TaskRunner* taskRunner,
-                 const Expectations& expectations,
-                 skiagm::GMRegistry::Factory gmFactory,
-                 SkColorType colorType)
-    : Task(reporter, taskRunner)
+CpuGMTask::CpuGMTask(const char* config,
+                     Reporter* reporter,
+                     TaskRunner* taskRunner,
+                     const Expectations& expectations,
+                     skiagm::GMRegistry::Factory gmFactory,
+                     SkColorType colorType)
+    : CpuTask(reporter, taskRunner)
     , fGMFactory(gmFactory)
     , fGM(fGMFactory(NULL))
     , fName(UnderJoin(fGM->getName(), config))
@@ -23,7 +23,7 @@ CpuTask::CpuTask(const char* config,
     , fColorType(colorType)
     {}
 
-void CpuTask::draw() {
+void CpuGMTask::draw() {
     SkBitmap bitmap;
     SetupBitmap(fColorType, fGM.get(), &bitmap);
 
@@ -47,7 +47,7 @@ void CpuTask::draw() {
 #undef SPAWN
 }
 
-bool CpuTask::shouldSkip() const {
+bool CpuGMTask::shouldSkip() const {
     if (kRGB_565_SkColorType == fColorType && (fGM->getFlags() & skiagm::GM::kSkip565_Flag)) {
         return true;
     }
diff --git a/dm/DMCpuTask.h b/dm/DMCpuGMTask.h
index 0ae112f754..7712da88d0 100644
--- a/dm/DMCpuTask.h
+++ b/dm/DMCpuGMTask.h
@@ -1,5 +1,5 @@
-#ifndef DMCpuTask_DEFINED
-#define DMCpuTask_DEFINED
+#ifndef DMCpuGMTask_DEFINED
+#define DMCpuGMTask_DEFINED
 
 #include "DMExpectations.h"
 #include "DMReporter.h"
@@ -15,17 +15,16 @@
 
 namespace DM {
 
-class CpuTask : public Task {
+class CpuGMTask : public CpuTask {
 public:
-    CpuTask(const char* config,
-            Reporter*,
-            TaskRunner*,
-            const Expectations&,
-            skiagm::GMRegistry::Factory,
-            SkColorType);
+    CpuGMTask(const char* config,
+              Reporter*,
+              TaskRunner*,
+              const Expectations&,
+              skiagm::GMRegistry::Factory,
+              SkColorType);
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return false; }
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
@@ -39,4 +38,4 @@ private:
 
 }  // namespace DM
 
-#endif // DMCpuTask_DEFINED
+#endif // DMCpuGMTask_DEFINED
diff --git a/dm/DMExpectationsTask.cpp b/dm/DMExpectationsTask.cpp
index cb92486269..e29257afbd 100644
--- a/dm/DMExpectationsTask.cpp
+++ b/dm/DMExpectationsTask.cpp
@@ -6,7 +6,7 @@ namespace DM {
 ExpectationsTask::ExpectationsTask(const Task& parent,
                                    const Expectations& expectations,
                                    SkBitmap bitmap)
-    : Task(parent)
+    : CpuTask(parent)
     , fName(parent.name())  // Masquerade as parent so failures are attributed to it.
     , fExpectations(expectations)
     , fBitmap(bitmap)
diff --git a/dm/DMExpectationsTask.h b/dm/DMExpectationsTask.h
index cf76fc8bdf..7000de4b68 100644
--- a/dm/DMExpectationsTask.h
+++ b/dm/DMExpectationsTask.h
@@ -10,12 +10,11 @@ namespace DM {
 
 // ExpectationsTask compares an SkBitmap against some Expectations.
 // Moving this off the GPU threadpool is a nice (~30%) runtime win.
-class ExpectationsTask : public Task {
+class ExpectationsTask : public CpuTask {
 public:
     ExpectationsTask(const Task& parent, const Expectations&, SkBitmap);
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return false; }
     virtual bool shouldSkip() const SK_OVERRIDE { return false; }
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
diff --git a/dm/DMGpuTask.cpp b/dm/DMGpuGMTask.cpp
index c285d88fd7..cffa2291c5 100644
--- a/dm/DMGpuTask.cpp
+++ b/dm/DMGpuGMTask.cpp
@@ -1,4 +1,4 @@
-#include "DMGpuTask.h"
+#include "DMGpuGMTask.h"
 
 #include "DMExpectationsTask.h"
 #include "DMUtil.h"
@@ -9,14 +9,14 @@
 
 namespace DM {
 
-GpuTask::GpuTask(const char* config,
-                 Reporter* reporter,
-                 TaskRunner* taskRunner,
-                 const Expectations& expectations,
-                 skiagm::GMRegistry::Factory gmFactory,
-                 GrContextFactory::GLContextType contextType,
-                 int sampleCount)
-    : Task(reporter, taskRunner)
+GpuGMTask::GpuGMTask(const char* config,
+                     Reporter* reporter,
+                     TaskRunner* taskRunner,
+                     const Expectations& expectations,
+                     skiagm::GMRegistry::Factory gmFactory,
+                     GrContextFactory::GLContextType contextType,
+                     int sampleCount)
+    : GpuTask(reporter, taskRunner)
     , fGM(gmFactory(NULL))
     , fName(UnderJoin(fGM->getName(), config))
     , fExpectations(expectations)
@@ -24,13 +24,13 @@ GpuTask::GpuTask(const char* config,
     , fSampleCount(sampleCount)
     {}
 
-void GpuTask::draw() {
+void GpuGMTask::draw(GrContextFactory* grFactory) {
     SkImageInfo info = SkImageInfo::Make(SkScalarCeilToInt(fGM->width()),
                                          SkScalarCeilToInt(fGM->height()),
                                          kPMColor_SkColorType,
                                          kPremul_SkAlphaType);
     SkAutoTUnref<SkSurface> surface(SkSurface::NewRenderTarget(
-            this->getGrContextFactory()->get(fContextType), info, fSampleCount));
+            grFactory->get(fContextType), info, fSampleCount));
     SkCanvas* canvas = surface->getCanvas();
 
     canvas->concat(fGM->getInitialTransform());
@@ -49,7 +49,7 @@ void GpuTask::draw() {
     this->spawnChild(SkNEW_ARGS(WriteTask, (*this, bitmap)));
 }
 
-bool GpuTask::shouldSkip() const {
+bool GpuGMTask::shouldSkip() const {
     return SkToBool(fGM->getFlags() & skiagm::GM::kSkipGPU_Flag);
 }
 
diff --git a/dm/DMGpuTask.h b/dm/DMGpuGMTask.h
index f74950c088..141994e6d3 100644
--- a/dm/DMGpuTask.h
+++ b/dm/DMGpuGMTask.h
@@ -1,5 +1,5 @@
-#ifndef DMGpuTask_DEFINED
-#define DMGpuTask_DEFINED
+#ifndef DMGpuGMTask_DEFINED
+#define DMGpuGMTask_DEFINED
 
 #include "DMExpectations.h"
 #include "DMReporter.h"
@@ -15,18 +15,17 @@
 
 namespace DM {
 
-class GpuTask : public Task {
+class GpuGMTask : public GpuTask {
 public:
-    GpuTask(const char* config,
-            Reporter*,
-            TaskRunner*,
-            const Expectations&,
-            skiagm::GMRegistry::Factory,
-            GrContextFactory::GLContextType,
-            int sampleCount);
-
-    virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return true; }
+    GpuGMTask(const char* config,
+              Reporter*,
+              TaskRunner*,
+              const Expectations&,
+              skiagm::GMRegistry::Factory,
+              GrContextFactory::GLContextType,
+              int sampleCount);
+
+    virtual void draw(GrContextFactory*) SK_OVERRIDE;
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
@@ -40,4 +39,4 @@ private:
 
 }  // namespace DM
 
-#endif  // DMGpuTask_DEFINED
+#endif  // DMGpuGMTask_DEFINED
diff --git a/dm/DMPipeTask.cpp b/dm/DMPipeTask.cpp
index 163f1e64db..513594e0ea 100644
--- a/dm/DMPipeTask.cpp
+++ b/dm/DMPipeTask.cpp
@@ -38,7 +38,7 @@ PipeTask::PipeTask(const Task& parent,
                    SkBitmap reference,
                    bool crossProcess,
                    bool sharedAddressSpace)
-    : Task(parent)
+    : CpuTask(parent)
     , fFlags(get_flags(crossProcess, sharedAddressSpace))
     , fName(UnderJoin(parent.name().c_str(), get_name(fFlags)))
     , fGM(gm)
diff --git a/dm/DMPipeTask.h b/dm/DMPipeTask.h
index 23bbfef114..c251d08328 100644
--- a/dm/DMPipeTask.h
+++ b/dm/DMPipeTask.h
@@ -11,7 +11,7 @@
 
 namespace DM {
 
-class PipeTask : public Task {
+class PipeTask : public CpuTask {
 
 public:
     PipeTask(const Task& parent,        // PipeTask must be a child task.  Pass its parent here.
@@ -21,7 +21,6 @@ public:
              bool sharedAddressSpace);  // If cross process, should it assume shared address space?
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return false; }
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
diff --git a/dm/DMReplayTask.cpp b/dm/DMReplayTask.cpp
index 3d010358c2..c915be20d2 100644
--- a/dm/DMReplayTask.cpp
+++ b/dm/DMReplayTask.cpp
@@ -14,7 +14,7 @@ ReplayTask::ReplayTask(const Task& parent,
                        skiagm::GM* gm,
                        SkBitmap reference,
                        bool useRTree)
-    : Task(parent)
+    : CpuTask(parent)
     , fName(UnderJoin(parent.name().c_str(), useRTree ? "rtree" : "replay"))
     , fGM(gm)
     , fReference(reference)
diff --git a/dm/DMReplayTask.h b/dm/DMReplayTask.h
index 1245009923..78bef0dfd8 100644
--- a/dm/DMReplayTask.h
+++ b/dm/DMReplayTask.h
@@ -11,7 +11,7 @@
 
 namespace DM {
 
-class ReplayTask : public Task {
+class ReplayTask : public CpuTask {
 
 public:
     ReplayTask(const Task& parent,  // ReplayTask must be a child task.  Pass its parent here.
@@ -20,7 +20,6 @@ public:
                bool useRTree);      // Record with an RTree?
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return false; }
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
diff --git a/dm/DMSerializeTask.cpp b/dm/DMSerializeTask.cpp
index 7e45d49be5..4f55de57d5 100644
--- a/dm/DMSerializeTask.cpp
+++ b/dm/DMSerializeTask.cpp
@@ -13,7 +13,7 @@ namespace DM {
 SerializeTask::SerializeTask(const Task& parent,
                              skiagm::GM* gm,
                              SkBitmap reference)
-    : Task(parent)
+    : CpuTask(parent)
     , fName(UnderJoin(parent.name().c_str(), "serialize"))
     , fGM(gm)
     , fReference(reference)
diff --git a/dm/DMSerializeTask.h b/dm/DMSerializeTask.h
index 30a4303b06..1f8b83632b 100644
--- a/dm/DMSerializeTask.h
+++ b/dm/DMSerializeTask.h
@@ -11,7 +11,7 @@
 
 namespace DM {
 
-class SerializeTask : public Task {
+class SerializeTask : public CpuTask {
 
 public:
     SerializeTask(const Task& parent,
@@ -19,7 +19,6 @@ public:
                   SkBitmap reference);
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return false; }
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
diff --git a/dm/DMTask.cpp b/dm/DMTask.cpp
index d26971c890..1c4cc25693 100644
--- a/dm/DMTask.cpp
+++ b/dm/DMTask.cpp
@@ -1,53 +1,59 @@
 #include "DMTask.h"
-
 #include "DMTaskRunner.h"
-#include "DMUtil.h"
-#include "SkBitmap.h"
-#include "SkCommandLineFlags.h"
 
 namespace DM {
 
 Task::Task(Reporter* reporter, TaskRunner* taskRunner)
-    : fReporter(reporter), fTaskRunner(taskRunner), fDepth(0) {
+    : fReporter(reporter)
+    , fTaskRunner(taskRunner)
+    , fDepth(0) {
     fReporter->start();
 }
 
 Task::Task(const Task& parent)
-    : INHERITED(parent)
-    , fReporter(parent.fReporter)
+    : fReporter(parent.fReporter)
     , fTaskRunner(parent.fTaskRunner)
-    , fDepth(parent.depth()+1) {
+    , fDepth(parent.depth() + 1) {
     fReporter->start();
 }
 
-Task::~Task() {}
-
-void Task::run() {
-    if (!this->shouldSkip()) {
-        this->draw();
+void Task::fail(const char* msg) {
+    SkString failure(this->name());
+    if (msg) {
+        failure.appendf(": %s", msg);
     }
+    fReporter->fail(failure);
+}
+
+void Task::finish() {
     fReporter->finish(this->name());
-    delete this;
 }
 
-void Task::spawnChild(Task* task) {
-    if (!task->usesGpu()) {
-        fTaskRunner->add(task);
-    } else {
-        SkDEBUGFAIL("Sorry, we can't spawn GPU tasks. :(  See comment in TaskRunner::wait().");
-    }
+void Task::spawnChild(CpuTask* task) {
+    fTaskRunner->add(task);
 }
 
-void Task::fail(const char* msg) {
-    SkString failure(this->name());
-    if (msg) {
-        failure.appendf(": %s", msg);
+CpuTask::CpuTask(Reporter* reporter, TaskRunner* taskRunner) : Task(reporter, taskRunner) {}
+CpuTask::CpuTask(const Task& parent) : Task(parent) {}
+
+void CpuTask::run() {
+    if (!this->shouldSkip()) {
+        this->draw();
     }
-    fReporter->fail(failure);
+    this->finish();
+    SkDELETE(this);
 }
 
-GrContextFactory* Task::getGrContextFactory() const {
-    return fTaskRunner->getGrContextFactory();
+GpuTask::GpuTask(Reporter* reporter, TaskRunner* taskRunner) : Task(reporter, taskRunner) {}
+
+void GpuTask::run(GrContextFactory& factory) {
+    if (!this->shouldSkip()) {
+        this->draw(&factory);
+    }
+    this->finish();
+    SkDELETE(this);
 }
 
+
+
 }  // namespace DM
diff --git a/dm/DMTask.h b/dm/DMTask.h
index e8598df85e..cad8234c05 100644
--- a/dm/DMTask.h
+++ b/dm/DMTask.h
@@ -4,28 +4,21 @@
 #include "DMReporter.h"
 #include "GrContextFactory.h"
 #include "SkRunnable.h"
-#include "SkThreadPool.h"
 
-// DM will run() these tasks on one of two threadpools, depending on the result
-// of usesGpu().  The subclasses can call fail() to mark this task as failed,
-// or make any number of spawnChild() calls to kick off dependent tasks.
+// DM will run() these tasks on one of two threadpools.
+// Subclasses can call fail() to mark this task as failed, or make any number of spawnChild() calls
+// to kick off dependent tasks.
 //
-// Task deletes itself when run.
+// Tasks delete themselves when run.
 
 namespace DM {
 
 class TaskRunner;
 
-class Task : public SkRunnable {
-public:
-    Task(Reporter* reporter, TaskRunner* taskRunner);
-    Task(const Task& parent);
-    virtual ~Task();
-
-    void run() SK_OVERRIDE;
+class CpuTask;
 
-    virtual void draw() = 0;
-    virtual bool usesGpu() const = 0;
+class Task {
+public:
     virtual bool shouldSkip() const = 0;
     virtual SkString name() const = 0;
 
@@ -34,19 +27,37 @@ public:
     int depth() const { return fDepth; }
 
 protected:
-    void spawnChild(Task* task);
-    void fail(const char* msg = NULL);
+    Task(Reporter* reporter, TaskRunner* taskRunner);
+    Task(const Task& parent);
+    virtual ~Task() {}
 
-    // This can only be safely called from a GPU task's draw() method.
-    GrContextFactory* getGrContextFactory() const;
+    void fail(const char* msg = NULL);
+    void finish();
+    void spawnChild(CpuTask* task);  // For now we don't allow GPU child tasks.
 
 private:
-    // Both unowned.
-    Reporter* fReporter;
-    TaskRunner* fTaskRunner;
+    Reporter* fReporter;      // Unowned.
+    TaskRunner* fTaskRunner;  // Unowned.
     int fDepth;
+};
+
+class CpuTask : public Task, public SkRunnable {
+public:
+    CpuTask(Reporter* reporter, TaskRunner* taskRunner);
+    CpuTask(const Task& parent);
+    virtual ~CpuTask() {}
+
+    void run() SK_OVERRIDE;
+    virtual void draw() = 0;
+};
+
+class GpuTask : public Task, public SkTRunnable<GrContextFactory> {
+ public:
+    GpuTask(Reporter* reporter, TaskRunner* taskRunner);
+    virtual ~GpuTask() {}
 
-    typedef SkRunnable INHERITED;
+    void run(GrContextFactory&) SK_OVERRIDE;
+    virtual void draw(GrContextFactory*) = 0;
 };
 
 }  // namespace DM
diff --git a/dm/DMTaskRunner.cpp b/dm/DMTaskRunner.cpp
index bd53ce615a..e0bd977288 100644
--- a/dm/DMTaskRunner.cpp
+++ b/dm/DMTaskRunner.cpp
@@ -3,48 +3,19 @@
 
 namespace DM {
 
+TaskRunner::TaskRunner(int cpuThreads, int gpuThreads) : fCpu(cpuThreads), fGpu(gpuThreads) {}
 
-TaskRunner::TaskRunner(int cputhreads)
-    : fMain(cputhreads)
-    , fGpu(1) {
-    // Enqueue a task on the GPU thread to create a GrContextFactory.
-    struct Create : public SkRunnable {
-        Create(GrContextFactory** ptr) : fPtr(ptr) {}
-        void run() SK_OVERRIDE {
-            *fPtr = SkNEW(GrContextFactory);
-            delete this;
-        }
-        GrContextFactory** fPtr;
-    };
-    fGpu.add(SkNEW_ARGS(Create, (&fGrContextFactory)));
-}
+void TaskRunner::add(CpuTask* task) { fCpu.add(task); }
 
-void TaskRunner::add(Task* task) {
-    if (task->usesGpu()) {
-        fGpu.add(task);
-    } else {
-        fMain.add(task);
-    }
-}
+void TaskRunner::add(GpuTask* task) { fGpu.add(task); }
 
 void TaskRunner::wait() {
-    // Enqueue a task on the GPU thread to destroy the GrContextFactory.
-    struct Delete : public SkRunnable {
-        Delete(GrContextFactory* ptr) : fPtr(ptr) {}
-        void run() SK_OVERRIDE {
-            delete fPtr;
-            delete this;
-        }
-        GrContextFactory* fPtr;
-    };
-    fGpu.add(SkNEW_ARGS(Delete, (fGrContextFactory)));
-
-    // These wait calls block until the threadpool is done.  We don't allow
-    // children to spawn new GPU tasks so we can wait for that first knowing
-    // we'll never try to add to it later.  Same can't be said of fMain: fGpu
-    // and fMain can both add tasks to fMain, so we have to wait for that last.
+    // These wait calls block until each threadpool is done.  We don't allow
+    // spawning new child GPU tasks, so we can wait for that first knowing
+    // we'll never try to add to it later.  Same can't be said of the CPU pool:
+    // both CPU and GPU tasks can spawn off new CPU work, so we wait for that last.
     fGpu.wait();
-    fMain.wait();
+    fCpu.wait();
 }
 
 }  // namespace DM
diff --git a/dm/DMTaskRunner.h b/dm/DMTaskRunner.h
index 8af1b63719..c7b40588e9 100644
--- a/dm/DMTaskRunner.h
+++ b/dm/DMTaskRunner.h
@@ -5,26 +5,25 @@
 #include "SkThreadPool.h"
 #include "SkTypes.h"
 
-// TaskRunner runs Tasks on one of two threadpools depending on the Task's usesGpu() method.  This
-// lets us drive the GPU from a single thread while parallelizing CPU-bound work.
+// TaskRunner runs Tasks on one of two threadpools depending on the need for a GrContextFactory.
+// It's typically a good idea to run fewer GPU threads than CPU threads (go nuts with those).
 
 namespace DM {
 
-class Task;
+class CpuTask;
+class GpuTask;
 
 class TaskRunner : SkNoncopyable {
 public:
-    explicit TaskRunner(int cputhreads);
+    explicit TaskRunner(int cpuThreads, int gpuThreads);
 
-    void add(Task* task);
+    void add(CpuTask* task);
+    void add(GpuTask* task);
     void wait();
 
-    // This can only be safely called from a GPU task's draw() method.
-    GrContextFactory* getGrContextFactory() const { return fGrContextFactory; }
-
 private:
-    SkThreadPool fMain, fGpu;
-    GrContextFactory* fGrContextFactory;  // Created and destroyed on fGpu threadpool.
+    SkTThreadPool<void> fCpu;
+    SkTThreadPool<GrContextFactory> fGpu;
 };
 
 }  // namespace DM
diff --git a/dm/DMTestTask.cpp b/dm/DMTestTask.cpp
index 32a698c673..6c3fcedf54 100644
--- a/dm/DMTestTask.cpp
+++ b/dm/DMTestTask.cpp
@@ -8,23 +8,32 @@ DEFINE_bool2(pathOpsVerbose,      V, false, "Tell pathOps tests to be verbose.")
 
 namespace DM {
 
+bool TestReporter::allowExtendedTest() const { return FLAGS_pathOpsExtended; }
+bool TestReporter::allowThreaded()     const { return !FLAGS_pathOpsSingleThread; }
+bool TestReporter::verbose()           const { return FLAGS_pathOpsVerbose; }
+
 static SkString test_name(const char* name) {
     SkString result("test ");
     result.append(name);
     return result;
 }
 
-TestTask::TestTask(Reporter* reporter,
-                   TaskRunner* taskRunner,
-                   skiatest::TestRegistry::Factory factory)
-    : Task(reporter, taskRunner)
+CpuTestTask::CpuTestTask(Reporter* reporter,
+                         TaskRunner* taskRunner,
+                         skiatest::TestRegistry::Factory factory)
+    : CpuTask(reporter, taskRunner)
     , fTest(factory(NULL))
     , fName(test_name(fTest->getName())) {}
 
-void TestTask::draw() {
-    if (this->usesGpu()) {
-        fTest->setGrContextFactory(this->getGrContextFactory());
-    }
+GpuTestTask::GpuTestTask(Reporter* reporter,
+                         TaskRunner* taskRunner,
+                         skiatest::TestRegistry::Factory factory)
+    : GpuTask(reporter, taskRunner)
+    , fTest(factory(NULL))
+    , fName(test_name(fTest->getName())) {}
+
+
+void CpuTestTask::draw() {
     fTest->setReporter(&fTestReporter);
     fTest->run();
     if (!fTest->passed()) {
@@ -32,8 +41,13 @@ void TestTask::draw() {
     }
 }
 
-bool TestTask::TestReporter::allowExtendedTest() const { return FLAGS_pathOpsExtended; }
-bool TestTask::TestReporter::allowThreaded()     const { return !FLAGS_pathOpsSingleThread; }
-bool TestTask::TestReporter::verbose()           const { return FLAGS_pathOpsVerbose; }
+void GpuTestTask::draw(GrContextFactory* grFactory) {
+    fTest->setGrContextFactory(grFactory);
+    fTest->setReporter(&fTestReporter);
+    fTest->run();
+    if (!fTest->passed()) {
+        this->fail(fTestReporter.failure());
+    }
+}
 
 }  // namespace DM
diff --git a/dm/DMTestTask.h b/dm/DMTestTask.h
index 49a8e773b6..87f59209b2 100644
--- a/dm/DMTestTask.h
+++ b/dm/DMTestTask.h
@@ -11,34 +11,47 @@
 // Runs a unit test.
 namespace DM {
 
-class TestTask : public Task {
+class TestReporter : public skiatest::Reporter {
 public:
-    TestTask(Reporter*, TaskRunner*, skiatest::TestRegistry::Factory);
+  TestReporter() {}
+
+  const char* failure() const { return fFailure.c_str(); }
+
+private:
+  virtual bool allowExtendedTest() const SK_OVERRIDE;
+  virtual bool allowThreaded()     const SK_OVERRIDE;
+  virtual bool verbose()           const SK_OVERRIDE;
+
+  virtual void onReportFailed(const SkString& desc) SK_OVERRIDE {
+      fFailure = desc;
+  }
+
+  SkString fFailure;
+};
+
+class CpuTestTask : public CpuTask {
+public:
+    CpuTestTask(Reporter*, TaskRunner*, skiatest::TestRegistry::Factory);
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return fTest->isGPUTest(); }
     virtual bool shouldSkip() const SK_OVERRIDE { return false; }
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
 private:
-    class TestReporter : public skiatest::Reporter {
-    public:
-      TestReporter() {}
-
-      const char* failure() const { return fFailure.c_str(); }
-
-    private:
-      virtual bool allowExtendedTest() const SK_OVERRIDE;
-      virtual bool allowThreaded()     const SK_OVERRIDE;
-      virtual bool verbose()           const SK_OVERRIDE;
+    TestReporter fTestReporter;
+    SkAutoTDelete<skiatest::Test> fTest;
+    const SkString fName;
+};
 
-      virtual void onReportFailed(const SkString& desc) SK_OVERRIDE {
-          fFailure = desc;
-      }
+class GpuTestTask : public GpuTask {
+public:
+    GpuTestTask(Reporter*, TaskRunner*, skiatest::TestRegistry::Factory);
 
-      SkString fFailure;
-    };
+    virtual void draw(GrContextFactory*) SK_OVERRIDE;
+    virtual bool shouldSkip() const SK_OVERRIDE { return false; }
+    virtual SkString name() const SK_OVERRIDE { return fName; }
 
+private:
     TestReporter fTestReporter;
     SkAutoTDelete<skiatest::Test> fTest;
     const SkString fName;
diff --git a/dm/DMTileGridTask.cpp b/dm/DMTileGridTask.cpp
index beffbb0371..f9cac07de3 100644
--- a/dm/DMTileGridTask.cpp
+++ b/dm/DMTileGridTask.cpp
@@ -12,7 +12,7 @@ DEFINE_bool(tileGrid, false, "If true, run picture replay tests with a tile grid
 namespace DM {
 
 TileGridTask::TileGridTask(const Task& parent, skiagm::GM* gm, SkBitmap reference, SkISize tileSize)
-    : Task(parent)
+    : CpuTask(parent)
     , fName(UnderJoin(parent.name().c_str(), "tilegrid"))
     , fGM(gm)
     , fReference(reference)
diff --git a/dm/DMTileGridTask.h b/dm/DMTileGridTask.h
index 4a522b956c..911a1c52a1 100644
--- a/dm/DMTileGridTask.h
+++ b/dm/DMTileGridTask.h
@@ -11,7 +11,7 @@
 
 namespace DM {
 
-class TileGridTask : public Task {
+class TileGridTask : public CpuTask {
 
 public:
     TileGridTask(const Task& parent,  // TileGridTask must be a child task.  Pass its parent here.
@@ -20,7 +20,6 @@ public:
                  SkISize tileSize);   // Tile size to use.
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return false; }
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE { return fName; }
 
diff --git a/dm/DMWriteTask.cpp b/dm/DMWriteTask.cpp
index 5adb1d0437..e30cbdbf85 100644
--- a/dm/DMWriteTask.cpp
+++ b/dm/DMWriteTask.cpp
@@ -26,7 +26,7 @@ static int split_suffixes(int N, const char* name, SkTArray<SkString>* out) {
     return consumed;
 }
 
-WriteTask::WriteTask(const Task& parent, SkBitmap bitmap) : Task(parent), fBitmap(bitmap) {
+WriteTask::WriteTask(const Task& parent, SkBitmap bitmap) : CpuTask(parent), fBitmap(bitmap) {
     const int suffixes = parent.depth() + 1;
     const SkString& name = parent.name();
     const int totalSuffixLength = split_suffixes(suffixes, name.c_str(), &fSuffixes);
diff --git a/dm/DMWriteTask.h b/dm/DMWriteTask.h
index 49a5c746a6..839abd7ef1 100644
--- a/dm/DMWriteTask.h
+++ b/dm/DMWriteTask.h
@@ -12,14 +12,13 @@
 
 namespace DM {
 
-class WriteTask : public Task {
+class WriteTask : public CpuTask {
 
 public:
     WriteTask(const Task& parent,  // WriteTask must be a child Task.  Pass its parent here.
               SkBitmap bitmap);    // Bitmap to write.
 
     virtual void draw() SK_OVERRIDE;
-    virtual bool usesGpu() const SK_OVERRIDE { return false; }
     virtual bool shouldSkip() const SK_OVERRIDE;
     virtual SkString name() const SK_OVERRIDE;
 
diff --git a/gyp/dm.gyp b/gyp/dm.gyp
index db509938db..c96281aef6 100644
--- a/gyp/dm.gyp
+++ b/gyp/dm.gyp
@@ -29,9 +29,9 @@
         'sources': [
             '../dm/DM.cpp',
             '../dm/DMBenchTask.cpp',
-            '../dm/DMCpuTask.cpp',
+            '../dm/DMCpuGMTask.cpp',
             '../dm/DMExpectationsTask.cpp',
-            '../dm/DMGpuTask.cpp',
+            '../dm/DMGpuGMTask.cpp',
             '../dm/DMPipeTask.cpp',
             '../dm/DMReplayTask.cpp',
             '../dm/DMReporter.cpp',
diff --git a/gyp/utils.gyp b/gyp/utils.gyp
index 3f6e5b5bf1..0d6c2ac9e1 100644
--- a/gyp/utils.gyp
+++ b/gyp/utils.gyp
@@ -30,7 +30,6 @@
         '../include/utils/SkThreadPool.h',
         '../src/utils/SkCondVar.cpp',
         '../src/utils/SkCountdown.cpp',
-        '../src/utils/SkThreadPool.cpp',
 
         '../include/utils/SkBoundaryPatch.h',
         '../include/utils/SkFrontBufferedStream.h',
@@ -227,6 +226,7 @@
       'direct_dependent_settings': {
         'include_dirs': [
           '../include/utils',
+          '../src/utils',
         ],
       },
     },
diff --git a/include/utils/SkRunnable.h b/include/utils/SkRunnable.h
index 84e43750f6..5acf4dbc61 100644
--- a/include/utils/SkRunnable.h
+++ b/include/utils/SkRunnable.h
@@ -8,10 +8,18 @@
 #ifndef SkRunnable_DEFINED
 #define SkRunnable_DEFINED
 
-class SkRunnable {
-public:
-    virtual ~SkRunnable() {};
+template <typename T>
+struct SkTRunnable {
+    virtual ~SkTRunnable() {};
+    virtual void run(T&) = 0;
+};
+
+template <>
+struct SkTRunnable<void> {
+    virtual ~SkTRunnable() {};
     virtual void run() = 0;
 };
 
+typedef SkTRunnable<void> SkRunnable;
+
 #endif
diff --git a/include/utils/SkThreadPool.h b/include/utils/SkThreadPool.h
index 0aa7c08ad5..a75bed8be4 100644
--- a/include/utils/SkThreadPool.h
+++ b/include/utils/SkThreadPool.h
@@ -12,24 +12,42 @@
 #include "SkRunnable.h"
 #include "SkTDArray.h"
 #include "SkTInternalLList.h"
+#include "SkThreadUtils.h"
+#include "SkTypes.h"
 
-class SkThread;
+#if defined(SK_BUILD_FOR_UNIX) || defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_ANDROID)
+#    include <unistd.h>
+#endif
 
-class SkThreadPool {
+// Returns the number of cores on this machine.
+static inline int num_cores() {
+#if defined(SK_BUILD_FOR_WIN32)
+    SYSTEM_INFO sysinfo;
+    GetSystemInfo(&sysinfo);
+    return sysinfo.dwNumberOfProcessors;
+#elif defined(SK_BUILD_FOR_UNIX) || defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_ANDROID)
+    return sysconf(_SC_NPROCESSORS_ONLN);
+#else
+    return 1;
+#endif
+}
 
+template <typename T>
+class SkTThreadPool {
 public:
     /**
      * Create a threadpool with count threads, or one thread per core if kThreadPerCore.
      */
     static const int kThreadPerCore = -1;
-    explicit SkThreadPool(int count);
-    ~SkThreadPool();
+    explicit SkTThreadPool(int count);
+    ~SkTThreadPool();
 
     /**
-     * Queues up an SkRunnable to run when a thread is available, or immediately if
-     * count is 0.  NULL is a safe no-op.  Does not take ownership.
+     * Queues up an SkRunnable to run when a thread is available, or synchronously if count is 0.
+     * Does not take ownership. NULL is a safe no-op.  If T is not void, the runnable will be passed
+     * a reference to a T on the thread's local stack.
      */
-    void add(SkRunnable*);
+    void add(SkTRunnable<T>*);
 
     /**
      * Block until all added SkRunnables have completed.  Once called, calling add() is undefined.
@@ -38,10 +56,7 @@ public:
 
  private:
     struct LinkedRunnable {
-        // Unowned pointer.
-        SkRunnable* fRunnable;
-
-    private:
+        SkTRunnable<T>* fRunnable;  // Unowned.
         SK_DECLARE_INTERNAL_LLIST_INTERFACE(LinkedRunnable);
     };
 
@@ -60,4 +75,128 @@ public:
     static void Loop(void*);  // Static because we pass in this.
 };
 
+template <typename T>
+SkTThreadPool<T>::SkTThreadPool(int count) : fState(kRunning_State), fBusyThreads(0) {
+    if (count < 0) {
+        count = num_cores();
+    }
+    // Create count threads, all running SkTThreadPool::Loop.
+    for (int i = 0; i < count; i++) {
+        SkThread* thread = SkNEW_ARGS(SkThread, (&SkTThreadPool::Loop, this));
+        *fThreads.append() = thread;
+        thread->start();
+    }
+}
+
+template <typename T>
+SkTThreadPool<T>::~SkTThreadPool() {
+    if (kRunning_State == fState) {
+        this->wait();
+    }
+}
+
+namespace SkThreadPoolPrivate {
+
+template <typename T>
+struct ThreadLocal {
+    void run(SkTRunnable<T>* r) { r->run(data); }
+    T data;
+};
+
+template <>
+struct ThreadLocal<void> {
+    void run(SkTRunnable<void>* r) { r->run(); }
+};
+
+}  // namespace SkThreadPoolPrivate
+
+template <typename T>
+void SkTThreadPool<T>::add(SkTRunnable<T>* r) {
+    if (r == NULL) {
+        return;
+    }
+
+    if (fThreads.isEmpty()) {
+        SkThreadPoolPrivate::ThreadLocal<T> threadLocal;
+        threadLocal.run(r);
+        return;
+    }
+
+    LinkedRunnable* linkedRunnable = SkNEW(LinkedRunnable);
+    linkedRunnable->fRunnable = r;
+    fReady.lock();
+    SkASSERT(fState != kHalting_State);  // Shouldn't be able to add work when we're halting.
+    fQueue.addToHead(linkedRunnable);
+    fReady.signal();
+    fReady.unlock();
+}
+
+
+template <typename T>
+void SkTThreadPool<T>::wait() {
+    fReady.lock();
+    fState = kWaiting_State;
+    fReady.broadcast();
+    fReady.unlock();
+
+    // Wait for all threads to stop.
+    for (int i = 0; i < fThreads.count(); i++) {
+        fThreads[i]->join();
+        SkDELETE(fThreads[i]);
+    }
+    SkASSERT(fQueue.isEmpty());
+}
+
+template <typename T>
+/*static*/ void SkTThreadPool<T>::Loop(void* arg) {
+    // The SkTThreadPool passes itself as arg to each thread as they're created.
+    SkTThreadPool<T>* pool = static_cast<SkTThreadPool<T>*>(arg);
+    SkThreadPoolPrivate::ThreadLocal<T> threadLocal;
+
+    while (true) {
+        // We have to be holding the lock to read the queue and to call wait.
+        pool->fReady.lock();
+        while(pool->fQueue.isEmpty()) {
+            // Does the client want to stop and are all the threads ready to stop?
+            // If so, we move into the halting state, and whack all the threads so they notice.
+            if (kWaiting_State == pool->fState && pool->fBusyThreads == 0) {
+                pool->fState = kHalting_State;
+                pool->fReady.broadcast();
+            }
+            // Any time we find ourselves in the halting state, it's quitting time.
+            if (kHalting_State == pool->fState) {
+                pool->fReady.unlock();
+                return;
+            }
+            // wait yields the lock while waiting, but will have it again when awoken.
+            pool->fReady.wait();
+        }
+        // We've got the lock back here, no matter if we ran wait or not.
+
+        // The queue is not empty, so we have something to run.  Claim it.
+        LinkedRunnable* r = pool->fQueue.tail();
+
+        pool->fQueue.remove(r);
+
+        // Having claimed our SkRunnable, we now give up the lock while we run it.
+        // Otherwise, we'd only ever do work on one thread at a time, which rather
+        // defeats the point of this code.
+        pool->fBusyThreads++;
+        pool->fReady.unlock();
+
+        // OK, now really do the work.
+        threadLocal.run(r->fRunnable);
+        SkDELETE(r);
+
+        // Let everyone know we're not busy.
+        pool->fReady.lock();
+        pool->fBusyThreads--;
+        pool->fReady.unlock();
+    }
+
+    SkASSERT(false); // Unreachable.  The only exit happens when pool->fState is kHalting_State.
+}
+
+typedef SkTThreadPool<void> SkThreadPool;
+
 #endif
diff --git a/src/utils/SkThreadPool.cpp b/src/utils/SkThreadPool.cpp
deleted file mode 100644
index 125a5d9b6a..0000000000
--- a/src/utils/SkThreadPool.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright 2012 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkRunnable.h"
-#include "SkThreadPool.h"
-#include "SkThreadUtils.h"
-#include "SkTypes.h"
-
-#if defined(SK_BUILD_FOR_UNIX) || defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_ANDROID)
-#include <unistd.h>
-#endif
-
-// Returns the number of cores on this machine.
-static int num_cores() {
-#if defined(SK_BUILD_FOR_WIN32)
-    SYSTEM_INFO sysinfo;
-    GetSystemInfo(&sysinfo);
-    return sysinfo.dwNumberOfProcessors;
-#elif defined(SK_BUILD_FOR_UNIX) || defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_ANDROID)
-    return sysconf(_SC_NPROCESSORS_ONLN);
-#else
-    return 1;
-#endif
-}
-
-SkThreadPool::SkThreadPool(int count)
-: fState(kRunning_State), fBusyThreads(0) {
-    if (count < 0) count = num_cores();
-    // Create count threads, all running SkThreadPool::Loop.
-    for (int i = 0; i < count; i++) {
-        SkThread* thread = SkNEW_ARGS(SkThread, (&SkThreadPool::Loop, this));
-        *fThreads.append() = thread;
-        thread->start();
-    }
-}
-
-SkThreadPool::~SkThreadPool() {
-    if (kRunning_State == fState) {
-        this->wait();
-    }
-}
-
-void SkThreadPool::wait() {
-    fReady.lock();
-    fState = kWaiting_State;
-    fReady.broadcast();
-    fReady.unlock();
-
-    // Wait for all threads to stop.
-    for (int i = 0; i < fThreads.count(); i++) {
-        fThreads[i]->join();
-        SkDELETE(fThreads[i]);
-    }
-    SkASSERT(fQueue.isEmpty());
-}
-
-/*static*/ void SkThreadPool::Loop(void* arg) {
-    // The SkThreadPool passes itself as arg to each thread as they're created.
-    SkThreadPool* pool = static_cast<SkThreadPool*>(arg);
-
-    while (true) {
-        // We have to be holding the lock to read the queue and to call wait.
-        pool->fReady.lock();
-        while(pool->fQueue.isEmpty()) {
-            // Does the client want to stop and are all the threads ready to stop?
-            // If so, we move into the halting state, and whack all the threads so they notice.
-            if (kWaiting_State == pool->fState && pool->fBusyThreads == 0) {
-                pool->fState = kHalting_State;
-                pool->fReady.broadcast();
-            }
-            // Any time we find ourselves in the halting state, it's quitting time.
-            if (kHalting_State == pool->fState) {
-                pool->fReady.unlock();
-                return;
-            }
-            // wait yields the lock while waiting, but will have it again when awoken.
-            pool->fReady.wait();
-        }
-        // We've got the lock back here, no matter if we ran wait or not.
-
-        // The queue is not empty, so we have something to run.  Claim it.
-        LinkedRunnable* r = pool->fQueue.tail();
-
-        pool->fQueue.remove(r);
-
-        // Having claimed our SkRunnable, we now give up the lock while we run it.
-        // Otherwise, we'd only ever do work on one thread at a time, which rather
-        // defeats the point of this code.
-        pool->fBusyThreads++;
-        pool->fReady.unlock();
-
-        // OK, now really do the work.
-        r->fRunnable->run();
-        SkDELETE(r);
-
-        // Let everyone know we're not busy.
-        pool->fReady.lock();
-        pool->fBusyThreads--;
-        pool->fReady.unlock();
-    }
-
-    SkASSERT(false); // Unreachable.  The only exit happens when pool->fState is kHalting_State.
-}
-
-void SkThreadPool::add(SkRunnable* r) {
-    if (NULL == r) {
-        return;
-    }
-
-    // If we don't have any threads, obligingly just run the thing now.
-    if (fThreads.isEmpty()) {
-        return r->run();
-    }
-
-    // We have some threads.  Queue it up!
-    fReady.lock();
-    SkASSERT(fState != kHalting_State);  // Shouldn't be able to add work when we're halting.
-    LinkedRunnable* linkedRunnable = SkNEW(LinkedRunnable);
-    linkedRunnable->fRunnable = r;
-    fQueue.addToHead(linkedRunnable);
-    fReady.signal();
-    fReady.unlock();
-}