aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/SkThreadedBMPDevice.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/SkThreadedBMPDevice.cpp')
-rw-r--r--src/core/SkThreadedBMPDevice.cpp232
1 files changed, 214 insertions, 18 deletions
diff --git a/src/core/SkThreadedBMPDevice.cpp b/src/core/SkThreadedBMPDevice.cpp
index 1cf7fe449a..0e45b9fbf6 100644
--- a/src/core/SkThreadedBMPDevice.cpp
+++ b/src/core/SkThreadedBMPDevice.cpp
@@ -11,28 +11,222 @@
#include "SkTaskGroup.h"
#include "SkVertices.h"
-SkThreadedBMPDevice::SkThreadedBMPDevice(const SkBitmap& bitmap, int threads)
+#include <mutex>
+#include <vector>
+
+constexpr int MAX_CACHE_LINE = 64;
+
+// Some basic logics and data structures that are shared across the current experimental schedulers.
+class TiledDrawSchedulerBase : public TiledDrawScheduler {
+public:
+ TiledDrawSchedulerBase(int tiles, WorkFunc work)
+ : fTileCnt(tiles), fIsFinishing(false), fDrawCnt(0), fWork(work) {}
+
+ void signal() override {
+ fDrawCnt++;
+ }
+ void finish() override {
+ fIsFinishing.store(true, std::memory_order_relaxed);
+ }
+
+protected:
+ const int fTileCnt;
+ std::atomic<bool> fIsFinishing;
+ std::atomic<int> fDrawCnt;
+ WorkFunc fWork;
+};
+
+class TiledDrawSchedulerBySpinning : public TiledDrawSchedulerBase {
+public:
+ TiledDrawSchedulerBySpinning(int tiles, WorkFunc work)
+ : TiledDrawSchedulerBase(tiles, work), fScheduleData(tiles) {}
+
+ void signal() final { this->TiledDrawSchedulerBase::signal(); }
+ void finish() final { this->TiledDrawSchedulerBase::finish(); }
+
+ bool next(int& tileIndex) final {
+ int& drawIndex = fScheduleData[tileIndex].fDrawIndex;
+ SkASSERT(drawIndex <= fDrawCnt);
+ while (true) {
+ bool isFinishing = fIsFinishing.load(std::memory_order_relaxed);
+ if (isFinishing && drawIndex >= fDrawCnt) {
+ return false;
+ } else if (drawIndex < fDrawCnt) {
+ fWork(tileIndex, drawIndex++);
+ return true;
+ }
+ }
+ }
+
+private:
+ // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
+ struct alignas(MAX_CACHE_LINE) TileScheduleData {
+ TileScheduleData() : fDrawIndex(0) {}
+
+ int fDrawIndex; // next draw index for this tile
+ };
+
+ std::vector<TileScheduleData> fScheduleData;
+};
+
+class TiledDrawSchedulerFlexible : public TiledDrawSchedulerBase {
+public:
+ TiledDrawSchedulerFlexible(int tiles, WorkFunc work)
+ : TiledDrawSchedulerBase(tiles, work), fScheduleData(tiles) {}
+
+ void signal() final { this->TiledDrawSchedulerBase::signal(); }
+ void finish() final { this->TiledDrawSchedulerBase::finish(); }
+
+ bool next(int& tileIndex) final {
+ int failCnt = 0;
+ while (true) {
+ TileScheduleData& scheduleData = fScheduleData[tileIndex];
+ bool locked = scheduleData.fMutex.try_lock();
+ bool processed = false;
+
+ if (locked) {
+ if (scheduleData.fDrawIndex < fDrawCnt) {
+ fWork(tileIndex, scheduleData.fDrawIndex++);
+ processed = true;
+ } else {
+ failCnt += fIsFinishing.load(std::memory_order_relaxed);
+ }
+ scheduleData.fMutex.unlock();
+ }
+
+ if (processed) {
+ return true;
+ } else {
+ if (failCnt >= fTileCnt) {
+ return false;
+ }
+ tileIndex = (tileIndex + 1) % fTileCnt;
+ }
+ }
+ }
+
+private:
+ // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
+ struct alignas(MAX_CACHE_LINE) TileScheduleData {
+ TileScheduleData() : fDrawIndex(0) {}
+
+ int fDrawIndex; // next draw index for this tile
+ std::mutex fMutex; // the mutex for the thread to acquire
+ };
+
+ std::vector<TileScheduleData> fScheduleData;
+};
+
+class TiledDrawSchedulerBySemaphores : public TiledDrawSchedulerBase {
+public:
+ TiledDrawSchedulerBySemaphores(int tiles, WorkFunc work)
+ : TiledDrawSchedulerBase(tiles, work), fScheduleData(tiles) {}
+
+
+ void signal() final {
+ this->TiledDrawSchedulerBase::signal();
+ signalRoot();
+ }
+
+ void finish() final {
+ this->TiledDrawSchedulerBase::finish();
+ signalRoot();
+ }
+
+ bool next(int& tileIndex) final {
+ SkASSERT(tileIndex >= 0 && tileIndex < fTileCnt);
+ TileScheduleData& scheduleData = fScheduleData[tileIndex];
+ while (true) {
+ scheduleData.fSemaphore.wait();
+ int leftChild = (tileIndex + 1) * 2 - 1;
+ int rightChild = leftChild + 1;
+ if (leftChild < fTileCnt) {
+ fScheduleData[leftChild].fSemaphore.signal();
+ }
+ if (rightChild < fTileCnt) {
+ fScheduleData[rightChild].fSemaphore.signal();
+ }
+
+ bool isFinishing = fIsFinishing.load(std::memory_order_relaxed);
+ if (isFinishing && scheduleData.fDrawIndex >= fDrawCnt) {
+ return false;
+ } else {
+ SkASSERT(scheduleData.fDrawIndex < fDrawCnt);
+ fWork(tileIndex, scheduleData.fDrawIndex++);
+ return true;
+ }
+ }
+ }
+
+private:
+ // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
+ struct alignas(MAX_CACHE_LINE) TileScheduleData {
+ TileScheduleData() : fDrawIndex(0) {}
+
+ int fDrawIndex;
+ SkSemaphore fSemaphore;
+ };
+
+ void signalRoot() {
+ SkASSERT(fTileCnt > 0);
+ fScheduleData[0].fSemaphore.signal();
+ }
+
+ std::vector<TileScheduleData> fScheduleData;
+};
+
+void SkThreadedBMPDevice::startThreads() {
+ SkASSERT(fThreadFutures.count() == 0);
+ SkASSERT(fQueueSize == 0);
+
+ TiledDrawScheduler::WorkFunc work = [this](int tileIndex, int drawIndex){
+ auto& element = fQueue[drawIndex];
+ if (SkIRect::Intersects(fTileBounds[tileIndex], element.fDrawBounds)) {
+ element.fDrawFn(fTileBounds[tileIndex]);
+ }
+ };
+
+ // using Scheduler = TiledDrawSchedulerBySemaphores;
+ // using Scheduler = TiledDrawSchedulerBySpinning;
+ using Scheduler = TiledDrawSchedulerFlexible;
+ fScheduler.reset(new Scheduler(fTileCnt, work));
+ for(int i = 0; i < fThreadCnt; ++i) {
+ fThreadFutures.push_back(std::async(std::launch::async, [this, i]() {
+ int tileIndex = i;
+ while (fScheduler->next(tileIndex)) {}
+ }));
+ }
+}
+
+void SkThreadedBMPDevice::finishThreads() {
+ fScheduler->finish();
+ for(auto& future : fThreadFutures) {
+ future.wait();
+ }
+ fThreadFutures.reset();
+ fQueueSize = 0;
+ fScheduler.reset(nullptr);
+}
+
+SkThreadedBMPDevice::SkThreadedBMPDevice(const SkBitmap& bitmap, int tiles, int threads)
: INHERITED(bitmap)
- , fThreadCnt(threads)
+ , fTileCnt(tiles)
+ , fThreadCnt(threads <= 0 ? tiles : threads)
{
// Tiling using stripes for now; we'll explore better tiling in the future.
- int h = (bitmap.height() + fThreadCnt - 1) / SkTMax(fThreadCnt, 1);
+ int h = (bitmap.height() + fTileCnt - 1) / SkTMax(fTileCnt, 1);
int w = bitmap.width();
int top = 0;
- for(int tid = 0; tid < fThreadCnt; ++tid, top += h) {
- fThreadBounds.push_back(SkIRect::MakeLTRB(0, top, w, top + h));
+ for(int tid = 0; tid < fTileCnt; ++tid, top += h) {
+ fTileBounds.push_back(SkIRect::MakeLTRB(0, top, w, top + h));
}
+ fQueueSize = 0;
+ startThreads();
}
void SkThreadedBMPDevice::flush() {
- SkTaskGroup().batch(fThreadCnt, [this](int i) {
- for(auto& element : fQueue) {
- if (SkIRect::Intersects(fThreadBounds[i], element.fDrawBounds)) {
- element.fDrawFn(fThreadBounds[i]);
- }
- }
- });
- fQueue.reset();
+ finishThreads();
+ startThreads();
}
// Having this captured in lambda seems to be faster than saving this in DrawElement
@@ -75,14 +269,16 @@ SkIRect SkThreadedBMPDevice::transformDrawBounds(const SkRect& drawBounds) const
#define THREADED_DRAW(drawBounds, actualDrawCall) \
do { \
DrawState ds(this); \
- fQueue.push_back({ \
+ SkASSERT(fQueueSize < MAX_QUEUE_SIZE); \
+ fQueue[fQueueSize++] = { \
this->transformDrawBounds(drawBounds), \
- [=](const SkIRect& threadBounds) { \
- SkRasterClip threadRC; \
- SkDraw draw = ds.getThreadDraw(threadRC, threadBounds); \
+ [=](const SkIRect& tileBounds) { \
+ SkRasterClip tileRC; \
+ SkDraw draw = ds.getThreadDraw(tileRC, tileBounds); \
draw.actualDrawCall; \
}, \
- }); \
+ }; \
+ fScheduler->signal(); \
} while (false)
static inline SkRect get_fast_bounds(const SkRect& r, const SkPaint& p) {