/* * Copyright 2017 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "SkThreadedBMPDevice.h" #include "SkPath.h" #include "SkTaskGroup.h" #include "SkVertices.h" #include #include constexpr int MAX_CACHE_LINE = 64; // Some basic logics and data structures that are shared across the current experimental schedulers. class TiledDrawSchedulerBase : public TiledDrawScheduler { public: TiledDrawSchedulerBase(int tiles, WorkFunc work) : fTileCnt(tiles), fIsFinishing(false), fDrawCnt(0), fWork(std::move(work)) {} void signal() override { fDrawCnt++; } void finish() override { fIsFinishing.store(true, std::memory_order_relaxed); } protected: const int fTileCnt; std::atomic fIsFinishing; std::atomic fDrawCnt; WorkFunc fWork; }; class TiledDrawSchedulerBySpinning : public TiledDrawSchedulerBase { public: TiledDrawSchedulerBySpinning(int tiles, WorkFunc work) : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {} void signal() final { this->TiledDrawSchedulerBase::signal(); } void finish() final { this->TiledDrawSchedulerBase::finish(); } bool next(int& tileIndex) final { int& drawIndex = fScheduleData[tileIndex].fDrawIndex; SkASSERT(drawIndex <= fDrawCnt); while (true) { bool isFinishing = fIsFinishing.load(std::memory_order_relaxed); if (isFinishing && drawIndex >= fDrawCnt) { return false; } else if (drawIndex < fDrawCnt) { fWork(tileIndex, drawIndex++); return true; } } } private: // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines struct alignas(MAX_CACHE_LINE) TileScheduleData { TileScheduleData() : fDrawIndex(0) {} int fDrawIndex; // next draw index for this tile }; std::vector fScheduleData; }; class TiledDrawSchedulerFlexible : public TiledDrawSchedulerBase { public: TiledDrawSchedulerFlexible(int tiles, WorkFunc work) : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {} void signal() final { this->TiledDrawSchedulerBase::signal(); } void finish() final { this->TiledDrawSchedulerBase::finish(); } bool next(int& tileIndex) final { int failCnt = 0; while (true) { TileScheduleData& scheduleData = fScheduleData[tileIndex]; bool locked = scheduleData.fMutex.try_lock(); bool processed = false; if (locked) { if (scheduleData.fDrawIndex < fDrawCnt) { fWork(tileIndex, scheduleData.fDrawIndex++); processed = true; } else { failCnt += fIsFinishing.load(std::memory_order_relaxed); } scheduleData.fMutex.unlock(); } if (processed) { return true; } else { if (failCnt >= fTileCnt) { return false; } tileIndex = (tileIndex + 1) % fTileCnt; } } } private: // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines struct alignas(MAX_CACHE_LINE) TileScheduleData { TileScheduleData() : fDrawIndex(0) {} int fDrawIndex; // next draw index for this tile std::mutex fMutex; // the mutex for the thread to acquire }; std::vector fScheduleData; }; class TiledDrawSchedulerBySemaphores : public TiledDrawSchedulerBase { public: TiledDrawSchedulerBySemaphores(int tiles, WorkFunc work) : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {} void signal() final { this->TiledDrawSchedulerBase::signal(); signalRoot(); } void finish() final { this->TiledDrawSchedulerBase::finish(); signalRoot(); } bool next(int& tileIndex) final { SkASSERT(tileIndex >= 0 && tileIndex < fTileCnt); TileScheduleData& scheduleData = fScheduleData[tileIndex]; while (true) { scheduleData.fSemaphore.wait(); int leftChild = (tileIndex + 1) * 2 - 1; int rightChild = leftChild + 1; if (leftChild < fTileCnt) { fScheduleData[leftChild].fSemaphore.signal(); } if (rightChild < fTileCnt) { fScheduleData[rightChild].fSemaphore.signal(); } bool isFinishing = fIsFinishing.load(std::memory_order_relaxed); if (isFinishing && scheduleData.fDrawIndex >= fDrawCnt) { return false; } else { SkASSERT(scheduleData.fDrawIndex < fDrawCnt); fWork(tileIndex, scheduleData.fDrawIndex++); return true; } } } private: // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines struct alignas(MAX_CACHE_LINE) TileScheduleData { TileScheduleData() : fDrawIndex(0) {} int fDrawIndex; SkSemaphore fSemaphore; }; void signalRoot() { SkASSERT(fTileCnt > 0); fScheduleData[0].fSemaphore.signal(); } std::vector fScheduleData; }; void SkThreadedBMPDevice::startThreads() { SkASSERT(fThreadFutures.count() == 0); SkASSERT(fQueueSize == 0); TiledDrawScheduler::WorkFunc work = [this](int tileIndex, int drawIndex){ auto& element = fQueue[drawIndex]; if (SkIRect::Intersects(fTileBounds[tileIndex], element.fDrawBounds)) { element.fDrawFn(fTileBounds[tileIndex]); } }; // using Scheduler = TiledDrawSchedulerBySemaphores; // using Scheduler = TiledDrawSchedulerBySpinning; using Scheduler = TiledDrawSchedulerFlexible; fScheduler.reset(new Scheduler(fTileCnt, work)); for(int i = 0; i < fThreadCnt; ++i) { fThreadFutures.push_back(std::async(std::launch::async, [this, i]() { int tileIndex = i; while (fScheduler->next(tileIndex)) {} })); } } void SkThreadedBMPDevice::finishThreads() { fScheduler->finish(); for(auto& future : fThreadFutures) { future.wait(); } fThreadFutures.reset(); fQueueSize = 0; fScheduler.reset(nullptr); } SkThreadedBMPDevice::SkThreadedBMPDevice(const SkBitmap& bitmap, int tiles, int threads) : INHERITED(bitmap) , fTileCnt(tiles) , fThreadCnt(threads <= 0 ? tiles : threads) { // Tiling using stripes for now; we'll explore better tiling in the future. int h = (bitmap.height() + fTileCnt - 1) / SkTMax(fTileCnt, 1); int w = bitmap.width(); int top = 0; for(int tid = 0; tid < fTileCnt; ++tid, top += h) { fTileBounds.push_back(SkIRect::MakeLTRB(0, top, w, top + h)); } fQueueSize = 0; startThreads(); } void SkThreadedBMPDevice::flush() { finishThreads(); startThreads(); } // Having this captured in lambda seems to be faster than saving this in DrawElement struct SkThreadedBMPDevice::DrawState { SkPixmap fDst; SkMatrix fMatrix; SkRasterClip fRC; explicit DrawState(SkThreadedBMPDevice* dev) { // we need fDst to be set, and if we're actually drawing, to dirty the genID if (!dev->accessPixels(&fDst)) { // NoDrawDevice uses us (why?) so we have to catch this case w/ no pixels fDst.reset(dev->imageInfo(), nullptr, 0); } fMatrix = dev->ctm(); fRC = dev->fRCStack.rc(); } SkDraw getThreadDraw(SkRasterClip& threadRC, const SkIRect& threadBounds) const { SkDraw draw; draw.fDst = fDst; draw.fMatrix = &fMatrix; threadRC = fRC; threadRC.op(threadBounds, SkRegion::kIntersect_Op); draw.fRC = &threadRC; return draw; } }; SkIRect SkThreadedBMPDevice::transformDrawBounds(const SkRect& drawBounds) const { if (drawBounds.isLargest()) { return SkIRect::MakeLargest(); } SkRect transformedBounds; this->ctm().mapRect(&transformedBounds, drawBounds); return transformedBounds.roundOut(); } // The do {...} while (false) is to enforce trailing semicolon as suggested by mtklein@ #define THREADED_DRAW(drawBounds, actualDrawCall) \ do { \ DrawState ds(this); \ SkASSERT(fQueueSize < MAX_QUEUE_SIZE); \ fQueue[fQueueSize++] = { \ this->transformDrawBounds(drawBounds), \ [=](const SkIRect& tileBounds) { \ SkRasterClip tileRC; \ SkDraw draw = ds.getThreadDraw(tileRC, tileBounds); \ draw.actualDrawCall; \ }, \ }; \ fScheduler->signal(); \ } while (false) static inline SkRect get_fast_bounds(const SkRect& r, const SkPaint& p) { SkRect result; if (p.canComputeFastBounds()) { result = p.computeFastBounds(r, &result); } else { result = SkRect::MakeLargest(); } return result; } void SkThreadedBMPDevice::drawPaint(const SkPaint& paint) { THREADED_DRAW(SkRect::MakeLargest(), drawPaint(paint)); } void SkThreadedBMPDevice::drawPoints(SkCanvas::PointMode mode, size_t count, const SkPoint pts[], const SkPaint& paint) { // TODO tighter drawBounds SkRect drawBounds = SkRect::MakeLargest(); THREADED_DRAW(drawBounds, drawPoints(mode, count, pts, paint, nullptr)); } void SkThreadedBMPDevice::drawRect(const SkRect& r, const SkPaint& paint) { SkRect drawBounds = get_fast_bounds(r, paint); THREADED_DRAW(drawBounds, drawRect(r, paint)); } void SkThreadedBMPDevice::drawRRect(const SkRRect& rrect, const SkPaint& paint) { #ifdef SK_IGNORE_BLURRED_RRECT_OPT SkPath path; path.addRRect(rrect); // call the VIRTUAL version, so any subclasses who do handle drawPath aren't // required to override drawRRect. this->drawPath(path, paint, nullptr, false); #else SkRect drawBounds = get_fast_bounds(rrect.getBounds(), paint); THREADED_DRAW(drawBounds, drawRRect(rrect, paint)); #endif } void SkThreadedBMPDevice::drawPath(const SkPath& path, const SkPaint& paint, const SkMatrix* prePathMatrix, bool pathIsMutable) { SkRect drawBounds = path.isInverseFillType() ? SkRect::MakeLargest() : get_fast_bounds(path.getBounds(), paint); // For thread safety, make path imutable THREADED_DRAW(drawBounds, drawPath(path, paint, prePathMatrix, false)); } void SkThreadedBMPDevice::drawBitmap(const SkBitmap& bitmap, SkScalar x, SkScalar y, const SkPaint& paint) { SkMatrix matrix = SkMatrix::MakeTrans(x, y); LogDrawScaleFactor(SkMatrix::Concat(this->ctm(), matrix), paint.getFilterQuality()); SkRect drawBounds = SkRect::MakeWH(bitmap.width(), bitmap.height()); matrix.mapRect(&drawBounds); THREADED_DRAW(drawBounds, drawBitmap(bitmap, matrix, nullptr, paint)); } void SkThreadedBMPDevice::drawSprite(const SkBitmap& bitmap, int x, int y, const SkPaint& paint) { SkRect drawBounds = SkRect::MakeXYWH(x, y, bitmap.width(), bitmap.height()); THREADED_DRAW(drawBounds, drawSprite(bitmap, x, y, paint)); } void SkThreadedBMPDevice::drawText(const void* text, size_t len, SkScalar x, SkScalar y, const SkPaint& paint) { SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds THREADED_DRAW(drawBounds, drawText((const char*)text, len, x, y, paint, &this->surfaceProps())); } void SkThreadedBMPDevice::drawPosText(const void* text, size_t len, const SkScalar xpos[], int scalarsPerPos, const SkPoint& offset, const SkPaint& paint) { SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds THREADED_DRAW(drawBounds, drawPosText((const char*)text, len, xpos, scalarsPerPos, offset, paint, &surfaceProps())); } void SkThreadedBMPDevice::drawVertices(const SkVertices* vertices, SkBlendMode bmode, const SkPaint& paint) { SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds THREADED_DRAW(drawBounds, drawVertices(vertices->mode(), vertices->vertexCount(), vertices->positions(), vertices->texCoords(), vertices->colors(), bmode, vertices->indices(), vertices->indexCount(), paint)); } void SkThreadedBMPDevice::drawDevice(SkBaseDevice* device, int x, int y, const SkPaint& paint) { SkASSERT(!paint.getImageFilter()); SkRect drawBounds = SkRect::MakeXYWH(x, y, device->width(), device->height()); THREADED_DRAW(drawBounds, drawSprite(static_cast(device)->fBitmap, x, y, paint)); }