/* * Copyright 2017 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "GrCCPathParser.h" #include "GrCaps.h" #include "GrGpuCommandBuffer.h" #include "GrOnFlushResourceProvider.h" #include "GrOpFlushState.h" #include "SkMathPriv.h" #include "SkPath.h" #include "SkPathPriv.h" #include "SkPoint.h" #include "ccpr/GrCCGeometry.h" using TriangleInstance = GrCCCoverageProcessor::TriangleInstance; using CubicInstance = GrCCCoverageProcessor::CubicInstance; GrCCPathParser::GrCCPathParser(int maxTotalPaths, int maxPathPoints, int numSkPoints, int numSkVerbs) : fLocalDevPtsBuffer(maxPathPoints + 1) // Overallocate by one point to accomodate for // overflow with Sk4f. (See parsePath.) , fGeometry(numSkPoints, numSkVerbs) , fPathsInfo(maxTotalPaths) , fScissorSubBatches(maxTotalPaths) , fTotalPrimitiveCounts{PrimitiveTallies(), PrimitiveTallies()} { // Batches decide what to draw by looking where the previous one ended. Define initial batches // that "end" at the beginning of the data. These will not be drawn, but will only be be read by // the first actual batch. fScissorSubBatches.push_back() = {PrimitiveTallies(), SkIRect::MakeEmpty()}; fCoverageCountBatches.push_back() = {PrimitiveTallies(), fScissorSubBatches.count()}; } void GrCCPathParser::parsePath(const SkMatrix& m, const SkPath& path, SkRect* devBounds, SkRect* devBounds45) { const SkPoint* pts = SkPathPriv::PointData(path); int numPts = path.countPoints(); SkASSERT(numPts + 1 <= fLocalDevPtsBuffer.count()); if (!numPts) { devBounds->setEmpty(); devBounds45->setEmpty(); this->parsePath(path, nullptr); return; } // m45 transforms path points into "45 degree" device space. A bounding box in this space gives // the circumscribing octagon's diagonals. We could use SK_ScalarRoot2Over2, but an orthonormal // transform is not necessary as long as the shader uses the correct inverse. SkMatrix m45; m45.setSinCos(1, 1); m45.preConcat(m); // X,Y,T are two parallel view matrices that accumulate two bounding boxes as they map points: // device-space bounds and "45 degree" device-space bounds (| 1 -1 | * devCoords). // | 1 1 | Sk4f X = Sk4f(m.getScaleX(), m.getSkewY(), m45.getScaleX(), m45.getSkewY()); Sk4f Y = Sk4f(m.getSkewX(), m.getScaleY(), m45.getSkewX(), m45.getScaleY()); Sk4f T = Sk4f(m.getTranslateX(), m.getTranslateY(), m45.getTranslateX(), m45.getTranslateY()); // Map the path's points to device space and accumulate bounding boxes. Sk4f devPt = SkNx_fma(Y, Sk4f(pts[0].y()), T); devPt = SkNx_fma(X, Sk4f(pts[0].x()), devPt); Sk4f topLeft = devPt; Sk4f bottomRight = devPt; // Store all 4 values [dev.x, dev.y, dev45.x, dev45.y]. We are only interested in the first two, // and will overwrite [dev45.x, dev45.y] with the next point. This is why the dst buffer must // be at least one larger than the number of points. devPt.store(&fLocalDevPtsBuffer[0]); for (int i = 1; i < numPts; ++i) { devPt = SkNx_fma(Y, Sk4f(pts[i].y()), T); devPt = SkNx_fma(X, Sk4f(pts[i].x()), devPt); topLeft = Sk4f::Min(topLeft, devPt); bottomRight = Sk4f::Max(bottomRight, devPt); devPt.store(&fLocalDevPtsBuffer[i]); } SkPoint topLeftPts[2], bottomRightPts[2]; topLeft.store(topLeftPts); bottomRight.store(bottomRightPts); devBounds->setLTRB(topLeftPts[0].x(), topLeftPts[0].y(), bottomRightPts[0].x(), bottomRightPts[0].y()); devBounds45->setLTRB(topLeftPts[1].x(), topLeftPts[1].y(), bottomRightPts[1].x(), bottomRightPts[1].y()); this->parsePath(path, fLocalDevPtsBuffer.get()); } void GrCCPathParser::parseDeviceSpacePath(const SkPath& deviceSpacePath) { this->parsePath(deviceSpacePath, SkPathPriv::PointData(deviceSpacePath)); } void GrCCPathParser::parsePath(const SkPath& path, const SkPoint* deviceSpacePts) { SkASSERT(!fInstanceBuffer); // Can't call after finalize(). SkASSERT(!fParsingPath); // Call saveParsedPath() or discardParsedPath() for the last one first. SkDEBUGCODE(fParsingPath = true); SkASSERT(path.isEmpty() || deviceSpacePts); fCurrPathPointsIdx = fGeometry.points().count(); fCurrPathVerbsIdx = fGeometry.verbs().count(); fCurrPathPrimitiveCounts = PrimitiveTallies(); fGeometry.beginPath(); if (path.isEmpty()) { return; } int ptsIdx = 0; bool insideContour = false; for (SkPath::Verb verb : SkPathPriv::Verbs(path)) { switch (verb) { case SkPath::kMove_Verb: this->endContourIfNeeded(insideContour); fGeometry.beginContour(deviceSpacePts[ptsIdx]); ++ptsIdx; insideContour = true; continue; case SkPath::kClose_Verb: this->endContourIfNeeded(insideContour); insideContour = false; continue; case SkPath::kLine_Verb: fGeometry.lineTo(deviceSpacePts[ptsIdx]); ++ptsIdx; continue; case SkPath::kQuad_Verb: fGeometry.quadraticTo(deviceSpacePts[ptsIdx], deviceSpacePts[ptsIdx + 1]); ptsIdx += 2; continue; case SkPath::kCubic_Verb: fGeometry.cubicTo(deviceSpacePts[ptsIdx], deviceSpacePts[ptsIdx + 1], deviceSpacePts[ptsIdx + 2]); ptsIdx += 3; continue; case SkPath::kConic_Verb: SK_ABORT("Conics are not supported."); default: SK_ABORT("Unexpected path verb."); } } this->endContourIfNeeded(insideContour); } void GrCCPathParser::endContourIfNeeded(bool insideContour) { if (insideContour) { fCurrPathPrimitiveCounts += fGeometry.endContour(); } } void GrCCPathParser::saveParsedPath(ScissorMode scissorMode, const SkIRect& clippedDevIBounds, int16_t atlasOffsetX, int16_t atlasOffsetY) { SkASSERT(fParsingPath); fPathsInfo.push_back() = {scissorMode, atlasOffsetX, atlasOffsetY}; fTotalPrimitiveCounts[(int)scissorMode] += fCurrPathPrimitiveCounts; if (ScissorMode::kScissored == scissorMode) { fScissorSubBatches.push_back() = {fTotalPrimitiveCounts[(int)ScissorMode::kScissored], clippedDevIBounds.makeOffset(atlasOffsetX, atlasOffsetY)}; } SkDEBUGCODE(fParsingPath = false); } void GrCCPathParser::discardParsedPath() { SkASSERT(fParsingPath); fGeometry.resize_back(fCurrPathPointsIdx, fCurrPathVerbsIdx); SkDEBUGCODE(fParsingPath = false); } GrCCPathParser::CoverageCountBatchID GrCCPathParser::closeCurrentBatch() { SkASSERT(!fInstanceBuffer); SkASSERT(!fCoverageCountBatches.empty()); int maxMeshes = 1 + fScissorSubBatches.count() - fCoverageCountBatches.back().fEndScissorSubBatchIdx; fMaxMeshesPerDraw = SkTMax(fMaxMeshesPerDraw, maxMeshes); fCoverageCountBatches.push_back() = { fTotalPrimitiveCounts[(int)ScissorMode::kNonScissored], fScissorSubBatches.count() }; return fCoverageCountBatches.count() - 1; } // Emits a contour's triangle fan. // // Classic Redbook fanning would be the triangles: [0 1 2], [0 2 3], ..., [0 n-2 n-1]. // // This function emits the triangle: [0 n/3 n*2/3], and then recurses on all three sides. The // advantage to this approach is that for a convex-ish contour, it generates larger triangles. // Classic fanning tends to generate long, skinny triangles, which are expensive to draw since they // have a longer perimeter to rasterize and antialias. // // The indices array indexes the fan's points (think: glDrawElements), and must have at least log3 // elements past the end for this method to use as scratch space. // // Returns the next triangle instance after the final one emitted. static TriangleInstance* emit_recursive_fan(const SkTArray& pts, SkTArray& indices, int firstIndex, int indexCount, const Sk2f& atlasOffset, TriangleInstance out[]) { if (indexCount < 3) { return out; } int32_t oneThirdCount = indexCount / 3; int32_t twoThirdsCount = (2 * indexCount) / 3; out++->set(pts[indices[firstIndex]], pts[indices[firstIndex + oneThirdCount]], pts[indices[firstIndex + twoThirdsCount]], atlasOffset); out = emit_recursive_fan(pts, indices, firstIndex, oneThirdCount + 1, atlasOffset, out); out = emit_recursive_fan(pts, indices, firstIndex + oneThirdCount, twoThirdsCount - oneThirdCount + 1, atlasOffset, out); int endIndex = firstIndex + indexCount; int32_t oldValue = indices[endIndex]; indices[endIndex] = indices[firstIndex]; out = emit_recursive_fan(pts, indices, firstIndex + twoThirdsCount, indexCount - twoThirdsCount + 1, atlasOffset, out); indices[endIndex] = oldValue; return out; } bool GrCCPathParser::finalize(GrOnFlushResourceProvider* onFlushRP) { SkASSERT(!fParsingPath); // Call saveParsedPath() or discardParsedPath(). SkASSERT(fCoverageCountBatches.back().fEndNonScissorIndices == // Call closeCurrentBatch(). fTotalPrimitiveCounts[(int)ScissorMode::kNonScissored]); SkASSERT(fCoverageCountBatches.back().fEndScissorSubBatchIdx == fScissorSubBatches.count()); // Here we build a single instance buffer to share with every internal batch. // // CCPR processs 3 different types of primitives: triangles, quadratics, cubics. Each primitive // type is further divided into instances that require a scissor and those that don't. This // leaves us with 3*2 = 6 independent instance arrays to build for the GPU. // // Rather than place each instance array in its own GPU buffer, we allocate a single // megabuffer and lay them all out side-by-side. We can offset the "baseInstance" parameter in // our draw calls to direct the GPU to the applicable elements within a given array. // // We already know how big to make each of the 6 arrays from fTotalPrimitiveCounts, so layout is // straightforward. Start with triangles and quadratics. They both view the instance buffer as // an array of TriangleInstance[], so we can begin at zero and lay them out one after the other. fBaseInstances[0].fTriangles = 0; fBaseInstances[1].fTriangles = fBaseInstances[0].fTriangles + fTotalPrimitiveCounts[0].fTriangles; fBaseInstances[0].fQuadratics = fBaseInstances[1].fTriangles + fTotalPrimitiveCounts[1].fTriangles; fBaseInstances[1].fQuadratics = fBaseInstances[0].fQuadratics + fTotalPrimitiveCounts[0].fQuadratics; int triEndIdx = fBaseInstances[1].fQuadratics + fTotalPrimitiveCounts[1].fQuadratics; // Cubics view the same instance buffer as an array of CubicInstance[]. So, reinterpreting the // instance data as CubicInstance[], we start them on the first index that will not overwrite // previous TriangleInstance data. int cubicBaseIdx = GR_CT_DIV_ROUND_UP(triEndIdx * sizeof(TriangleInstance), sizeof(CubicInstance)); fBaseInstances[0].fCubics = cubicBaseIdx; fBaseInstances[1].fCubics = fBaseInstances[0].fCubics + fTotalPrimitiveCounts[0].fCubics; int cubicEndIdx = fBaseInstances[1].fCubics + fTotalPrimitiveCounts[1].fCubics; fInstanceBuffer = onFlushRP->makeBuffer(kVertex_GrBufferType, cubicEndIdx * sizeof(CubicInstance)); if (!fInstanceBuffer) { return false; } TriangleInstance* triangleInstanceData = static_cast(fInstanceBuffer->map()); CubicInstance* cubicInstanceData = reinterpret_cast(triangleInstanceData); SkASSERT(cubicInstanceData); PathInfo* currPathInfo = fPathsInfo.begin(); float atlasOffsetX = 0.0, atlasOffsetY = 0.0; Sk2f atlasOffset; int ptsIdx = -1; PrimitiveTallies instanceIndices[2] = {fBaseInstances[0], fBaseInstances[1]}; PrimitiveTallies* currIndices = nullptr; SkSTArray<256, int32_t, true> currFan; const SkTArray& pts = fGeometry.points(); // Expand the ccpr verbs into GPU instance buffers. for (GrCCGeometry::Verb verb : fGeometry.verbs()) { switch (verb) { case GrCCGeometry::Verb::kBeginPath: SkASSERT(currFan.empty()); currIndices = &instanceIndices[(int)currPathInfo->fScissorMode]; atlasOffsetX = static_cast(currPathInfo->fAtlasOffsetX); atlasOffsetY = static_cast(currPathInfo->fAtlasOffsetY); atlasOffset = {atlasOffsetX, atlasOffsetY}; ++currPathInfo; continue; case GrCCGeometry::Verb::kBeginContour: SkASSERT(currFan.empty()); currFan.push_back(++ptsIdx); continue; case GrCCGeometry::Verb::kLineTo: SkASSERT(!currFan.empty()); currFan.push_back(++ptsIdx); continue; case GrCCGeometry::Verb::kMonotonicQuadraticTo: SkASSERT(!currFan.empty()); triangleInstanceData[currIndices->fQuadratics++].set(&pts[ptsIdx], atlasOffset); currFan.push_back(ptsIdx += 2); continue; case GrCCGeometry::Verb::kMonotonicCubicTo: SkASSERT(!currFan.empty()); cubicInstanceData[currIndices->fCubics++].set(&pts[ptsIdx], atlasOffsetX, atlasOffsetY); currFan.push_back(ptsIdx += 3); continue; case GrCCGeometry::Verb::kEndClosedContour: // endPt == startPt. SkASSERT(!currFan.empty()); currFan.pop_back(); // fallthru. case GrCCGeometry::Verb::kEndOpenContour: // endPt != startPt. if (currFan.count() >= 3) { int fanSize = currFan.count(); // Reserve space for emit_recursive_fan. Technically this can grow to // fanSize + log3(fanSize), but we approximate with log2. currFan.push_back_n(SkNextLog2(fanSize)); SkDEBUGCODE(TriangleInstance* end =) emit_recursive_fan(pts, currFan, 0, fanSize, atlasOffset, triangleInstanceData + currIndices->fTriangles); currIndices->fTriangles += fanSize - 2; SkASSERT(triangleInstanceData + currIndices->fTriangles == end); } currFan.reset(); continue; } } fInstanceBuffer->unmap(); SkASSERT(currPathInfo == fPathsInfo.end()); SkASSERT(ptsIdx == pts.count() - 1); SkASSERT(instanceIndices[0].fTriangles == fBaseInstances[1].fTriangles); SkASSERT(instanceIndices[1].fTriangles == fBaseInstances[0].fQuadratics); SkASSERT(instanceIndices[0].fQuadratics == fBaseInstances[1].fQuadratics); SkASSERT(instanceIndices[1].fQuadratics == triEndIdx); SkASSERT(instanceIndices[0].fCubics == fBaseInstances[1].fCubics); SkASSERT(instanceIndices[1].fCubics == cubicEndIdx); fMeshesScratchBuffer.reserve(fMaxMeshesPerDraw); fDynamicStatesScratchBuffer.reserve(fMaxMeshesPerDraw); return true; } void GrCCPathParser::drawCoverageCount(GrOpFlushState* flushState, CoverageCountBatchID batchID, const SkIRect& drawBounds) const { using RenderPass = GrCCCoverageProcessor::RenderPass; SkASSERT(fInstanceBuffer); GrPipeline pipeline(flushState->drawOpArgs().fProxy, GrPipeline::ScissorState::kEnabled, SkBlendMode::kPlus); // Triangles. this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kTriangleHulls, &PrimitiveTallies::fTriangles, drawBounds); this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kTriangleEdges, &PrimitiveTallies::fTriangles, drawBounds); // Might get skipped. this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kTriangleCorners, &PrimitiveTallies::fTriangles, drawBounds); // Quadratics. this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kQuadraticHulls, &PrimitiveTallies::fQuadratics, drawBounds); this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kQuadraticCorners, &PrimitiveTallies::fQuadratics, drawBounds); // Cubics. this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kCubicHulls, &PrimitiveTallies::fCubics, drawBounds); this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kCubicCorners, &PrimitiveTallies::fCubics, drawBounds); } void GrCCPathParser::drawRenderPass(GrOpFlushState* flushState, const GrPipeline& pipeline, CoverageCountBatchID batchID, GrCCCoverageProcessor::RenderPass renderPass, int PrimitiveTallies::*instanceType, const SkIRect& drawBounds) const { SkASSERT(pipeline.getScissorState().enabled()); if (!GrCCCoverageProcessor::DoesRenderPass(renderPass, flushState->caps())) { return; } // Don't call reset(), as that also resets the reserve count. fMeshesScratchBuffer.pop_back_n(fMeshesScratchBuffer.count()); fDynamicStatesScratchBuffer.pop_back_n(fDynamicStatesScratchBuffer.count()); GrCCCoverageProcessor proc(flushState->resourceProvider(), renderPass, flushState->caps()); SkASSERT(batchID > 0); SkASSERT(batchID < fCoverageCountBatches.count()); const CoverageCountBatch& previousBatch = fCoverageCountBatches[batchID - 1]; const CoverageCountBatch& batch = fCoverageCountBatches[batchID]; if (int instanceCount = batch.fEndNonScissorIndices.*instanceType - previousBatch.fEndNonScissorIndices.*instanceType) { SkASSERT(instanceCount > 0); int baseInstance = fBaseInstances[(int)ScissorMode::kNonScissored].*instanceType + previousBatch.fEndNonScissorIndices.*instanceType; proc.appendMesh(fInstanceBuffer.get(), instanceCount, baseInstance, &fMeshesScratchBuffer); fDynamicStatesScratchBuffer.push_back().fScissorRect.setXYWH(0, 0, drawBounds.width(), drawBounds.height()); } SkASSERT(previousBatch.fEndScissorSubBatchIdx > 0); SkASSERT(batch.fEndScissorSubBatchIdx <= fScissorSubBatches.count()); int baseScissorInstance = fBaseInstances[(int)ScissorMode::kScissored].*instanceType; for (int i = previousBatch.fEndScissorSubBatchIdx; i < batch.fEndScissorSubBatchIdx; ++i) { const ScissorSubBatch& previousSubBatch = fScissorSubBatches[i - 1]; const ScissorSubBatch& scissorSubBatch = fScissorSubBatches[i]; int startIndex = previousSubBatch.fEndPrimitiveIndices.*instanceType; int instanceCount = scissorSubBatch.fEndPrimitiveIndices.*instanceType - startIndex; if (!instanceCount) { continue; } SkASSERT(instanceCount > 0); proc.appendMesh(fInstanceBuffer.get(), instanceCount, baseScissorInstance + startIndex, &fMeshesScratchBuffer); fDynamicStatesScratchBuffer.push_back().fScissorRect = scissorSubBatch.fScissor; } SkASSERT(fMeshesScratchBuffer.count() == fDynamicStatesScratchBuffer.count()); SkASSERT(fMeshesScratchBuffer.count() <= fMaxMeshesPerDraw); if (!fMeshesScratchBuffer.empty()) { SkASSERT(flushState->rtCommandBuffer()); flushState->rtCommandBuffer()->draw(pipeline, proc, fMeshesScratchBuffer.begin(), fDynamicStatesScratchBuffer.begin(), fMeshesScratchBuffer.count(), SkRect::Make(drawBounds)); } }