diff options
Diffstat (limited to 'src/gpu')
-rw-r--r-- | src/gpu/GrCaps.cpp | 2 | ||||
-rw-r--r-- | src/gpu/GrGeometryProcessor.h | 17 | ||||
-rw-r--r-- | src/gpu/GrGpuCommandBuffer.cpp | 4 | ||||
-rw-r--r-- | src/gpu/GrMesh.h | 100 | ||||
-rw-r--r-- | src/gpu/GrPrimitiveProcessor.h | 82 | ||||
-rw-r--r-- | src/gpu/GrShaderCaps.cpp | 2 | ||||
-rw-r--r-- | src/gpu/gl/GrGLCaps.cpp | 37 | ||||
-rw-r--r-- | src/gpu/gl/GrGLGpu.cpp | 80 | ||||
-rw-r--r-- | src/gpu/gl/GrGLGpu.h | 15 | ||||
-rw-r--r-- | src/gpu/gl/GrGLVertexArray.cpp | 12 | ||||
-rw-r--r-- | src/gpu/gl/GrGLVertexArray.h | 11 | ||||
-rw-r--r-- | src/gpu/ops/GrDefaultPathRenderer.cpp | 2 | ||||
-rw-r--r-- | src/gpu/ops/GrDrawVerticesOp.cpp | 2 | ||||
-rw-r--r-- | src/gpu/ops/GrMSAAPathRenderer.cpp | 8 | ||||
-rw-r--r-- | src/gpu/ops/GrNonAAFillRectOp.cpp | 1 | ||||
-rw-r--r-- | src/gpu/ops/GrNonAAStrokeRectOp.cpp | 2 | ||||
-rw-r--r-- | src/gpu/ops/GrTessellatingPathRenderer.cpp | 2 | ||||
-rw-r--r-- | src/gpu/vk/GrVkCaps.cpp | 2 | ||||
-rw-r--r-- | src/gpu/vk/GrVkCommandBuffer.cpp | 6 | ||||
-rw-r--r-- | src/gpu/vk/GrVkCommandBuffer.h | 26 | ||||
-rw-r--r-- | src/gpu/vk/GrVkCopyManager.cpp | 2 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpuCommandBuffer.cpp | 69 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpuCommandBuffer.h | 30 | ||||
-rw-r--r-- | src/gpu/vk/GrVkPipeline.cpp | 51 |
24 files changed, 397 insertions, 168 deletions
diff --git a/src/gpu/GrCaps.cpp b/src/gpu/GrCaps.cpp index 5c04d190c5..9c8b1db19c 100644 --- a/src/gpu/GrCaps.cpp +++ b/src/gpu/GrCaps.cpp @@ -43,6 +43,7 @@ GrCaps::GrCaps(const GrContextOptions& options) { fTextureBarrierSupport = false; fSampleLocationsSupport = false; fMultisampleDisableSupport = false; + fInstanceAttribSupport = false; fUsesMixedSamples = false; fPreferClientSideDynamicBuffers = false; fFullClearIsFree = false; @@ -132,6 +133,7 @@ SkString GrCaps::dump() const { r.appendf("Texture Barrier Support : %s\n", gNY[fTextureBarrierSupport]); r.appendf("Sample Locations Support : %s\n", gNY[fSampleLocationsSupport]); r.appendf("Multisample disable support : %s\n", gNY[fMultisampleDisableSupport]); + r.appendf("Instance Attrib Support : %s\n", gNY[fInstanceAttribSupport]); r.appendf("Uses Mixed Samples : %s\n", gNY[fUsesMixedSamples]); r.appendf("Prefer client-side dynamic buffers : %s\n", gNY[fPreferClientSideDynamicBuffers]); r.appendf("Full screen clear is free : %s\n", gNY[fFullClearIsFree]); diff --git a/src/gpu/GrGeometryProcessor.h b/src/gpu/GrGeometryProcessor.h index e5222bf80a..086d9baa53 100644 --- a/src/gpu/GrGeometryProcessor.h +++ b/src/gpu/GrGeometryProcessor.h @@ -40,23 +40,6 @@ public: } protected: - /** - * Subclasses call this from their constructor to register vertex attributes. Attributes - * will be padded to the nearest 4 bytes for performance reasons. - * TODO After deferred geometry, we should do all of this inline in GenerateGeometry alongside - * the struct used to actually populate the attributes. This is all extremely fragile, vertex - * attributes have to be added in the order they will appear in the struct which maps memory. - * The processor key should reflect the vertex attributes, or there lack thereof in the - * GrGeometryProcessor. - */ - const Attribute& addVertexAttrib(const char* name, GrVertexAttribType type, - GrSLPrecision precision = kDefault_GrSLPrecision) { - precision = (kDefault_GrSLPrecision == precision) ? kMedium_GrSLPrecision : precision; - fAttribs.emplace_back(name, type, precision); - fVertexStride += fAttribs.back().fOffset; - return fAttribs.back(); - } - void setWillUseGeoShader() { fWillUseGeoShader = true; } /** diff --git a/src/gpu/GrGpuCommandBuffer.cpp b/src/gpu/GrGpuCommandBuffer.cpp index 0c7bb9704f..5570a5a65e 100644 --- a/src/gpu/GrGpuCommandBuffer.cpp +++ b/src/gpu/GrGpuCommandBuffer.cpp @@ -42,8 +42,10 @@ bool GrGpuCommandBuffer::draw(const GrPipeline& pipeline, int meshCount, const SkRect& bounds) { #ifdef SK_DEBUG + SkASSERT(!primProc.hasInstanceAttribs() || this->gpu()->caps()->instanceAttribSupport()); for (int i = 0; i < meshCount; ++i) { - SkASSERT(SkToBool(primProc.numAttribs()) == meshes[i].hasVertexData()); + SkASSERT(primProc.hasVertexAttribs() == meshes[i].hasVertexData()); + SkASSERT(primProc.hasInstanceAttribs() == meshes[i].isInstanced()); } #endif diff --git a/src/gpu/GrMesh.h b/src/gpu/GrMesh.h index 98f9911a6f..53854764f2 100644 --- a/src/gpu/GrMesh.h +++ b/src/gpu/GrMesh.h @@ -24,19 +24,26 @@ public: GrMesh(GrPrimitiveType primitiveType) : fPrimitiveType(primitiveType) , fBaseVertex(0) { - SkDEBUGCODE(fNonIndexData.fVertexCount = -1); + SkDEBUGCODE(fNonIndexNonInstanceData.fVertexCount = -1;) } GrPrimitiveType primitiveType() const { return fPrimitiveType; } bool isIndexed() const { return SkToBool(fIndexBuffer.get()); } + bool isInstanced() const { return SkToBool(fInstanceBuffer.get()); } bool hasVertexData() const { return SkToBool(fVertexBuffer.get()); } - void setNonIndexed(int vertexCount); + void setNonIndexedNonInstanced(int vertexCount); + void setIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex, uint16_t minIndexValue, uint16_t maxIndexValue); void setIndexedPatterned(const GrBuffer* indexBuffer, int indexCount, int vertexCount, int patternRepeatCount, int maxPatternRepetitionsInIndexBuffer); + void setInstanced(const GrBuffer* instanceBuffer, int instanceCount, int baseInstance, + int vertexCount); + void setIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, + const GrBuffer* instanceBuffer, int instanceCount, int baseInstance=0); + void setVertexData(const GrBuffer* vertexBuffer, int baseVertex = 0); class SendToGpuImpl { @@ -51,6 +58,17 @@ public: uint16_t maxIndexValue, const GrBuffer* vertexBuffer, int baseVertex) = 0; + virtual void sendInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* vertexBuffer, int vertexCount, + int baseVertex, const GrBuffer* instanceBuffer, + int instanceCount, int baseInstance) = 0; + + virtual void sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* indexBuffer, int indexCount, + int baseIndex, const GrBuffer* vertexBuffer, + int baseVertex, const GrBuffer* instanceBuffer, + int instanceCount, int baseInstance) = 0; + virtual ~SendToGpuImpl() {} }; @@ -63,15 +81,16 @@ private: GrPrimitiveType fPrimitiveType; PendingBuffer fIndexBuffer; + PendingBuffer fInstanceBuffer; PendingBuffer fVertexBuffer; int fBaseVertex; union { - struct { // When fIndexBuffer == nullptr. + struct { // When fIndexBuffer == nullptr and fInstanceBuffer == nullptr. int fVertexCount; - } fNonIndexData; + } fNonIndexNonInstanceData; - struct { // When fIndexBuffer != nullptr. + struct { // When fIndexBuffer != nullptr and fInstanceBuffer == nullptr. struct { int fIndexCount; int fPatternRepeatCount; @@ -90,12 +109,30 @@ private: } fPatternData; }; }; + + struct { // When fInstanceBuffer != nullptr. + struct { + int fInstanceCount; + int fBaseInstance; + } fInstanceData; + + union { // When fIndexBuffer == nullptr. + struct { + int fVertexCount; + } fInstanceNonIndexData; + + struct { // When fIndexBuffer != nullptr. + int fIndexCount; + } fInstanceIndexData; + }; + }; }; }; -inline void GrMesh::setNonIndexed(int vertexCount) { +inline void GrMesh::setNonIndexedNonInstanced(int vertexCount) { fIndexBuffer.reset(nullptr); - fNonIndexData.fVertexCount = vertexCount; + fInstanceBuffer.reset(nullptr); + fNonIndexNonInstanceData.fVertexCount = vertexCount; } inline void GrMesh::setIndexed(const GrBuffer* indexBuffer, int indexCount, int baseIndex, @@ -105,6 +142,7 @@ inline void GrMesh::setIndexed(const GrBuffer* indexBuffer, int indexCount, int SkASSERT(baseIndex >= 0); SkASSERT(maxIndexValue > minIndexValue); fIndexBuffer.reset(indexBuffer); + fInstanceBuffer.reset(nullptr); fIndexData.fIndexCount = indexCount; fIndexData.fPatternRepeatCount = 0; fNonPatternIndexData.fBaseIndex = baseIndex; @@ -121,12 +159,40 @@ inline void GrMesh::setIndexedPatterned(const GrBuffer* indexBuffer, int indexCo SkASSERT(patternRepeatCount >= 1); SkASSERT(maxPatternRepetitionsInIndexBuffer >= 1); fIndexBuffer.reset(indexBuffer); + fInstanceBuffer.reset(nullptr); fIndexData.fIndexCount = indexCount; fIndexData.fPatternRepeatCount = patternRepeatCount; fPatternData.fVertexCount = vertexCount; fPatternData.fMaxPatternRepetitionsInIndexBuffer = maxPatternRepetitionsInIndexBuffer; } +inline void GrMesh::setInstanced(const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance, int vertexCount) { + SkASSERT(instanceBuffer); + SkASSERT(instanceCount >= 1); + SkASSERT(baseInstance >= 0); + fIndexBuffer.reset(nullptr); + fInstanceBuffer.reset(instanceBuffer); + fInstanceData.fInstanceCount = instanceCount; + fInstanceData.fBaseInstance = baseInstance; + fInstanceNonIndexData.fVertexCount = vertexCount; +} + +inline void GrMesh::setIndexedInstanced(const GrBuffer* indexBuffer, int indexCount, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) { + SkASSERT(indexBuffer); + SkASSERT(indexCount >= 1); + SkASSERT(instanceBuffer); + SkASSERT(instanceCount >= 1); + SkASSERT(baseInstance >= 0); + fIndexBuffer.reset(indexBuffer); + fInstanceBuffer.reset(instanceBuffer); + fInstanceData.fInstanceCount = instanceCount; + fInstanceData.fBaseInstance = baseInstance; + fInstanceIndexData.fIndexCount = indexCount; +} + inline void GrMesh::setVertexData(const GrBuffer* vertexBuffer, int baseVertex) { SkASSERT(baseVertex >= 0); fVertexBuffer.reset(vertexBuffer); @@ -134,10 +200,26 @@ inline void GrMesh::setVertexData(const GrBuffer* vertexBuffer, int baseVertex) } inline void GrMesh::sendToGpu(const GrPrimitiveProcessor& primProc, SendToGpuImpl* impl) const { + if (this->isInstanced()) { + if (!this->isIndexed()) { + impl->sendInstancedMeshToGpu(primProc, fPrimitiveType, fVertexBuffer.get(), + fInstanceNonIndexData.fVertexCount, fBaseVertex, + fInstanceBuffer.get(), fInstanceData.fInstanceCount, + fInstanceData.fBaseInstance); + } else { + impl->sendIndexedInstancedMeshToGpu(primProc, fPrimitiveType, fIndexBuffer.get(), + fInstanceIndexData.fIndexCount, 0, + fVertexBuffer.get(), fBaseVertex, + fInstanceBuffer.get(), fInstanceData.fInstanceCount, + fInstanceData.fBaseInstance); + } + return; + } + if (!this->isIndexed()) { - SkASSERT(fNonIndexData.fVertexCount > 0); + SkASSERT(fNonIndexNonInstanceData.fVertexCount > 0); impl->sendMeshToGpu(primProc, fPrimitiveType, fVertexBuffer.get(), - fNonIndexData.fVertexCount, fBaseVertex); + fNonIndexNonInstanceData.fVertexCount, fBaseVertex); return; } diff --git a/src/gpu/GrPrimitiveProcessor.h b/src/gpu/GrPrimitiveProcessor.h index d078ac5072..63265f02a6 100644 --- a/src/gpu/GrPrimitiveProcessor.h +++ b/src/gpu/GrPrimitiveProcessor.h @@ -40,33 +40,45 @@ class GrGLSLPrimitiveProcessor; */ class GrPrimitiveProcessor : public GrResourceIOProcessor, public GrProgramElement { public: - // Only the GrGeometryProcessor subclass actually has a geo shader or vertex attributes, but - // we put these calls on the base class to prevent having to cast - virtual bool willUseGeoShader() const = 0; - struct Attribute { - Attribute() - : fName(nullptr) - , fType(kFloat_GrVertexAttribType) - , fOffset(0) {} - Attribute(const char* name, GrVertexAttribType type, GrSLPrecision precision) - : fName(name) - , fType(type) - , fOffset(SkAlign4(GrVertexAttribTypeSize(type))) - , fPrecision(precision) {} - const char* fName; - GrVertexAttribType fType; - size_t fOffset; - GrSLPrecision fPrecision; + enum class InputRate : bool { + kPerVertex, + kPerInstance + }; + + const char* fName; + GrVertexAttribType fType; + int fOffsetInRecord; + GrSLPrecision fPrecision; + InputRate fInputRate; }; int numAttribs() const { return fAttribs.count(); } const Attribute& getAttrib(int index) const { return fAttribs[index]; } - // Returns the vertex stride of the GP. A common use case is to request geometry from a - // GrOpList based off of the stride, and to populate this memory using an implicit array of - // structs. In this case, it is best to assert the vertexstride == sizeof(VertexStruct). - size_t getVertexStride() const { return fVertexStride; } + bool hasVertexAttribs() const { return SkToBool(fVertexStride); } + bool hasInstanceAttribs() const { return SkToBool(fInstanceStride); } + + /** + * These return the strides of the vertex and instance buffers. Attributes are expected to be + * laid out interleaved in their corresponding buffer (vertex or instance). fOffsetInRecord + * indicates an attribute's location in bytes relative to the first attribute. (These are padded + * to the nearest 4 bytes for performance reasons.) + * + * A common practice is to populate the buffer's memory using an implicit array of structs. In + * this case, it is best to assert: + * + * stride == sizeof(struct) and + * offsetof(struct, field[i]) == attrib[i].fOffsetInRecord + * + * NOTE: for instanced draws the vertex buffer has a single record that each instance reuses. + */ + int getVertexStride() const { return fVertexStride; } + int getInstanceStride() const { return fInstanceStride; } + + // Only the GrGeometryProcessor subclass actually has a geo shader or vertex attributes, but + // we put these calls on the base class to prevent having to cast + virtual bool willUseGeoShader() const = 0; /** * Computes a transformKey from an array of coord transforms. Will only look at the first @@ -107,11 +119,25 @@ public: virtual bool implementsDistanceVector() const { return false; } protected: - GrPrimitiveProcessor() : fVertexStride(0) {} - - enum { kPreallocAttribCnt = 8 }; - SkSTArray<kPreallocAttribCnt, Attribute> fAttribs; - size_t fVertexStride; + /** + * Subclasses call these from their constructor to register vertex and instance attributes. + */ + const Attribute& addVertexAttrib(const char* name, GrVertexAttribType type, + GrSLPrecision precision = kDefault_GrSLPrecision) { + precision = (kDefault_GrSLPrecision == precision) ? kMedium_GrSLPrecision : precision; + fAttribs.push_back() = {name, type, fVertexStride, precision, + Attribute::InputRate::kPerVertex}; + fVertexStride += static_cast<int>(SkAlign4(GrVertexAttribTypeSize(type))); + return fAttribs.back(); + } + const Attribute& addInstanceAttrib(const char* name, GrVertexAttribType type, + GrSLPrecision precision = kDefault_GrSLPrecision) { + precision = (kDefault_GrSLPrecision == precision) ? kMedium_GrSLPrecision : precision; + fAttribs.push_back() = {name, type, fInstanceStride, precision, + Attribute::InputRate::kPerInstance}; + fInstanceStride += static_cast<int>(SkAlign4(GrVertexAttribTypeSize(type))); + return fAttribs.back(); + } private: void addPendingIOs() const override { GrResourceIOProcessor::addPendingIOs(); } @@ -120,6 +146,10 @@ private: void notifyRefCntIsZero() const final {} virtual bool hasExplicitLocalCoords() const = 0; + SkSTArray<8, Attribute> fAttribs; + int fVertexStride = 0; + int fInstanceStride = 0; + typedef GrProcessor INHERITED; }; diff --git a/src/gpu/GrShaderCaps.cpp b/src/gpu/GrShaderCaps.cpp index cf29ab5976..6f5dc7f0d7 100644 --- a/src/gpu/GrShaderCaps.cpp +++ b/src/gpu/GrShaderCaps.cpp @@ -68,6 +68,7 @@ GrShaderCaps::GrShaderCaps(const GrContextOptions& options) { fSampleMaskOverrideCoverageSupport = false; fExternalTextureSupport = false; fTexelFetchSupport = false; + fVertexIDSupport = false; fVersionDeclString = nullptr; fShaderDerivativeExtensionString = nullptr; @@ -158,6 +159,7 @@ SkString GrShaderCaps::dump() const { "YES" : "NO")); r.appendf("External texture support: %s\n", (fExternalTextureSupport ? "YES" : "NO")); r.appendf("texelFetch support: %s\n", (fTexelFetchSupport ? "YES" : "NO")); + r.appendf("sk_VertexID support: %s\n", (fVertexIDSupport ? "YES" : "NO")); r.appendf("Max VS Samplers: %d\n", fMaxVertexSamplers); r.appendf("Max GS Samplers: %d\n", fMaxGeometrySamplers); r.appendf("Max FS Samplers: %d\n", fMaxFragmentSamplers); diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp index df905a4f0f..a39b744574 100644 --- a/src/gpu/gl/GrGLCaps.cpp +++ b/src/gpu/gl/GrGLCaps.cpp @@ -38,7 +38,6 @@ GrGLCaps::GrGLCaps(const GrContextOptions& contextOptions, fDirectStateAccessSupport = false; fDebugSupport = false; fES2CompatibilitySupport = false; - fDrawInstancedSupport = false; fDrawIndirectSupport = false; fMultiDrawIndirectSupport = false; fBaseInstanceSupport = false; @@ -185,6 +184,20 @@ void GrGLCaps::init(const GrContextOptions& contextOptions, } if (kGL_GrGLStandard == standard) { + // 3.1 has draw_instanced but not instanced_arrays, for the time being we only care about + // instanced arrays, but we could make this more granular if we wanted + fInstanceAttribSupport = + version >= GR_GL_VER(3, 2) || + (ctxInfo.hasExtension("GL_ARB_draw_instanced") && + ctxInfo.hasExtension("GL_ARB_instanced_arrays")); + } else { + fInstanceAttribSupport = + version >= GR_GL_VER(3, 0) || + (ctxInfo.hasExtension("GL_EXT_draw_instanced") && + ctxInfo.hasExtension("GL_EXT_instanced_arrays")); + } + + if (kGL_GrGLStandard == standard) { if (version >= GR_GL_VER(3, 0)) { fBindFragDataLocationSupport = true; } @@ -529,20 +542,6 @@ void GrGLCaps::init(const GrContextOptions& contextOptions, } if (kGL_GrGLStandard == standard) { - // 3.1 has draw_instanced but not instanced_arrays, for the time being we only care about - // instanced arrays, but we could make this more granular if we wanted - fDrawInstancedSupport = - version >= GR_GL_VER(3, 2) || - (ctxInfo.hasExtension("GL_ARB_draw_instanced") && - ctxInfo.hasExtension("GL_ARB_instanced_arrays")); - } else { - fDrawInstancedSupport = - version >= GR_GL_VER(3, 0) || - (ctxInfo.hasExtension("GL_EXT_draw_instanced") && - ctxInfo.hasExtension("GL_EXT_instanced_arrays")); - } - - if (kGL_GrGLStandard == standard) { fDrawIndirectSupport = version >= GR_GL_VER(4,0) || ctxInfo.hasExtension("GL_ARB_draw_indirect"); fBaseInstanceSupport = version >= GR_GL_VER(4,2); @@ -850,6 +849,13 @@ void GrGLCaps::initGLSL(const GrGLContextInfo& ctxInfo) { } } + if (kGL_GrGLStandard == standard) { + shaderCaps->fVertexIDSupport = true; + } else { + // Desktop GLSL 3.30 == ES GLSL 3.00. + shaderCaps->fVertexIDSupport = ctxInfo.glslGeneration() >= k330_GrGLSLGeneration; + } + // The Tegra3 compiler will sometimes never return if we have min(abs(x), 1.0), so we must do // the abs first in a separate expression. if (kTegra3_GrGLRenderer == ctxInfo.renderer()) { @@ -1242,7 +1248,6 @@ SkString GrGLCaps::dump() const { r.appendf("Vertex array object support: %s\n", (fVertexArrayObjectSupport ? "YES": "NO")); r.appendf("Direct state access support: %s\n", (fDirectStateAccessSupport ? "YES": "NO")); r.appendf("Debug support: %s\n", (fDebugSupport ? "YES": "NO")); - r.appendf("Draw instanced support: %s\n", (fDrawInstancedSupport ? "YES" : "NO")); r.appendf("Draw indirect support: %s\n", (fDrawIndirectSupport ? "YES" : "NO")); r.appendf("Multi draw indirect support: %s\n", (fMultiDrawIndirectSupport ? "YES" : "NO")); r.appendf("Base instance support: %s\n", (fBaseInstanceSupport ? "YES" : "NO")); diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp index 045804c668..c32d5d8fda 100644 --- a/src/gpu/gl/GrGLGpu.cpp +++ b/src/gpu/gl/GrGLGpu.cpp @@ -1779,33 +1779,46 @@ bool GrGLGpu::flushGLState(const GrPipeline& pipeline, const GrPrimitiveProcesso void GrGLGpu::setupGeometry(const GrPrimitiveProcessor& primProc, const GrBuffer* indexBuffer, const GrBuffer* vertexBuffer, - int baseVertex) { + int baseVertex, + const GrBuffer* instanceBuffer, + int baseInstance) { GrGLAttribArrayState* attribState; if (indexBuffer) { - SkASSERT(indexBuffer); - SkASSERT(!indexBuffer->isMapped()); + SkASSERT(indexBuffer && !indexBuffer->isMapped()); attribState = fHWVertexArrayState.bindInternalVertexArray(this, indexBuffer); } else { attribState = fHWVertexArrayState.bindInternalVertexArray(this); } - int vaCount = primProc.numAttribs(); - attribState->enableVertexArrays(this, vaCount); + struct { + const GrBuffer* fBuffer; + int fStride; + size_t fBufferOffset; + } bindings[2]; - if (vaCount > 0) { - SkASSERT(vertexBuffer); - SkASSERT(!vertexBuffer->isMapped()); + if (int vertexStride = primProc.getVertexStride()) { + SkASSERT(vertexBuffer && !vertexBuffer->isMapped()); + bindings[0].fBuffer = vertexBuffer; + bindings[0].fStride = vertexStride; + bindings[0].fBufferOffset = vertexBuffer->baseOffset() + baseVertex * vertexStride; + } + if (int instanceStride = primProc.getInstanceStride()) { + SkASSERT(instanceBuffer && !instanceBuffer->isMapped()); + bindings[1].fBuffer = instanceBuffer; + bindings[1].fStride = instanceStride; + bindings[1].fBufferOffset = instanceBuffer->baseOffset() + baseInstance * instanceStride; + } - GrGLsizei stride = static_cast<GrGLsizei>(primProc.getVertexStride()); - size_t vertexBufferOffsetInBytes = stride * baseVertex + vertexBuffer->baseOffset(); - size_t attribOffset = 0; + int numAttribs = primProc.numAttribs(); + attribState->enableVertexArrays(this, numAttribs); - for (int attribIndex = 0; attribIndex < vaCount; attribIndex++) { - const GrGeometryProcessor::Attribute& attrib = primProc.getAttrib(attribIndex); - attribState->set(this, attribIndex, vertexBuffer, attrib.fType, stride, - vertexBufferOffsetInBytes + attribOffset); - attribOffset += attrib.fOffset; - } + for (int i = 0; i < numAttribs; ++i) { + using InputRate = GrPrimitiveProcessor::Attribute::InputRate; + const GrGeometryProcessor::Attribute& attrib = primProc.getAttrib(i); + const int divisor = InputRate::kPerInstance == attrib.fInputRate ? 1 : 0; + const auto& binding = bindings[divisor]; + attribState->set(this, i, binding.fBuffer, attrib.fType, binding.fStride, + binding.fBufferOffset + attrib.fOffsetInRecord, divisor); } } @@ -2472,10 +2485,10 @@ void GrGLGpu::sendMeshToGpu(const GrPrimitiveProcessor& primProc, GrPrimitiveTyp const GrGLenum glPrimType = gPrimitiveType2GLMode[primitiveType]; if (this->glCaps().drawArraysBaseVertexIsBroken()) { - this->setupGeometry(primProc, nullptr, vertexBuffer, baseVertex); + this->setupGeometry(primProc, nullptr, vertexBuffer, baseVertex, nullptr, 0); GL_CALL(DrawArrays(glPrimType, 0, vertexCount)); } else { - this->setupGeometry(primProc, nullptr, vertexBuffer, 0); + this->setupGeometry(primProc, nullptr, vertexBuffer, 0, nullptr, 0); GL_CALL(DrawArrays(glPrimType, baseVertex, vertexCount)); } fStats.incNumDraws(); @@ -2490,7 +2503,7 @@ void GrGLGpu::sendIndexedMeshToGpu(const GrPrimitiveProcessor& primProc, GrGLvoid* const indices = reinterpret_cast<void*>(indexBuffer->baseOffset() + sizeof(uint16_t) * baseIndex); - this->setupGeometry(primProc, indexBuffer, vertexBuffer, baseVertex); + this->setupGeometry(primProc, indexBuffer, vertexBuffer, baseVertex, nullptr, 0); if (this->glCaps().drawRangeElementsSupport()) { GL_CALL(DrawRangeElements(glPrimType, minIndexValue, maxIndexValue, indexCount, @@ -2501,6 +2514,33 @@ void GrGLGpu::sendIndexedMeshToGpu(const GrPrimitiveProcessor& primProc, fStats.incNumDraws(); } +void GrGLGpu::sendInstancedMeshToGpu(const GrPrimitiveProcessor& primProc, GrPrimitiveType + primitiveType, const GrBuffer* vertexBuffer, + int vertexCount, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) { + const GrGLenum glPrimType = gPrimitiveType2GLMode[primitiveType]; + this->setupGeometry(primProc, nullptr, vertexBuffer, 0, instanceBuffer, baseInstance); + GL_CALL(DrawArraysInstanced(glPrimType, baseVertex, vertexCount, instanceCount)); + fStats.incNumDraws(); +} + +void GrGLGpu::sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor& primProc, + GrPrimitiveType primitiveType, + const GrBuffer* indexBuffer, int indexCount, + int baseIndex, const GrBuffer* vertexBuffer, + int baseVertex, const GrBuffer* instanceBuffer, + int instanceCount, int baseInstance) { + const GrGLenum glPrimType = gPrimitiveType2GLMode[primitiveType]; + GrGLvoid* indices = reinterpret_cast<void*>(indexBuffer->baseOffset() + + sizeof(uint16_t) * baseIndex); + this->setupGeometry(primProc, indexBuffer, vertexBuffer, baseVertex, + instanceBuffer, baseInstance); + GL_CALL(DrawElementsInstanced(glPrimType, indexCount, GR_GL_UNSIGNED_SHORT, indices, + instanceCount)); + fStats.incNumDraws(); +} + void GrGLGpu::onResolveRenderTarget(GrRenderTarget* target) { GrGLRenderTarget* rt = static_cast<GrGLRenderTarget*>(target); if (rt->needsResolve()) { diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h index c83ad85585..7b59c3b696 100644 --- a/src/gpu/gl/GrGLGpu.h +++ b/src/gpu/gl/GrGLGpu.h @@ -115,6 +115,17 @@ public: uint16_t minIndexValue, uint16_t maxIndexValue, const GrBuffer* vertexBuffer, int baseVertex) final; + void sendInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* vertexBuffer, int vertexCount, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) final; + + void sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* indexBuffer, int indexCount, int baseIndex, + const GrBuffer* vertexBuffer, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) final; + // The GrGLGpuCommandBuffer does not buffer up draws before submitting them to the gpu. // Thus this is the implementation of the clear call for the corresponding passthrough function // on GrGLGpuCommandBuffer. @@ -264,7 +275,9 @@ private: void setupGeometry(const GrPrimitiveProcessor&, const GrBuffer* indexBuffer, const GrBuffer* vertexBuffer, - int baseVertex); + int baseVertex, + const GrBuffer* instanceBuffer, + int baseInstance); void flushBlend(const GrXferProcessor::BlendInfo& blendInfo, const GrSwizzle&); diff --git a/src/gpu/gl/GrGLVertexArray.cpp b/src/gpu/gl/GrGLVertexArray.cpp index 74e609e9b8..59f2be9831 100644 --- a/src/gpu/gl/GrGLVertexArray.cpp +++ b/src/gpu/gl/GrGLVertexArray.cpp @@ -53,8 +53,10 @@ void GrGLAttribArrayState::set(GrGLGpu* gpu, const GrBuffer* vertexBuffer, GrVertexAttribType type, GrGLsizei stride, - size_t offsetInBytes) { + size_t offsetInBytes, + int divisor) { SkASSERT(index >= 0 && index < fAttribArrayStates.count()); + SkASSERT(0 == divisor || gpu->caps()->instanceAttribSupport()); AttribArrayState* array = &fAttribArrayStates[index]; if (array->fVertexBufferUniqueID != vertexBuffer->uniqueID() || array->fType != type || @@ -84,10 +86,18 @@ void GrGLAttribArrayState::set(GrGLGpu* gpu, array->fStride = stride; array->fOffset = offsetInBytes; } + if (gpu->caps()->instanceAttribSupport() && array->fDivisor != divisor) { + SkASSERT(0 == divisor || 1 == divisor); // not necessarily a requirement but what we expect. + GR_GL_CALL(gpu->glInterface(), VertexAttribDivisor(index, divisor)); + array->fDivisor = divisor; + } } void GrGLAttribArrayState::enableVertexArrays(const GrGLGpu* gpu, int enabledCount) { SkASSERT(enabledCount <= fAttribArrayStates.count()); + if (fEnabledCountIsValid && enabledCount == fNumEnabledArrays) { + return; + } int firstIdxToEnable = fEnabledCountIsValid ? fNumEnabledArrays : 0; for (int i = firstIdxToEnable; i < enabledCount; ++i) { diff --git a/src/gpu/gl/GrGLVertexArray.h b/src/gpu/gl/GrGLVertexArray.h index 1970e30425..553df4f3fd 100644 --- a/src/gpu/gl/GrGLVertexArray.h +++ b/src/gpu/gl/GrGLVertexArray.h @@ -42,7 +42,8 @@ public: const GrBuffer* vertexBuffer, GrVertexAttribType type, GrGLsizei stride, - size_t offsetInBytes); + size_t offsetInBytes, + int divisor = 0); /** * This function enables the first 'enabledCount' vertex arrays and disables the rest. @@ -63,16 +64,22 @@ public: int count() const { return fAttribArrayStates.count(); } private: + static constexpr int kInvalidDivisor = -1; + /** * Tracks the state of glVertexAttribArray for an attribute index. */ struct AttribArrayState { - void invalidate() { fVertexBufferUniqueID.makeInvalid(); } + void invalidate() { + fVertexBufferUniqueID.makeInvalid(); + fDivisor = kInvalidDivisor; + } GrGpuResource::UniqueID fVertexBufferUniqueID; GrVertexAttribType fType; GrGLsizei fStride; size_t fOffset; + int fDivisor; }; SkSTArray<16, AttribArrayState, true> fAttribArrayStates; diff --git a/src/gpu/ops/GrDefaultPathRenderer.cpp b/src/gpu/ops/GrDefaultPathRenderer.cpp index 7633868c25..c282036971 100644 --- a/src/gpu/ops/GrDefaultPathRenderer.cpp +++ b/src/gpu/ops/GrDefaultPathRenderer.cpp @@ -249,7 +249,7 @@ private: GrMesh mesh(primitiveType); if (!isIndexed) { - mesh.setNonIndexed(vertexOffset); + mesh.setNonIndexedNonInstanced(vertexOffset); } else { mesh.setIndexed(indexBuffer, indexOffset, firstIndex, 0, vertexOffset - 1); } diff --git a/src/gpu/ops/GrDrawVerticesOp.cpp b/src/gpu/ops/GrDrawVerticesOp.cpp index e5b13831d2..a83b3e07b9 100644 --- a/src/gpu/ops/GrDrawVerticesOp.cpp +++ b/src/gpu/ops/GrDrawVerticesOp.cpp @@ -231,7 +231,7 @@ void GrDrawVerticesOp::onPrepareDraws(Target* target) const { GrMesh mesh(this->primitiveType()); if (!indices) { - mesh.setNonIndexed(fVertexCount); + mesh.setNonIndexedNonInstanced(fVertexCount); } else { mesh.setIndexed(indexBuffer, fIndexCount, firstIndex, 0, fVertexCount - 1); } diff --git a/src/gpu/ops/GrMSAAPathRenderer.cpp b/src/gpu/ops/GrMSAAPathRenderer.cpp index 36faf2f2c9..50a839d7b0 100644 --- a/src/gpu/ops/GrMSAAPathRenderer.cpp +++ b/src/gpu/ops/GrMSAAPathRenderer.cpp @@ -337,7 +337,7 @@ private: const GrBuffer* lineVertexBuffer; int firstLineVertex; MSAALineVertices lines; - size_t lineVertexStride = sizeof(MSAALineVertices::Vertex); + int lineVertexStride = sizeof(MSAALineVertices::Vertex); lines.vertices = (MSAALineVertices::Vertex*) target->makeVertexSpace(lineVertexStride, fMaxLineVertices, &lineVertexBuffer, @@ -350,7 +350,7 @@ private: SkDEBUGCODE(lines.verticesEnd = lines.vertices + fMaxLineVertices;) MSAAQuadVertices quads; - size_t quadVertexStride = sizeof(MSAAQuadVertices::Vertex); + int quadVertexStride = sizeof(MSAAQuadVertices::Vertex); SkAutoMalloc quadVertexPtr(fMaxQuadVertices * quadVertexStride); quads.vertices = (MSAAQuadVertices::Vertex*) quadVertexPtr.get(); quads.nextVertex = quads.vertices; @@ -412,7 +412,7 @@ private: GrMesh lineMeshes(primitiveType); if (!fIsIndexed) { - lineMeshes.setNonIndexed(lineVertexOffset); + lineMeshes.setNonIndexedNonInstanced(lineVertexOffset); } else { lineMeshes.setIndexed(lineIndexBuffer, lineIndexOffset, firstLineIndex, 0, lineVertexOffset - 1); @@ -439,7 +439,7 @@ private: memcpy(quadVertices, quads.vertices, quadVertexStride * quadVertexOffset); GrMesh quadMeshes(kTriangles_GrPrimitiveType); if (!fIsIndexed) { - quadMeshes.setNonIndexed(quadVertexOffset); + quadMeshes.setNonIndexedNonInstanced(quadVertexOffset); } else { const GrBuffer* quadIndexBuffer; int firstQuadIndex; diff --git a/src/gpu/ops/GrNonAAFillRectOp.cpp b/src/gpu/ops/GrNonAAFillRectOp.cpp index f50fcc0cc5..32e77da52b 100644 --- a/src/gpu/ops/GrNonAAFillRectOp.cpp +++ b/src/gpu/ops/GrNonAAFillRectOp.cpp @@ -119,6 +119,7 @@ public: const SkRect& rect, const SkRect* localRect, const SkMatrix* localMatrix, GrAAType aaType, const GrUserStencilSettings* stencilSettings) : INHERITED(ClassID()), fHelper(args, aaType, stencilSettings) { + SkASSERT(!viewMatrix.hasPerspective() && (!localMatrix || !localMatrix->hasPerspective())); RectInfo& info = fRects.push_back(); info.fColor = color; diff --git a/src/gpu/ops/GrNonAAStrokeRectOp.cpp b/src/gpu/ops/GrNonAAStrokeRectOp.cpp index f386984a30..eee8120eff 100644 --- a/src/gpu/ops/GrNonAAStrokeRectOp.cpp +++ b/src/gpu/ops/GrNonAAStrokeRectOp.cpp @@ -157,7 +157,7 @@ private: } GrMesh mesh(primType); - mesh.setNonIndexed(vertexCount); + mesh.setNonIndexedNonInstanced(vertexCount); mesh.setVertexData(vertexBuffer, firstVertex); target->draw(gp.get(), this->pipeline(), mesh); } diff --git a/src/gpu/ops/GrTessellatingPathRenderer.cpp b/src/gpu/ops/GrTessellatingPathRenderer.cpp index 9860d9c16d..27de8f2e5f 100644 --- a/src/gpu/ops/GrTessellatingPathRenderer.cpp +++ b/src/gpu/ops/GrTessellatingPathRenderer.cpp @@ -314,7 +314,7 @@ private: void drawVertices(Target* target, const GrGeometryProcessor* gp, const GrBuffer* vb, int firstVertex, int count) const { GrMesh mesh(TESSELLATOR_WIREFRAME ? kLines_GrPrimitiveType : kTriangles_GrPrimitiveType); - mesh.setNonIndexed(count); + mesh.setNonIndexedNonInstanced(count); mesh.setVertexData(vb, firstVertex); target->draw(gp, this->pipeline(), mesh); } diff --git a/src/gpu/vk/GrVkCaps.cpp b/src/gpu/vk/GrVkCaps.cpp index 16b46096e4..e8a397847c 100644 --- a/src/gpu/vk/GrVkCaps.cpp +++ b/src/gpu/vk/GrVkCaps.cpp @@ -32,6 +32,7 @@ GrVkCaps::GrVkCaps(const GrContextOptions& contextOptions, const GrVkInterface* fReuseScratchTextures = true; //TODO: figure this out fGpuTracingSupport = false; //TODO: figure this out fOversizedStencilSupport = false; //TODO: figure this out + fInstanceAttribSupport = true; fUseDrawInsteadOfClear = false; fFenceSyncSupport = true; // always available in Vulkan @@ -247,6 +248,7 @@ void GrVkCaps::initShaderCaps(const VkPhysicalDeviceProperties& properties, uint } shaderCaps->fIntegerSupport = true; + shaderCaps->fVertexIDSupport = true; // Assume the minimum precisions mandated by the SPIR-V spec. shaderCaps->fShaderPrecisionVaries = true; diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp index cc219f5ec8..ea0a02cb34 100644 --- a/src/gpu/vk/GrVkCommandBuffer.cpp +++ b/src/gpu/vk/GrVkCommandBuffer.cpp @@ -18,10 +18,10 @@ #include "SkRect.h" void GrVkCommandBuffer::invalidateState() { - fBoundVertexBuffer = VK_NULL_HANDLE; - fBoundVertexBufferIsValid = false; + for (auto& boundInputBuffer : fBoundInputBuffers) { + boundInputBuffer = VK_NULL_HANDLE; + } fBoundIndexBuffer = VK_NULL_HANDLE; - fBoundIndexBufferIsValid = false; memset(&fCachedViewport, 0, sizeof(VkViewport)); fCachedViewport.width = - 1.0f; // Viewport must have a width greater than 0 diff --git a/src/gpu/vk/GrVkCommandBuffer.h b/src/gpu/vk/GrVkCommandBuffer.h index 7c54877ec2..1f3c4a50e5 100644 --- a/src/gpu/vk/GrVkCommandBuffer.h +++ b/src/gpu/vk/GrVkCommandBuffer.h @@ -40,33 +40,36 @@ public: BarrierType barrierType, void* barrier) const; - void bindVertexBuffer(GrVkGpu* gpu, const GrVkVertexBuffer* vbuffer) { + static constexpr uint32_t kMaxInputBuffers = 2; + + void bindInputBuffer(GrVkGpu* gpu, uint32_t binding, const GrVkVertexBuffer* vbuffer) { VkBuffer vkBuffer = vbuffer->buffer(); + SkASSERT(VK_NULL_HANDLE != vkBuffer); + SkASSERT(binding < kMaxInputBuffers); // TODO: once vbuffer->offset() no longer always returns 0, we will need to track the offset // to know if we can skip binding or not. - if (!fBoundVertexBufferIsValid || vkBuffer != fBoundVertexBuffer) { + if (vkBuffer != fBoundInputBuffers[binding]) { VkDeviceSize offset = vbuffer->offset(); GR_VK_CALL(gpu->vkInterface(), CmdBindVertexBuffers(fCmdBuffer, - 0, + binding, 1, &vkBuffer, &offset)); - fBoundVertexBufferIsValid = true; - fBoundVertexBuffer = vkBuffer; + fBoundInputBuffers[binding] = vkBuffer; addResource(vbuffer->resource()); } } void bindIndexBuffer(GrVkGpu* gpu, const GrVkIndexBuffer* ibuffer) { VkBuffer vkBuffer = ibuffer->buffer(); + SkASSERT(VK_NULL_HANDLE != vkBuffer); // TODO: once ibuffer->offset() no longer always returns 0, we will need to track the offset // to know if we can skip binding or not. - if (!fBoundIndexBufferIsValid || vkBuffer != fBoundIndexBuffer) { + if (vkBuffer != fBoundIndexBuffer) { GR_VK_CALL(gpu->vkInterface(), CmdBindIndexBuffer(fCmdBuffer, vkBuffer, ibuffer->offset(), VK_INDEX_TYPE_UINT16)); - fBoundIndexBufferIsValid = true; fBoundIndexBuffer = vkBuffer; addResource(ibuffer->resource()); } @@ -146,8 +149,6 @@ protected: : fIsActive(false) , fActiveRenderPass(rp) , fCmdBuffer(cmdBuffer) - , fBoundVertexBufferIsValid(false) - , fBoundIndexBufferIsValid(false) , fNumResets(0) { fTrackedResources.setReserve(kInitialTrackedResourcesCount); fTrackedRecycledResources.setReserve(kInitialTrackedResourcesCount); @@ -177,11 +178,8 @@ private: virtual void onReset(GrVkGpu* gpu) {} - VkBuffer fBoundVertexBuffer; - bool fBoundVertexBufferIsValid; - - VkBuffer fBoundIndexBuffer; - bool fBoundIndexBufferIsValid; + VkBuffer fBoundInputBuffers[kMaxInputBuffers]; + VkBuffer fBoundIndexBuffer; // When resetting the command buffer, we remove the tracked resources from their arrays, and // we prefer to not free all the memory every time so usually we just rewind. However, to avoid diff --git a/src/gpu/vk/GrVkCopyManager.cpp b/src/gpu/vk/GrVkCopyManager.cpp index 5301deae49..cd5dba4b18 100644 --- a/src/gpu/vk/GrVkCopyManager.cpp +++ b/src/gpu/vk/GrVkCopyManager.cpp @@ -377,7 +377,7 @@ bool GrVkCopyManager::copySurfaceAsDraw(GrVkGpu* gpu, scissor.offset.y = 0; cmdBuffer->setScissor(gpu, 0, 1, &scissor); - cmdBuffer->bindVertexBuffer(gpu, fVertexBuffer.get()); + cmdBuffer->bindInputBuffer(gpu, 0, fVertexBuffer.get()); cmdBuffer->draw(gpu, 4, 1, 0, 0); cmdBuffer->endRenderPass(gpu); diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.cpp b/src/gpu/vk/GrVkGpuCommandBuffer.cpp index 2e9373b130..6f39cfa545 100644 --- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp +++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp @@ -426,17 +426,35 @@ void GrVkGpuCommandBuffer::inlineUpload(GrOpFlushState* state, GrDrawOp::Deferre void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc, const GrBuffer* indexBuffer, - const GrBuffer* vertexBuffer) { + const GrBuffer* vertexBuffer, + const GrBuffer* instanceBuffer) { GrVkSecondaryCommandBuffer* currCmdBuf = fCommandBufferInfos[fCurrentCmdInfo].currentCmdBuf(); // There is no need to put any memory barriers to make sure host writes have finished here. // When a command buffer is submitted to a queue, there is an implicit memory barrier that // occurs for all host writes. Additionally, BufferMemoryBarriers are not allowed inside of // an active RenderPass. - SkASSERT(vertexBuffer); - SkASSERT(!vertexBuffer->isCPUBacked()); - SkASSERT(!vertexBuffer->isMapped()); - currCmdBuf->bindVertexBuffer(fGpu, static_cast<const GrVkVertexBuffer*>(vertexBuffer)); + // Here our vertex and instance inputs need to match the same 0-based bindings they were + // assigned in GrVkPipeline. That is, vertex first (if any) followed by instance. + uint32_t binding = 0; + + if (primProc.hasVertexAttribs()) { + SkASSERT(vertexBuffer); + SkASSERT(!vertexBuffer->isCPUBacked()); + SkASSERT(!vertexBuffer->isMapped()); + + currCmdBuf->bindInputBuffer(fGpu, binding++, + static_cast<const GrVkVertexBuffer*>(vertexBuffer)); + } + + if (primProc.hasInstanceAttribs()) { + SkASSERT(instanceBuffer); + SkASSERT(!instanceBuffer->isCPUBacked()); + SkASSERT(!instanceBuffer->isMapped()); + + currCmdBuf->bindInputBuffer(fGpu, binding++, + static_cast<const GrVkVertexBuffer*>(instanceBuffer)); + } if (indexBuffer) { SkASSERT(indexBuffer); @@ -575,29 +593,34 @@ void GrVkGpuCommandBuffer::onDraw(const GrPipeline& pipeline, pipelineState->freeTempResources(fGpu); } -void GrVkGpuCommandBuffer::sendMeshToGpu(const GrPrimitiveProcessor& primProc, - GrPrimitiveType, - const GrBuffer* vertexBuffer, - int vertexCount, - int baseVertex) { +void GrVkGpuCommandBuffer::sendInstancedMeshToGpu(const GrPrimitiveProcessor& primProc, + GrPrimitiveType, + const GrBuffer* vertexBuffer, + int vertexCount, + int baseVertex, + const GrBuffer* instanceBuffer, + int instanceCount, + int baseInstance) { CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; - this->bindGeometry(primProc, nullptr, vertexBuffer); - cbInfo.currentCmdBuf()->draw(fGpu, vertexCount, 1, baseVertex, 0); + this->bindGeometry(primProc, nullptr, vertexBuffer, instanceBuffer); + cbInfo.currentCmdBuf()->draw(fGpu, vertexCount, instanceCount, baseVertex, baseInstance); fGpu->stats()->incNumDraws(); } -void GrVkGpuCommandBuffer::sendIndexedMeshToGpu(const GrPrimitiveProcessor& primProc, - GrPrimitiveType, - const GrBuffer* indexBuffer, - int indexCount, - int baseIndex, - uint16_t /*minIndexValue*/, - uint16_t /*maxIndexValue*/, - const GrBuffer* vertexBuffer, - int baseVertex) { +void GrVkGpuCommandBuffer::sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor& primProc, + GrPrimitiveType, + const GrBuffer* indexBuffer, + int indexCount, + int baseIndex, + const GrBuffer* vertexBuffer, + int baseVertex, + const GrBuffer* instanceBuffer, + int instanceCount, + int baseInstance) { CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; - this->bindGeometry(primProc, indexBuffer, vertexBuffer); - cbInfo.currentCmdBuf()->drawIndexed(fGpu, indexCount, 1, baseIndex, baseVertex, 0); + this->bindGeometry(primProc, indexBuffer, vertexBuffer, instanceBuffer); + cbInfo.currentCmdBuf()->drawIndexed(fGpu, indexCount, instanceCount, + baseIndex, baseVertex, baseInstance); fGpu->stats()->incNumDraws(); } diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.h b/src/gpu/vk/GrVkGpuCommandBuffer.h index b1e96a3e11..6836fac0ca 100644 --- a/src/gpu/vk/GrVkGpuCommandBuffer.h +++ b/src/gpu/vk/GrVkGpuCommandBuffer.h @@ -48,7 +48,8 @@ private: // Bind vertex and index buffers void bindGeometry(const GrPrimitiveProcessor&, const GrBuffer* indexBuffer, - const GrBuffer* vertexBuffer); + const GrBuffer* vertexBuffer, + const GrBuffer* instanceBuffer); sk_sp<GrVkPipelineState> prepareDrawState(const GrPipeline&, const GrPrimitiveProcessor&, @@ -62,13 +63,30 @@ private: // GrMesh::SendToGpuImpl methods. These issue the actual Vulkan draw commands. // Marked final as a hint to the compiler to not use virtual dispatch. - void sendMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, - const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) final; + void sendMeshToGpu(const GrPrimitiveProcessor& primProc, GrPrimitiveType primType, + const GrBuffer* vertexBuffer, int vertexCount, int baseVertex) final { + this->sendInstancedMeshToGpu(primProc, primType, vertexBuffer, vertexCount, baseVertex, + nullptr, 1, 0); + } - void sendIndexedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + void sendIndexedMeshToGpu(const GrPrimitiveProcessor& primProc, GrPrimitiveType primType, const GrBuffer* indexBuffer, int indexCount, int baseIndex, - uint16_t minIndexValue, uint16_t maxIndexValue, - const GrBuffer* vertexBuffer, int baseVertex) final; + uint16_t /*minIndexValue*/, uint16_t /*maxIndexValue*/, + const GrBuffer* vertexBuffer, int baseVertex) final { + this->sendIndexedInstancedMeshToGpu(primProc, primType, indexBuffer, indexCount, baseIndex, + vertexBuffer, baseVertex, nullptr, 1, 0); + } + + void sendInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* vertexBuffer, int vertexCount, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) final; + + void sendIndexedInstancedMeshToGpu(const GrPrimitiveProcessor&, GrPrimitiveType, + const GrBuffer* indexBuffer, int indexCount, int baseIndex, + const GrBuffer* vertexBuffer, int baseVertex, + const GrBuffer* instanceBuffer, int instanceCount, + int baseInstance) final; void onClear(GrRenderTarget*, const GrFixedClip&, GrColor color) override; diff --git a/src/gpu/vk/GrVkPipeline.cpp b/src/gpu/vk/GrVkPipeline.cpp index 47acb94266..2732c6fc80 100644 --- a/src/gpu/vk/GrVkPipeline.cpp +++ b/src/gpu/vk/GrVkPipeline.cpp @@ -46,30 +46,41 @@ static inline VkFormat attrib_type_to_vkformat(GrVertexAttribType type) { } static void setup_vertex_input_state(const GrPrimitiveProcessor& primProc, - VkPipelineVertexInputStateCreateInfo* vertexInputInfo, - VkVertexInputBindingDescription* bindingDesc, - int maxBindingDescCount, - VkVertexInputAttributeDescription* attributeDesc) { - // for now we have only one vertex buffer and one binding - memset(bindingDesc, 0, sizeof(VkVertexInputBindingDescription)); - bindingDesc->binding = 0; - bindingDesc->stride = (uint32_t)primProc.getVertexStride(); - bindingDesc->inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + VkPipelineVertexInputStateCreateInfo* vertexInputInfo, + SkSTArray<2, VkVertexInputBindingDescription, true>* bindingDescs, + VkVertexInputAttributeDescription* attributeDesc) { + uint32_t vertexBinding, instanceBinding; + + if (primProc.hasVertexAttribs()) { + vertexBinding = bindingDescs->count(); + bindingDescs->push_back() = { + vertexBinding, + (uint32_t) primProc.getVertexStride(), + VK_VERTEX_INPUT_RATE_VERTEX + }; + } + + if (primProc.hasInstanceAttribs()) { + instanceBinding = bindingDescs->count(); + bindingDescs->push_back() = { + instanceBinding, + (uint32_t) primProc.getInstanceStride(), + VK_VERTEX_INPUT_RATE_INSTANCE + }; + } // setup attribute descriptions int vaCount = primProc.numAttribs(); if (vaCount > 0) { - size_t offset = 0; for (int attribIndex = 0; attribIndex < vaCount; attribIndex++) { + using InputRate = GrPrimitiveProcessor::Attribute::InputRate; const GrGeometryProcessor::Attribute& attrib = primProc.getAttrib(attribIndex); - GrVertexAttribType attribType = attrib.fType; - VkVertexInputAttributeDescription& vkAttrib = attributeDesc[attribIndex]; vkAttrib.location = attribIndex; // for now assume location = attribIndex - vkAttrib.binding = 0; // for now only one vertex buffer & binding - vkAttrib.format = attrib_type_to_vkformat(attribType); - vkAttrib.offset = static_cast<uint32_t>(offset); - offset += attrib.fOffset; + vkAttrib.binding = InputRate::kPerInstance == attrib.fInputRate ? instanceBinding + : vertexBinding; + vkAttrib.format = attrib_type_to_vkformat(attrib.fType); + vkAttrib.offset = attrib.fOffsetInRecord; } } @@ -77,8 +88,8 @@ static void setup_vertex_input_state(const GrPrimitiveProcessor& primProc, vertexInputInfo->sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; vertexInputInfo->pNext = nullptr; vertexInputInfo->flags = 0; - vertexInputInfo->vertexBindingDescriptionCount = 1; - vertexInputInfo->pVertexBindingDescriptions = bindingDesc; + vertexInputInfo->vertexBindingDescriptionCount = bindingDescs->count(); + vertexInputInfo->pVertexBindingDescriptions = bindingDescs->begin(); vertexInputInfo->vertexAttributeDescriptionCount = vaCount; vertexInputInfo->pVertexAttributeDescriptions = attributeDesc; } @@ -402,11 +413,11 @@ GrVkPipeline* GrVkPipeline::Create(GrVkGpu* gpu, const GrPipeline& pipeline, VkPipelineLayout layout, VkPipelineCache cache) { VkPipelineVertexInputStateCreateInfo vertexInputInfo; - VkVertexInputBindingDescription bindingDesc; + SkSTArray<2, VkVertexInputBindingDescription, true> bindingDescs; SkSTArray<16, VkVertexInputAttributeDescription> attributeDesc; SkASSERT(primProc.numAttribs() <= gpu->vkCaps().maxVertexAttributes()); VkVertexInputAttributeDescription* pAttribs = attributeDesc.push_back_n(primProc.numAttribs()); - setup_vertex_input_state(primProc, &vertexInputInfo, &bindingDesc, 1, pAttribs); + setup_vertex_input_state(primProc, &vertexInputInfo, &bindingDescs, pAttribs); VkPipelineInputAssemblyStateCreateInfo inputAssemblyInfo; setup_input_assembly_state(primitiveType, &inputAssemblyInfo); |