diff options
author | Greg Daniel <egdaniel@google.com> | 2018-05-30 14:51:53 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2018-05-30 19:52:38 +0000 |
commit | 331c266ed716526478a10885aff66181cec64486 (patch) | |
tree | f36dea84cdc2f25e22d3cbdbad56cd84fd2bb4aa /src/gpu | |
parent | 9febd0b9f18ccbfaa92735d77209bf77a13faa4f (diff) |
Use GrVkMemoryAllocator for vulkan memory allocations in ganesh.
Besides using the new allocator, the big logical change is that map
and unmap calls form GrVkMemory are specc'd to map the entire GrVkAlloc
instead of a specific offset and size as they did before. As a
consequence of this, we move the handling of non-coherent alignment
for flush/invalidate calls to GrVkMemory instead of the callers.
Bug: skia:
Change-Id: I794d713106602f27aa7e808c306bbb69fd2b67be
Reviewed-on: https://skia-review.googlesource.com/130021
Commit-Queue: Greg Daniel <egdaniel@google.com>
Reviewed-by: Jim Van Verth <jvanverth@google.com>
Diffstat (limited to 'src/gpu')
-rw-r--r-- | src/gpu/vk/GrVkAMDMemoryAllocator.cpp | 44 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBackendContext.cpp | 3 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBuffer.cpp | 37 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBuffer.h | 5 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.cpp | 83 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.h | 28 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.cpp | 661 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.h | 138 |
8 files changed, 184 insertions, 815 deletions
diff --git a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp index 0b838ece3a..93e2fff494 100644 --- a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp +++ b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp @@ -8,6 +8,7 @@ #include "GrVkAMDMemoryAllocator.h" #include "vk/GrVkInterface.h" +#include "GrVkMemory.h" #include "GrVkUtil.h" GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice, @@ -42,7 +43,10 @@ GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice, info.flags = 0; info.physicalDevice = physicalDevice; info.device = device; - info.preferredLargeHeapBlockSize = 0; + // Manually testing runs of dm using 64 here instead of the default 256 shows less memory usage + // on average. Also dm seems to run faster using 64 so it doesn't seem to be trading off speed + // for memory. + info.preferredLargeHeapBlockSize = 64*1024*1024; info.pAllocationCallbacks = nullptr; info.pDeviceMemoryCallbacks = nullptr; info.frameInUseCount = 0; @@ -198,24 +202,9 @@ void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHa vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; - offset = offset + info.fOffset; - VkDeviceSize offsetDiff = offset & (alignment -1); - offset = offset - offsetDiff; - size = (size + alignment - 1) & ~(alignment - 1); -#ifdef SK_DEBUG - SkASSERT(offset >= info.fOffset); - SkASSERT(offset + size <= info.fOffset + info.fSize); - SkASSERT(0 == (offset & (alignment-1))); - SkASSERT(size > 0); - SkASSERT(0 == (size & (alignment-1))); -#endif - VkMappedMemoryRange mappedMemoryRange; - memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); - mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - mappedMemoryRange.memory = info.fMemory; - mappedMemoryRange.offset = offset; - mappedMemoryRange.size = size; + GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, + &mappedMemoryRange); GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); } } @@ -231,24 +220,9 @@ void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& mem vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; - offset = offset + info.fOffset; - VkDeviceSize offsetDiff = offset & (alignment -1); - offset = offset - offsetDiff; - size = (size + alignment - 1) & ~(alignment - 1); -#ifdef SK_DEBUG - SkASSERT(offset >= info.fOffset); - SkASSERT(offset + size <= info.fOffset + info.fSize); - SkASSERT(0 == (offset & (alignment-1))); - SkASSERT(size > 0); - SkASSERT(0 == (size & (alignment-1))); -#endif - VkMappedMemoryRange mappedMemoryRange; - memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); - mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - mappedMemoryRange.memory = info.fMemory; - mappedMemoryRange.offset = offset; - mappedMemoryRange.size = size; + GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, + &mappedMemoryRange); GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); } } diff --git a/src/gpu/vk/GrVkBackendContext.cpp b/src/gpu/vk/GrVkBackendContext.cpp index 269a8911e4..196b141493 100644 --- a/src/gpu/vk/GrVkBackendContext.cpp +++ b/src/gpu/vk/GrVkBackendContext.cpp @@ -8,7 +8,7 @@ #include "SkAutoMalloc.h" #include "vk/GrVkBackendContext.h" #include "vk/GrVkExtensions.h" -#include "vk/GrVkInterface.h" +#include "vk/GrVkMemoryAllocator.h" #include "vk/GrVkUtil.h" //////////////////////////////////////////////////////////////////////////////// @@ -323,6 +323,7 @@ const GrVkBackendContext* GrVkBackendContext::Create(uint32_t* presentQueueIndex } GrVkBackendContext::~GrVkBackendContext() { + fMemoryAllocator.reset(); if (fInterface == nullptr || !fOwnsInstanceAndDevice) { return; } diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp index f65b15ded0..b3c1d825aa 100644 --- a/src/gpu/vk/GrVkBuffer.cpp +++ b/src/gpu/vk/GrVkBuffer.cpp @@ -170,28 +170,10 @@ void GrVkBuffer::internalMap(GrVkGpu* gpu, size_t size, bool* createdNewBuffer) if (fDesc.fDynamic) { const GrVkAlloc& alloc = this->alloc(); SkASSERT(alloc.fSize > 0); + SkASSERT(alloc.fSize >= size); + SkASSERT(0 == fOffset); - // For Noncoherent buffers we want to make sure the range that we map, both offset and size, - // are aligned to the nonCoherentAtomSize limit. The offset should have been correctly - // aligned by our memory allocator. For size we pad out to make the range also aligned. - if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { - // Currently we always have the internal offset as 0. - SkASSERT(0 == fOffset); - VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - SkASSERT(0 == (alloc.fOffset & (alignment - 1))); - - // Make size of the map aligned to nonCoherentAtomSize - size = (size + alignment - 1) & ~(alignment - 1); - fMappedSize = size; - } - SkASSERT(size + fOffset <= alloc.fSize); - VkResult err = VK_CALL(gpu, MapMemory(gpu->device(), alloc.fMemory, - alloc.fOffset + fOffset, - size, 0, &fMapPtr)); - if (err) { - fMapPtr = nullptr; - fMappedSize = 0; - } + fMapPtr = GrVkMemory::MapAlloc(gpu, alloc); } else { if (!fMapPtr) { fMapPtr = new unsigned char[this->size()]; @@ -206,16 +188,15 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) { SkASSERT(this->vkIsMapped()); if (fDesc.fDynamic) { + const GrVkAlloc& alloc = this->alloc(); + SkASSERT(alloc.fSize > 0); + SkASSERT(alloc.fSize >= size); // We currently don't use fOffset SkASSERT(0 == fOffset); - VkDeviceSize flushOffset = this->alloc().fOffset + fOffset; - VkDeviceSize flushSize = gpu->vkCaps().canUseWholeSizeOnFlushMappedMemory() ? VK_WHOLE_SIZE - : fMappedSize; - GrVkMemory::FlushMappedAlloc(gpu, this->alloc(), flushOffset, flushSize); - VK_CALL(gpu, UnmapMemory(gpu->device(), this->alloc().fMemory)); + GrVkMemory::FlushMappedAlloc(gpu, alloc, 0, size); + GrVkMemory::UnmapAlloc(gpu, alloc); fMapPtr = nullptr; - fMappedSize = 0; } else { // vkCmdUpdateBuffer requires size < 64k and 4-byte alignment. // https://bugs.chromium.org/p/skia/issues/detail?id=7488 @@ -224,7 +205,7 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) { } else { GrVkTransferBuffer* transferBuffer = GrVkTransferBuffer::Create(gpu, size, GrVkBuffer::kCopyRead_Type); - if(!transferBuffer) { + if (!transferBuffer) { return; } diff --git a/src/gpu/vk/GrVkBuffer.h b/src/gpu/vk/GrVkBuffer.h index 8d116a40f8..6d0c1fda9a 100644 --- a/src/gpu/vk/GrVkBuffer.h +++ b/src/gpu/vk/GrVkBuffer.h @@ -82,7 +82,7 @@ protected: const Desc& descriptor); GrVkBuffer(const Desc& desc, const GrVkBuffer::Resource* resource) - : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr), fMappedSize(0) { + : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr) { } void* vkMap(GrVkGpu* gpu) { @@ -115,9 +115,6 @@ private: const Resource* fResource; VkDeviceSize fOffset; void* fMapPtr; - // On certain Intel devices/drivers there is a bug if we try to flush non-coherent memory and - // pass in VK_WHOLE_SIZE. Thus we track our mapped size and explicitly set it when calling flush - VkDeviceSize fMappedSize; typedef SkNoncopyable INHERITED; }; diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp index 56d0b95bd0..2525c5c16c 100644 --- a/src/gpu/vk/GrVkGpu.cpp +++ b/src/gpu/vk/GrVkGpu.cpp @@ -17,6 +17,7 @@ #include "GrRenderTargetPriv.h" #include "GrTexturePriv.h" +#include "GrVkAMDMemoryAllocator.h" #include "GrVkCommandBuffer.h" #include "GrVkGpuCommandBuffer.h" #include "GrVkImage.h" @@ -92,6 +93,7 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, sk_sp<const GrVkBackendContext> backendCtx) : INHERITED(context) , fBackendContext(std::move(backendCtx)) + , fMemoryAllocator(fBackendContext->fMemoryAllocator) , fDevice(fBackendContext->fDevice) , fQueue(fBackendContext->fQueue) , fResourceProvider(this) @@ -118,6 +120,12 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, } #endif + if (!fMemoryAllocator) { + // We were not given a memory allocator at creation + fMemoryAllocator.reset(new GrVkAMDMemoryAllocator(fBackendContext->fPhysicalDevice, + fDevice, fBackendContext->fInterface)); + } + fCompiler = new SkSL::Compiler(); fVkCaps.reset(new GrVkCaps(options, this->vkInterface(), fBackendContext->fPhysicalDevice, @@ -142,17 +150,6 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, fCurrentCmdBuffer = fResourceProvider.findOrCreatePrimaryCommandBuffer(); SkASSERT(fCurrentCmdBuffer); fCurrentCmdBuffer->begin(this); - - // set up our heaps - fHeaps[kLinearImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024)); - fHeaps[kOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 64*1024*1024)); - fHeaps[kSmallOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 2*1024*1024)); - fHeaps[kVertexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); - fHeaps[kIndexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); - fHeaps[kUniformBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 256*1024)); - fHeaps[kTexelBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); - fHeaps[kCopyReadBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); - fHeaps[kCopyWriteBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024)); } void GrVkGpu::destroyResources() { @@ -562,7 +559,6 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i 0, // arraySlice }; VkSubresourceLayout layout; - VkResult err; const GrVkInterface* interface = this->vkInterface(); @@ -573,28 +569,14 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i int texTop = kBottomLeft_GrSurfaceOrigin == texOrigin ? tex->height() - top - height : top; const GrVkAlloc& alloc = tex->alloc(); - VkDeviceSize offset = alloc.fOffset + texTop*layout.rowPitch + left*bpp; - VkDeviceSize offsetDiff = 0; + VkDeviceSize offset = texTop*layout.rowPitch + left*bpp; VkDeviceSize size = height*layout.rowPitch; - // For Noncoherent buffers we want to make sure the range that we map, both offset and size, - // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to - // meet the alignment requirements. So we track how far we move back and then adjust the mapped - // ptr back up so that this is opaque to the caller. - if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { - VkDeviceSize alignment = this->physicalDeviceProperties().limits.nonCoherentAtomSize; - offsetDiff = offset & (alignment - 1); - offset = offset - offsetDiff; - // Make size of the map aligned to nonCoherentAtomSize - size = (size + alignment - 1) & ~(alignment - 1); - } - SkASSERT(offset >= alloc.fOffset); - SkASSERT(size <= alloc.fOffset + alloc.fSize); - void* mapPtr; - err = GR_VK_CALL(interface, MapMemory(fDevice, alloc.fMemory, offset, size, 0, &mapPtr)); - if (err) { + SkASSERT(size + offset <= alloc.fSize); + void* mapPtr = GrVkMemory::MapAlloc(this, alloc); + if (!mapPtr) { return false; } - mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff; + mapPtr = reinterpret_cast<char*>(mapPtr) + offset; if (kBottomLeft_GrSurfaceOrigin == texOrigin) { // copy into buffer by rows @@ -611,7 +593,7 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i } GrVkMemory::FlushMappedAlloc(this, alloc, offset, size); - GR_VK_CALL(interface, UnmapMemory(fDevice, alloc.fMemory)); + GrVkMemory::UnmapAlloc(this, alloc); return true; } @@ -1147,33 +1129,14 @@ GrStencilAttachment* GrVkGpu::createStencilAttachmentForRenderTarget(const GrRen bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc, size_t bufferOffset, size_t srcRowBytes, size_t dstRowBytes, int h) { - // For Noncoherent buffers we want to make sure the range that we map, both offset and size, - // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to - // meet the alignment requirements. So we track how far we move back and then adjust the mapped - // ptr back up so that this is opaque to the caller. - VkDeviceSize mapSize = dstRowBytes * h; - VkDeviceSize mapOffset = alloc.fOffset + bufferOffset; - VkDeviceSize offsetDiff = 0; - if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { - VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - offsetDiff = mapOffset & (alignment - 1); - mapOffset = mapOffset - offsetDiff; - // Make size of the map aligned to nonCoherentAtomSize - mapSize = (mapSize + alignment - 1) & ~(alignment - 1); - } - SkASSERT(mapOffset >= alloc.fOffset); - SkASSERT(mapSize + mapOffset <= alloc.fOffset + alloc.fSize); - void* mapPtr; - VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), - alloc.fMemory, - mapOffset, - mapSize, - 0, - &mapPtr)); - mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff; - if (err) { + VkDeviceSize size = dstRowBytes * h; + VkDeviceSize offset = bufferOffset; + SkASSERT(size + offset <= alloc.fSize); + void* mapPtr = GrVkMemory::MapAlloc(gpu, alloc); + if (!mapPtr) { return false; } + mapPtr = reinterpret_cast<char*>(mapPtr) + offset; if (srcData) { // If there is no padding on dst we can do a single memcopy. @@ -1192,8 +1155,8 @@ bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc } } } - GrVkMemory::FlushMappedAlloc(gpu, alloc, mapOffset, mapSize); - GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory)); + GrVkMemory::FlushMappedAlloc(gpu, alloc, offset, size); + GrVkMemory::UnmapAlloc(gpu, alloc); return true; } @@ -2017,7 +1980,7 @@ bool GrVkGpu::onReadPixels(GrSurface* surface, GrSurfaceOrigin origin, int left, this->submitCommandBuffer(kForce_SyncQueue); void* mappedMemory = transferBuffer->map(); const GrVkAlloc& transAlloc = transferBuffer->alloc(); - GrVkMemory::InvalidateMappedAlloc(this, transAlloc, transAlloc.fOffset, VK_WHOLE_SIZE); + GrVkMemory::InvalidateMappedAlloc(this, transAlloc, 0, transAlloc.fSize); if (copyFromOrigin) { uint32_t skipRows = region.imageExtent.height - height; diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h index 7bdfbeaab3..a44ea7230f 100644 --- a/src/gpu/vk/GrVkGpu.h +++ b/src/gpu/vk/GrVkGpu.h @@ -23,6 +23,7 @@ class GrPipeline; class GrVkBufferImpl; +class GrVkMemoryAllocator; class GrVkPipeline; class GrVkPipelineState; class GrVkPrimaryCommandBuffer; @@ -46,6 +47,8 @@ public: const GrVkInterface* vkInterface() const { return fBackendContext->fInterface.get(); } const GrVkCaps& vkCaps() const { return *fVkCaps; } + GrVkMemoryAllocator* memoryAllocator() const { return fMemoryAllocator.get(); } + VkDevice device() const { return fDevice; } VkQueue queue() const { return fQueue; } VkCommandPool cmdPool() const { return fCmdPool; } @@ -140,28 +143,6 @@ public: VkDeviceSize dstOffset, VkDeviceSize size); bool updateBuffer(GrVkBuffer* buffer, const void* src, VkDeviceSize offset, VkDeviceSize size); - // Heaps - enum Heap { - kLinearImage_Heap = 0, - // We separate out small (i.e., <= 16K) images to reduce fragmentation - // in the main heap. - kOptimalImage_Heap, - kSmallOptimalImage_Heap, - // We have separate vertex and image heaps, because it's possible that - // a given Vulkan driver may allocate them separately. - kVertexBuffer_Heap, - kIndexBuffer_Heap, - kUniformBuffer_Heap, - kTexelBuffer_Heap, - kCopyReadBuffer_Heap, - kCopyWriteBuffer_Heap, - - kLastHeap = kCopyWriteBuffer_Heap - }; - static const int kHeapCount = kLastHeap + 1; - - GrVkHeap* getHeap(Heap heap) const { return fHeaps[heap].get(); } - private: GrVkGpu(GrContext*, const GrContextOptions&, sk_sp<const GrVkBackendContext> backendContext); @@ -251,6 +232,7 @@ private: #endif sk_sp<const GrVkBackendContext> fBackendContext; + sk_sp<GrVkMemoryAllocator> fMemoryAllocator; sk_sp<GrVkCaps> fVkCaps; // These Vulkan objects are provided by the client, and also stored in fBackendContext. @@ -270,8 +252,6 @@ private: VkPhysicalDeviceProperties fPhysDevProps; VkPhysicalDeviceMemoryProperties fPhysDevMemProps; - std::unique_ptr<GrVkHeap> fHeaps[kHeapCount]; - GrVkCopyManager fCopyManager; #ifdef SK_ENABLE_VK_LAYERS diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp index 4f619a3ef3..f999c26546 100644 --- a/src/gpu/vk/GrVkMemory.cpp +++ b/src/gpu/vk/GrVkMemory.cpp @@ -9,49 +9,26 @@ #include "GrVkGpu.h" #include "GrVkUtil.h" +#include "vk/GrVkMemoryAllocator.h" -#ifdef SK_DEBUG -// for simple tracking of how much we're using in each heap -// last counter is for non-subheap allocations -VkDeviceSize gHeapUsage[VK_MAX_MEMORY_HEAPS+1] = { 0 }; -#endif +using AllocationPropertyFlags = GrVkMemoryAllocator::AllocationPropertyFlags; +using BufferUsage = GrVkMemoryAllocator::BufferUsage; -static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps, - uint32_t typeBits, - VkMemoryPropertyFlags requestedMemFlags, - uint32_t* typeIndex, - uint32_t* heapIndex) { - for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) { - if (typeBits & (1 << i)) { - uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags & - requestedMemFlags; - if (supportedFlags == requestedMemFlags) { - *typeIndex = i; - *heapIndex = physDevMemProps.memoryTypes[i].heapIndex; - return true; - } - } +static BufferUsage get_buffer_usage(GrVkBuffer::Type type, bool dynamic) { + switch (type) { + case GrVkBuffer::kVertex_Type: // fall through + case GrVkBuffer::kIndex_Type: // fall through + case GrVkBuffer::kTexel_Type: + return dynamic ? BufferUsage::kCpuWritesGpuReads : BufferUsage::kGpuOnly; + case GrVkBuffer::kUniform_Type: + SkASSERT(dynamic); + return BufferUsage::kCpuWritesGpuReads; + case GrVkBuffer::kCopyRead_Type: // fall through + case GrVkBuffer::kCopyWrite_Type: + return BufferUsage::kCpuOnly; } - return false; -} - -static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) { - const GrVkGpu::Heap kBufferToHeap[]{ - GrVkGpu::kVertexBuffer_Heap, - GrVkGpu::kIndexBuffer_Heap, - GrVkGpu::kUniformBuffer_Heap, - GrVkGpu::kTexelBuffer_Heap, - GrVkGpu::kCopyReadBuffer_Heap, - GrVkGpu::kCopyWriteBuffer_Heap, - }; - GR_STATIC_ASSERT(0 == GrVkBuffer::kVertex_Type); - GR_STATIC_ASSERT(1 == GrVkBuffer::kIndex_Type); - GR_STATIC_ASSERT(2 == GrVkBuffer::kUniform_Type); - GR_STATIC_ASSERT(3 == GrVkBuffer::kTexel_Type); - GR_STATIC_ASSERT(4 == GrVkBuffer::kCopyRead_Type); - GR_STATIC_ASSERT(5 == GrVkBuffer::kCopyWrite_Type); - - return kBufferToHeap[type]; + SK_ABORT("Invalid GrVkBuffer::Type"); + return BufferUsage::kCpuOnly; // Just returning an arbitrary value. } bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, @@ -59,68 +36,23 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, bool dynamic, GrVkAlloc* alloc) { - const GrVkInterface* iface = gpu->vkInterface(); - VkDevice device = gpu->device(); - - VkMemoryRequirements memReqs; - GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + GrVkBackendMemory memory = 0; - uint32_t typeIndex = 0; - uint32_t heapIndex = 0; - const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); - const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties(); - if (dynamic) { - // try to get cached and ideally non-coherent memory first - if (!get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - &typeIndex, - &heapIndex)) { - // some sort of host-visible memory type should always be available for dynamic buffers - SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - &typeIndex, - &heapIndex)); - } + GrVkMemoryAllocator::BufferUsage usage = get_buffer_usage(type, dynamic); - VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags; - alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 - : GrVkAlloc::kNoncoherent_Flag; - if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) { - SkASSERT(SkIsPow2(memReqs.alignment)); - SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize)); - memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize); - } - } else { - // device-local memory should always be available for static buffers - SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - &typeIndex, - &heapIndex)); - alloc->fFlags = 0x0; - } - - GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); - - if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { - // if static, try to allocate from non-host-visible non-device-local memory instead - if (dynamic || - !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, - 0, &typeIndex, &heapIndex) || - !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { - SkDebugf("Failed to alloc buffer\n"); - return false; - } + if (!allocator->allocateMemoryForBuffer(buffer, usage, AllocationPropertyFlags::kNone, + &memory)) { + return false; } + allocator->getAllocInfo(memory, alloc); // Bind buffer - VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, - alloc->fMemory, alloc->fOffset)); + VkResult err = GR_VK_CALL(gpu->vkInterface(), BindBufferMemory(gpu->device(), buffer, + alloc->fMemory, + alloc->fOffset)); if (err) { - SkASSERT_RELEASE(heap->free(*alloc)); + FreeBufferMemory(gpu, type, *alloc); return false; } @@ -129,503 +61,152 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, const GrVkAlloc& alloc) { - - GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); - SkASSERT_RELEASE(heap->free(alloc)); + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->freeMemory(alloc.fBackendMemory); + } else { + GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr)); + } } -// for debugging -static uint64_t gTotalImageMemory = 0; -static uint64_t gTotalImageMemoryFullPage = 0; - const VkDeviceSize kMaxSmallImageSize = 16 * 1024; -const VkDeviceSize kMinVulkanPageSize = 16 * 1024; - -static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) { - return (size + alignment - 1) & ~(alignment - 1); -} bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, VkImage image, bool linearTiling, GrVkAlloc* alloc) { - const GrVkInterface* iface = gpu->vkInterface(); - VkDevice device = gpu->device(); + SkASSERT(!linearTiling); + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + GrVkBackendMemory memory = 0; VkMemoryRequirements memReqs; - GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs)); + GR_VK_CALL(gpu->vkInterface(), GetImageMemoryRequirements(gpu->device(), image, &memReqs)); - uint32_t typeIndex = 0; - uint32_t heapIndex = 0; - GrVkHeap* heap; - const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); - const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties(); - if (linearTiling) { - VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - if (!get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - desiredMemProps, - &typeIndex, - &heapIndex)) { - // some sort of host-visible memory type should always be available - SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - &typeIndex, - &heapIndex)); - } - heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); - VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags; - alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 - : GrVkAlloc::kNoncoherent_Flag; - if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) { - SkASSERT(SkIsPow2(memReqs.alignment)); - SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize)); - memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize); - } + AllocationPropertyFlags propFlags; + if (memReqs.size <= kMaxSmallImageSize) { + propFlags = AllocationPropertyFlags::kNone; } else { - // this memory type should always be available - SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - &typeIndex, - &heapIndex)); - if (memReqs.size <= kMaxSmallImageSize) { - heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); - } else { - heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); - } - alloc->fFlags = 0x0; + propFlags = AllocationPropertyFlags::kDedicatedAllocation; } - if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { - // if optimal, try to allocate from non-host-visible non-device-local memory instead - if (linearTiling || - !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, - 0, &typeIndex, &heapIndex) || - !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { - SkDebugf("Failed to alloc image\n"); - return false; - } + if (!allocator->allocateMemoryForImage(image, AllocationPropertyFlags::kDedicatedAllocation, + &memory)) { + return false; } + allocator->getAllocInfo(memory, alloc); - // Bind image - VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image, - alloc->fMemory, alloc->fOffset)); + // Bind buffer + VkResult err = GR_VK_CALL(gpu->vkInterface(), BindImageMemory(gpu->device(), image, + alloc->fMemory, alloc->fOffset)); if (err) { - SkASSERT_RELEASE(heap->free(*alloc)); + FreeImageMemory(gpu, linearTiling, *alloc); return false; } - gTotalImageMemory += alloc->fSize; - - VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize); - gTotalImageMemoryFullPage += pageAlignedSize; - return true; } void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc) { - GrVkHeap* heap; - if (linearTiling) { - heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); - } else if (alloc.fSize <= kMaxSmallImageSize) { - heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->freeMemory(alloc.fBackendMemory); } else { - heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); - } - if (!heap->free(alloc)) { - // must be an adopted allocation GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr)); - } else { - gTotalImageMemory -= alloc.fSize; - VkDeviceSize pageAlignedSize = align_size(alloc.fSize, kMinVulkanPageSize); - gTotalImageMemoryFullPage -= pageAlignedSize; } } -void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, - VkDeviceSize size) { - if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { +void* GrVkMemory::MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { + SkASSERT(GrVkAlloc::kMappable_Flag & alloc.fFlags); #ifdef SK_DEBUG - SkASSERT(offset >= alloc.fOffset); - VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - SkASSERT(0 == (offset & (alignment-1))); - if (size != VK_WHOLE_SIZE) { - SkASSERT(size > 0); - SkASSERT(0 == (size & (alignment-1)) || - (offset + size) == (alloc.fOffset + alloc.fSize)); - SkASSERT(offset + size <= alloc.fOffset + alloc.fSize); - } -#endif - - VkMappedMemoryRange mappedMemoryRange; - memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); - mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - mappedMemoryRange.memory = alloc.fMemory; - mappedMemoryRange.offset = offset; - mappedMemoryRange.size = size; - GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), - 1, &mappedMemoryRange)); - } -} - -void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, - VkDeviceSize offset, VkDeviceSize size) { if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { -#ifdef SK_DEBUG - SkASSERT(offset >= alloc.fOffset); VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - SkASSERT(0 == (offset & (alignment-1))); - if (size != VK_WHOLE_SIZE) { - SkASSERT(size > 0); - SkASSERT(0 == (size & (alignment-1)) || - (offset + size) == (alloc.fOffset + alloc.fSize)); - SkASSERT(offset + size <= alloc.fOffset + alloc.fSize); - } -#endif - - VkMappedMemoryRange mappedMemoryRange; - memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); - mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - mappedMemoryRange.memory = alloc.fMemory; - mappedMemoryRange.offset = offset; - mappedMemoryRange.size = size; - GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), - 1, &mappedMemoryRange)); + SkASSERT(0 == (alloc.fOffset & (alignment-1))); + SkASSERT(0 == (alloc.fSize & (alignment-1))); } -} - -bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize, - VkDeviceSize* allocOffset, VkDeviceSize* allocSize) { - VkDeviceSize alignedSize = align_size(requestedSize, fAlignment); - - // find the smallest block big enough for our allocation - FreeList::Iter iter = fFreeList.headIter(); - FreeList::Iter bestFitIter; - VkDeviceSize bestFitSize = fSize + 1; - VkDeviceSize secondLargestSize = 0; - VkDeviceSize secondLargestOffset = 0; - while (iter.get()) { - Block* block = iter.get(); - // need to adjust size to match desired alignment - SkASSERT(align_size(block->fOffset, fAlignment) - block->fOffset == 0); - if (block->fSize >= alignedSize && block->fSize < bestFitSize) { - bestFitIter = iter; - bestFitSize = block->fSize; - } - if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) { - secondLargestSize = block->fSize; - secondLargestOffset = block->fOffset; - } - iter.next(); - } - SkASSERT(secondLargestSize <= fLargestBlockSize); - - Block* bestFit = bestFitIter.get(); - if (bestFit) { - SkASSERT(align_size(bestFit->fOffset, fAlignment) == bestFit->fOffset); - *allocOffset = bestFit->fOffset; - *allocSize = alignedSize; - // adjust or remove current block - VkDeviceSize originalBestFitOffset = bestFit->fOffset; - if (bestFit->fSize > alignedSize) { - bestFit->fOffset += alignedSize; - bestFit->fSize -= alignedSize; - if (fLargestBlockOffset == originalBestFitOffset) { - if (bestFit->fSize >= secondLargestSize) { - fLargestBlockSize = bestFit->fSize; - fLargestBlockOffset = bestFit->fOffset; - } else { - fLargestBlockSize = secondLargestSize; - fLargestBlockOffset = secondLargestOffset; - } - } -#ifdef SK_DEBUG - VkDeviceSize largestSize = 0; - iter = fFreeList.headIter(); - while (iter.get()) { - Block* block = iter.get(); - if (largestSize < block->fSize) { - largestSize = block->fSize; - } - iter.next(); - } - SkASSERT(largestSize == fLargestBlockSize); #endif - } else { - SkASSERT(bestFit->fSize == alignedSize); - if (fLargestBlockOffset == originalBestFitOffset) { - fLargestBlockSize = secondLargestSize; - fLargestBlockOffset = secondLargestOffset; - } - fFreeList.remove(bestFit); -#ifdef SK_DEBUG - VkDeviceSize largestSize = 0; - iter = fFreeList.headIter(); - while (iter.get()) { - Block* block = iter.get(); - if (largestSize < block->fSize) { - largestSize = block->fSize; - } - iter.next(); - } - SkASSERT(largestSize == fLargestBlockSize); -#endif - } - fFreeSize -= alignedSize; - SkASSERT(*allocSize > 0); - - return true; + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + return allocator->mapMemory(alloc.fBackendMemory); } - SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize); - - return false; -} - -void GrVkFreeListAlloc::free(VkDeviceSize allocOffset, VkDeviceSize allocSize) { - // find the block right after this allocation - FreeList::Iter iter = fFreeList.headIter(); - FreeList::Iter prev; - while (iter.get() && iter.get()->fOffset < allocOffset) { - prev = iter; - iter.next(); - } - // we have four cases: - // we exactly follow the previous one - Block* block; - if (prev.get() && prev.get()->fOffset + prev.get()->fSize == allocOffset) { - block = prev.get(); - block->fSize += allocSize; - if (block->fOffset == fLargestBlockOffset) { - fLargestBlockSize = block->fSize; - } - // and additionally we may exactly precede the next one - if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) { - block->fSize += iter.get()->fSize; - if (iter.get()->fOffset == fLargestBlockOffset) { - fLargestBlockOffset = block->fOffset; - fLargestBlockSize = block->fSize; - } - fFreeList.remove(iter.get()); - } - // or we only exactly proceed the next one - } else if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) { - block = iter.get(); - block->fSize += allocSize; - if (block->fOffset == fLargestBlockOffset) { - fLargestBlockOffset = allocOffset; - fLargestBlockSize = block->fSize; - } - block->fOffset = allocOffset; - // or we fall somewhere in between, with gaps - } else { - block = fFreeList.addBefore(iter); - block->fOffset = allocOffset; - block->fSize = allocSize; - } - fFreeSize += allocSize; - if (block->fSize > fLargestBlockSize) { - fLargestBlockSize = block->fSize; - fLargestBlockOffset = block->fOffset; - } - -#ifdef SK_DEBUG - VkDeviceSize largestSize = 0; - iter = fFreeList.headIter(); - while (iter.get()) { - Block* block = iter.get(); - if (largestSize < block->fSize) { - largestSize = block->fSize; - } - iter.next(); + void* mapPtr; + VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), alloc.fMemory, + alloc.fOffset, + alloc.fSize, 0, &mapPtr)); + if (err) { + mapPtr = nullptr; } - SkASSERT(fLargestBlockSize == largestSize); -#endif + return mapPtr; } -GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex, - VkDeviceSize size, VkDeviceSize alignment) - : INHERITED(size, alignment) - , fGpu(gpu) -#ifdef SK_DEBUG - , fHeapIndex(heapIndex) -#endif - , fMemoryTypeIndex(memoryTypeIndex) { - - VkMemoryAllocateInfo allocInfo = { - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType - nullptr, // pNext - size, // allocationSize - memoryTypeIndex, // memoryTypeIndex - }; - - VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(), - &allocInfo, - nullptr, - &fAlloc)); - if (VK_SUCCESS != err) { - this->reset(); - } -#ifdef SK_DEBUG - else { - gHeapUsage[heapIndex] += size; +void GrVkMemory::UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->unmapMemory(alloc.fBackendMemory); + } else { + GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory)); } -#endif } -GrVkSubHeap::~GrVkSubHeap() { - const GrVkInterface* iface = fGpu->vkInterface(); - GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr)); +void GrVkMemory::GetNonCoherentMappedMemoryRange(const GrVkAlloc& alloc, VkDeviceSize offset, + VkDeviceSize size, VkDeviceSize alignment, + VkMappedMemoryRange* range) { + SkASSERT(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag); + offset = offset + alloc.fOffset; + VkDeviceSize offsetDiff = offset & (alignment -1); + offset = offset - offsetDiff; + size = (size + alignment - 1) & ~(alignment - 1); #ifdef SK_DEBUG - gHeapUsage[fHeapIndex] -= fSize; + SkASSERT(offset >= alloc.fOffset); + SkASSERT(offset + size <= alloc.fOffset + alloc.fSize); + SkASSERT(0 == (offset & (alignment-1))); + SkASSERT(size > 0); + SkASSERT(0 == (size & (alignment-1))); #endif -} -bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) { - alloc->fMemory = fAlloc; - return INHERITED::alloc(size, &alloc->fOffset, &alloc->fSize); + memset(range, 0, sizeof(VkMappedMemoryRange)); + range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range->memory = alloc.fMemory; + range->offset = offset; + range->size = size; } -void GrVkSubHeap::free(const GrVkAlloc& alloc) { - SkASSERT(alloc.fMemory == fAlloc); - - INHERITED::free(alloc.fOffset, alloc.fSize); -} - -bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) { - VkDeviceSize alignedSize = align_size(size, alignment); - - // if requested is larger than our subheap allocation, just alloc directly - if (alignedSize > fSubHeapSize) { - VkMemoryAllocateInfo allocInfo = { - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType - nullptr, // pNext - alignedSize, // allocationSize - memoryTypeIndex, // memoryTypeIndex - }; - - VkResult err = GR_VK_CALL(fGpu->vkInterface(), AllocateMemory(fGpu->device(), - &allocInfo, - nullptr, - &alloc->fMemory)); - if (VK_SUCCESS != err) { - return false; - } - alloc->fOffset = 0; - alloc->fSize = alignedSize; - alloc->fUsesSystemHeap = true; -#ifdef SK_DEBUG - gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize; -#endif - - return true; - } - - // first try to find a subheap that fits our allocation request - int bestFitIndex = -1; - VkDeviceSize bestFitSize = 0x7FFFFFFF; - for (auto i = 0; i < fSubHeaps.count(); ++i) { - if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && - fSubHeaps[i]->alignment() == alignment) { - VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize(); - if (heapSize >= alignedSize && heapSize < bestFitSize) { - bestFitIndex = i; - bestFitSize = heapSize; - } - } - } - - if (bestFitIndex >= 0) { - SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); - if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { - fUsedSize += alloc->fSize; - return true; - } - return false; - } - - // need to allocate a new subheap - std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); - subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, fSubHeapSize, alignment)); - // try to recover from failed allocation by only allocating what we need - if (subHeap->size() == 0) { - VkDeviceSize alignedSize = align_size(size, alignment); - subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment)); - if (subHeap->size() == 0) { - return false; - } - } - fAllocSize += fSubHeapSize; - if (subHeap->alloc(size, alloc)) { - fUsedSize += alloc->fSize; - return true; - } - - return false; -} - -bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) { - VkDeviceSize alignedSize = align_size(size, alignment); - - // first try to find an unallocated subheap that fits our allocation request - int bestFitIndex = -1; - VkDeviceSize bestFitSize = 0x7FFFFFFF; - for (auto i = 0; i < fSubHeaps.count(); ++i) { - if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && - fSubHeaps[i]->alignment() == alignment && - fSubHeaps[i]->unallocated()) { - VkDeviceSize heapSize = fSubHeaps[i]->size(); - if (heapSize >= alignedSize && heapSize < bestFitSize) { - bestFitIndex = i; - bestFitSize = heapSize; - } - } - } - - if (bestFitIndex >= 0) { - SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); - if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { - fUsedSize += alloc->fSize; - return true; +void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, + VkDeviceSize size) { + if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { + SkASSERT(offset == 0); + SkASSERT(size <= alloc.fSize); + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->flushMappedMemory(alloc.fBackendMemory, offset, size); + } else { + VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; + VkMappedMemoryRange mappedMemoryRange; + GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment, + &mappedMemoryRange); + GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), 1, + &mappedMemoryRange)); } - return false; - } - - // need to allocate a new subheap - std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); - subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment)); - fAllocSize += alignedSize; - if (subHeap->alloc(size, alloc)) { - fUsedSize += alloc->fSize; - return true; } - - return false; } -bool GrVkHeap::free(const GrVkAlloc& alloc) { - // a size of 0 means we're using the system heap - if (alloc.fUsesSystemHeap) { - const GrVkInterface* iface = fGpu->vkInterface(); - GR_VK_CALL(iface, FreeMemory(fGpu->device(), alloc.fMemory, nullptr)); - return true; - } - - for (auto i = 0; i < fSubHeaps.count(); ++i) { - if (fSubHeaps[i]->memory() == alloc.fMemory) { - fSubHeaps[i]->free(alloc); - fUsedSize -= alloc.fSize; - return true; +void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, + VkDeviceSize offset, VkDeviceSize size) { + if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { + SkASSERT(offset == 0); + SkASSERT(size <= alloc.fSize); + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->invalidateMappedMemory(alloc.fBackendMemory, offset, size); + } else { + VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; + VkMappedMemoryRange mappedMemoryRange; + GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment, + &mappedMemoryRange); + GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), 1, + &mappedMemoryRange)); } } - - return false; } - diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h index bb6681435f..741bdaa8a0 100644 --- a/src/gpu/vk/GrVkMemory.h +++ b/src/gpu/vk/GrVkMemory.h @@ -34,133 +34,25 @@ namespace GrVkMemory { GrVkAlloc* alloc); void FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc); + // Maps the entire GrVkAlloc and returns a pointer to the start of the allocation. Underneath + // the hood, we may map more than the range of the GrVkAlloc (e.g. the entire VkDeviceMemory), + // but the pointer returned will always be to the start of the GrVkAlloc. The caller should also + // never assume more than the GrVkAlloc block has been mapped. + void* MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc); + void UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc); + + // For the Flush and Invalidate calls, the offset should be relative to the GrVkAlloc. Thus this + // will often be 0. The client does not need to make sure the offset and size are aligned to the + // nonCoherentAtomSize, the internal calls will handle that. void FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, VkDeviceSize size); void InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, VkDeviceSize size); -} - -class GrVkFreeListAlloc { -public: - GrVkFreeListAlloc(VkDeviceSize size, VkDeviceSize alignment) - : fSize(size) - , fAlignment(alignment) - , fFreeSize(size) - , fLargestBlockSize(size) - , fLargestBlockOffset(0) { - Block* block = fFreeList.addToTail(); - block->fOffset = 0; - block->fSize = fSize; - } - ~GrVkFreeListAlloc() { - this->reset(); - } - - VkDeviceSize size() const { return fSize; } - VkDeviceSize alignment() const { return fAlignment; } - VkDeviceSize freeSize() const { return fFreeSize; } - VkDeviceSize largestBlockSize() const { return fLargestBlockSize; } - - bool unallocated() const { return fSize == fFreeSize; } - -protected: - bool alloc(VkDeviceSize requestedSize, VkDeviceSize* allocOffset, VkDeviceSize* allocSize); - void free(VkDeviceSize allocOffset, VkDeviceSize allocSize); - - void reset() { - fSize = 0; - fAlignment = 0; - fFreeSize = 0; - fLargestBlockSize = 0; - fFreeList.reset(); - } - - struct Block { - VkDeviceSize fOffset; - VkDeviceSize fSize; - }; - typedef SkTLList<Block, 16> FreeList; - - VkDeviceSize fSize; - VkDeviceSize fAlignment; - VkDeviceSize fFreeSize; - VkDeviceSize fLargestBlockSize; - VkDeviceSize fLargestBlockOffset; - FreeList fFreeList; -}; - -class GrVkSubHeap : public GrVkFreeListAlloc { -public: - GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex, - VkDeviceSize size, VkDeviceSize alignment); - ~GrVkSubHeap(); - - uint32_t memoryTypeIndex() const { return fMemoryTypeIndex; } - VkDeviceMemory memory() { return fAlloc; } - - bool alloc(VkDeviceSize requestedSize, GrVkAlloc* alloc); - void free(const GrVkAlloc& alloc); -private: - const GrVkGpu* fGpu; -#ifdef SK_DEBUG - uint32_t fHeapIndex; -#endif - uint32_t fMemoryTypeIndex; - VkDeviceMemory fAlloc; - - typedef GrVkFreeListAlloc INHERITED; -}; - -class GrVkHeap { -public: - enum Strategy { - kSubAlloc_Strategy, // alloc large subheaps and suballoc within them - kSingleAlloc_Strategy // alloc/recycle an individual subheap per object - }; - - GrVkHeap(const GrVkGpu* gpu, Strategy strategy, VkDeviceSize subHeapSize) - : fGpu(gpu) - , fSubHeapSize(subHeapSize) - , fAllocSize(0) - , fUsedSize(0) { - if (strategy == kSubAlloc_Strategy) { - fAllocFunc = &GrVkHeap::subAlloc; - } else { - fAllocFunc = &GrVkHeap::singleAlloc; - } - } - - ~GrVkHeap() {} - - VkDeviceSize allocSize() const { return fAllocSize; } - VkDeviceSize usedSize() const { return fUsedSize; } - - bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex, - uint32_t heapIndex, GrVkAlloc* alloc) { - SkASSERT(size > 0); - alloc->fUsesSystemHeap = false; - return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, heapIndex, alloc); - } - bool free(const GrVkAlloc& alloc); - -private: - typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, - GrVkAlloc* alloc); - - bool subAlloc(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, - GrVkAlloc* alloc); - bool singleAlloc(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, - GrVkAlloc* alloc); + // Helper for aligning and setting VkMappedMemoryRange for flushing/invalidating noncoherent + // memory. + void GetNonCoherentMappedMemoryRange(const GrVkAlloc&, VkDeviceSize offset, VkDeviceSize size, + VkDeviceSize alignment, VkMappedMemoryRange*); +} - const GrVkGpu* fGpu; - VkDeviceSize fSubHeapSize; - VkDeviceSize fAllocSize; - VkDeviceSize fUsedSize; - AllocFunc fAllocFunc; - SkTArray<std::unique_ptr<GrVkSubHeap>> fSubHeaps; -}; #endif |