diff options
author | Greg Daniel <egdaniel@google.com> | 2018-05-31 13:13:33 -0400 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2018-05-31 18:21:25 +0000 |
commit | 81df0414c8226ed40ccf70b6f67890f136440b81 (patch) | |
tree | 3d4d3a3256b7dfc0d0604ef4fa15db34f82b1e47 /src/gpu/vk | |
parent | ec0732433f28368b94973496fbdb990f967ba1f6 (diff) |
Reland "Use GrVkMemoryAllocator for vulkan memory allocations in ganesh."
This is a reland of 331c266ed716526478a10885aff66181cec64486
Original change's description:
> Use GrVkMemoryAllocator for vulkan memory allocations in ganesh.
>
> Besides using the new allocator, the big logical change is that map
> and unmap calls form GrVkMemory are specc'd to map the entire GrVkAlloc
> instead of a specific offset and size as they did before. As a
> consequence of this, we move the handling of non-coherent alignment
> for flush/invalidate calls to GrVkMemory instead of the callers.
>
> Bug: skia:
> Change-Id: I794d713106602f27aa7e808c306bbb69fd2b67be
> Reviewed-on: https://skia-review.googlesource.com/130021
> Commit-Queue: Greg Daniel <egdaniel@google.com>
> Reviewed-by: Jim Van Verth <jvanverth@google.com>
Bug: skia:
Change-Id: Ia9a4192d344449fb444d2adaa1d62ff1ede4b21d
Reviewed-on: https://skia-review.googlesource.com/131083
Reviewed-by: Jim Van Verth <jvanverth@google.com>
Commit-Queue: Greg Daniel <egdaniel@google.com>
Diffstat (limited to 'src/gpu/vk')
-rw-r--r-- | src/gpu/vk/GrVkAMDMemoryAllocator.cpp | 72 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBackendContext.cpp | 2 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBuffer.cpp | 37 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBuffer.h | 5 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.cpp | 83 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.h | 28 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.cpp | 661 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.h | 138 |
8 files changed, 207 insertions, 819 deletions
diff --git a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp index 0b838ece3a..53703a2149 100644 --- a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp +++ b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp @@ -8,6 +8,7 @@ #include "GrVkAMDMemoryAllocator.h" #include "vk/GrVkInterface.h" +#include "GrVkMemory.h" #include "GrVkUtil.h" GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice, @@ -42,7 +43,10 @@ GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice, info.flags = 0; info.physicalDevice = physicalDevice; info.device = device; - info.preferredLargeHeapBlockSize = 0; + // Manually testing runs of dm using 64 here instead of the default 256 shows less memory usage + // on average. Also dm seems to run faster using 64 so it doesn't seem to be trading off speed + // for memory. + info.preferredLargeHeapBlockSize = 64*1024*1024; info.pAllocationCallbacks = nullptr; info.pDeviceMemoryCallbacks = nullptr; info.frameInUseCount = 0; @@ -106,10 +110,10 @@ bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsag info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT; break; case BufferUsage::kCpuWritesGpuReads: - // First attempt to try memory is also device local + // First attempt to try memory is also cached info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; break; case BufferUsage::kGpuWritesCpuReads: info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; @@ -134,7 +138,7 @@ bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsag VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr); if (VK_SUCCESS != result) { if (usage == BufferUsage::kCpuWritesGpuReads) { - // We try again but this time drop the requirement for device local + // We try again but this time drop the requirement for cached info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr); } @@ -142,6 +146,7 @@ bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsag if (VK_SUCCESS != result) { return false; } + *backendMemory = (GrVkBackendMemory)allocation; return true; } @@ -173,6 +178,25 @@ void GrVkAMDMemoryAllocator::getAllocInfo(const GrVkBackendMemory& memoryHandle, alloc->fSize = vmaInfo.size; alloc->fFlags = flags; alloc->fBackendMemory = memoryHandle; + + // TODO: Remove this hack once the AMD allocator is able to handle the alignment of noncoherent + // memory itself. + if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) { + // This is a hack to say that the allocation size is actually larger than it is. This is to + // make sure when we are flushing and invalidating noncoherent memory we have a size that is + // aligned to the nonCoherentAtomSize. This is safe for three reasons. First the total size + // of the VkDeviceMemory we allocate will always be a multple of the max possible alignment + // (currently 256). Second all sub allocations are alignmed with an offset of 256. And + // finally the allocator we are using always maps the entire VkDeviceMemory so the range + // we'll be flushing/invalidating will be mapped. So our new fake allocation size will + // always fit into the VkDeviceMemory, will never push it into another suballocation, and + // will always be mapped when map is called. + const VkPhysicalDeviceProperties* devProps; + vmaGetPhysicalDeviceProperties(fAllocator, &devProps); + VkDeviceSize alignment = devProps->limits.nonCoherentAtomSize; + + alloc->fSize = (alloc->fSize + alignment - 1) & ~(alignment -1); + } } void* GrVkAMDMemoryAllocator::mapMemory(const GrVkBackendMemory& memoryHandle) { @@ -198,24 +222,9 @@ void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHa vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; - offset = offset + info.fOffset; - VkDeviceSize offsetDiff = offset & (alignment -1); - offset = offset - offsetDiff; - size = (size + alignment - 1) & ~(alignment - 1); -#ifdef SK_DEBUG - SkASSERT(offset >= info.fOffset); - SkASSERT(offset + size <= info.fOffset + info.fSize); - SkASSERT(0 == (offset & (alignment-1))); - SkASSERT(size > 0); - SkASSERT(0 == (size & (alignment-1))); -#endif - VkMappedMemoryRange mappedMemoryRange; - memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); - mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - mappedMemoryRange.memory = info.fMemory; - mappedMemoryRange.offset = offset; - mappedMemoryRange.size = size; + GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, + &mappedMemoryRange); GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); } } @@ -231,24 +240,9 @@ void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& mem vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; - offset = offset + info.fOffset; - VkDeviceSize offsetDiff = offset & (alignment -1); - offset = offset - offsetDiff; - size = (size + alignment - 1) & ~(alignment - 1); -#ifdef SK_DEBUG - SkASSERT(offset >= info.fOffset); - SkASSERT(offset + size <= info.fOffset + info.fSize); - SkASSERT(0 == (offset & (alignment-1))); - SkASSERT(size > 0); - SkASSERT(0 == (size & (alignment-1))); -#endif - VkMappedMemoryRange mappedMemoryRange; - memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); - mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - mappedMemoryRange.memory = info.fMemory; - mappedMemoryRange.offset = offset; - mappedMemoryRange.size = size; + GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, + &mappedMemoryRange); GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); } } diff --git a/src/gpu/vk/GrVkBackendContext.cpp b/src/gpu/vk/GrVkBackendContext.cpp index 269a8911e4..d54582f3a5 100644 --- a/src/gpu/vk/GrVkBackendContext.cpp +++ b/src/gpu/vk/GrVkBackendContext.cpp @@ -8,7 +8,6 @@ #include "SkAutoMalloc.h" #include "vk/GrVkBackendContext.h" #include "vk/GrVkExtensions.h" -#include "vk/GrVkInterface.h" #include "vk/GrVkUtil.h" //////////////////////////////////////////////////////////////////////////////// @@ -323,6 +322,7 @@ const GrVkBackendContext* GrVkBackendContext::Create(uint32_t* presentQueueIndex } GrVkBackendContext::~GrVkBackendContext() { + fMemoryAllocator.reset(); if (fInterface == nullptr || !fOwnsInstanceAndDevice) { return; } diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp index f65b15ded0..b3c1d825aa 100644 --- a/src/gpu/vk/GrVkBuffer.cpp +++ b/src/gpu/vk/GrVkBuffer.cpp @@ -170,28 +170,10 @@ void GrVkBuffer::internalMap(GrVkGpu* gpu, size_t size, bool* createdNewBuffer) if (fDesc.fDynamic) { const GrVkAlloc& alloc = this->alloc(); SkASSERT(alloc.fSize > 0); + SkASSERT(alloc.fSize >= size); + SkASSERT(0 == fOffset); - // For Noncoherent buffers we want to make sure the range that we map, both offset and size, - // are aligned to the nonCoherentAtomSize limit. The offset should have been correctly - // aligned by our memory allocator. For size we pad out to make the range also aligned. - if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { - // Currently we always have the internal offset as 0. - SkASSERT(0 == fOffset); - VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - SkASSERT(0 == (alloc.fOffset & (alignment - 1))); - - // Make size of the map aligned to nonCoherentAtomSize - size = (size + alignment - 1) & ~(alignment - 1); - fMappedSize = size; - } - SkASSERT(size + fOffset <= alloc.fSize); - VkResult err = VK_CALL(gpu, MapMemory(gpu->device(), alloc.fMemory, - alloc.fOffset + fOffset, - size, 0, &fMapPtr)); - if (err) { - fMapPtr = nullptr; - fMappedSize = 0; - } + fMapPtr = GrVkMemory::MapAlloc(gpu, alloc); } else { if (!fMapPtr) { fMapPtr = new unsigned char[this->size()]; @@ -206,16 +188,15 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) { SkASSERT(this->vkIsMapped()); if (fDesc.fDynamic) { + const GrVkAlloc& alloc = this->alloc(); + SkASSERT(alloc.fSize > 0); + SkASSERT(alloc.fSize >= size); // We currently don't use fOffset SkASSERT(0 == fOffset); - VkDeviceSize flushOffset = this->alloc().fOffset + fOffset; - VkDeviceSize flushSize = gpu->vkCaps().canUseWholeSizeOnFlushMappedMemory() ? VK_WHOLE_SIZE - : fMappedSize; - GrVkMemory::FlushMappedAlloc(gpu, this->alloc(), flushOffset, flushSize); - VK_CALL(gpu, UnmapMemory(gpu->device(), this->alloc().fMemory)); + GrVkMemory::FlushMappedAlloc(gpu, alloc, 0, size); + GrVkMemory::UnmapAlloc(gpu, alloc); fMapPtr = nullptr; - fMappedSize = 0; } else { // vkCmdUpdateBuffer requires size < 64k and 4-byte alignment. // https://bugs.chromium.org/p/skia/issues/detail?id=7488 @@ -224,7 +205,7 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) { } else { GrVkTransferBuffer* transferBuffer = GrVkTransferBuffer::Create(gpu, size, GrVkBuffer::kCopyRead_Type); - if(!transferBuffer) { + if (!transferBuffer) { return; } diff --git a/src/gpu/vk/GrVkBuffer.h b/src/gpu/vk/GrVkBuffer.h index 8d116a40f8..6d0c1fda9a 100644 --- a/src/gpu/vk/GrVkBuffer.h +++ b/src/gpu/vk/GrVkBuffer.h @@ -82,7 +82,7 @@ protected: const Desc& descriptor); GrVkBuffer(const Desc& desc, const GrVkBuffer::Resource* resource) - : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr), fMappedSize(0) { + : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr) { } void* vkMap(GrVkGpu* gpu) { @@ -115,9 +115,6 @@ private: const Resource* fResource; VkDeviceSize fOffset; void* fMapPtr; - // On certain Intel devices/drivers there is a bug if we try to flush non-coherent memory and - // pass in VK_WHOLE_SIZE. Thus we track our mapped size and explicitly set it when calling flush - VkDeviceSize fMappedSize; typedef SkNoncopyable INHERITED; }; diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp index 56d0b95bd0..2525c5c16c 100644 --- a/src/gpu/vk/GrVkGpu.cpp +++ b/src/gpu/vk/GrVkGpu.cpp @@ -17,6 +17,7 @@ #include "GrRenderTargetPriv.h" #include "GrTexturePriv.h" +#include "GrVkAMDMemoryAllocator.h" #include "GrVkCommandBuffer.h" #include "GrVkGpuCommandBuffer.h" #include "GrVkImage.h" @@ -92,6 +93,7 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, sk_sp<const GrVkBackendContext> backendCtx) : INHERITED(context) , fBackendContext(std::move(backendCtx)) + , fMemoryAllocator(fBackendContext->fMemoryAllocator) , fDevice(fBackendContext->fDevice) , fQueue(fBackendContext->fQueue) , fResourceProvider(this) @@ -118,6 +120,12 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, } #endif + if (!fMemoryAllocator) { + // We were not given a memory allocator at creation + fMemoryAllocator.reset(new GrVkAMDMemoryAllocator(fBackendContext->fPhysicalDevice, + fDevice, fBackendContext->fInterface)); + } + fCompiler = new SkSL::Compiler(); fVkCaps.reset(new GrVkCaps(options, this->vkInterface(), fBackendContext->fPhysicalDevice, @@ -142,17 +150,6 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, fCurrentCmdBuffer = fResourceProvider.findOrCreatePrimaryCommandBuffer(); SkASSERT(fCurrentCmdBuffer); fCurrentCmdBuffer->begin(this); - - // set up our heaps - fHeaps[kLinearImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024)); - fHeaps[kOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 64*1024*1024)); - fHeaps[kSmallOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 2*1024*1024)); - fHeaps[kVertexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); - fHeaps[kIndexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); - fHeaps[kUniformBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 256*1024)); - fHeaps[kTexelBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); - fHeaps[kCopyReadBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); - fHeaps[kCopyWriteBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024)); } void GrVkGpu::destroyResources() { @@ -562,7 +559,6 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i 0, // arraySlice }; VkSubresourceLayout layout; - VkResult err; const GrVkInterface* interface = this->vkInterface(); @@ -573,28 +569,14 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i int texTop = kBottomLeft_GrSurfaceOrigin == texOrigin ? tex->height() - top - height : top; const GrVkAlloc& alloc = tex->alloc(); - VkDeviceSize offset = alloc.fOffset + texTop*layout.rowPitch + left*bpp; - VkDeviceSize offsetDiff = 0; + VkDeviceSize offset = texTop*layout.rowPitch + left*bpp; VkDeviceSize size = height*layout.rowPitch; - // For Noncoherent buffers we want to make sure the range that we map, both offset and size, - // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to - // meet the alignment requirements. So we track how far we move back and then adjust the mapped - // ptr back up so that this is opaque to the caller. - if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { - VkDeviceSize alignment = this->physicalDeviceProperties().limits.nonCoherentAtomSize; - offsetDiff = offset & (alignment - 1); - offset = offset - offsetDiff; - // Make size of the map aligned to nonCoherentAtomSize - size = (size + alignment - 1) & ~(alignment - 1); - } - SkASSERT(offset >= alloc.fOffset); - SkASSERT(size <= alloc.fOffset + alloc.fSize); - void* mapPtr; - err = GR_VK_CALL(interface, MapMemory(fDevice, alloc.fMemory, offset, size, 0, &mapPtr)); - if (err) { + SkASSERT(size + offset <= alloc.fSize); + void* mapPtr = GrVkMemory::MapAlloc(this, alloc); + if (!mapPtr) { return false; } - mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff; + mapPtr = reinterpret_cast<char*>(mapPtr) + offset; if (kBottomLeft_GrSurfaceOrigin == texOrigin) { // copy into buffer by rows @@ -611,7 +593,7 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i } GrVkMemory::FlushMappedAlloc(this, alloc, offset, size); - GR_VK_CALL(interface, UnmapMemory(fDevice, alloc.fMemory)); + GrVkMemory::UnmapAlloc(this, alloc); return true; } @@ -1147,33 +1129,14 @@ GrStencilAttachment* GrVkGpu::createStencilAttachmentForRenderTarget(const GrRen bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc, size_t bufferOffset, size_t srcRowBytes, size_t dstRowBytes, int h) { - // For Noncoherent buffers we want to make sure the range that we map, both offset and size, - // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to - // meet the alignment requirements. So we track how far we move back and then adjust the mapped - // ptr back up so that this is opaque to the caller. - VkDeviceSize mapSize = dstRowBytes * h; - VkDeviceSize mapOffset = alloc.fOffset + bufferOffset; - VkDeviceSize offsetDiff = 0; - if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { - VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - offsetDiff = mapOffset & (alignment - 1); - mapOffset = mapOffset - offsetDiff; - // Make size of the map aligned to nonCoherentAtomSize - mapSize = (mapSize + alignment - 1) & ~(alignment - 1); - } - SkASSERT(mapOffset >= alloc.fOffset); - SkASSERT(mapSize + mapOffset <= alloc.fOffset + alloc.fSize); - void* mapPtr; - VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), - alloc.fMemory, - mapOffset, - mapSize, - 0, - &mapPtr)); - mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff; - if (err) { + VkDeviceSize size = dstRowBytes * h; + VkDeviceSize offset = bufferOffset; + SkASSERT(size + offset <= alloc.fSize); + void* mapPtr = GrVkMemory::MapAlloc(gpu, alloc); + if (!mapPtr) { return false; } + mapPtr = reinterpret_cast<char*>(mapPtr) + offset; if (srcData) { // If there is no padding on dst we can do a single memcopy. @@ -1192,8 +1155,8 @@ bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc } } } - GrVkMemory::FlushMappedAlloc(gpu, alloc, mapOffset, mapSize); - GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory)); + GrVkMemory::FlushMappedAlloc(gpu, alloc, offset, size); + GrVkMemory::UnmapAlloc(gpu, alloc); return true; } @@ -2017,7 +1980,7 @@ bool GrVkGpu::onReadPixels(GrSurface* surface, GrSurfaceOrigin origin, int left, this->submitCommandBuffer(kForce_SyncQueue); void* mappedMemory = transferBuffer->map(); const GrVkAlloc& transAlloc = transferBuffer->alloc(); - GrVkMemory::InvalidateMappedAlloc(this, transAlloc, transAlloc.fOffset, VK_WHOLE_SIZE); + GrVkMemory::InvalidateMappedAlloc(this, transAlloc, 0, transAlloc.fSize); if (copyFromOrigin) { uint32_t skipRows = region.imageExtent.height - height; diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h index 7bdfbeaab3..a44ea7230f 100644 --- a/src/gpu/vk/GrVkGpu.h +++ b/src/gpu/vk/GrVkGpu.h @@ -23,6 +23,7 @@ class GrPipeline; class GrVkBufferImpl; +class GrVkMemoryAllocator; class GrVkPipeline; class GrVkPipelineState; class GrVkPrimaryCommandBuffer; @@ -46,6 +47,8 @@ public: const GrVkInterface* vkInterface() const { return fBackendContext->fInterface.get(); } const GrVkCaps& vkCaps() const { return *fVkCaps; } + GrVkMemoryAllocator* memoryAllocator() const { return fMemoryAllocator.get(); } + VkDevice device() const { return fDevice; } VkQueue queue() const { return fQueue; } VkCommandPool cmdPool() const { return fCmdPool; } @@ -140,28 +143,6 @@ public: VkDeviceSize dstOffset, VkDeviceSize size); bool updateBuffer(GrVkBuffer* buffer, const void* src, VkDeviceSize offset, VkDeviceSize size); - // Heaps - enum Heap { - kLinearImage_Heap = 0, - // We separate out small (i.e., <= 16K) images to reduce fragmentation - // in the main heap. - kOptimalImage_Heap, - kSmallOptimalImage_Heap, - // We have separate vertex and image heaps, because it's possible that - // a given Vulkan driver may allocate them separately. - kVertexBuffer_Heap, - kIndexBuffer_Heap, - kUniformBuffer_Heap, - kTexelBuffer_Heap, - kCopyReadBuffer_Heap, - kCopyWriteBuffer_Heap, - - kLastHeap = kCopyWriteBuffer_Heap - }; - static const int kHeapCount = kLastHeap + 1; - - GrVkHeap* getHeap(Heap heap) const { return fHeaps[heap].get(); } - private: GrVkGpu(GrContext*, const GrContextOptions&, sk_sp<const GrVkBackendContext> backendContext); @@ -251,6 +232,7 @@ private: #endif sk_sp<const GrVkBackendContext> fBackendContext; + sk_sp<GrVkMemoryAllocator> fMemoryAllocator; sk_sp<GrVkCaps> fVkCaps; // These Vulkan objects are provided by the client, and also stored in fBackendContext. @@ -270,8 +252,6 @@ private: VkPhysicalDeviceProperties fPhysDevProps; VkPhysicalDeviceMemoryProperties fPhysDevMemProps; - std::unique_ptr<GrVkHeap> fHeaps[kHeapCount]; - GrVkCopyManager fCopyManager; #ifdef SK_ENABLE_VK_LAYERS diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp index 4f619a3ef3..f999c26546 100644 --- a/src/gpu/vk/GrVkMemory.cpp +++ b/src/gpu/vk/GrVkMemory.cpp @@ -9,49 +9,26 @@ #include "GrVkGpu.h" #include "GrVkUtil.h" +#include "vk/GrVkMemoryAllocator.h" -#ifdef SK_DEBUG -// for simple tracking of how much we're using in each heap -// last counter is for non-subheap allocations -VkDeviceSize gHeapUsage[VK_MAX_MEMORY_HEAPS+1] = { 0 }; -#endif +using AllocationPropertyFlags = GrVkMemoryAllocator::AllocationPropertyFlags; +using BufferUsage = GrVkMemoryAllocator::BufferUsage; -static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps, - uint32_t typeBits, - VkMemoryPropertyFlags requestedMemFlags, - uint32_t* typeIndex, - uint32_t* heapIndex) { - for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) { - if (typeBits & (1 << i)) { - uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags & - requestedMemFlags; - if (supportedFlags == requestedMemFlags) { - *typeIndex = i; - *heapIndex = physDevMemProps.memoryTypes[i].heapIndex; - return true; - } - } +static BufferUsage get_buffer_usage(GrVkBuffer::Type type, bool dynamic) { + switch (type) { + case GrVkBuffer::kVertex_Type: // fall through + case GrVkBuffer::kIndex_Type: // fall through + case GrVkBuffer::kTexel_Type: + return dynamic ? BufferUsage::kCpuWritesGpuReads : BufferUsage::kGpuOnly; + case GrVkBuffer::kUniform_Type: + SkASSERT(dynamic); + return BufferUsage::kCpuWritesGpuReads; + case GrVkBuffer::kCopyRead_Type: // fall through + case GrVkBuffer::kCopyWrite_Type: + return BufferUsage::kCpuOnly; } - return false; -} - -static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) { - const GrVkGpu::Heap kBufferToHeap[]{ - GrVkGpu::kVertexBuffer_Heap, - GrVkGpu::kIndexBuffer_Heap, - GrVkGpu::kUniformBuffer_Heap, - GrVkGpu::kTexelBuffer_Heap, - GrVkGpu::kCopyReadBuffer_Heap, - GrVkGpu::kCopyWriteBuffer_Heap, - }; - GR_STATIC_ASSERT(0 == GrVkBuffer::kVertex_Type); - GR_STATIC_ASSERT(1 == GrVkBuffer::kIndex_Type); - GR_STATIC_ASSERT(2 == GrVkBuffer::kUniform_Type); - GR_STATIC_ASSERT(3 == GrVkBuffer::kTexel_Type); - GR_STATIC_ASSERT(4 == GrVkBuffer::kCopyRead_Type); - GR_STATIC_ASSERT(5 == GrVkBuffer::kCopyWrite_Type); - - return kBufferToHeap[type]; + SK_ABORT("Invalid GrVkBuffer::Type"); + return BufferUsage::kCpuOnly; // Just returning an arbitrary value. } bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, @@ -59,68 +36,23 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, bool dynamic, GrVkAlloc* alloc) { - const GrVkInterface* iface = gpu->vkInterface(); - VkDevice device = gpu->device(); - - VkMemoryRequirements memReqs; - GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + GrVkBackendMemory memory = 0; - uint32_t typeIndex = 0; - uint32_t heapIndex = 0; - const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); - const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties(); - if (dynamic) { - // try to get cached and ideally non-coherent memory first - if (!get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - &typeIndex, - &heapIndex)) { - // some sort of host-visible memory type should always be available for dynamic buffers - SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - &typeIndex, - &heapIndex)); - } + GrVkMemoryAllocator::BufferUsage usage = get_buffer_usage(type, dynamic); - VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags; - alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 - : GrVkAlloc::kNoncoherent_Flag; - if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) { - SkASSERT(SkIsPow2(memReqs.alignment)); - SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize)); - memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize); - } - } else { - // device-local memory should always be available for static buffers - SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - &typeIndex, - &heapIndex)); - alloc->fFlags = 0x0; - } - - GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); - - if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { - // if static, try to allocate from non-host-visible non-device-local memory instead - if (dynamic || - !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, - 0, &typeIndex, &heapIndex) || - !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { - SkDebugf("Failed to alloc buffer\n"); - return false; - } + if (!allocator->allocateMemoryForBuffer(buffer, usage, AllocationPropertyFlags::kNone, + &memory)) { + return false; } + allocator->getAllocInfo(memory, alloc); // Bind buffer - VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, - alloc->fMemory, alloc->fOffset)); + VkResult err = GR_VK_CALL(gpu->vkInterface(), BindBufferMemory(gpu->device(), buffer, + alloc->fMemory, + alloc->fOffset)); if (err) { - SkASSERT_RELEASE(heap->free(*alloc)); + FreeBufferMemory(gpu, type, *alloc); return false; } @@ -129,503 +61,152 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, const GrVkAlloc& alloc) { - - GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); - SkASSERT_RELEASE(heap->free(alloc)); + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->freeMemory(alloc.fBackendMemory); + } else { + GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr)); + } } -// for debugging -static uint64_t gTotalImageMemory = 0; -static uint64_t gTotalImageMemoryFullPage = 0; - const VkDeviceSize kMaxSmallImageSize = 16 * 1024; -const VkDeviceSize kMinVulkanPageSize = 16 * 1024; - -static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) { - return (size + alignment - 1) & ~(alignment - 1); -} bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, VkImage image, bool linearTiling, GrVkAlloc* alloc) { - const GrVkInterface* iface = gpu->vkInterface(); - VkDevice device = gpu->device(); + SkASSERT(!linearTiling); + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + GrVkBackendMemory memory = 0; VkMemoryRequirements memReqs; - GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs)); + GR_VK_CALL(gpu->vkInterface(), GetImageMemoryRequirements(gpu->device(), image, &memReqs)); - uint32_t typeIndex = 0; - uint32_t heapIndex = 0; - GrVkHeap* heap; - const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); - const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties(); - if (linearTiling) { - VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - if (!get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - desiredMemProps, - &typeIndex, - &heapIndex)) { - // some sort of host-visible memory type should always be available - SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - &typeIndex, - &heapIndex)); - } - heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); - VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags; - alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 - : GrVkAlloc::kNoncoherent_Flag; - if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) { - SkASSERT(SkIsPow2(memReqs.alignment)); - SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize)); - memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize); - } + AllocationPropertyFlags propFlags; + if (memReqs.size <= kMaxSmallImageSize) { + propFlags = AllocationPropertyFlags::kNone; } else { - // this memory type should always be available - SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, - memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - &typeIndex, - &heapIndex)); - if (memReqs.size <= kMaxSmallImageSize) { - heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); - } else { - heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); - } - alloc->fFlags = 0x0; + propFlags = AllocationPropertyFlags::kDedicatedAllocation; } - if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { - // if optimal, try to allocate from non-host-visible non-device-local memory instead - if (linearTiling || - !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, - 0, &typeIndex, &heapIndex) || - !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { - SkDebugf("Failed to alloc image\n"); - return false; - } + if (!allocator->allocateMemoryForImage(image, AllocationPropertyFlags::kDedicatedAllocation, + &memory)) { + return false; } + allocator->getAllocInfo(memory, alloc); - // Bind image - VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image, - alloc->fMemory, alloc->fOffset)); + // Bind buffer + VkResult err = GR_VK_CALL(gpu->vkInterface(), BindImageMemory(gpu->device(), image, + alloc->fMemory, alloc->fOffset)); if (err) { - SkASSERT_RELEASE(heap->free(*alloc)); + FreeImageMemory(gpu, linearTiling, *alloc); return false; } - gTotalImageMemory += alloc->fSize; - - VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize); - gTotalImageMemoryFullPage += pageAlignedSize; - return true; } void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc) { - GrVkHeap* heap; - if (linearTiling) { - heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); - } else if (alloc.fSize <= kMaxSmallImageSize) { - heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->freeMemory(alloc.fBackendMemory); } else { - heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); - } - if (!heap->free(alloc)) { - // must be an adopted allocation GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr)); - } else { - gTotalImageMemory -= alloc.fSize; - VkDeviceSize pageAlignedSize = align_size(alloc.fSize, kMinVulkanPageSize); - gTotalImageMemoryFullPage -= pageAlignedSize; } } -void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, - VkDeviceSize size) { - if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { +void* GrVkMemory::MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { + SkASSERT(GrVkAlloc::kMappable_Flag & alloc.fFlags); #ifdef SK_DEBUG - SkASSERT(offset >= alloc.fOffset); - VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - SkASSERT(0 == (offset & (alignment-1))); - if (size != VK_WHOLE_SIZE) { - SkASSERT(size > 0); - SkASSERT(0 == (size & (alignment-1)) || - (offset + size) == (alloc.fOffset + alloc.fSize)); - SkASSERT(offset + size <= alloc.fOffset + alloc.fSize); - } -#endif - - VkMappedMemoryRange mappedMemoryRange; - memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); - mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - mappedMemoryRange.memory = alloc.fMemory; - mappedMemoryRange.offset = offset; - mappedMemoryRange.size = size; - GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), - 1, &mappedMemoryRange)); - } -} - -void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, - VkDeviceSize offset, VkDeviceSize size) { if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { -#ifdef SK_DEBUG - SkASSERT(offset >= alloc.fOffset); VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - SkASSERT(0 == (offset & (alignment-1))); - if (size != VK_WHOLE_SIZE) { - SkASSERT(size > 0); - SkASSERT(0 == (size & (alignment-1)) || - (offset + size) == (alloc.fOffset + alloc.fSize)); - SkASSERT(offset + size <= alloc.fOffset + alloc.fSize); - } -#endif - - VkMappedMemoryRange mappedMemoryRange; - memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); - mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - mappedMemoryRange.memory = alloc.fMemory; - mappedMemoryRange.offset = offset; - mappedMemoryRange.size = size; - GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), - 1, &mappedMemoryRange)); + SkASSERT(0 == (alloc.fOffset & (alignment-1))); + SkASSERT(0 == (alloc.fSize & (alignment-1))); } -} - -bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize, - VkDeviceSize* allocOffset, VkDeviceSize* allocSize) { - VkDeviceSize alignedSize = align_size(requestedSize, fAlignment); - - // find the smallest block big enough for our allocation - FreeList::Iter iter = fFreeList.headIter(); - FreeList::Iter bestFitIter; - VkDeviceSize bestFitSize = fSize + 1; - VkDeviceSize secondLargestSize = 0; - VkDeviceSize secondLargestOffset = 0; - while (iter.get()) { - Block* block = iter.get(); - // need to adjust size to match desired alignment - SkASSERT(align_size(block->fOffset, fAlignment) - block->fOffset == 0); - if (block->fSize >= alignedSize && block->fSize < bestFitSize) { - bestFitIter = iter; - bestFitSize = block->fSize; - } - if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) { - secondLargestSize = block->fSize; - secondLargestOffset = block->fOffset; - } - iter.next(); - } - SkASSERT(secondLargestSize <= fLargestBlockSize); - - Block* bestFit = bestFitIter.get(); - if (bestFit) { - SkASSERT(align_size(bestFit->fOffset, fAlignment) == bestFit->fOffset); - *allocOffset = bestFit->fOffset; - *allocSize = alignedSize; - // adjust or remove current block - VkDeviceSize originalBestFitOffset = bestFit->fOffset; - if (bestFit->fSize > alignedSize) { - bestFit->fOffset += alignedSize; - bestFit->fSize -= alignedSize; - if (fLargestBlockOffset == originalBestFitOffset) { - if (bestFit->fSize >= secondLargestSize) { - fLargestBlockSize = bestFit->fSize; - fLargestBlockOffset = bestFit->fOffset; - } else { - fLargestBlockSize = secondLargestSize; - fLargestBlockOffset = secondLargestOffset; - } - } -#ifdef SK_DEBUG - VkDeviceSize largestSize = 0; - iter = fFreeList.headIter(); - while (iter.get()) { - Block* block = iter.get(); - if (largestSize < block->fSize) { - largestSize = block->fSize; - } - iter.next(); - } - SkASSERT(largestSize == fLargestBlockSize); #endif - } else { - SkASSERT(bestFit->fSize == alignedSize); - if (fLargestBlockOffset == originalBestFitOffset) { - fLargestBlockSize = secondLargestSize; - fLargestBlockOffset = secondLargestOffset; - } - fFreeList.remove(bestFit); -#ifdef SK_DEBUG - VkDeviceSize largestSize = 0; - iter = fFreeList.headIter(); - while (iter.get()) { - Block* block = iter.get(); - if (largestSize < block->fSize) { - largestSize = block->fSize; - } - iter.next(); - } - SkASSERT(largestSize == fLargestBlockSize); -#endif - } - fFreeSize -= alignedSize; - SkASSERT(*allocSize > 0); - - return true; + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + return allocator->mapMemory(alloc.fBackendMemory); } - SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize); - - return false; -} - -void GrVkFreeListAlloc::free(VkDeviceSize allocOffset, VkDeviceSize allocSize) { - // find the block right after this allocation - FreeList::Iter iter = fFreeList.headIter(); - FreeList::Iter prev; - while (iter.get() && iter.get()->fOffset < allocOffset) { - prev = iter; - iter.next(); - } - // we have four cases: - // we exactly follow the previous one - Block* block; - if (prev.get() && prev.get()->fOffset + prev.get()->fSize == allocOffset) { - block = prev.get(); - block->fSize += allocSize; - if (block->fOffset == fLargestBlockOffset) { - fLargestBlockSize = block->fSize; - } - // and additionally we may exactly precede the next one - if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) { - block->fSize += iter.get()->fSize; - if (iter.get()->fOffset == fLargestBlockOffset) { - fLargestBlockOffset = block->fOffset; - fLargestBlockSize = block->fSize; - } - fFreeList.remove(iter.get()); - } - // or we only exactly proceed the next one - } else if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) { - block = iter.get(); - block->fSize += allocSize; - if (block->fOffset == fLargestBlockOffset) { - fLargestBlockOffset = allocOffset; - fLargestBlockSize = block->fSize; - } - block->fOffset = allocOffset; - // or we fall somewhere in between, with gaps - } else { - block = fFreeList.addBefore(iter); - block->fOffset = allocOffset; - block->fSize = allocSize; - } - fFreeSize += allocSize; - if (block->fSize > fLargestBlockSize) { - fLargestBlockSize = block->fSize; - fLargestBlockOffset = block->fOffset; - } - -#ifdef SK_DEBUG - VkDeviceSize largestSize = 0; - iter = fFreeList.headIter(); - while (iter.get()) { - Block* block = iter.get(); - if (largestSize < block->fSize) { - largestSize = block->fSize; - } - iter.next(); + void* mapPtr; + VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), alloc.fMemory, + alloc.fOffset, + alloc.fSize, 0, &mapPtr)); + if (err) { + mapPtr = nullptr; } - SkASSERT(fLargestBlockSize == largestSize); -#endif + return mapPtr; } -GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex, - VkDeviceSize size, VkDeviceSize alignment) - : INHERITED(size, alignment) - , fGpu(gpu) -#ifdef SK_DEBUG - , fHeapIndex(heapIndex) -#endif - , fMemoryTypeIndex(memoryTypeIndex) { - - VkMemoryAllocateInfo allocInfo = { - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType - nullptr, // pNext - size, // allocationSize - memoryTypeIndex, // memoryTypeIndex - }; - - VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(), - &allocInfo, - nullptr, - &fAlloc)); - if (VK_SUCCESS != err) { - this->reset(); - } -#ifdef SK_DEBUG - else { - gHeapUsage[heapIndex] += size; +void GrVkMemory::UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->unmapMemory(alloc.fBackendMemory); + } else { + GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory)); } -#endif } -GrVkSubHeap::~GrVkSubHeap() { - const GrVkInterface* iface = fGpu->vkInterface(); - GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr)); +void GrVkMemory::GetNonCoherentMappedMemoryRange(const GrVkAlloc& alloc, VkDeviceSize offset, + VkDeviceSize size, VkDeviceSize alignment, + VkMappedMemoryRange* range) { + SkASSERT(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag); + offset = offset + alloc.fOffset; + VkDeviceSize offsetDiff = offset & (alignment -1); + offset = offset - offsetDiff; + size = (size + alignment - 1) & ~(alignment - 1); #ifdef SK_DEBUG - gHeapUsage[fHeapIndex] -= fSize; + SkASSERT(offset >= alloc.fOffset); + SkASSERT(offset + size <= alloc.fOffset + alloc.fSize); + SkASSERT(0 == (offset & (alignment-1))); + SkASSERT(size > 0); + SkASSERT(0 == (size & (alignment-1))); #endif -} -bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) { - alloc->fMemory = fAlloc; - return INHERITED::alloc(size, &alloc->fOffset, &alloc->fSize); + memset(range, 0, sizeof(VkMappedMemoryRange)); + range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range->memory = alloc.fMemory; + range->offset = offset; + range->size = size; } -void GrVkSubHeap::free(const GrVkAlloc& alloc) { - SkASSERT(alloc.fMemory == fAlloc); - - INHERITED::free(alloc.fOffset, alloc.fSize); -} - -bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) { - VkDeviceSize alignedSize = align_size(size, alignment); - - // if requested is larger than our subheap allocation, just alloc directly - if (alignedSize > fSubHeapSize) { - VkMemoryAllocateInfo allocInfo = { - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType - nullptr, // pNext - alignedSize, // allocationSize - memoryTypeIndex, // memoryTypeIndex - }; - - VkResult err = GR_VK_CALL(fGpu->vkInterface(), AllocateMemory(fGpu->device(), - &allocInfo, - nullptr, - &alloc->fMemory)); - if (VK_SUCCESS != err) { - return false; - } - alloc->fOffset = 0; - alloc->fSize = alignedSize; - alloc->fUsesSystemHeap = true; -#ifdef SK_DEBUG - gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize; -#endif - - return true; - } - - // first try to find a subheap that fits our allocation request - int bestFitIndex = -1; - VkDeviceSize bestFitSize = 0x7FFFFFFF; - for (auto i = 0; i < fSubHeaps.count(); ++i) { - if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && - fSubHeaps[i]->alignment() == alignment) { - VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize(); - if (heapSize >= alignedSize && heapSize < bestFitSize) { - bestFitIndex = i; - bestFitSize = heapSize; - } - } - } - - if (bestFitIndex >= 0) { - SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); - if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { - fUsedSize += alloc->fSize; - return true; - } - return false; - } - - // need to allocate a new subheap - std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); - subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, fSubHeapSize, alignment)); - // try to recover from failed allocation by only allocating what we need - if (subHeap->size() == 0) { - VkDeviceSize alignedSize = align_size(size, alignment); - subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment)); - if (subHeap->size() == 0) { - return false; - } - } - fAllocSize += fSubHeapSize; - if (subHeap->alloc(size, alloc)) { - fUsedSize += alloc->fSize; - return true; - } - - return false; -} - -bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) { - VkDeviceSize alignedSize = align_size(size, alignment); - - // first try to find an unallocated subheap that fits our allocation request - int bestFitIndex = -1; - VkDeviceSize bestFitSize = 0x7FFFFFFF; - for (auto i = 0; i < fSubHeaps.count(); ++i) { - if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && - fSubHeaps[i]->alignment() == alignment && - fSubHeaps[i]->unallocated()) { - VkDeviceSize heapSize = fSubHeaps[i]->size(); - if (heapSize >= alignedSize && heapSize < bestFitSize) { - bestFitIndex = i; - bestFitSize = heapSize; - } - } - } - - if (bestFitIndex >= 0) { - SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); - if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { - fUsedSize += alloc->fSize; - return true; +void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, + VkDeviceSize size) { + if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { + SkASSERT(offset == 0); + SkASSERT(size <= alloc.fSize); + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->flushMappedMemory(alloc.fBackendMemory, offset, size); + } else { + VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; + VkMappedMemoryRange mappedMemoryRange; + GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment, + &mappedMemoryRange); + GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), 1, + &mappedMemoryRange)); } - return false; - } - - // need to allocate a new subheap - std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); - subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment)); - fAllocSize += alignedSize; - if (subHeap->alloc(size, alloc)) { - fUsedSize += alloc->fSize; - return true; } - - return false; } -bool GrVkHeap::free(const GrVkAlloc& alloc) { - // a size of 0 means we're using the system heap - if (alloc.fUsesSystemHeap) { - const GrVkInterface* iface = fGpu->vkInterface(); - GR_VK_CALL(iface, FreeMemory(fGpu->device(), alloc.fMemory, nullptr)); - return true; - } - - for (auto i = 0; i < fSubHeaps.count(); ++i) { - if (fSubHeaps[i]->memory() == alloc.fMemory) { - fSubHeaps[i]->free(alloc); - fUsedSize -= alloc.fSize; - return true; +void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, + VkDeviceSize offset, VkDeviceSize size) { + if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { + SkASSERT(offset == 0); + SkASSERT(size <= alloc.fSize); + if (alloc.fBackendMemory) { + GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); + allocator->invalidateMappedMemory(alloc.fBackendMemory, offset, size); + } else { + VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; + VkMappedMemoryRange mappedMemoryRange; + GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment, + &mappedMemoryRange); + GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), 1, + &mappedMemoryRange)); } } - - return false; } - diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h index bb6681435f..741bdaa8a0 100644 --- a/src/gpu/vk/GrVkMemory.h +++ b/src/gpu/vk/GrVkMemory.h @@ -34,133 +34,25 @@ namespace GrVkMemory { GrVkAlloc* alloc); void FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc); + // Maps the entire GrVkAlloc and returns a pointer to the start of the allocation. Underneath + // the hood, we may map more than the range of the GrVkAlloc (e.g. the entire VkDeviceMemory), + // but the pointer returned will always be to the start of the GrVkAlloc. The caller should also + // never assume more than the GrVkAlloc block has been mapped. + void* MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc); + void UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc); + + // For the Flush and Invalidate calls, the offset should be relative to the GrVkAlloc. Thus this + // will often be 0. The client does not need to make sure the offset and size are aligned to the + // nonCoherentAtomSize, the internal calls will handle that. void FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, VkDeviceSize size); void InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, VkDeviceSize size); -} - -class GrVkFreeListAlloc { -public: - GrVkFreeListAlloc(VkDeviceSize size, VkDeviceSize alignment) - : fSize(size) - , fAlignment(alignment) - , fFreeSize(size) - , fLargestBlockSize(size) - , fLargestBlockOffset(0) { - Block* block = fFreeList.addToTail(); - block->fOffset = 0; - block->fSize = fSize; - } - ~GrVkFreeListAlloc() { - this->reset(); - } - - VkDeviceSize size() const { return fSize; } - VkDeviceSize alignment() const { return fAlignment; } - VkDeviceSize freeSize() const { return fFreeSize; } - VkDeviceSize largestBlockSize() const { return fLargestBlockSize; } - - bool unallocated() const { return fSize == fFreeSize; } - -protected: - bool alloc(VkDeviceSize requestedSize, VkDeviceSize* allocOffset, VkDeviceSize* allocSize); - void free(VkDeviceSize allocOffset, VkDeviceSize allocSize); - - void reset() { - fSize = 0; - fAlignment = 0; - fFreeSize = 0; - fLargestBlockSize = 0; - fFreeList.reset(); - } - - struct Block { - VkDeviceSize fOffset; - VkDeviceSize fSize; - }; - typedef SkTLList<Block, 16> FreeList; - - VkDeviceSize fSize; - VkDeviceSize fAlignment; - VkDeviceSize fFreeSize; - VkDeviceSize fLargestBlockSize; - VkDeviceSize fLargestBlockOffset; - FreeList fFreeList; -}; - -class GrVkSubHeap : public GrVkFreeListAlloc { -public: - GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex, - VkDeviceSize size, VkDeviceSize alignment); - ~GrVkSubHeap(); - - uint32_t memoryTypeIndex() const { return fMemoryTypeIndex; } - VkDeviceMemory memory() { return fAlloc; } - - bool alloc(VkDeviceSize requestedSize, GrVkAlloc* alloc); - void free(const GrVkAlloc& alloc); -private: - const GrVkGpu* fGpu; -#ifdef SK_DEBUG - uint32_t fHeapIndex; -#endif - uint32_t fMemoryTypeIndex; - VkDeviceMemory fAlloc; - - typedef GrVkFreeListAlloc INHERITED; -}; - -class GrVkHeap { -public: - enum Strategy { - kSubAlloc_Strategy, // alloc large subheaps and suballoc within them - kSingleAlloc_Strategy // alloc/recycle an individual subheap per object - }; - - GrVkHeap(const GrVkGpu* gpu, Strategy strategy, VkDeviceSize subHeapSize) - : fGpu(gpu) - , fSubHeapSize(subHeapSize) - , fAllocSize(0) - , fUsedSize(0) { - if (strategy == kSubAlloc_Strategy) { - fAllocFunc = &GrVkHeap::subAlloc; - } else { - fAllocFunc = &GrVkHeap::singleAlloc; - } - } - - ~GrVkHeap() {} - - VkDeviceSize allocSize() const { return fAllocSize; } - VkDeviceSize usedSize() const { return fUsedSize; } - - bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex, - uint32_t heapIndex, GrVkAlloc* alloc) { - SkASSERT(size > 0); - alloc->fUsesSystemHeap = false; - return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, heapIndex, alloc); - } - bool free(const GrVkAlloc& alloc); - -private: - typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, - GrVkAlloc* alloc); - - bool subAlloc(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, - GrVkAlloc* alloc); - bool singleAlloc(VkDeviceSize size, VkDeviceSize alignment, - uint32_t memoryTypeIndex, uint32_t heapIndex, - GrVkAlloc* alloc); + // Helper for aligning and setting VkMappedMemoryRange for flushing/invalidating noncoherent + // memory. + void GetNonCoherentMappedMemoryRange(const GrVkAlloc&, VkDeviceSize offset, VkDeviceSize size, + VkDeviceSize alignment, VkMappedMemoryRange*); +} - const GrVkGpu* fGpu; - VkDeviceSize fSubHeapSize; - VkDeviceSize fAllocSize; - VkDeviceSize fUsedSize; - AllocFunc fAllocFunc; - SkTArray<std::unique_ptr<GrVkSubHeap>> fSubHeaps; -}; #endif |