diff options
-rw-r--r-- | gn/tests.gni | 1 | ||||
-rw-r--r-- | include/gpu/vk/GrVkBackendContext.h | 4 | ||||
-rw-r--r-- | include/gpu/vk/GrVkTypes.h | 2 | ||||
-rw-r--r-- | src/gpu/vk/GrVkAMDMemoryAllocator.cpp | 44 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBackendContext.cpp | 3 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBuffer.cpp | 37 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBuffer.h | 5 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.cpp | 83 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.h | 28 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.cpp | 661 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.h | 138 | ||||
-rw-r--r-- | tests/VkHeapTests.cpp | 239 |
12 files changed, 1056 insertions, 189 deletions
diff --git a/gn/tests.gni b/gn/tests.gni index 5b4594099b..a359ca3e3c 100644 --- a/gn/tests.gni +++ b/gn/tests.gni @@ -275,6 +275,7 @@ tests_sources = [ "$_tests/UtilsTest.cpp", "$_tests/VerticesTest.cpp", "$_tests/VkBackendSurfaceTest.cpp", + "$_tests/VkHeapTests.cpp", "$_tests/VkMakeCopyPipelineTest.cpp", "$_tests/VkUploadPixelsTests.cpp", "$_tests/VkWrapTests.cpp", diff --git a/include/gpu/vk/GrVkBackendContext.h b/include/gpu/vk/GrVkBackendContext.h index 212362873a..fdc71d373f 100644 --- a/include/gpu/vk/GrVkBackendContext.h +++ b/include/gpu/vk/GrVkBackendContext.h @@ -13,8 +13,6 @@ #include "vk/GrVkDefines.h" #include "vk/GrVkInterface.h" -class GrVkMemoryAllocator; - enum GrVkExtensionFlags { kEXT_debug_report_GrVkExtensionFlag = 0x0001, kNV_glsl_shader_GrVkExtensionFlag = 0x0002, @@ -47,8 +45,6 @@ struct SK_API GrVkBackendContext : public SkRefCnt { uint32_t fExtensions; uint32_t fFeatures; sk_sp<const GrVkInterface> fInterface; - sk_sp<GrVkMemoryAllocator> fMemoryAllocator; - /** * Controls whether this object destroys the instance and device upon destruction. The default * is temporarily 'true' to avoid breaking existing clients but will be changed to 'false'. diff --git a/include/gpu/vk/GrVkTypes.h b/include/gpu/vk/GrVkTypes.h index 9225e92778..2e31250324 100644 --- a/include/gpu/vk/GrVkTypes.h +++ b/include/gpu/vk/GrVkTypes.h @@ -10,7 +10,7 @@ #define GrVkTypes_DEFINED #include "GrTypes.h" -#include "GrVkDefines.h" +#include "vk/GrVkDefines.h" /** * KHR_debug diff --git a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp index 93e2fff494..0b838ece3a 100644 --- a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp +++ b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp @@ -8,7 +8,6 @@ #include "GrVkAMDMemoryAllocator.h" #include "vk/GrVkInterface.h" -#include "GrVkMemory.h" #include "GrVkUtil.h" GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice, @@ -43,10 +42,7 @@ GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice, info.flags = 0; info.physicalDevice = physicalDevice; info.device = device; - // Manually testing runs of dm using 64 here instead of the default 256 shows less memory usage - // on average. Also dm seems to run faster using 64 so it doesn't seem to be trading off speed - // for memory. - info.preferredLargeHeapBlockSize = 64*1024*1024; + info.preferredLargeHeapBlockSize = 0; info.pAllocationCallbacks = nullptr; info.pDeviceMemoryCallbacks = nullptr; info.frameInUseCount = 0; @@ -202,9 +198,24 @@ void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHa vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; + offset = offset + info.fOffset; + VkDeviceSize offsetDiff = offset & (alignment -1); + offset = offset - offsetDiff; + size = (size + alignment - 1) & ~(alignment - 1); +#ifdef SK_DEBUG + SkASSERT(offset >= info.fOffset); + SkASSERT(offset + size <= info.fOffset + info.fSize); + SkASSERT(0 == (offset & (alignment-1))); + SkASSERT(size > 0); + SkASSERT(0 == (size & (alignment-1))); +#endif + VkMappedMemoryRange mappedMemoryRange; - GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, - &mappedMemoryRange); + memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); + mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + mappedMemoryRange.memory = info.fMemory; + mappedMemoryRange.offset = offset; + mappedMemoryRange.size = size; GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); } } @@ -220,9 +231,24 @@ void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& mem vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; + offset = offset + info.fOffset; + VkDeviceSize offsetDiff = offset & (alignment -1); + offset = offset - offsetDiff; + size = (size + alignment - 1) & ~(alignment - 1); +#ifdef SK_DEBUG + SkASSERT(offset >= info.fOffset); + SkASSERT(offset + size <= info.fOffset + info.fSize); + SkASSERT(0 == (offset & (alignment-1))); + SkASSERT(size > 0); + SkASSERT(0 == (size & (alignment-1))); +#endif + VkMappedMemoryRange mappedMemoryRange; - GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, - &mappedMemoryRange); + memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); + mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + mappedMemoryRange.memory = info.fMemory; + mappedMemoryRange.offset = offset; + mappedMemoryRange.size = size; GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); } } diff --git a/src/gpu/vk/GrVkBackendContext.cpp b/src/gpu/vk/GrVkBackendContext.cpp index 196b141493..269a8911e4 100644 --- a/src/gpu/vk/GrVkBackendContext.cpp +++ b/src/gpu/vk/GrVkBackendContext.cpp @@ -8,7 +8,7 @@ #include "SkAutoMalloc.h" #include "vk/GrVkBackendContext.h" #include "vk/GrVkExtensions.h" -#include "vk/GrVkMemoryAllocator.h" +#include "vk/GrVkInterface.h" #include "vk/GrVkUtil.h" //////////////////////////////////////////////////////////////////////////////// @@ -323,7 +323,6 @@ const GrVkBackendContext* GrVkBackendContext::Create(uint32_t* presentQueueIndex } GrVkBackendContext::~GrVkBackendContext() { - fMemoryAllocator.reset(); if (fInterface == nullptr || !fOwnsInstanceAndDevice) { return; } diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp index b3c1d825aa..f65b15ded0 100644 --- a/src/gpu/vk/GrVkBuffer.cpp +++ b/src/gpu/vk/GrVkBuffer.cpp @@ -170,10 +170,28 @@ void GrVkBuffer::internalMap(GrVkGpu* gpu, size_t size, bool* createdNewBuffer) if (fDesc.fDynamic) { const GrVkAlloc& alloc = this->alloc(); SkASSERT(alloc.fSize > 0); - SkASSERT(alloc.fSize >= size); - SkASSERT(0 == fOffset); - fMapPtr = GrVkMemory::MapAlloc(gpu, alloc); + // For Noncoherent buffers we want to make sure the range that we map, both offset and size, + // are aligned to the nonCoherentAtomSize limit. The offset should have been correctly + // aligned by our memory allocator. For size we pad out to make the range also aligned. + if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { + // Currently we always have the internal offset as 0. + SkASSERT(0 == fOffset); + VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; + SkASSERT(0 == (alloc.fOffset & (alignment - 1))); + + // Make size of the map aligned to nonCoherentAtomSize + size = (size + alignment - 1) & ~(alignment - 1); + fMappedSize = size; + } + SkASSERT(size + fOffset <= alloc.fSize); + VkResult err = VK_CALL(gpu, MapMemory(gpu->device(), alloc.fMemory, + alloc.fOffset + fOffset, + size, 0, &fMapPtr)); + if (err) { + fMapPtr = nullptr; + fMappedSize = 0; + } } else { if (!fMapPtr) { fMapPtr = new unsigned char[this->size()]; @@ -188,15 +206,16 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) { SkASSERT(this->vkIsMapped()); if (fDesc.fDynamic) { - const GrVkAlloc& alloc = this->alloc(); - SkASSERT(alloc.fSize > 0); - SkASSERT(alloc.fSize >= size); // We currently don't use fOffset SkASSERT(0 == fOffset); + VkDeviceSize flushOffset = this->alloc().fOffset + fOffset; + VkDeviceSize flushSize = gpu->vkCaps().canUseWholeSizeOnFlushMappedMemory() ? VK_WHOLE_SIZE + : fMappedSize; - GrVkMemory::FlushMappedAlloc(gpu, alloc, 0, size); - GrVkMemory::UnmapAlloc(gpu, alloc); + GrVkMemory::FlushMappedAlloc(gpu, this->alloc(), flushOffset, flushSize); + VK_CALL(gpu, UnmapMemory(gpu->device(), this->alloc().fMemory)); fMapPtr = nullptr; + fMappedSize = 0; } else { // vkCmdUpdateBuffer requires size < 64k and 4-byte alignment. // https://bugs.chromium.org/p/skia/issues/detail?id=7488 @@ -205,7 +224,7 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) { } else { GrVkTransferBuffer* transferBuffer = GrVkTransferBuffer::Create(gpu, size, GrVkBuffer::kCopyRead_Type); - if (!transferBuffer) { + if(!transferBuffer) { return; } diff --git a/src/gpu/vk/GrVkBuffer.h b/src/gpu/vk/GrVkBuffer.h index 6d0c1fda9a..8d116a40f8 100644 --- a/src/gpu/vk/GrVkBuffer.h +++ b/src/gpu/vk/GrVkBuffer.h @@ -82,7 +82,7 @@ protected: const Desc& descriptor); GrVkBuffer(const Desc& desc, const GrVkBuffer::Resource* resource) - : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr) { + : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr), fMappedSize(0) { } void* vkMap(GrVkGpu* gpu) { @@ -115,6 +115,9 @@ private: const Resource* fResource; VkDeviceSize fOffset; void* fMapPtr; + // On certain Intel devices/drivers there is a bug if we try to flush non-coherent memory and + // pass in VK_WHOLE_SIZE. Thus we track our mapped size and explicitly set it when calling flush + VkDeviceSize fMappedSize; typedef SkNoncopyable INHERITED; }; diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp index 2525c5c16c..56d0b95bd0 100644 --- a/src/gpu/vk/GrVkGpu.cpp +++ b/src/gpu/vk/GrVkGpu.cpp @@ -17,7 +17,6 @@ #include "GrRenderTargetPriv.h" #include "GrTexturePriv.h" -#include "GrVkAMDMemoryAllocator.h" #include "GrVkCommandBuffer.h" #include "GrVkGpuCommandBuffer.h" #include "GrVkImage.h" @@ -93,7 +92,6 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, sk_sp<const GrVkBackendContext> backendCtx) : INHERITED(context) , fBackendContext(std::move(backendCtx)) - , fMemoryAllocator(fBackendContext->fMemoryAllocator) , fDevice(fBackendContext->fDevice) , fQueue(fBackendContext->fQueue) , fResourceProvider(this) @@ -120,12 +118,6 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, } #endif - if (!fMemoryAllocator) { - // We were not given a memory allocator at creation - fMemoryAllocator.reset(new GrVkAMDMemoryAllocator(fBackendContext->fPhysicalDevice, - fDevice, fBackendContext->fInterface)); - } - fCompiler = new SkSL::Compiler(); fVkCaps.reset(new GrVkCaps(options, this->vkInterface(), fBackendContext->fPhysicalDevice, @@ -150,6 +142,17 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, fCurrentCmdBuffer = fResourceProvider.findOrCreatePrimaryCommandBuffer(); SkASSERT(fCurrentCmdBuffer); fCurrentCmdBuffer->begin(this); + + // set up our heaps + fHeaps[kLinearImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024)); + fHeaps[kOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 64*1024*1024)); + fHeaps[kSmallOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 2*1024*1024)); + fHeaps[kVertexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); + fHeaps[kIndexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); + fHeaps[kUniformBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 256*1024)); + fHeaps[kTexelBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); + fHeaps[kCopyReadBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); + fHeaps[kCopyWriteBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024)); } void GrVkGpu::destroyResources() { @@ -559,6 +562,7 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i 0, // arraySlice }; VkSubresourceLayout layout; + VkResult err; const GrVkInterface* interface = this->vkInterface(); @@ -569,14 +573,28 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i int texTop = kBottomLeft_GrSurfaceOrigin == texOrigin ? tex->height() - top - height : top; const GrVkAlloc& alloc = tex->alloc(); - VkDeviceSize offset = texTop*layout.rowPitch + left*bpp; + VkDeviceSize offset = alloc.fOffset + texTop*layout.rowPitch + left*bpp; + VkDeviceSize offsetDiff = 0; VkDeviceSize size = height*layout.rowPitch; - SkASSERT(size + offset <= alloc.fSize); - void* mapPtr = GrVkMemory::MapAlloc(this, alloc); - if (!mapPtr) { + // For Noncoherent buffers we want to make sure the range that we map, both offset and size, + // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to + // meet the alignment requirements. So we track how far we move back and then adjust the mapped + // ptr back up so that this is opaque to the caller. + if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { + VkDeviceSize alignment = this->physicalDeviceProperties().limits.nonCoherentAtomSize; + offsetDiff = offset & (alignment - 1); + offset = offset - offsetDiff; + // Make size of the map aligned to nonCoherentAtomSize + size = (size + alignment - 1) & ~(alignment - 1); + } + SkASSERT(offset >= alloc.fOffset); + SkASSERT(size <= alloc.fOffset + alloc.fSize); + void* mapPtr; + err = GR_VK_CALL(interface, MapMemory(fDevice, alloc.fMemory, offset, size, 0, &mapPtr)); + if (err) { return false; } - mapPtr = reinterpret_cast<char*>(mapPtr) + offset; + mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff; if (kBottomLeft_GrSurfaceOrigin == texOrigin) { // copy into buffer by rows @@ -593,7 +611,7 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i } GrVkMemory::FlushMappedAlloc(this, alloc, offset, size); - GrVkMemory::UnmapAlloc(this, alloc); + GR_VK_CALL(interface, UnmapMemory(fDevice, alloc.fMemory)); return true; } @@ -1129,14 +1147,33 @@ GrStencilAttachment* GrVkGpu::createStencilAttachmentForRenderTarget(const GrRen bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc, size_t bufferOffset, size_t srcRowBytes, size_t dstRowBytes, int h) { - VkDeviceSize size = dstRowBytes * h; - VkDeviceSize offset = bufferOffset; - SkASSERT(size + offset <= alloc.fSize); - void* mapPtr = GrVkMemory::MapAlloc(gpu, alloc); - if (!mapPtr) { + // For Noncoherent buffers we want to make sure the range that we map, both offset and size, + // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to + // meet the alignment requirements. So we track how far we move back and then adjust the mapped + // ptr back up so that this is opaque to the caller. + VkDeviceSize mapSize = dstRowBytes * h; + VkDeviceSize mapOffset = alloc.fOffset + bufferOffset; + VkDeviceSize offsetDiff = 0; + if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { + VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; + offsetDiff = mapOffset & (alignment - 1); + mapOffset = mapOffset - offsetDiff; + // Make size of the map aligned to nonCoherentAtomSize + mapSize = (mapSize + alignment - 1) & ~(alignment - 1); + } + SkASSERT(mapOffset >= alloc.fOffset); + SkASSERT(mapSize + mapOffset <= alloc.fOffset + alloc.fSize); + void* mapPtr; + VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), + alloc.fMemory, + mapOffset, + mapSize, + 0, + &mapPtr)); + mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff; + if (err) { return false; } - mapPtr = reinterpret_cast<char*>(mapPtr) + offset; if (srcData) { // If there is no padding on dst we can do a single memcopy. @@ -1155,8 +1192,8 @@ bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc } } } - GrVkMemory::FlushMappedAlloc(gpu, alloc, offset, size); - GrVkMemory::UnmapAlloc(gpu, alloc); + GrVkMemory::FlushMappedAlloc(gpu, alloc, mapOffset, mapSize); + GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory)); return true; } @@ -1980,7 +2017,7 @@ bool GrVkGpu::onReadPixels(GrSurface* surface, GrSurfaceOrigin origin, int left, this->submitCommandBuffer(kForce_SyncQueue); void* mappedMemory = transferBuffer->map(); const GrVkAlloc& transAlloc = transferBuffer->alloc(); - GrVkMemory::InvalidateMappedAlloc(this, transAlloc, 0, transAlloc.fSize); + GrVkMemory::InvalidateMappedAlloc(this, transAlloc, transAlloc.fOffset, VK_WHOLE_SIZE); if (copyFromOrigin) { uint32_t skipRows = region.imageExtent.height - height; diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h index a44ea7230f..7bdfbeaab3 100644 --- a/src/gpu/vk/GrVkGpu.h +++ b/src/gpu/vk/GrVkGpu.h @@ -23,7 +23,6 @@ class GrPipeline; class GrVkBufferImpl; -class GrVkMemoryAllocator; class GrVkPipeline; class GrVkPipelineState; class GrVkPrimaryCommandBuffer; @@ -47,8 +46,6 @@ public: const GrVkInterface* vkInterface() const { return fBackendContext->fInterface.get(); } const GrVkCaps& vkCaps() const { return *fVkCaps; } - GrVkMemoryAllocator* memoryAllocator() const { return fMemoryAllocator.get(); } - VkDevice device() const { return fDevice; } VkQueue queue() const { return fQueue; } VkCommandPool cmdPool() const { return fCmdPool; } @@ -143,6 +140,28 @@ public: VkDeviceSize dstOffset, VkDeviceSize size); bool updateBuffer(GrVkBuffer* buffer, const void* src, VkDeviceSize offset, VkDeviceSize size); + // Heaps + enum Heap { + kLinearImage_Heap = 0, + // We separate out small (i.e., <= 16K) images to reduce fragmentation + // in the main heap. + kOptimalImage_Heap, + kSmallOptimalImage_Heap, + // We have separate vertex and image heaps, because it's possible that + // a given Vulkan driver may allocate them separately. + kVertexBuffer_Heap, + kIndexBuffer_Heap, + kUniformBuffer_Heap, + kTexelBuffer_Heap, + kCopyReadBuffer_Heap, + kCopyWriteBuffer_Heap, + + kLastHeap = kCopyWriteBuffer_Heap + }; + static const int kHeapCount = kLastHeap + 1; + + GrVkHeap* getHeap(Heap heap) const { return fHeaps[heap].get(); } + private: GrVkGpu(GrContext*, const GrContextOptions&, sk_sp<const GrVkBackendContext> backendContext); @@ -232,7 +251,6 @@ private: #endif sk_sp<const GrVkBackendContext> fBackendContext; - sk_sp<GrVkMemoryAllocator> fMemoryAllocator; sk_sp<GrVkCaps> fVkCaps; // These Vulkan objects are provided by the client, and also stored in fBackendContext. @@ -252,6 +270,8 @@ private: VkPhysicalDeviceProperties fPhysDevProps; VkPhysicalDeviceMemoryProperties fPhysDevMemProps; + std::unique_ptr<GrVkHeap> fHeaps[kHeapCount]; + GrVkCopyManager fCopyManager; #ifdef SK_ENABLE_VK_LAYERS diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp index f999c26546..4f619a3ef3 100644 --- a/src/gpu/vk/GrVkMemory.cpp +++ b/src/gpu/vk/GrVkMemory.cpp @@ -9,26 +9,49 @@ #include "GrVkGpu.h" #include "GrVkUtil.h" -#include "vk/GrVkMemoryAllocator.h" -using AllocationPropertyFlags = GrVkMemoryAllocator::AllocationPropertyFlags; -using BufferUsage = GrVkMemoryAllocator::BufferUsage; +#ifdef SK_DEBUG +// for simple tracking of how much we're using in each heap +// last counter is for non-subheap allocations +VkDeviceSize gHeapUsage[VK_MAX_MEMORY_HEAPS+1] = { 0 }; +#endif -static BufferUsage get_buffer_usage(GrVkBuffer::Type type, bool dynamic) { - switch (type) { - case GrVkBuffer::kVertex_Type: // fall through - case GrVkBuffer::kIndex_Type: // fall through - case GrVkBuffer::kTexel_Type: - return dynamic ? BufferUsage::kCpuWritesGpuReads : BufferUsage::kGpuOnly; - case GrVkBuffer::kUniform_Type: - SkASSERT(dynamic); - return BufferUsage::kCpuWritesGpuReads; - case GrVkBuffer::kCopyRead_Type: // fall through - case GrVkBuffer::kCopyWrite_Type: - return BufferUsage::kCpuOnly; +static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps, + uint32_t typeBits, + VkMemoryPropertyFlags requestedMemFlags, + uint32_t* typeIndex, + uint32_t* heapIndex) { + for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) { + if (typeBits & (1 << i)) { + uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags & + requestedMemFlags; + if (supportedFlags == requestedMemFlags) { + *typeIndex = i; + *heapIndex = physDevMemProps.memoryTypes[i].heapIndex; + return true; + } + } } - SK_ABORT("Invalid GrVkBuffer::Type"); - return BufferUsage::kCpuOnly; // Just returning an arbitrary value. + return false; +} + +static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) { + const GrVkGpu::Heap kBufferToHeap[]{ + GrVkGpu::kVertexBuffer_Heap, + GrVkGpu::kIndexBuffer_Heap, + GrVkGpu::kUniformBuffer_Heap, + GrVkGpu::kTexelBuffer_Heap, + GrVkGpu::kCopyReadBuffer_Heap, + GrVkGpu::kCopyWriteBuffer_Heap, + }; + GR_STATIC_ASSERT(0 == GrVkBuffer::kVertex_Type); + GR_STATIC_ASSERT(1 == GrVkBuffer::kIndex_Type); + GR_STATIC_ASSERT(2 == GrVkBuffer::kUniform_Type); + GR_STATIC_ASSERT(3 == GrVkBuffer::kTexel_Type); + GR_STATIC_ASSERT(4 == GrVkBuffer::kCopyRead_Type); + GR_STATIC_ASSERT(5 == GrVkBuffer::kCopyWrite_Type); + + return kBufferToHeap[type]; } bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, @@ -36,23 +59,68 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, bool dynamic, GrVkAlloc* alloc) { - GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); - GrVkBackendMemory memory = 0; + const GrVkInterface* iface = gpu->vkInterface(); + VkDevice device = gpu->device(); - GrVkMemoryAllocator::BufferUsage usage = get_buffer_usage(type, dynamic); + VkMemoryRequirements memReqs; + GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); - if (!allocator->allocateMemoryForBuffer(buffer, usage, AllocationPropertyFlags::kNone, - &memory)) { - return false; + uint32_t typeIndex = 0; + uint32_t heapIndex = 0; + const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); + const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties(); + if (dynamic) { + // try to get cached and ideally non-coherent memory first + if (!get_valid_memory_type_index(phDevMemProps, + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + &typeIndex, + &heapIndex)) { + // some sort of host-visible memory type should always be available for dynamic buffers + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + &typeIndex, + &heapIndex)); + } + + VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags; + alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 + : GrVkAlloc::kNoncoherent_Flag; + if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) { + SkASSERT(SkIsPow2(memReqs.alignment)); + SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize)); + memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize); + } + } else { + // device-local memory should always be available for static buffers + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + &typeIndex, + &heapIndex)); + alloc->fFlags = 0x0; + } + + GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); + + if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { + // if static, try to allocate from non-host-visible non-device-local memory instead + if (dynamic || + !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, + 0, &typeIndex, &heapIndex) || + !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { + SkDebugf("Failed to alloc buffer\n"); + return false; + } } - allocator->getAllocInfo(memory, alloc); // Bind buffer - VkResult err = GR_VK_CALL(gpu->vkInterface(), BindBufferMemory(gpu->device(), buffer, - alloc->fMemory, - alloc->fOffset)); + VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, + alloc->fMemory, alloc->fOffset)); if (err) { - FreeBufferMemory(gpu, type, *alloc); + SkASSERT_RELEASE(heap->free(*alloc)); return false; } @@ -61,152 +129,503 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, const GrVkAlloc& alloc) { - if (alloc.fBackendMemory) { - GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); - allocator->freeMemory(alloc.fBackendMemory); - } else { - GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr)); - } + + GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); + SkASSERT_RELEASE(heap->free(alloc)); } +// for debugging +static uint64_t gTotalImageMemory = 0; +static uint64_t gTotalImageMemoryFullPage = 0; + const VkDeviceSize kMaxSmallImageSize = 16 * 1024; +const VkDeviceSize kMinVulkanPageSize = 16 * 1024; + +static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) { + return (size + alignment - 1) & ~(alignment - 1); +} bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, VkImage image, bool linearTiling, GrVkAlloc* alloc) { - SkASSERT(!linearTiling); - GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); - GrVkBackendMemory memory = 0; + const GrVkInterface* iface = gpu->vkInterface(); + VkDevice device = gpu->device(); VkMemoryRequirements memReqs; - GR_VK_CALL(gpu->vkInterface(), GetImageMemoryRequirements(gpu->device(), image, &memReqs)); + GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs)); - AllocationPropertyFlags propFlags; - if (memReqs.size <= kMaxSmallImageSize) { - propFlags = AllocationPropertyFlags::kNone; + uint32_t typeIndex = 0; + uint32_t heapIndex = 0; + GrVkHeap* heap; + const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); + const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties(); + if (linearTiling) { + VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + if (!get_valid_memory_type_index(phDevMemProps, + memReqs.memoryTypeBits, + desiredMemProps, + &typeIndex, + &heapIndex)) { + // some sort of host-visible memory type should always be available + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + &typeIndex, + &heapIndex)); + } + heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); + VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags; + alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 + : GrVkAlloc::kNoncoherent_Flag; + if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) { + SkASSERT(SkIsPow2(memReqs.alignment)); + SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize)); + memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize); + } } else { - propFlags = AllocationPropertyFlags::kDedicatedAllocation; + // this memory type should always be available + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + &typeIndex, + &heapIndex)); + if (memReqs.size <= kMaxSmallImageSize) { + heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); + } else { + heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); + } + alloc->fFlags = 0x0; } - if (!allocator->allocateMemoryForImage(image, AllocationPropertyFlags::kDedicatedAllocation, - &memory)) { - return false; + if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { + // if optimal, try to allocate from non-host-visible non-device-local memory instead + if (linearTiling || + !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, + 0, &typeIndex, &heapIndex) || + !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) { + SkDebugf("Failed to alloc image\n"); + return false; + } } - allocator->getAllocInfo(memory, alloc); - // Bind buffer - VkResult err = GR_VK_CALL(gpu->vkInterface(), BindImageMemory(gpu->device(), image, - alloc->fMemory, alloc->fOffset)); + // Bind image + VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image, + alloc->fMemory, alloc->fOffset)); if (err) { - FreeImageMemory(gpu, linearTiling, *alloc); + SkASSERT_RELEASE(heap->free(*alloc)); return false; } + gTotalImageMemory += alloc->fSize; + + VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize); + gTotalImageMemoryFullPage += pageAlignedSize; + return true; } void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc) { - if (alloc.fBackendMemory) { - GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); - allocator->freeMemory(alloc.fBackendMemory); + GrVkHeap* heap; + if (linearTiling) { + heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); + } else if (alloc.fSize <= kMaxSmallImageSize) { + heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); } else { + heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); + } + if (!heap->free(alloc)) { + // must be an adopted allocation GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr)); + } else { + gTotalImageMemory -= alloc.fSize; + VkDeviceSize pageAlignedSize = align_size(alloc.fSize, kMinVulkanPageSize); + gTotalImageMemoryFullPage -= pageAlignedSize; } } -void* GrVkMemory::MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { - SkASSERT(GrVkAlloc::kMappable_Flag & alloc.fFlags); -#ifdef SK_DEBUG +void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, + VkDeviceSize size) { if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { +#ifdef SK_DEBUG + SkASSERT(offset >= alloc.fOffset); VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - SkASSERT(0 == (alloc.fOffset & (alignment-1))); - SkASSERT(0 == (alloc.fSize & (alignment-1))); + SkASSERT(0 == (offset & (alignment-1))); + if (size != VK_WHOLE_SIZE) { + SkASSERT(size > 0); + SkASSERT(0 == (size & (alignment-1)) || + (offset + size) == (alloc.fOffset + alloc.fSize)); + SkASSERT(offset + size <= alloc.fOffset + alloc.fSize); + } +#endif + + VkMappedMemoryRange mappedMemoryRange; + memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); + mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + mappedMemoryRange.memory = alloc.fMemory; + mappedMemoryRange.offset = offset; + mappedMemoryRange.size = size; + GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), + 1, &mappedMemoryRange)); } +} + +void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, + VkDeviceSize offset, VkDeviceSize size) { + if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { +#ifdef SK_DEBUG + SkASSERT(offset >= alloc.fOffset); + VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; + SkASSERT(0 == (offset & (alignment-1))); + if (size != VK_WHOLE_SIZE) { + SkASSERT(size > 0); + SkASSERT(0 == (size & (alignment-1)) || + (offset + size) == (alloc.fOffset + alloc.fSize)); + SkASSERT(offset + size <= alloc.fOffset + alloc.fSize); + } #endif - if (alloc.fBackendMemory) { - GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); - return allocator->mapMemory(alloc.fBackendMemory); + + VkMappedMemoryRange mappedMemoryRange; + memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); + mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + mappedMemoryRange.memory = alloc.fMemory; + mappedMemoryRange.offset = offset; + mappedMemoryRange.size = size; + GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), + 1, &mappedMemoryRange)); } +} - void* mapPtr; - VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), alloc.fMemory, - alloc.fOffset, - alloc.fSize, 0, &mapPtr)); - if (err) { - mapPtr = nullptr; +bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize, + VkDeviceSize* allocOffset, VkDeviceSize* allocSize) { + VkDeviceSize alignedSize = align_size(requestedSize, fAlignment); + + // find the smallest block big enough for our allocation + FreeList::Iter iter = fFreeList.headIter(); + FreeList::Iter bestFitIter; + VkDeviceSize bestFitSize = fSize + 1; + VkDeviceSize secondLargestSize = 0; + VkDeviceSize secondLargestOffset = 0; + while (iter.get()) { + Block* block = iter.get(); + // need to adjust size to match desired alignment + SkASSERT(align_size(block->fOffset, fAlignment) - block->fOffset == 0); + if (block->fSize >= alignedSize && block->fSize < bestFitSize) { + bestFitIter = iter; + bestFitSize = block->fSize; + } + if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) { + secondLargestSize = block->fSize; + secondLargestOffset = block->fOffset; + } + iter.next(); + } + SkASSERT(secondLargestSize <= fLargestBlockSize); + + Block* bestFit = bestFitIter.get(); + if (bestFit) { + SkASSERT(align_size(bestFit->fOffset, fAlignment) == bestFit->fOffset); + *allocOffset = bestFit->fOffset; + *allocSize = alignedSize; + // adjust or remove current block + VkDeviceSize originalBestFitOffset = bestFit->fOffset; + if (bestFit->fSize > alignedSize) { + bestFit->fOffset += alignedSize; + bestFit->fSize -= alignedSize; + if (fLargestBlockOffset == originalBestFitOffset) { + if (bestFit->fSize >= secondLargestSize) { + fLargestBlockSize = bestFit->fSize; + fLargestBlockOffset = bestFit->fOffset; + } else { + fLargestBlockSize = secondLargestSize; + fLargestBlockOffset = secondLargestOffset; + } + } +#ifdef SK_DEBUG + VkDeviceSize largestSize = 0; + iter = fFreeList.headIter(); + while (iter.get()) { + Block* block = iter.get(); + if (largestSize < block->fSize) { + largestSize = block->fSize; + } + iter.next(); + } + SkASSERT(largestSize == fLargestBlockSize); +#endif + } else { + SkASSERT(bestFit->fSize == alignedSize); + if (fLargestBlockOffset == originalBestFitOffset) { + fLargestBlockSize = secondLargestSize; + fLargestBlockOffset = secondLargestOffset; + } + fFreeList.remove(bestFit); +#ifdef SK_DEBUG + VkDeviceSize largestSize = 0; + iter = fFreeList.headIter(); + while (iter.get()) { + Block* block = iter.get(); + if (largestSize < block->fSize) { + largestSize = block->fSize; + } + iter.next(); + } + SkASSERT(largestSize == fLargestBlockSize); +#endif + } + fFreeSize -= alignedSize; + SkASSERT(*allocSize > 0); + + return true; } - return mapPtr; + + SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize); + + return false; } -void GrVkMemory::UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { - if (alloc.fBackendMemory) { - GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); - allocator->unmapMemory(alloc.fBackendMemory); +void GrVkFreeListAlloc::free(VkDeviceSize allocOffset, VkDeviceSize allocSize) { + // find the block right after this allocation + FreeList::Iter iter = fFreeList.headIter(); + FreeList::Iter prev; + while (iter.get() && iter.get()->fOffset < allocOffset) { + prev = iter; + iter.next(); + } + // we have four cases: + // we exactly follow the previous one + Block* block; + if (prev.get() && prev.get()->fOffset + prev.get()->fSize == allocOffset) { + block = prev.get(); + block->fSize += allocSize; + if (block->fOffset == fLargestBlockOffset) { + fLargestBlockSize = block->fSize; + } + // and additionally we may exactly precede the next one + if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) { + block->fSize += iter.get()->fSize; + if (iter.get()->fOffset == fLargestBlockOffset) { + fLargestBlockOffset = block->fOffset; + fLargestBlockSize = block->fSize; + } + fFreeList.remove(iter.get()); + } + // or we only exactly proceed the next one + } else if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) { + block = iter.get(); + block->fSize += allocSize; + if (block->fOffset == fLargestBlockOffset) { + fLargestBlockOffset = allocOffset; + fLargestBlockSize = block->fSize; + } + block->fOffset = allocOffset; + // or we fall somewhere in between, with gaps } else { - GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory)); + block = fFreeList.addBefore(iter); + block->fOffset = allocOffset; + block->fSize = allocSize; + } + fFreeSize += allocSize; + if (block->fSize > fLargestBlockSize) { + fLargestBlockSize = block->fSize; + fLargestBlockOffset = block->fOffset; + } + +#ifdef SK_DEBUG + VkDeviceSize largestSize = 0; + iter = fFreeList.headIter(); + while (iter.get()) { + Block* block = iter.get(); + if (largestSize < block->fSize) { + largestSize = block->fSize; + } + iter.next(); } + SkASSERT(fLargestBlockSize == largestSize); +#endif } -void GrVkMemory::GetNonCoherentMappedMemoryRange(const GrVkAlloc& alloc, VkDeviceSize offset, - VkDeviceSize size, VkDeviceSize alignment, - VkMappedMemoryRange* range) { - SkASSERT(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag); - offset = offset + alloc.fOffset; - VkDeviceSize offsetDiff = offset & (alignment -1); - offset = offset - offsetDiff; - size = (size + alignment - 1) & ~(alignment - 1); +GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex, + VkDeviceSize size, VkDeviceSize alignment) + : INHERITED(size, alignment) + , fGpu(gpu) #ifdef SK_DEBUG - SkASSERT(offset >= alloc.fOffset); - SkASSERT(offset + size <= alloc.fOffset + alloc.fSize); - SkASSERT(0 == (offset & (alignment-1))); - SkASSERT(size > 0); - SkASSERT(0 == (size & (alignment-1))); + , fHeapIndex(heapIndex) #endif + , fMemoryTypeIndex(memoryTypeIndex) { + + VkMemoryAllocateInfo allocInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType + nullptr, // pNext + size, // allocationSize + memoryTypeIndex, // memoryTypeIndex + }; - memset(range, 0, sizeof(VkMappedMemoryRange)); - range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range->memory = alloc.fMemory; - range->offset = offset; - range->size = size; + VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(), + &allocInfo, + nullptr, + &fAlloc)); + if (VK_SUCCESS != err) { + this->reset(); + } +#ifdef SK_DEBUG + else { + gHeapUsage[heapIndex] += size; + } +#endif } -void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, - VkDeviceSize size) { - if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { - SkASSERT(offset == 0); - SkASSERT(size <= alloc.fSize); - if (alloc.fBackendMemory) { - GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); - allocator->flushMappedMemory(alloc.fBackendMemory, offset, size); - } else { - VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - VkMappedMemoryRange mappedMemoryRange; - GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment, - &mappedMemoryRange); - GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), 1, - &mappedMemoryRange)); +GrVkSubHeap::~GrVkSubHeap() { + const GrVkInterface* iface = fGpu->vkInterface(); + GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr)); +#ifdef SK_DEBUG + gHeapUsage[fHeapIndex] -= fSize; +#endif +} + +bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) { + alloc->fMemory = fAlloc; + return INHERITED::alloc(size, &alloc->fOffset, &alloc->fSize); +} + +void GrVkSubHeap::free(const GrVkAlloc& alloc) { + SkASSERT(alloc.fMemory == fAlloc); + + INHERITED::free(alloc.fOffset, alloc.fSize); +} + +bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) { + VkDeviceSize alignedSize = align_size(size, alignment); + + // if requested is larger than our subheap allocation, just alloc directly + if (alignedSize > fSubHeapSize) { + VkMemoryAllocateInfo allocInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType + nullptr, // pNext + alignedSize, // allocationSize + memoryTypeIndex, // memoryTypeIndex + }; + + VkResult err = GR_VK_CALL(fGpu->vkInterface(), AllocateMemory(fGpu->device(), + &allocInfo, + nullptr, + &alloc->fMemory)); + if (VK_SUCCESS != err) { + return false; } + alloc->fOffset = 0; + alloc->fSize = alignedSize; + alloc->fUsesSystemHeap = true; +#ifdef SK_DEBUG + gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize; +#endif + + return true; } + + // first try to find a subheap that fits our allocation request + int bestFitIndex = -1; + VkDeviceSize bestFitSize = 0x7FFFFFFF; + for (auto i = 0; i < fSubHeaps.count(); ++i) { + if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && + fSubHeaps[i]->alignment() == alignment) { + VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize(); + if (heapSize >= alignedSize && heapSize < bestFitSize) { + bestFitIndex = i; + bestFitSize = heapSize; + } + } + } + + if (bestFitIndex >= 0) { + SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); + if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { + fUsedSize += alloc->fSize; + return true; + } + return false; + } + + // need to allocate a new subheap + std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); + subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, fSubHeapSize, alignment)); + // try to recover from failed allocation by only allocating what we need + if (subHeap->size() == 0) { + VkDeviceSize alignedSize = align_size(size, alignment); + subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment)); + if (subHeap->size() == 0) { + return false; + } + } + fAllocSize += fSubHeapSize; + if (subHeap->alloc(size, alloc)) { + fUsedSize += alloc->fSize; + return true; + } + + return false; } -void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, - VkDeviceSize offset, VkDeviceSize size) { - if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { - SkASSERT(offset == 0); - SkASSERT(size <= alloc.fSize); - if (alloc.fBackendMemory) { - GrVkMemoryAllocator* allocator = gpu->memoryAllocator(); - allocator->invalidateMappedMemory(alloc.fBackendMemory, offset, size); - } else { - VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; - VkMappedMemoryRange mappedMemoryRange; - GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment, - &mappedMemoryRange); - GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), 1, - &mappedMemoryRange)); +bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) { + VkDeviceSize alignedSize = align_size(size, alignment); + + // first try to find an unallocated subheap that fits our allocation request + int bestFitIndex = -1; + VkDeviceSize bestFitSize = 0x7FFFFFFF; + for (auto i = 0; i < fSubHeaps.count(); ++i) { + if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && + fSubHeaps[i]->alignment() == alignment && + fSubHeaps[i]->unallocated()) { + VkDeviceSize heapSize = fSubHeaps[i]->size(); + if (heapSize >= alignedSize && heapSize < bestFitSize) { + bestFitIndex = i; + bestFitSize = heapSize; + } + } + } + + if (bestFitIndex >= 0) { + SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); + if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { + fUsedSize += alloc->fSize; + return true; + } + return false; + } + + // need to allocate a new subheap + std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); + subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment)); + fAllocSize += alignedSize; + if (subHeap->alloc(size, alloc)) { + fUsedSize += alloc->fSize; + return true; + } + + return false; +} + +bool GrVkHeap::free(const GrVkAlloc& alloc) { + // a size of 0 means we're using the system heap + if (alloc.fUsesSystemHeap) { + const GrVkInterface* iface = fGpu->vkInterface(); + GR_VK_CALL(iface, FreeMemory(fGpu->device(), alloc.fMemory, nullptr)); + return true; + } + + for (auto i = 0; i < fSubHeaps.count(); ++i) { + if (fSubHeaps[i]->memory() == alloc.fMemory) { + fSubHeaps[i]->free(alloc); + fUsedSize -= alloc.fSize; + return true; } } + + return false; } + diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h index 741bdaa8a0..bb6681435f 100644 --- a/src/gpu/vk/GrVkMemory.h +++ b/src/gpu/vk/GrVkMemory.h @@ -34,25 +34,133 @@ namespace GrVkMemory { GrVkAlloc* alloc); void FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc); - // Maps the entire GrVkAlloc and returns a pointer to the start of the allocation. Underneath - // the hood, we may map more than the range of the GrVkAlloc (e.g. the entire VkDeviceMemory), - // but the pointer returned will always be to the start of the GrVkAlloc. The caller should also - // never assume more than the GrVkAlloc block has been mapped. - void* MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc); - void UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc); - - // For the Flush and Invalidate calls, the offset should be relative to the GrVkAlloc. Thus this - // will often be 0. The client does not need to make sure the offset and size are aligned to the - // nonCoherentAtomSize, the internal calls will handle that. void FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, VkDeviceSize size); void InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset, VkDeviceSize size); - - // Helper for aligning and setting VkMappedMemoryRange for flushing/invalidating noncoherent - // memory. - void GetNonCoherentMappedMemoryRange(const GrVkAlloc&, VkDeviceSize offset, VkDeviceSize size, - VkDeviceSize alignment, VkMappedMemoryRange*); } +class GrVkFreeListAlloc { +public: + GrVkFreeListAlloc(VkDeviceSize size, VkDeviceSize alignment) + : fSize(size) + , fAlignment(alignment) + , fFreeSize(size) + , fLargestBlockSize(size) + , fLargestBlockOffset(0) { + Block* block = fFreeList.addToTail(); + block->fOffset = 0; + block->fSize = fSize; + } + ~GrVkFreeListAlloc() { + this->reset(); + } + + VkDeviceSize size() const { return fSize; } + VkDeviceSize alignment() const { return fAlignment; } + VkDeviceSize freeSize() const { return fFreeSize; } + VkDeviceSize largestBlockSize() const { return fLargestBlockSize; } + + bool unallocated() const { return fSize == fFreeSize; } + +protected: + bool alloc(VkDeviceSize requestedSize, VkDeviceSize* allocOffset, VkDeviceSize* allocSize); + void free(VkDeviceSize allocOffset, VkDeviceSize allocSize); + + void reset() { + fSize = 0; + fAlignment = 0; + fFreeSize = 0; + fLargestBlockSize = 0; + fFreeList.reset(); + } + + struct Block { + VkDeviceSize fOffset; + VkDeviceSize fSize; + }; + typedef SkTLList<Block, 16> FreeList; + + VkDeviceSize fSize; + VkDeviceSize fAlignment; + VkDeviceSize fFreeSize; + VkDeviceSize fLargestBlockSize; + VkDeviceSize fLargestBlockOffset; + FreeList fFreeList; +}; + +class GrVkSubHeap : public GrVkFreeListAlloc { +public: + GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex, + VkDeviceSize size, VkDeviceSize alignment); + ~GrVkSubHeap(); + + uint32_t memoryTypeIndex() const { return fMemoryTypeIndex; } + VkDeviceMemory memory() { return fAlloc; } + + bool alloc(VkDeviceSize requestedSize, GrVkAlloc* alloc); + void free(const GrVkAlloc& alloc); + +private: + const GrVkGpu* fGpu; +#ifdef SK_DEBUG + uint32_t fHeapIndex; +#endif + uint32_t fMemoryTypeIndex; + VkDeviceMemory fAlloc; + + typedef GrVkFreeListAlloc INHERITED; +}; + +class GrVkHeap { +public: + enum Strategy { + kSubAlloc_Strategy, // alloc large subheaps and suballoc within them + kSingleAlloc_Strategy // alloc/recycle an individual subheap per object + }; + + GrVkHeap(const GrVkGpu* gpu, Strategy strategy, VkDeviceSize subHeapSize) + : fGpu(gpu) + , fSubHeapSize(subHeapSize) + , fAllocSize(0) + , fUsedSize(0) { + if (strategy == kSubAlloc_Strategy) { + fAllocFunc = &GrVkHeap::subAlloc; + } else { + fAllocFunc = &GrVkHeap::singleAlloc; + } + } + + ~GrVkHeap() {} + + VkDeviceSize allocSize() const { return fAllocSize; } + VkDeviceSize usedSize() const { return fUsedSize; } + + bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex, + uint32_t heapIndex, GrVkAlloc* alloc) { + SkASSERT(size > 0); + alloc->fUsesSystemHeap = false; + return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, heapIndex, alloc); + } + bool free(const GrVkAlloc& alloc); + +private: + typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, uint32_t heapIndex, + GrVkAlloc* alloc); + + bool subAlloc(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, uint32_t heapIndex, + GrVkAlloc* alloc); + bool singleAlloc(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, uint32_t heapIndex, + GrVkAlloc* alloc); + + const GrVkGpu* fGpu; + VkDeviceSize fSubHeapSize; + VkDeviceSize fAllocSize; + VkDeviceSize fUsedSize; + AllocFunc fAllocFunc; + SkTArray<std::unique_ptr<GrVkSubHeap>> fSubHeaps; +}; #endif diff --git a/tests/VkHeapTests.cpp b/tests/VkHeapTests.cpp new file mode 100644 index 0000000000..67eb045d98 --- /dev/null +++ b/tests/VkHeapTests.cpp @@ -0,0 +1,239 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +// This is a GPU-backend specific test. It relies on static intializers to work + +#include "SkTypes.h" + +#if SK_SUPPORT_GPU && defined(SK_VULKAN) + +#include "GrContextPriv.h" +#include "GrContextFactory.h" +#include "GrTest.h" +#include "Test.h" +#include "vk/GrVkGpu.h" + +using sk_gpu_test::GrContextFactory; + +void subheap_test(skiatest::Reporter* reporter, GrContext* context) { + GrVkGpu* gpu = static_cast<GrVkGpu*>(context->contextPriv().getGpu()); + + // memtype doesn't matter, we're just testing the suballocation algorithm so we'll use 0 + GrVkSubHeap heap(gpu, 0, 0, 64 * 1024, 32); + GrVkAlloc alloc0, alloc1, alloc2, alloc3; + // test full allocation and free + REPORTER_ASSERT(reporter, heap.alloc(64 * 1024, &alloc0)); + REPORTER_ASSERT(reporter, alloc0.fOffset == 0); + REPORTER_ASSERT(reporter, alloc0.fSize == 64 * 1024); + REPORTER_ASSERT(reporter, heap.freeSize() == 0 && heap.largestBlockSize() == 0); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.freeSize() == 64*1024 && heap.largestBlockSize() == 64 * 1024); + + // now let's suballoc some memory + REPORTER_ASSERT(reporter, heap.alloc(16 * 1024, &alloc0)); + REPORTER_ASSERT(reporter, heap.alloc(23 * 1024, &alloc1)); + REPORTER_ASSERT(reporter, heap.alloc(18 * 1024, &alloc2)); + REPORTER_ASSERT(reporter, heap.freeSize() == 7 * 1024 && heap.largestBlockSize() == 7 * 1024); + // free lone block + heap.free(alloc1); + REPORTER_ASSERT(reporter, heap.freeSize() == 30 * 1024 && heap.largestBlockSize() == 23 * 1024); + // allocate into smallest free block + REPORTER_ASSERT(reporter, heap.alloc(6 * 1024, &alloc3)); + REPORTER_ASSERT(reporter, heap.freeSize() == 24 * 1024 && heap.largestBlockSize() == 23 * 1024); + // allocate into exact size free block + REPORTER_ASSERT(reporter, heap.alloc(23 * 1024, &alloc1)); + REPORTER_ASSERT(reporter, heap.freeSize() == 1 * 1024 && heap.largestBlockSize() == 1 * 1024); + // free lone block + heap.free(alloc2); + REPORTER_ASSERT(reporter, heap.freeSize() == 19 * 1024 && heap.largestBlockSize() == 18 * 1024); + // free and merge with preceding block and following + heap.free(alloc3); + REPORTER_ASSERT(reporter, heap.freeSize() == 25 * 1024 && heap.largestBlockSize() == 25 * 1024); + // free and merge with following block + heap.free(alloc1); + REPORTER_ASSERT(reporter, heap.freeSize() == 48 * 1024 && heap.largestBlockSize() == 48 * 1024); + // free starting block and merge with following + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.freeSize() == 64 * 1024 && heap.largestBlockSize() == 64 * 1024); + + // realloc + REPORTER_ASSERT(reporter, heap.alloc(4 * 1024, &alloc0)); + REPORTER_ASSERT(reporter, heap.alloc(35 * 1024, &alloc1)); + REPORTER_ASSERT(reporter, heap.alloc(10 * 1024, &alloc2)); + REPORTER_ASSERT(reporter, heap.freeSize() == 15 * 1024 && heap.largestBlockSize() == 15 * 1024); + // free starting block and merge with following + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.freeSize() == 19 * 1024 && heap.largestBlockSize() == 15 * 1024); + // free block and merge with preceding + heap.free(alloc1); + REPORTER_ASSERT(reporter, heap.freeSize() == 54 * 1024 && heap.largestBlockSize() == 39 * 1024); + // free block and merge with preceding and following + heap.free(alloc2); + REPORTER_ASSERT(reporter, heap.freeSize() == 64 * 1024 && heap.largestBlockSize() == 64 * 1024); + + // fragment + REPORTER_ASSERT(reporter, heap.alloc(19 * 1024, &alloc0)); + REPORTER_ASSERT(reporter, heap.alloc(5 * 1024, &alloc1)); + REPORTER_ASSERT(reporter, heap.alloc(15 * 1024, &alloc2)); + REPORTER_ASSERT(reporter, heap.alloc(3 * 1024, &alloc3)); + REPORTER_ASSERT(reporter, heap.freeSize() == 22 * 1024 && heap.largestBlockSize() == 22 * 1024); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.freeSize() == 41 * 1024 && heap.largestBlockSize() == 22 * 1024); + heap.free(alloc2); + REPORTER_ASSERT(reporter, heap.freeSize() == 56 * 1024 && heap.largestBlockSize() == 22 * 1024); + REPORTER_ASSERT(reporter, !heap.alloc(40 * 1024, &alloc0)); + heap.free(alloc3); + REPORTER_ASSERT(reporter, heap.freeSize() == 59 * 1024 && heap.largestBlockSize() == 40 * 1024); + REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, &alloc0)); + REPORTER_ASSERT(reporter, heap.freeSize() == 19 * 1024 && heap.largestBlockSize() == 19 * 1024); + heap.free(alloc1); + REPORTER_ASSERT(reporter, heap.freeSize() == 24 * 1024 && heap.largestBlockSize() == 24 * 1024); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.freeSize() == 64 * 1024 && heap.largestBlockSize() == 64 * 1024); + + // unaligned sizes + REPORTER_ASSERT(reporter, heap.alloc(19 * 1024 - 31, &alloc0)); + REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 5, &alloc1)); + REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 19, &alloc2)); + REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 3, &alloc3)); + REPORTER_ASSERT(reporter, heap.freeSize() == 22 * 1024 && heap.largestBlockSize() == 22 * 1024); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.freeSize() == 41 * 1024 && heap.largestBlockSize() == 22 * 1024); + heap.free(alloc2); + REPORTER_ASSERT(reporter, heap.freeSize() == 56 * 1024 && heap.largestBlockSize() == 22 * 1024); + REPORTER_ASSERT(reporter, !heap.alloc(40 * 1024, &alloc0)); + heap.free(alloc3); + REPORTER_ASSERT(reporter, heap.freeSize() == 59 * 1024 && heap.largestBlockSize() == 40 * 1024); + REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, &alloc0)); + REPORTER_ASSERT(reporter, heap.freeSize() == 19 * 1024 && heap.largestBlockSize() == 19 * 1024); + heap.free(alloc1); + REPORTER_ASSERT(reporter, heap.freeSize() == 24 * 1024 && heap.largestBlockSize() == 24 * 1024); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.freeSize() == 64 * 1024 && heap.largestBlockSize() == 64 * 1024); +} + +void suballoc_test(skiatest::Reporter* reporter, GrContext* context) { + GrVkGpu* gpu = static_cast<GrVkGpu*>(context->contextPriv().getGpu()); + + // memtype/heap index don't matter, we're just testing the allocation algorithm so we'll use 0 + GrVkHeap heap(gpu, GrVkHeap::kSubAlloc_Strategy, 64 * 1024); + GrVkAlloc alloc0, alloc1, alloc2, alloc3; + const VkDeviceSize kAlignment = 16; + const uint32_t kMemType = 0; + const uint32_t kHeapIndex = 0; + + REPORTER_ASSERT(reporter, heap.allocSize() == 0 && heap.usedSize() == 0); + + // fragment allocations so we need to grow heap + REPORTER_ASSERT(reporter, heap.alloc(19 * 1024 - 3, kAlignment, kMemType, kHeapIndex, &alloc0)); + REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 9, kAlignment, kMemType, kHeapIndex, &alloc1)); + REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 15, kAlignment, kMemType, kHeapIndex, &alloc2)); + REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 6, kAlignment, kMemType, kHeapIndex, &alloc3)); + REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 42 * 1024); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 23 * 1024); + heap.free(alloc2); + REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 8 * 1024); + // we expect the heap to grow here + REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0)); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 48 * 1024); + heap.free(alloc3); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 45 * 1024); + // heap should not grow here (first subheap has exactly enough room) + REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc3)); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 85 * 1024); + // heap should not grow here (second subheap has room) + REPORTER_ASSERT(reporter, heap.alloc(22 * 1024, kAlignment, kMemType, kHeapIndex, &alloc2)); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 107 * 1024); + heap.free(alloc1); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 102 * 1024); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 62 * 1024); + heap.free(alloc2); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 40 * 1024); + heap.free(alloc3); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024); + // heap should not grow here (allocating more than subheap size) + REPORTER_ASSERT(reporter, heap.alloc(128 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0)); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0)); + REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 24 * 1024); + // heap should alloc a new subheap because the memory type is different + REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType+1, kHeapIndex, &alloc1)); + REPORTER_ASSERT(reporter, heap.allocSize() == 192 * 1024 && heap.usedSize() == 48 * 1024); + // heap should alloc a new subheap because the alignment is different + REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, 128, kMemType, kHeapIndex, &alloc2)); + REPORTER_ASSERT(reporter, heap.allocSize() == 256 * 1024 && heap.usedSize() == 72 * 1024); + heap.free(alloc2); + heap.free(alloc0); + heap.free(alloc1); + REPORTER_ASSERT(reporter, heap.allocSize() == 256 * 1024 && heap.usedSize() == 0 * 1024); +} + +void singlealloc_test(skiatest::Reporter* reporter, GrContext* context) { + GrVkGpu* gpu = static_cast<GrVkGpu*>(context->contextPriv().getGpu()); + + // memtype/heap index don't matter, we're just testing the allocation algorithm so we'll use 0 + GrVkHeap heap(gpu, GrVkHeap::kSingleAlloc_Strategy, 64 * 1024); + GrVkAlloc alloc0, alloc1, alloc2, alloc3; + const VkDeviceSize kAlignment = 64; + const uint32_t kMemType = 0; + const uint32_t kHeapIndex = 0; + + REPORTER_ASSERT(reporter, heap.allocSize() == 0 && heap.usedSize() == 0); + + // make a few allocations + REPORTER_ASSERT(reporter, heap.alloc(49 * 1024 - 3, kAlignment, kMemType, kHeapIndex, &alloc0)); + REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 37, kAlignment, kMemType, kHeapIndex, &alloc1)); + REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 11, kAlignment, kMemType, kHeapIndex, &alloc2)); + REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 29, kAlignment, kMemType, kHeapIndex, &alloc3)); + REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 72 * 1024); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 23 * 1024); + heap.free(alloc2); + REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 8 * 1024); + // heap should not grow here (first subheap has room) + REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0)); + REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 48 * 1024); + heap.free(alloc3); + REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 45 * 1024); + // check for exact fit -- heap should not grow here (third subheap has room) + REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 63, kAlignment, kMemType, kHeapIndex, &alloc2)); + REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 60 * 1024); + heap.free(alloc2); + REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 45 * 1024); + // heap should grow here (no subheap has room) + REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc3)); + REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 85 * 1024); + heap.free(alloc1); + REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 80 * 1024); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 40 * 1024); + heap.free(alloc3); + REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 0 * 1024); + REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0)); + REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 24 * 1024); + // heap should alloc a new subheap because the memory type is different + REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType + 1, kHeapIndex, &alloc1)); + REPORTER_ASSERT(reporter, heap.allocSize() == 136 * 1024 && heap.usedSize() == 48 * 1024); + // heap should alloc a new subheap because the alignment is different + REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, 128, kMemType, kHeapIndex, &alloc2)); + REPORTER_ASSERT(reporter, heap.allocSize() == 160 * 1024 && heap.usedSize() == 72 * 1024); + heap.free(alloc1); + heap.free(alloc2); + heap.free(alloc0); + REPORTER_ASSERT(reporter, heap.allocSize() == 160 * 1024 && heap.usedSize() == 0 * 1024); +} + +DEF_GPUTEST_FOR_VULKAN_CONTEXT(VkHeapTests, reporter, ctxInfo) { + subheap_test(reporter, ctxInfo.grContext()); + suballoc_test(reporter, ctxInfo.grContext()); + singlealloc_test(reporter, ctxInfo.grContext()); +} + +#endif |