diff options
author | 2016-06-13 14:28:07 -0700 | |
---|---|---|
committer | 2016-06-13 14:28:07 -0700 | |
commit | 6b6ffc4ab6ecee9f46425a5467ef0fbebbb8c9cd (patch) | |
tree | 286b9794262bc05a56b24fed800d0ce63badcd3e /src/gpu/vk | |
parent | d06693d9088f741c48a0b3b8449b3695447a86bc (diff) |
Create free list heap for suballocation
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2029763002
Review-Url: https://codereview.chromium.org/2029763002
Diffstat (limited to 'src/gpu/vk')
-rw-r--r-- | src/gpu/vk/GrVkBuffer.cpp | 24 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBuffer.h | 9 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.cpp | 26 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.h | 24 | ||||
-rw-r--r-- | src/gpu/vk/GrVkImage.cpp | 18 | ||||
-rw-r--r-- | src/gpu/vk/GrVkImage.h | 16 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.cpp | 438 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.h | 92 | ||||
-rw-r--r-- | src/gpu/vk/GrVkTexture.cpp | 2 |
9 files changed, 552 insertions, 97 deletions
diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp index 12925db008..1c7c4d0e25 100644 --- a/src/gpu/vk/GrVkBuffer.cpp +++ b/src/gpu/vk/GrVkBuffer.cpp @@ -56,37 +56,23 @@ const GrVkBuffer::Resource* GrVkBuffer::Create(const GrVkGpu* gpu, const Desc& d return nullptr; } - VkMemoryPropertyFlags requiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - if (!GrVkMemory::AllocAndBindBufferMemory(gpu, buffer, - requiredMemProps, + desc.fType, &alloc)) { - // Try again without requiring host cached memory - requiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - if (!GrVkMemory::AllocAndBindBufferMemory(gpu, - buffer, - requiredMemProps, - &alloc)) { - VK_CALL(gpu, DestroyBuffer(gpu->device(), buffer, nullptr)); - return nullptr; - } + return nullptr; } - const GrVkBuffer::Resource* resource = new GrVkBuffer::Resource(buffer, alloc); + const GrVkBuffer::Resource* resource = new GrVkBuffer::Resource(buffer, alloc, desc.fType); if (!resource) { VK_CALL(gpu, DestroyBuffer(gpu->device(), buffer, nullptr)); - GrVkMemory::FreeBufferMemory(gpu, alloc); + GrVkMemory::FreeBufferMemory(gpu, desc.fType, alloc); return nullptr; } return resource; } - void GrVkBuffer::addMemoryBarrier(const GrVkGpu* gpu, VkAccessFlags srcAccessMask, VkAccessFlags dstAccesMask, @@ -113,7 +99,7 @@ void GrVkBuffer::Resource::freeGPUData(const GrVkGpu* gpu) const { SkASSERT(fBuffer); SkASSERT(fAlloc.fMemory); VK_CALL(gpu, DestroyBuffer(gpu->device(), fBuffer, nullptr)); - GrVkMemory::FreeBufferMemory(gpu, fAlloc); + GrVkMemory::FreeBufferMemory(gpu, fType, fAlloc); } void GrVkBuffer::vkRelease(const GrVkGpu* gpu) { diff --git a/src/gpu/vk/GrVkBuffer.h b/src/gpu/vk/GrVkBuffer.h index f7d43c796d..0bfbecace1 100644 --- a/src/gpu/vk/GrVkBuffer.h +++ b/src/gpu/vk/GrVkBuffer.h @@ -54,11 +54,12 @@ protected: class Resource : public GrVkResource { public: - Resource(VkBuffer buf, const GrVkAlloc& alloc) - : INHERITED(), fBuffer(buf), fAlloc(alloc) {} + Resource(VkBuffer buf, const GrVkAlloc& alloc, Type type) + : INHERITED(), fBuffer(buf), fAlloc(alloc), fType(type) {} - VkBuffer fBuffer; - GrVkAlloc fAlloc; + VkBuffer fBuffer; + GrVkAlloc fAlloc; + Type fType; private: void freeGPUData(const GrVkGpu* gpu) const; diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp index 6f953653e4..b80835441a 100644 --- a/src/gpu/vk/GrVkGpu.cpp +++ b/src/gpu/vk/GrVkGpu.cpp @@ -132,6 +132,16 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, fCurrentCmdBuffer = fResourceProvider.createPrimaryCommandBuffer(); SkASSERT(fCurrentCmdBuffer); fCurrentCmdBuffer->begin(this); + + // set up our heaps + fHeaps[kLinearImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024)); + fHeaps[kOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 64*1024*1024)); + fHeaps[kSmallOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 2*1024*1024)); + fHeaps[kVertexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); + fHeaps[kIndexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); + fHeaps[kUniformBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 64*1024)); + fHeaps[kCopyReadBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0)); + fHeaps[kCopyWriteBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024)); } GrVkGpu::~GrVkGpu() { @@ -880,11 +890,8 @@ GrBackendObject GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w, i usageFlags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; usageFlags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; - VkFlags memProps = (srcData && linearTiling) ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - VkImage image = VK_NULL_HANDLE; - GrVkAlloc alloc = { VK_NULL_HANDLE, 0 }; + GrVkAlloc alloc = { VK_NULL_HANDLE, 0, 0 }; VkImageTiling imageTiling = linearTiling ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; VkImageLayout initialLayout = (VK_IMAGE_TILING_LINEAR == imageTiling) @@ -917,7 +924,7 @@ GrBackendObject GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w, i GR_VK_CALL_ERRCHECK(this->vkInterface(), CreateImage(this->device(), &imageCreateInfo, nullptr, &image)); - if (!GrVkMemory::AllocAndBindImageMemory(this, image, memProps, &alloc)) { + if (!GrVkMemory::AllocAndBindImageMemory(this, image, linearTiling, &alloc)) { VK_CALL(DestroyImage(this->device(), image, nullptr)); return 0; } @@ -938,7 +945,7 @@ GrBackendObject GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w, i err = VK_CALL(MapMemory(fDevice, alloc.fMemory, alloc.fOffset, layout.rowPitch * h, 0, &mapPtr)); if (err) { - GrVkMemory::FreeImageMemory(this, alloc); + GrVkMemory::FreeImageMemory(this, linearTiling, alloc); VK_CALL(DestroyImage(this->device(), image, nullptr)); return 0; } @@ -989,15 +996,12 @@ bool GrVkGpu::isTestingOnlyBackendTexture(GrBackendObject id) const { } void GrVkGpu::deleteTestingOnlyBackendTexture(GrBackendObject id, bool abandon) { - const GrVkImageInfo* backend = reinterpret_cast<const GrVkImageInfo*>(id); - + GrVkImageInfo* backend = reinterpret_cast<GrVkImageInfo*>(id); if (backend) { if (!abandon) { // something in the command buffer may still be using this, so force submit this->submitCommandBuffer(kForce_SyncQueue); - - GrVkMemory::FreeImageMemory(this, backend->fAlloc); - VK_CALL(DestroyImage(this->device(), backend->fImage, nullptr)); + GrVkImage::DestroyImageInfo(this, backend); } delete backend; } diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h index cd72c69bd4..b076de29ff 100644 --- a/src/gpu/vk/GrVkGpu.h +++ b/src/gpu/vk/GrVkGpu.h @@ -13,6 +13,7 @@ #include "vk/GrVkBackendContext.h" #include "GrVkCaps.h" #include "GrVkIndexBuffer.h" +#include "GrVkMemory.h" #include "GrVkResourceProvider.h" #include "GrVkVertexBuffer.h" #include "GrVkUtil.h" @@ -122,6 +123,27 @@ public: void generateMipmap(GrVkTexture* tex) const; + // Heaps + enum Heap { + kLinearImage_Heap = 0, + // We separate out small (i.e., <= 16K) images to reduce fragmentation + // in the main heap. + kOptimalImage_Heap, + kSmallOptimalImage_Heap, + // We have separate vertex and image heaps, because it's possible that + // a given Vulkan driver may allocate them separately. + kVertexBuffer_Heap, + kIndexBuffer_Heap, + kUniformBuffer_Heap, + kCopyReadBuffer_Heap, + kCopyWriteBuffer_Heap, + + kLastHeap = kCopyWriteBuffer_Heap + }; + static const int kHeapCount = kLastHeap + 1; + + GrVkHeap* getHeap(Heap heap) const { return fHeaps[heap]; } + private: GrVkGpu(GrContext* context, const GrContextOptions& options, const GrVkBackendContext* backendContext); @@ -226,6 +248,8 @@ private: GrVkPrimaryCommandBuffer* fCurrentCmdBuffer; VkPhysicalDeviceMemoryProperties fPhysDevMemProps; + SkAutoTDelete<GrVkHeap> fHeaps[kHeapCount]; + #ifdef ENABLE_VK_LAYERS // For reporting validation layer errors VkDebugReportCallbackEXT fCallback; diff --git a/src/gpu/vk/GrVkImage.cpp b/src/gpu/vk/GrVkImage.cpp index bbef1b40e8..70dd4486ae 100644 --- a/src/gpu/vk/GrVkImage.cpp +++ b/src/gpu/vk/GrVkImage.cpp @@ -64,9 +64,9 @@ bool GrVkImage::InitImageInfo(const GrVkGpu* gpu, const ImageDesc& imageDesc, Gr VkImage image = 0; GrVkAlloc alloc; - VkImageLayout initialLayout = (VK_IMAGE_TILING_LINEAR == imageDesc.fImageTiling) - ? VK_IMAGE_LAYOUT_PREINITIALIZED - : VK_IMAGE_LAYOUT_UNDEFINED; + bool isLinear = VK_IMAGE_TILING_LINEAR == imageDesc.fImageTiling; + VkImageLayout initialLayout = isLinear ? VK_IMAGE_LAYOUT_PREINITIALIZED + : VK_IMAGE_LAYOUT_UNDEFINED; // Create Image VkSampleCountFlagBits vkSamples; @@ -102,7 +102,7 @@ bool GrVkImage::InitImageInfo(const GrVkGpu* gpu, const ImageDesc& imageDesc, Gr GR_VK_CALL_ERRCHECK(gpu->vkInterface(), CreateImage(gpu->device(), &imageCreateInfo, nullptr, &image)); - if (!GrVkMemory::AllocAndBindImageMemory(gpu, image, imageDesc.fMemProps, &alloc)) { + if (!GrVkMemory::AllocAndBindImageMemory(gpu, image, isLinear, &alloc)) { VK_CALL(gpu, DestroyImage(gpu->device(), image, nullptr)); return false; } @@ -118,11 +118,12 @@ bool GrVkImage::InitImageInfo(const GrVkGpu* gpu, const ImageDesc& imageDesc, Gr void GrVkImage::DestroyImageInfo(const GrVkGpu* gpu, GrVkImageInfo* info) { VK_CALL(gpu, DestroyImage(gpu->device(), info->fImage, nullptr)); - GrVkMemory::FreeImageMemory(gpu, info->fAlloc); + bool isLinear = VK_IMAGE_TILING_LINEAR == info->fImageTiling; + GrVkMemory::FreeImageMemory(gpu, isLinear, info->fAlloc); } -void GrVkImage::setNewResource(VkImage image, const GrVkAlloc& alloc) { - fResource = new Resource(image, alloc); +void GrVkImage::setNewResource(VkImage image, const GrVkAlloc& alloc, VkImageTiling tiling) { + fResource = new Resource(image, alloc, tiling); } GrVkImage::~GrVkImage() { @@ -146,7 +147,8 @@ void GrVkImage::abandonImage() { void GrVkImage::Resource::freeGPUData(const GrVkGpu* gpu) const { VK_CALL(gpu, DestroyImage(gpu->device(), fImage, nullptr)); - GrVkMemory::FreeImageMemory(gpu, fAlloc); + bool isLinear = (VK_IMAGE_TILING_LINEAR == fImageTiling); + GrVkMemory::FreeImageMemory(gpu, isLinear, fAlloc); } void GrVkImage::BorrowedResource::freeGPUData(const GrVkGpu* gpu) const { diff --git a/src/gpu/vk/GrVkImage.h b/src/gpu/vk/GrVkImage.h index 85ee620754..fe180698e8 100644 --- a/src/gpu/vk/GrVkImage.h +++ b/src/gpu/vk/GrVkImage.h @@ -33,9 +33,9 @@ public: : fInfo(info) , fIsBorrowed(kBorrowed_Wrapped == wrapped) { if (kBorrowed_Wrapped == wrapped) { - fResource = new BorrowedResource(info.fImage, info.fAlloc); + fResource = new BorrowedResource(info.fImage, info.fAlloc, info.fImageTiling); } else { - fResource = new Resource(info.fImage, info.fAlloc); + fResource = new Resource(info.fImage, info.fAlloc, info.fImageTiling); } } virtual ~GrVkImage(); @@ -87,13 +87,12 @@ protected: void releaseImage(const GrVkGpu* gpu); void abandonImage(); - void setNewResource(VkImage image, const GrVkAlloc& alloc); + void setNewResource(VkImage image, const GrVkAlloc& alloc, VkImageTiling tiling); GrVkImageInfo fInfo; bool fIsBorrowed; private: - // unlike GrVkBuffer, this needs to be public so GrVkStencilAttachment can use it class Resource : public GrVkResource { public: Resource() @@ -103,8 +102,8 @@ private: fAlloc.fOffset = 0; } - Resource(VkImage image, const GrVkAlloc& alloc) - : fImage(image), fAlloc(alloc) {} + Resource(VkImage image, const GrVkAlloc& alloc, VkImageTiling tiling) + : fImage(image), fAlloc(alloc), fImageTiling(tiling) {} ~Resource() override {} @@ -113,6 +112,7 @@ private: VkImage fImage; GrVkAlloc fAlloc; + VkImageTiling fImageTiling; typedef GrVkResource INHERITED; }; @@ -120,8 +120,8 @@ private: // for wrapped textures class BorrowedResource : public Resource { public: - BorrowedResource(VkImage image, const GrVkAlloc& alloc) - : Resource(image, alloc) { + BorrowedResource(VkImage image, const GrVkAlloc& alloc, VkImageTiling tiling) + : Resource(image, alloc, tiling) { } private: void freeGPUData(const GrVkGpu* gpu) const override; diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp index e0ab3a6c8c..fa0bcb5cd7 100644 --- a/src/gpu/vk/GrVkMemory.cpp +++ b/src/gpu/vk/GrVkMemory.cpp @@ -29,38 +29,26 @@ static bool get_valid_memory_type_index(VkPhysicalDeviceMemoryProperties physDev return false; } -static bool alloc_device_memory(const GrVkGpu* gpu, - VkMemoryRequirements* memReqs, - const VkMemoryPropertyFlags flags, - VkDeviceMemory* memory) { - uint32_t typeIndex; - if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), - memReqs->memoryTypeBits, - flags, - &typeIndex)) { - return false; - } - - VkMemoryAllocateInfo allocInfo = { - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType - NULL, // pNext - memReqs->size, // allocationSize - typeIndex, // memoryTypeIndex +static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) { + const GrVkGpu::Heap kBufferToHeap[]{ + GrVkGpu::kVertexBuffer_Heap, + GrVkGpu::kIndexBuffer_Heap, + GrVkGpu::kUniformBuffer_Heap, + GrVkGpu::kCopyReadBuffer_Heap, + GrVkGpu::kCopyWriteBuffer_Heap, }; + GR_STATIC_ASSERT(0 == GrVkBuffer::kVertex_Type); + GR_STATIC_ASSERT(1 == GrVkBuffer::kIndex_Type); + GR_STATIC_ASSERT(2 == GrVkBuffer::kUniform_Type); + GR_STATIC_ASSERT(3 == GrVkBuffer::kCopyRead_Type); + GR_STATIC_ASSERT(4 == GrVkBuffer::kCopyWrite_Type); - VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(), - &allocInfo, - nullptr, - memory)); - if (err) { - return false; - } - return true; + return kBufferToHeap[type]; } bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, VkBuffer buffer, - const VkMemoryPropertyFlags flags, + GrVkBuffer::Type type, GrVkAlloc* alloc) { const GrVkInterface* iface = gpu->vkInterface(); VkDevice device = gpu->device(); @@ -68,30 +56,61 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, VkMemoryRequirements memReqs; GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); - if (!alloc_device_memory(gpu, &memReqs, flags, &alloc->fMemory)) { + VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + uint32_t typeIndex; + if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), + memReqs.memoryTypeBits, + desiredMemProps, + &typeIndex)) { + // this memory type should always be available + SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &typeIndex)); + } + + GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); + + if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { + SkDebugf("Failed to alloc buffer\n"); return false; } - // for now, offset is always 0 - alloc->fOffset = 0; // Bind Memory to device VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, alloc->fMemory, alloc->fOffset)); if (err) { - GR_VK_CALL(iface, FreeMemory(device, alloc->fMemory, nullptr)); + SkASSERT_RELEASE(heap->free(*alloc)); return false; } + return true; } -void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc) { - const GrVkInterface* iface = gpu->vkInterface(); - GR_VK_CALL(iface, FreeMemory(gpu->device(), alloc.fMemory, nullptr)); +void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, + const GrVkAlloc& alloc) { + + GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); + SkASSERT_RELEASE(heap->free(alloc)); +} + +// for debugging +static uint64_t gTotalImageMemory = 0; +static uint64_t gTotalImageMemoryFullPage = 0; + +const VkDeviceSize kMaxSmallImageSize = 16 * 1024; +const VkDeviceSize kMinVulkanPageSize = 16 * 1024; + +static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) { + return (size + alignment - 1) & ~(alignment - 1); } bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, VkImage image, - const VkMemoryPropertyFlags flags, + bool linearTiling, GrVkAlloc* alloc) { const GrVkInterface* iface = gpu->vkInterface(); VkDevice device = gpu->device(); @@ -99,25 +118,76 @@ bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, VkMemoryRequirements memReqs; GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs)); - if (!alloc_device_memory(gpu, &memReqs, flags, &alloc->fMemory)) { + uint32_t typeIndex; + GrVkHeap* heap; + if (linearTiling) { + VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), + memReqs.memoryTypeBits, + desiredMemProps, + &typeIndex)) { + // this memory type should always be available + SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &typeIndex)); + } + heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); + } else { + // this memory type should always be available + SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + &typeIndex)); + if (memReqs.size <= kMaxSmallImageSize) { + heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); + } else { + heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); + } + } + + if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { + SkDebugf("Failed to alloc image\n"); return false; } - // for now, offset is always 0 - alloc->fOffset = 0; // Bind Memory to device VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image, alloc->fMemory, alloc->fOffset)); if (err) { - GR_VK_CALL(iface, FreeMemory(device, alloc->fMemory, nullptr)); + SkASSERT_RELEASE(heap->free(*alloc)); return false; } + + gTotalImageMemory += alloc->fSize; + + VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize); + gTotalImageMemoryFullPage += pageAlignedSize; + return true; } -void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc) { - const GrVkInterface* iface = gpu->vkInterface(); - GR_VK_CALL(iface, FreeMemory(gpu->device(), alloc.fMemory, nullptr)); +void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, + const GrVkAlloc& alloc) { + GrVkHeap* heap; + if (linearTiling) { + heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); + } else if (alloc.fSize <= kMaxSmallImageSize) { + heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap); + } else { + heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); + } + if (!heap->free(alloc)) { + // must be an adopted allocation + GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr)); + } else { + gTotalImageMemory -= alloc.fSize; + VkDeviceSize pageAlignedSize = align_size(alloc.fSize, kMinVulkanPageSize); + gTotalImageMemoryFullPage -= pageAlignedSize; + } } VkPipelineStageFlags GrVkMemory::LayoutToPipelineStageFlags(const VkImageLayout layout) { @@ -169,3 +239,289 @@ VkAccessFlags GrVkMemory::LayoutToSrcAccessMask(const VkImageLayout layout) { } return flags; } + +GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, + VkDeviceSize size, VkDeviceSize alignment) + : fGpu(gpu) + , fMemoryTypeIndex(memoryTypeIndex) { + + VkMemoryAllocateInfo allocInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType + NULL, // pNext + size, // allocationSize + memoryTypeIndex, // memoryTypeIndex + }; + + VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(), + &allocInfo, + nullptr, + &fAlloc)); + + if (VK_SUCCESS == err) { + fSize = size; + fAlignment = alignment; + fFreeSize = size; + fLargestBlockSize = size; + fLargestBlockOffset = 0; + + Block* block = fFreeList.addToTail(); + block->fOffset = 0; + block->fSize = fSize; + } else { + fSize = 0; + fAlignment = 0; + fFreeSize = 0; + fLargestBlockSize = 0; + } +} + +GrVkSubHeap::~GrVkSubHeap() { + const GrVkInterface* iface = fGpu->vkInterface(); + GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr)); + + fFreeList.reset(); +} + +bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) { + VkDeviceSize alignedSize = align_size(size, fAlignment); + + // find the smallest block big enough for our allocation + FreeList::Iter iter = fFreeList.headIter(); + FreeList::Iter bestFitIter; + VkDeviceSize bestFitSize = fSize + 1; + VkDeviceSize secondLargestSize = 0; + VkDeviceSize secondLargestOffset = 0; + while (iter.get()) { + Block* block = iter.get(); + // need to adjust size to match desired alignment + SkASSERT(align_size(block->fOffset, fAlignment) - block->fOffset == 0); + if (block->fSize >= alignedSize && block->fSize < bestFitSize) { + bestFitIter = iter; + bestFitSize = block->fSize; + } + if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) { + secondLargestSize = block->fSize; + secondLargestOffset = block->fOffset; + } + iter.next(); + } + SkASSERT(secondLargestSize <= fLargestBlockSize); + + Block* bestFit = bestFitIter.get(); + if (bestFit) { + alloc->fMemory = fAlloc; + SkASSERT(align_size(bestFit->fOffset, fAlignment) == bestFit->fOffset); + alloc->fOffset = bestFit->fOffset; + alloc->fSize = alignedSize; + // adjust or remove current block + VkDeviceSize originalBestFitOffset = bestFit->fOffset; + if (bestFit->fSize > alignedSize) { + bestFit->fOffset += alignedSize; + bestFit->fSize -= alignedSize; + if (fLargestBlockOffset == originalBestFitOffset) { + if (bestFit->fSize >= secondLargestSize) { + fLargestBlockSize = bestFit->fSize; + fLargestBlockOffset = bestFit->fOffset; + } else { + fLargestBlockSize = secondLargestSize; + fLargestBlockOffset = secondLargestOffset; + } + } +#ifdef SK_DEBUG + VkDeviceSize largestSize = 0; + iter = fFreeList.headIter(); + while (iter.get()) { + Block* block = iter.get(); + if (largestSize < block->fSize) { + largestSize = block->fSize; + } + iter.next(); + } + SkASSERT(largestSize == fLargestBlockSize) +#endif + } else { + SkASSERT(bestFit->fSize == alignedSize); + if (fLargestBlockOffset == originalBestFitOffset) { + fLargestBlockSize = secondLargestSize; + fLargestBlockOffset = secondLargestOffset; + } + fFreeList.remove(bestFit); +#ifdef SK_DEBUG + VkDeviceSize largestSize = 0; + iter = fFreeList.headIter(); + while (iter.get()) { + Block* block = iter.get(); + if (largestSize < block->fSize) { + largestSize = block->fSize; + } + iter.next(); + } + SkASSERT(largestSize == fLargestBlockSize); +#endif + } + fFreeSize -= alignedSize; + + return true; + } + + SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize); + + return false; +} + + +void GrVkSubHeap::free(const GrVkAlloc& alloc) { + SkASSERT(alloc.fMemory == fAlloc); + + // find the block right after this allocation + FreeList::Iter iter = fFreeList.headIter(); + while (iter.get() && iter.get()->fOffset < alloc.fOffset) { + iter.next(); + } + FreeList::Iter prev = iter; + prev.prev(); + // we have four cases: + // we exactly follow the previous one + Block* block; + if (prev.get() && prev.get()->fOffset + prev.get()->fSize == alloc.fOffset) { + block = prev.get(); + block->fSize += alloc.fSize; + if (block->fOffset == fLargestBlockOffset) { + fLargestBlockSize = block->fSize; + } + // and additionally we may exactly precede the next one + if (iter.get() && iter.get()->fOffset == alloc.fOffset + alloc.fSize) { + block->fSize += iter.get()->fSize; + if (iter.get()->fOffset == fLargestBlockOffset) { + fLargestBlockOffset = block->fOffset; + fLargestBlockSize = block->fSize; + } + fFreeList.remove(iter.get()); + } + // or we only exactly proceed the next one + } else if (iter.get() && iter.get()->fOffset == alloc.fOffset + alloc.fSize) { + block = iter.get(); + block->fSize += alloc.fSize; + if (block->fOffset == fLargestBlockOffset) { + fLargestBlockOffset = alloc.fOffset; + fLargestBlockSize = block->fSize; + } + block->fOffset = alloc.fOffset; + // or we fall somewhere in between, with gaps + } else { + block = fFreeList.addBefore(iter); + block->fOffset = alloc.fOffset; + block->fSize = alloc.fSize; + } + fFreeSize += alloc.fSize; + if (block->fSize > fLargestBlockSize) { + fLargestBlockSize = block->fSize; + fLargestBlockOffset = block->fOffset; + } + +#ifdef SK_DEBUG + VkDeviceSize largestSize = 0; + iter = fFreeList.headIter(); + while (iter.get()) { + Block* block = iter.get(); + if (largestSize < block->fSize) { + largestSize = block->fSize; + } + iter.next(); + } + SkASSERT(fLargestBlockSize == largestSize); +#endif +} + +GrVkHeap::~GrVkHeap() { +} + +bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, GrVkAlloc* alloc) { + VkDeviceSize alignedSize = align_size(size, alignment); + + // first try to find a subheap that fits our allocation request + int bestFitIndex = -1; + VkDeviceSize bestFitSize = 0x7FFFFFFF; + for (auto i = 0; i < fSubHeaps.count(); ++i) { + if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex) { + VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize(); + if (heapSize > alignedSize && heapSize < bestFitSize) { + bestFitIndex = i; + bestFitSize = heapSize; + } + } + } + + if (bestFitIndex >= 0) { + SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); + if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { + fUsedSize += alloc->fSize; + return true; + } + return false; + } + + // need to allocate a new subheap + SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); + subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, fSubHeapSize, alignment)); + fAllocSize += fSubHeapSize; + if (subHeap->alloc(size, alloc)) { + fUsedSize += alloc->fSize; + return true; + } + + return false; +} + +bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, GrVkAlloc* alloc) { + VkDeviceSize alignedSize = align_size(size, alignment); + + // first try to find an unallocated subheap that fits our allocation request + int bestFitIndex = -1; + VkDeviceSize bestFitSize = 0x7FFFFFFF; + for (auto i = 0; i < fSubHeaps.count(); ++i) { + if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && fSubHeaps[i]->unallocated()) { + VkDeviceSize heapSize = fSubHeaps[i]->size(); + if (heapSize > alignedSize && heapSize < bestFitSize) { + bestFitIndex = i; + bestFitSize = heapSize; + } + } + } + + if (bestFitIndex >= 0) { + SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment); + if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) { + fUsedSize += alloc->fSize; + return true; + } + return false; + } + + // need to allocate a new subheap + SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back(); + subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize, alignment)); + fAllocSize += alignedSize; + if (subHeap->alloc(size, alloc)) { + fUsedSize += alloc->fSize; + return true; + } + + return false; +} + +bool GrVkHeap::free(const GrVkAlloc& alloc) { + for (auto i = 0; i < fSubHeaps.count(); ++i) { + if (fSubHeaps[i]->memory() == alloc.fMemory) { + fSubHeaps[i]->free(alloc); + fUsedSize -= alloc.fSize; + return true; + } + } + + return false; +} + + diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h index 279dd58dd5..197bbe8719 100644 --- a/src/gpu/vk/GrVkMemory.h +++ b/src/gpu/vk/GrVkMemory.h @@ -8,6 +8,9 @@ #ifndef GrVkMemory_DEFINED #define GrVkMemory_DEFINED +#include "GrVkBuffer.h" +#include "SkTArray.h" +#include "SkTLList.h" #include "vk/GrVkDefines.h" #include "vk/GrVkTypes.h" @@ -16,23 +19,102 @@ class GrVkGpu; namespace GrVkMemory { /** * Allocates vulkan device memory and binds it to the gpu's device for the given object. - * Returns true of allocation succeeded. + * Returns true if allocation succeeded. */ bool AllocAndBindBufferMemory(const GrVkGpu* gpu, VkBuffer buffer, - const VkMemoryPropertyFlags flags, + GrVkBuffer::Type type, GrVkAlloc* alloc); - void FreeBufferMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc); + void FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, const GrVkAlloc& alloc); bool AllocAndBindImageMemory(const GrVkGpu* gpu, VkImage image, - const VkMemoryPropertyFlags flags, + bool linearTiling, GrVkAlloc* alloc); - void FreeImageMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc); + void FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc); VkPipelineStageFlags LayoutToPipelineStageFlags(const VkImageLayout layout); VkAccessFlags LayoutToSrcAccessMask(const VkImageLayout layout); } +class GrVkSubHeap { +public: + GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, + VkDeviceSize size, VkDeviceSize alignment); + ~GrVkSubHeap(); + + uint32_t memoryTypeIndex() const { return fMemoryTypeIndex; } + VkDeviceSize size() const { return fSize; } + VkDeviceSize alignment() const { return fAlignment; } + VkDeviceSize freeSize() const { return fFreeSize; } + VkDeviceSize largestBlockSize() const { return fLargestBlockSize; } + VkDeviceMemory memory() { return fAlloc; } + + bool unallocated() const { return fSize == fFreeSize; } + + bool alloc(VkDeviceSize size, GrVkAlloc* alloc); + void free(const GrVkAlloc& alloc); + +private: + struct Block { + VkDeviceSize fOffset; + VkDeviceSize fSize; + }; + typedef SkTLList<Block, 16> FreeList; + + const GrVkGpu* fGpu; + uint32_t fMemoryTypeIndex; + VkDeviceSize fSize; + VkDeviceSize fAlignment; + VkDeviceSize fFreeSize; + VkDeviceSize fLargestBlockSize; + VkDeviceSize fLargestBlockOffset; + VkDeviceMemory fAlloc; + FreeList fFreeList; +}; + +class GrVkHeap { +public: + enum Strategy { + kSubAlloc_Strategy, // alloc large subheaps and suballoc within them + kSingleAlloc_Strategy // alloc/recycle an individual subheap per object + }; + + GrVkHeap(const GrVkGpu* gpu, Strategy strategy, VkDeviceSize subHeapSize) + : fGpu(gpu) + , fSubHeapSize(subHeapSize) + , fAllocSize(0) + , fUsedSize(0) { + if (strategy == kSubAlloc_Strategy) { + fAllocFunc = &GrVkHeap::subAlloc; + } else { + fAllocFunc = &GrVkHeap::singleAlloc; + } + } + + ~GrVkHeap(); + + bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex, + GrVkAlloc* alloc) { + return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, alloc); + } + bool free(const GrVkAlloc& alloc); + +private: + typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, GrVkAlloc* alloc); + + bool subAlloc(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, GrVkAlloc* alloc); + bool singleAlloc(VkDeviceSize size, VkDeviceSize alignment, + uint32_t memoryTypeIndex, GrVkAlloc* alloc); + + const GrVkGpu* fGpu; + VkDeviceSize fSubHeapSize; + VkDeviceSize fAllocSize; + VkDeviceSize fUsedSize; + AllocFunc fAllocFunc; + SkTArray<SkAutoTDelete<GrVkSubHeap>> fSubHeaps; +}; #endif diff --git a/src/gpu/vk/GrVkTexture.cpp b/src/gpu/vk/GrVkTexture.cpp index 0adf87b413..5d15311cbf 100644 --- a/src/gpu/vk/GrVkTexture.cpp +++ b/src/gpu/vk/GrVkTexture.cpp @@ -216,7 +216,7 @@ bool GrVkTexture::reallocForMipmap(const GrVkGpu* gpu, uint32_t mipLevels) { fLinearTextureView = nullptr; } - this->setNewResource(info.fImage, info.fAlloc); + this->setNewResource(info.fImage, info.fAlloc, info.fImageTiling); fTextureView = textureView; fInfo = info; this->texturePriv().setMaxMipMapLevel(mipLevels); |