aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Greg Daniel <egdaniel@google.com>2018-05-30 22:59:03 +0000
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2018-05-30 22:59:08 +0000
commita9d3dae67ade82d6e01fd911dc7c63ec41c64107 (patch)
tree05542f085a298a8b88f005a25ceebc673719dde2
parentde81143630f379af560467c3ae1335e73a0b9f03 (diff)
Revert "Use GrVkMemoryAllocator for vulkan memory allocations in ganesh."
This reverts commit 331c266ed716526478a10885aff66181cec64486. Reason for revert: breaking an intel vulkan bot Original change's description: > Use GrVkMemoryAllocator for vulkan memory allocations in ganesh. > > Besides using the new allocator, the big logical change is that map > and unmap calls form GrVkMemory are specc'd to map the entire GrVkAlloc > instead of a specific offset and size as they did before. As a > consequence of this, we move the handling of non-coherent alignment > for flush/invalidate calls to GrVkMemory instead of the callers. > > Bug: skia: > Change-Id: I794d713106602f27aa7e808c306bbb69fd2b67be > Reviewed-on: https://skia-review.googlesource.com/130021 > Commit-Queue: Greg Daniel <egdaniel@google.com> > Reviewed-by: Jim Van Verth <jvanverth@google.com> TBR=egdaniel@google.com,jvanverth@google.com,bsalomon@google.com Change-Id: I5237c00625dc95d3d9b36c1e5591762988d85562 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: skia: Reviewed-on: https://skia-review.googlesource.com/131081 Reviewed-by: Greg Daniel <egdaniel@google.com> Commit-Queue: Greg Daniel <egdaniel@google.com>
-rw-r--r--gn/tests.gni1
-rw-r--r--include/gpu/vk/GrVkBackendContext.h4
-rw-r--r--include/gpu/vk/GrVkTypes.h2
-rw-r--r--src/gpu/vk/GrVkAMDMemoryAllocator.cpp44
-rw-r--r--src/gpu/vk/GrVkBackendContext.cpp3
-rw-r--r--src/gpu/vk/GrVkBuffer.cpp37
-rw-r--r--src/gpu/vk/GrVkBuffer.h5
-rw-r--r--src/gpu/vk/GrVkGpu.cpp83
-rw-r--r--src/gpu/vk/GrVkGpu.h28
-rw-r--r--src/gpu/vk/GrVkMemory.cpp661
-rw-r--r--src/gpu/vk/GrVkMemory.h138
-rw-r--r--tests/VkHeapTests.cpp239
12 files changed, 1056 insertions, 189 deletions
diff --git a/gn/tests.gni b/gn/tests.gni
index 5b4594099b..a359ca3e3c 100644
--- a/gn/tests.gni
+++ b/gn/tests.gni
@@ -275,6 +275,7 @@ tests_sources = [
"$_tests/UtilsTest.cpp",
"$_tests/VerticesTest.cpp",
"$_tests/VkBackendSurfaceTest.cpp",
+ "$_tests/VkHeapTests.cpp",
"$_tests/VkMakeCopyPipelineTest.cpp",
"$_tests/VkUploadPixelsTests.cpp",
"$_tests/VkWrapTests.cpp",
diff --git a/include/gpu/vk/GrVkBackendContext.h b/include/gpu/vk/GrVkBackendContext.h
index 212362873a..fdc71d373f 100644
--- a/include/gpu/vk/GrVkBackendContext.h
+++ b/include/gpu/vk/GrVkBackendContext.h
@@ -13,8 +13,6 @@
#include "vk/GrVkDefines.h"
#include "vk/GrVkInterface.h"
-class GrVkMemoryAllocator;
-
enum GrVkExtensionFlags {
kEXT_debug_report_GrVkExtensionFlag = 0x0001,
kNV_glsl_shader_GrVkExtensionFlag = 0x0002,
@@ -47,8 +45,6 @@ struct SK_API GrVkBackendContext : public SkRefCnt {
uint32_t fExtensions;
uint32_t fFeatures;
sk_sp<const GrVkInterface> fInterface;
- sk_sp<GrVkMemoryAllocator> fMemoryAllocator;
-
/**
* Controls whether this object destroys the instance and device upon destruction. The default
* is temporarily 'true' to avoid breaking existing clients but will be changed to 'false'.
diff --git a/include/gpu/vk/GrVkTypes.h b/include/gpu/vk/GrVkTypes.h
index 9225e92778..2e31250324 100644
--- a/include/gpu/vk/GrVkTypes.h
+++ b/include/gpu/vk/GrVkTypes.h
@@ -10,7 +10,7 @@
#define GrVkTypes_DEFINED
#include "GrTypes.h"
-#include "GrVkDefines.h"
+#include "vk/GrVkDefines.h"
/**
* KHR_debug
diff --git a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp
index 93e2fff494..0b838ece3a 100644
--- a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp
+++ b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp
@@ -8,7 +8,6 @@
#include "GrVkAMDMemoryAllocator.h"
#include "vk/GrVkInterface.h"
-#include "GrVkMemory.h"
#include "GrVkUtil.h"
GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,
@@ -43,10 +42,7 @@ GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,
info.flags = 0;
info.physicalDevice = physicalDevice;
info.device = device;
- // Manually testing runs of dm using 64 here instead of the default 256 shows less memory usage
- // on average. Also dm seems to run faster using 64 so it doesn't seem to be trading off speed
- // for memory.
- info.preferredLargeHeapBlockSize = 64*1024*1024;
+ info.preferredLargeHeapBlockSize = 0;
info.pAllocationCallbacks = nullptr;
info.pDeviceMemoryCallbacks = nullptr;
info.frameInUseCount = 0;
@@ -202,9 +198,24 @@ void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHa
vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
+ offset = offset + info.fOffset;
+ VkDeviceSize offsetDiff = offset & (alignment -1);
+ offset = offset - offsetDiff;
+ size = (size + alignment - 1) & ~(alignment - 1);
+#ifdef SK_DEBUG
+ SkASSERT(offset >= info.fOffset);
+ SkASSERT(offset + size <= info.fOffset + info.fSize);
+ SkASSERT(0 == (offset & (alignment-1)));
+ SkASSERT(size > 0);
+ SkASSERT(0 == (size & (alignment-1)));
+#endif
+
VkMappedMemoryRange mappedMemoryRange;
- GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
- &mappedMemoryRange);
+ memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
+ mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ mappedMemoryRange.memory = info.fMemory;
+ mappedMemoryRange.offset = offset;
+ mappedMemoryRange.size = size;
GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
}
}
@@ -220,9 +231,24 @@ void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& mem
vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
+ offset = offset + info.fOffset;
+ VkDeviceSize offsetDiff = offset & (alignment -1);
+ offset = offset - offsetDiff;
+ size = (size + alignment - 1) & ~(alignment - 1);
+#ifdef SK_DEBUG
+ SkASSERT(offset >= info.fOffset);
+ SkASSERT(offset + size <= info.fOffset + info.fSize);
+ SkASSERT(0 == (offset & (alignment-1)));
+ SkASSERT(size > 0);
+ SkASSERT(0 == (size & (alignment-1)));
+#endif
+
VkMappedMemoryRange mappedMemoryRange;
- GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
- &mappedMemoryRange);
+ memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
+ mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ mappedMemoryRange.memory = info.fMemory;
+ mappedMemoryRange.offset = offset;
+ mappedMemoryRange.size = size;
GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
}
}
diff --git a/src/gpu/vk/GrVkBackendContext.cpp b/src/gpu/vk/GrVkBackendContext.cpp
index 196b141493..269a8911e4 100644
--- a/src/gpu/vk/GrVkBackendContext.cpp
+++ b/src/gpu/vk/GrVkBackendContext.cpp
@@ -8,7 +8,7 @@
#include "SkAutoMalloc.h"
#include "vk/GrVkBackendContext.h"
#include "vk/GrVkExtensions.h"
-#include "vk/GrVkMemoryAllocator.h"
+#include "vk/GrVkInterface.h"
#include "vk/GrVkUtil.h"
////////////////////////////////////////////////////////////////////////////////
@@ -323,7 +323,6 @@ const GrVkBackendContext* GrVkBackendContext::Create(uint32_t* presentQueueIndex
}
GrVkBackendContext::~GrVkBackendContext() {
- fMemoryAllocator.reset();
if (fInterface == nullptr || !fOwnsInstanceAndDevice) {
return;
}
diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp
index b3c1d825aa..f65b15ded0 100644
--- a/src/gpu/vk/GrVkBuffer.cpp
+++ b/src/gpu/vk/GrVkBuffer.cpp
@@ -170,10 +170,28 @@ void GrVkBuffer::internalMap(GrVkGpu* gpu, size_t size, bool* createdNewBuffer)
if (fDesc.fDynamic) {
const GrVkAlloc& alloc = this->alloc();
SkASSERT(alloc.fSize > 0);
- SkASSERT(alloc.fSize >= size);
- SkASSERT(0 == fOffset);
- fMapPtr = GrVkMemory::MapAlloc(gpu, alloc);
+ // For Noncoherent buffers we want to make sure the range that we map, both offset and size,
+ // are aligned to the nonCoherentAtomSize limit. The offset should have been correctly
+ // aligned by our memory allocator. For size we pad out to make the range also aligned.
+ if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) {
+ // Currently we always have the internal offset as 0.
+ SkASSERT(0 == fOffset);
+ VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
+ SkASSERT(0 == (alloc.fOffset & (alignment - 1)));
+
+ // Make size of the map aligned to nonCoherentAtomSize
+ size = (size + alignment - 1) & ~(alignment - 1);
+ fMappedSize = size;
+ }
+ SkASSERT(size + fOffset <= alloc.fSize);
+ VkResult err = VK_CALL(gpu, MapMemory(gpu->device(), alloc.fMemory,
+ alloc.fOffset + fOffset,
+ size, 0, &fMapPtr));
+ if (err) {
+ fMapPtr = nullptr;
+ fMappedSize = 0;
+ }
} else {
if (!fMapPtr) {
fMapPtr = new unsigned char[this->size()];
@@ -188,15 +206,16 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) {
SkASSERT(this->vkIsMapped());
if (fDesc.fDynamic) {
- const GrVkAlloc& alloc = this->alloc();
- SkASSERT(alloc.fSize > 0);
- SkASSERT(alloc.fSize >= size);
// We currently don't use fOffset
SkASSERT(0 == fOffset);
+ VkDeviceSize flushOffset = this->alloc().fOffset + fOffset;
+ VkDeviceSize flushSize = gpu->vkCaps().canUseWholeSizeOnFlushMappedMemory() ? VK_WHOLE_SIZE
+ : fMappedSize;
- GrVkMemory::FlushMappedAlloc(gpu, alloc, 0, size);
- GrVkMemory::UnmapAlloc(gpu, alloc);
+ GrVkMemory::FlushMappedAlloc(gpu, this->alloc(), flushOffset, flushSize);
+ VK_CALL(gpu, UnmapMemory(gpu->device(), this->alloc().fMemory));
fMapPtr = nullptr;
+ fMappedSize = 0;
} else {
// vkCmdUpdateBuffer requires size < 64k and 4-byte alignment.
// https://bugs.chromium.org/p/skia/issues/detail?id=7488
@@ -205,7 +224,7 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) {
} else {
GrVkTransferBuffer* transferBuffer =
GrVkTransferBuffer::Create(gpu, size, GrVkBuffer::kCopyRead_Type);
- if (!transferBuffer) {
+ if(!transferBuffer) {
return;
}
diff --git a/src/gpu/vk/GrVkBuffer.h b/src/gpu/vk/GrVkBuffer.h
index 6d0c1fda9a..8d116a40f8 100644
--- a/src/gpu/vk/GrVkBuffer.h
+++ b/src/gpu/vk/GrVkBuffer.h
@@ -82,7 +82,7 @@ protected:
const Desc& descriptor);
GrVkBuffer(const Desc& desc, const GrVkBuffer::Resource* resource)
- : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr) {
+ : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr), fMappedSize(0) {
}
void* vkMap(GrVkGpu* gpu) {
@@ -115,6 +115,9 @@ private:
const Resource* fResource;
VkDeviceSize fOffset;
void* fMapPtr;
+ // On certain Intel devices/drivers there is a bug if we try to flush non-coherent memory and
+ // pass in VK_WHOLE_SIZE. Thus we track our mapped size and explicitly set it when calling flush
+ VkDeviceSize fMappedSize;
typedef SkNoncopyable INHERITED;
};
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index 2525c5c16c..56d0b95bd0 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -17,7 +17,6 @@
#include "GrRenderTargetPriv.h"
#include "GrTexturePriv.h"
-#include "GrVkAMDMemoryAllocator.h"
#include "GrVkCommandBuffer.h"
#include "GrVkGpuCommandBuffer.h"
#include "GrVkImage.h"
@@ -93,7 +92,6 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options,
sk_sp<const GrVkBackendContext> backendCtx)
: INHERITED(context)
, fBackendContext(std::move(backendCtx))
- , fMemoryAllocator(fBackendContext->fMemoryAllocator)
, fDevice(fBackendContext->fDevice)
, fQueue(fBackendContext->fQueue)
, fResourceProvider(this)
@@ -120,12 +118,6 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options,
}
#endif
- if (!fMemoryAllocator) {
- // We were not given a memory allocator at creation
- fMemoryAllocator.reset(new GrVkAMDMemoryAllocator(fBackendContext->fPhysicalDevice,
- fDevice, fBackendContext->fInterface));
- }
-
fCompiler = new SkSL::Compiler();
fVkCaps.reset(new GrVkCaps(options, this->vkInterface(), fBackendContext->fPhysicalDevice,
@@ -150,6 +142,17 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options,
fCurrentCmdBuffer = fResourceProvider.findOrCreatePrimaryCommandBuffer();
SkASSERT(fCurrentCmdBuffer);
fCurrentCmdBuffer->begin(this);
+
+ // set up our heaps
+ fHeaps[kLinearImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024));
+ fHeaps[kOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 64*1024*1024));
+ fHeaps[kSmallOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 2*1024*1024));
+ fHeaps[kVertexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
+ fHeaps[kIndexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
+ fHeaps[kUniformBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 256*1024));
+ fHeaps[kTexelBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
+ fHeaps[kCopyReadBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
+ fHeaps[kCopyWriteBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024));
}
void GrVkGpu::destroyResources() {
@@ -559,6 +562,7 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i
0, // arraySlice
};
VkSubresourceLayout layout;
+ VkResult err;
const GrVkInterface* interface = this->vkInterface();
@@ -569,14 +573,28 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i
int texTop = kBottomLeft_GrSurfaceOrigin == texOrigin ? tex->height() - top - height : top;
const GrVkAlloc& alloc = tex->alloc();
- VkDeviceSize offset = texTop*layout.rowPitch + left*bpp;
+ VkDeviceSize offset = alloc.fOffset + texTop*layout.rowPitch + left*bpp;
+ VkDeviceSize offsetDiff = 0;
VkDeviceSize size = height*layout.rowPitch;
- SkASSERT(size + offset <= alloc.fSize);
- void* mapPtr = GrVkMemory::MapAlloc(this, alloc);
- if (!mapPtr) {
+ // For Noncoherent buffers we want to make sure the range that we map, both offset and size,
+ // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to
+ // meet the alignment requirements. So we track how far we move back and then adjust the mapped
+ // ptr back up so that this is opaque to the caller.
+ if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) {
+ VkDeviceSize alignment = this->physicalDeviceProperties().limits.nonCoherentAtomSize;
+ offsetDiff = offset & (alignment - 1);
+ offset = offset - offsetDiff;
+ // Make size of the map aligned to nonCoherentAtomSize
+ size = (size + alignment - 1) & ~(alignment - 1);
+ }
+ SkASSERT(offset >= alloc.fOffset);
+ SkASSERT(size <= alloc.fOffset + alloc.fSize);
+ void* mapPtr;
+ err = GR_VK_CALL(interface, MapMemory(fDevice, alloc.fMemory, offset, size, 0, &mapPtr));
+ if (err) {
return false;
}
- mapPtr = reinterpret_cast<char*>(mapPtr) + offset;
+ mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff;
if (kBottomLeft_GrSurfaceOrigin == texOrigin) {
// copy into buffer by rows
@@ -593,7 +611,7 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i
}
GrVkMemory::FlushMappedAlloc(this, alloc, offset, size);
- GrVkMemory::UnmapAlloc(this, alloc);
+ GR_VK_CALL(interface, UnmapMemory(fDevice, alloc.fMemory));
return true;
}
@@ -1129,14 +1147,33 @@ GrStencilAttachment* GrVkGpu::createStencilAttachmentForRenderTarget(const GrRen
bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc,
size_t bufferOffset, size_t srcRowBytes, size_t dstRowBytes, int h) {
- VkDeviceSize size = dstRowBytes * h;
- VkDeviceSize offset = bufferOffset;
- SkASSERT(size + offset <= alloc.fSize);
- void* mapPtr = GrVkMemory::MapAlloc(gpu, alloc);
- if (!mapPtr) {
+ // For Noncoherent buffers we want to make sure the range that we map, both offset and size,
+ // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to
+ // meet the alignment requirements. So we track how far we move back and then adjust the mapped
+ // ptr back up so that this is opaque to the caller.
+ VkDeviceSize mapSize = dstRowBytes * h;
+ VkDeviceSize mapOffset = alloc.fOffset + bufferOffset;
+ VkDeviceSize offsetDiff = 0;
+ if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) {
+ VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
+ offsetDiff = mapOffset & (alignment - 1);
+ mapOffset = mapOffset - offsetDiff;
+ // Make size of the map aligned to nonCoherentAtomSize
+ mapSize = (mapSize + alignment - 1) & ~(alignment - 1);
+ }
+ SkASSERT(mapOffset >= alloc.fOffset);
+ SkASSERT(mapSize + mapOffset <= alloc.fOffset + alloc.fSize);
+ void* mapPtr;
+ VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(),
+ alloc.fMemory,
+ mapOffset,
+ mapSize,
+ 0,
+ &mapPtr));
+ mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff;
+ if (err) {
return false;
}
- mapPtr = reinterpret_cast<char*>(mapPtr) + offset;
if (srcData) {
// If there is no padding on dst we can do a single memcopy.
@@ -1155,8 +1192,8 @@ bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc
}
}
}
- GrVkMemory::FlushMappedAlloc(gpu, alloc, offset, size);
- GrVkMemory::UnmapAlloc(gpu, alloc);
+ GrVkMemory::FlushMappedAlloc(gpu, alloc, mapOffset, mapSize);
+ GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory));
return true;
}
@@ -1980,7 +2017,7 @@ bool GrVkGpu::onReadPixels(GrSurface* surface, GrSurfaceOrigin origin, int left,
this->submitCommandBuffer(kForce_SyncQueue);
void* mappedMemory = transferBuffer->map();
const GrVkAlloc& transAlloc = transferBuffer->alloc();
- GrVkMemory::InvalidateMappedAlloc(this, transAlloc, 0, transAlloc.fSize);
+ GrVkMemory::InvalidateMappedAlloc(this, transAlloc, transAlloc.fOffset, VK_WHOLE_SIZE);
if (copyFromOrigin) {
uint32_t skipRows = region.imageExtent.height - height;
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index a44ea7230f..7bdfbeaab3 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -23,7 +23,6 @@
class GrPipeline;
class GrVkBufferImpl;
-class GrVkMemoryAllocator;
class GrVkPipeline;
class GrVkPipelineState;
class GrVkPrimaryCommandBuffer;
@@ -47,8 +46,6 @@ public:
const GrVkInterface* vkInterface() const { return fBackendContext->fInterface.get(); }
const GrVkCaps& vkCaps() const { return *fVkCaps; }
- GrVkMemoryAllocator* memoryAllocator() const { return fMemoryAllocator.get(); }
-
VkDevice device() const { return fDevice; }
VkQueue queue() const { return fQueue; }
VkCommandPool cmdPool() const { return fCmdPool; }
@@ -143,6 +140,28 @@ public:
VkDeviceSize dstOffset, VkDeviceSize size);
bool updateBuffer(GrVkBuffer* buffer, const void* src, VkDeviceSize offset, VkDeviceSize size);
+ // Heaps
+ enum Heap {
+ kLinearImage_Heap = 0,
+ // We separate out small (i.e., <= 16K) images to reduce fragmentation
+ // in the main heap.
+ kOptimalImage_Heap,
+ kSmallOptimalImage_Heap,
+ // We have separate vertex and image heaps, because it's possible that
+ // a given Vulkan driver may allocate them separately.
+ kVertexBuffer_Heap,
+ kIndexBuffer_Heap,
+ kUniformBuffer_Heap,
+ kTexelBuffer_Heap,
+ kCopyReadBuffer_Heap,
+ kCopyWriteBuffer_Heap,
+
+ kLastHeap = kCopyWriteBuffer_Heap
+ };
+ static const int kHeapCount = kLastHeap + 1;
+
+ GrVkHeap* getHeap(Heap heap) const { return fHeaps[heap].get(); }
+
private:
GrVkGpu(GrContext*, const GrContextOptions&, sk_sp<const GrVkBackendContext> backendContext);
@@ -232,7 +251,6 @@ private:
#endif
sk_sp<const GrVkBackendContext> fBackendContext;
- sk_sp<GrVkMemoryAllocator> fMemoryAllocator;
sk_sp<GrVkCaps> fVkCaps;
// These Vulkan objects are provided by the client, and also stored in fBackendContext.
@@ -252,6 +270,8 @@ private:
VkPhysicalDeviceProperties fPhysDevProps;
VkPhysicalDeviceMemoryProperties fPhysDevMemProps;
+ std::unique_ptr<GrVkHeap> fHeaps[kHeapCount];
+
GrVkCopyManager fCopyManager;
#ifdef SK_ENABLE_VK_LAYERS
diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp
index f999c26546..4f619a3ef3 100644
--- a/src/gpu/vk/GrVkMemory.cpp
+++ b/src/gpu/vk/GrVkMemory.cpp
@@ -9,26 +9,49 @@
#include "GrVkGpu.h"
#include "GrVkUtil.h"
-#include "vk/GrVkMemoryAllocator.h"
-using AllocationPropertyFlags = GrVkMemoryAllocator::AllocationPropertyFlags;
-using BufferUsage = GrVkMemoryAllocator::BufferUsage;
+#ifdef SK_DEBUG
+// for simple tracking of how much we're using in each heap
+// last counter is for non-subheap allocations
+VkDeviceSize gHeapUsage[VK_MAX_MEMORY_HEAPS+1] = { 0 };
+#endif
-static BufferUsage get_buffer_usage(GrVkBuffer::Type type, bool dynamic) {
- switch (type) {
- case GrVkBuffer::kVertex_Type: // fall through
- case GrVkBuffer::kIndex_Type: // fall through
- case GrVkBuffer::kTexel_Type:
- return dynamic ? BufferUsage::kCpuWritesGpuReads : BufferUsage::kGpuOnly;
- case GrVkBuffer::kUniform_Type:
- SkASSERT(dynamic);
- return BufferUsage::kCpuWritesGpuReads;
- case GrVkBuffer::kCopyRead_Type: // fall through
- case GrVkBuffer::kCopyWrite_Type:
- return BufferUsage::kCpuOnly;
+static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps,
+ uint32_t typeBits,
+ VkMemoryPropertyFlags requestedMemFlags,
+ uint32_t* typeIndex,
+ uint32_t* heapIndex) {
+ for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) {
+ if (typeBits & (1 << i)) {
+ uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags &
+ requestedMemFlags;
+ if (supportedFlags == requestedMemFlags) {
+ *typeIndex = i;
+ *heapIndex = physDevMemProps.memoryTypes[i].heapIndex;
+ return true;
+ }
+ }
}
- SK_ABORT("Invalid GrVkBuffer::Type");
- return BufferUsage::kCpuOnly; // Just returning an arbitrary value.
+ return false;
+}
+
+static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) {
+ const GrVkGpu::Heap kBufferToHeap[]{
+ GrVkGpu::kVertexBuffer_Heap,
+ GrVkGpu::kIndexBuffer_Heap,
+ GrVkGpu::kUniformBuffer_Heap,
+ GrVkGpu::kTexelBuffer_Heap,
+ GrVkGpu::kCopyReadBuffer_Heap,
+ GrVkGpu::kCopyWriteBuffer_Heap,
+ };
+ GR_STATIC_ASSERT(0 == GrVkBuffer::kVertex_Type);
+ GR_STATIC_ASSERT(1 == GrVkBuffer::kIndex_Type);
+ GR_STATIC_ASSERT(2 == GrVkBuffer::kUniform_Type);
+ GR_STATIC_ASSERT(3 == GrVkBuffer::kTexel_Type);
+ GR_STATIC_ASSERT(4 == GrVkBuffer::kCopyRead_Type);
+ GR_STATIC_ASSERT(5 == GrVkBuffer::kCopyWrite_Type);
+
+ return kBufferToHeap[type];
}
bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
@@ -36,23 +59,68 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
GrVkBuffer::Type type,
bool dynamic,
GrVkAlloc* alloc) {
- GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
- GrVkBackendMemory memory = 0;
+ const GrVkInterface* iface = gpu->vkInterface();
+ VkDevice device = gpu->device();
- GrVkMemoryAllocator::BufferUsage usage = get_buffer_usage(type, dynamic);
+ VkMemoryRequirements memReqs;
+ GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs));
- if (!allocator->allocateMemoryForBuffer(buffer, usage, AllocationPropertyFlags::kNone,
- &memory)) {
- return false;
+ uint32_t typeIndex = 0;
+ uint32_t heapIndex = 0;
+ const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
+ const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties();
+ if (dynamic) {
+ // try to get cached and ideally non-coherent memory first
+ if (!get_valid_memory_type_index(phDevMemProps,
+ memReqs.memoryTypeBits,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+ &typeIndex,
+ &heapIndex)) {
+ // some sort of host-visible memory type should always be available for dynamic buffers
+ SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
+ memReqs.memoryTypeBits,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
+ &typeIndex,
+ &heapIndex));
+ }
+
+ VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
+ alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0
+ : GrVkAlloc::kNoncoherent_Flag;
+ if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) {
+ SkASSERT(SkIsPow2(memReqs.alignment));
+ SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize));
+ memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize);
+ }
+ } else {
+ // device-local memory should always be available for static buffers
+ SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
+ memReqs.memoryTypeBits,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ &typeIndex,
+ &heapIndex));
+ alloc->fFlags = 0x0;
+ }
+
+ GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
+
+ if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+ // if static, try to allocate from non-host-visible non-device-local memory instead
+ if (dynamic ||
+ !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
+ 0, &typeIndex, &heapIndex) ||
+ !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+ SkDebugf("Failed to alloc buffer\n");
+ return false;
+ }
}
- allocator->getAllocInfo(memory, alloc);
// Bind buffer
- VkResult err = GR_VK_CALL(gpu->vkInterface(), BindBufferMemory(gpu->device(), buffer,
- alloc->fMemory,
- alloc->fOffset));
+ VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer,
+ alloc->fMemory, alloc->fOffset));
if (err) {
- FreeBufferMemory(gpu, type, *alloc);
+ SkASSERT_RELEASE(heap->free(*alloc));
return false;
}
@@ -61,152 +129,503 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type,
const GrVkAlloc& alloc) {
- if (alloc.fBackendMemory) {
- GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
- allocator->freeMemory(alloc.fBackendMemory);
- } else {
- GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr));
- }
+
+ GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
+ SkASSERT_RELEASE(heap->free(alloc));
}
+// for debugging
+static uint64_t gTotalImageMemory = 0;
+static uint64_t gTotalImageMemoryFullPage = 0;
+
const VkDeviceSize kMaxSmallImageSize = 16 * 1024;
+const VkDeviceSize kMinVulkanPageSize = 16 * 1024;
+
+static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) {
+ return (size + alignment - 1) & ~(alignment - 1);
+}
bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu,
VkImage image,
bool linearTiling,
GrVkAlloc* alloc) {
- SkASSERT(!linearTiling);
- GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
- GrVkBackendMemory memory = 0;
+ const GrVkInterface* iface = gpu->vkInterface();
+ VkDevice device = gpu->device();
VkMemoryRequirements memReqs;
- GR_VK_CALL(gpu->vkInterface(), GetImageMemoryRequirements(gpu->device(), image, &memReqs));
+ GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs));
- AllocationPropertyFlags propFlags;
- if (memReqs.size <= kMaxSmallImageSize) {
- propFlags = AllocationPropertyFlags::kNone;
+ uint32_t typeIndex = 0;
+ uint32_t heapIndex = 0;
+ GrVkHeap* heap;
+ const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
+ const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties();
+ if (linearTiling) {
+ VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ if (!get_valid_memory_type_index(phDevMemProps,
+ memReqs.memoryTypeBits,
+ desiredMemProps,
+ &typeIndex,
+ &heapIndex)) {
+ // some sort of host-visible memory type should always be available
+ SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
+ memReqs.memoryTypeBits,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
+ &typeIndex,
+ &heapIndex));
+ }
+ heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
+ VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
+ alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0
+ : GrVkAlloc::kNoncoherent_Flag;
+ if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) {
+ SkASSERT(SkIsPow2(memReqs.alignment));
+ SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize));
+ memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize);
+ }
} else {
- propFlags = AllocationPropertyFlags::kDedicatedAllocation;
+ // this memory type should always be available
+ SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
+ memReqs.memoryTypeBits,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ &typeIndex,
+ &heapIndex));
+ if (memReqs.size <= kMaxSmallImageSize) {
+ heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
+ } else {
+ heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap);
+ }
+ alloc->fFlags = 0x0;
}
- if (!allocator->allocateMemoryForImage(image, AllocationPropertyFlags::kDedicatedAllocation,
- &memory)) {
- return false;
+ if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+ // if optimal, try to allocate from non-host-visible non-device-local memory instead
+ if (linearTiling ||
+ !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
+ 0, &typeIndex, &heapIndex) ||
+ !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+ SkDebugf("Failed to alloc image\n");
+ return false;
+ }
}
- allocator->getAllocInfo(memory, alloc);
- // Bind buffer
- VkResult err = GR_VK_CALL(gpu->vkInterface(), BindImageMemory(gpu->device(), image,
- alloc->fMemory, alloc->fOffset));
+ // Bind image
+ VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image,
+ alloc->fMemory, alloc->fOffset));
if (err) {
- FreeImageMemory(gpu, linearTiling, *alloc);
+ SkASSERT_RELEASE(heap->free(*alloc));
return false;
}
+ gTotalImageMemory += alloc->fSize;
+
+ VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize);
+ gTotalImageMemoryFullPage += pageAlignedSize;
+
return true;
}
void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling,
const GrVkAlloc& alloc) {
- if (alloc.fBackendMemory) {
- GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
- allocator->freeMemory(alloc.fBackendMemory);
+ GrVkHeap* heap;
+ if (linearTiling) {
+ heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
+ } else if (alloc.fSize <= kMaxSmallImageSize) {
+ heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
} else {
+ heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap);
+ }
+ if (!heap->free(alloc)) {
+ // must be an adopted allocation
GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr));
+ } else {
+ gTotalImageMemory -= alloc.fSize;
+ VkDeviceSize pageAlignedSize = align_size(alloc.fSize, kMinVulkanPageSize);
+ gTotalImageMemoryFullPage -= pageAlignedSize;
}
}
-void* GrVkMemory::MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) {
- SkASSERT(GrVkAlloc::kMappable_Flag & alloc.fFlags);
-#ifdef SK_DEBUG
+void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset,
+ VkDeviceSize size) {
if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
+#ifdef SK_DEBUG
+ SkASSERT(offset >= alloc.fOffset);
VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
- SkASSERT(0 == (alloc.fOffset & (alignment-1)));
- SkASSERT(0 == (alloc.fSize & (alignment-1)));
+ SkASSERT(0 == (offset & (alignment-1)));
+ if (size != VK_WHOLE_SIZE) {
+ SkASSERT(size > 0);
+ SkASSERT(0 == (size & (alignment-1)) ||
+ (offset + size) == (alloc.fOffset + alloc.fSize));
+ SkASSERT(offset + size <= alloc.fOffset + alloc.fSize);
+ }
+#endif
+
+ VkMappedMemoryRange mappedMemoryRange;
+ memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
+ mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ mappedMemoryRange.memory = alloc.fMemory;
+ mappedMemoryRange.offset = offset;
+ mappedMemoryRange.size = size;
+ GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(),
+ 1, &mappedMemoryRange));
}
+}
+
+void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc,
+ VkDeviceSize offset, VkDeviceSize size) {
+ if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
+#ifdef SK_DEBUG
+ SkASSERT(offset >= alloc.fOffset);
+ VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
+ SkASSERT(0 == (offset & (alignment-1)));
+ if (size != VK_WHOLE_SIZE) {
+ SkASSERT(size > 0);
+ SkASSERT(0 == (size & (alignment-1)) ||
+ (offset + size) == (alloc.fOffset + alloc.fSize));
+ SkASSERT(offset + size <= alloc.fOffset + alloc.fSize);
+ }
#endif
- if (alloc.fBackendMemory) {
- GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
- return allocator->mapMemory(alloc.fBackendMemory);
+
+ VkMappedMemoryRange mappedMemoryRange;
+ memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
+ mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ mappedMemoryRange.memory = alloc.fMemory;
+ mappedMemoryRange.offset = offset;
+ mappedMemoryRange.size = size;
+ GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(),
+ 1, &mappedMemoryRange));
}
+}
- void* mapPtr;
- VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), alloc.fMemory,
- alloc.fOffset,
- alloc.fSize, 0, &mapPtr));
- if (err) {
- mapPtr = nullptr;
+bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize,
+ VkDeviceSize* allocOffset, VkDeviceSize* allocSize) {
+ VkDeviceSize alignedSize = align_size(requestedSize, fAlignment);
+
+ // find the smallest block big enough for our allocation
+ FreeList::Iter iter = fFreeList.headIter();
+ FreeList::Iter bestFitIter;
+ VkDeviceSize bestFitSize = fSize + 1;
+ VkDeviceSize secondLargestSize = 0;
+ VkDeviceSize secondLargestOffset = 0;
+ while (iter.get()) {
+ Block* block = iter.get();
+ // need to adjust size to match desired alignment
+ SkASSERT(align_size(block->fOffset, fAlignment) - block->fOffset == 0);
+ if (block->fSize >= alignedSize && block->fSize < bestFitSize) {
+ bestFitIter = iter;
+ bestFitSize = block->fSize;
+ }
+ if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) {
+ secondLargestSize = block->fSize;
+ secondLargestOffset = block->fOffset;
+ }
+ iter.next();
+ }
+ SkASSERT(secondLargestSize <= fLargestBlockSize);
+
+ Block* bestFit = bestFitIter.get();
+ if (bestFit) {
+ SkASSERT(align_size(bestFit->fOffset, fAlignment) == bestFit->fOffset);
+ *allocOffset = bestFit->fOffset;
+ *allocSize = alignedSize;
+ // adjust or remove current block
+ VkDeviceSize originalBestFitOffset = bestFit->fOffset;
+ if (bestFit->fSize > alignedSize) {
+ bestFit->fOffset += alignedSize;
+ bestFit->fSize -= alignedSize;
+ if (fLargestBlockOffset == originalBestFitOffset) {
+ if (bestFit->fSize >= secondLargestSize) {
+ fLargestBlockSize = bestFit->fSize;
+ fLargestBlockOffset = bestFit->fOffset;
+ } else {
+ fLargestBlockSize = secondLargestSize;
+ fLargestBlockOffset = secondLargestOffset;
+ }
+ }
+#ifdef SK_DEBUG
+ VkDeviceSize largestSize = 0;
+ iter = fFreeList.headIter();
+ while (iter.get()) {
+ Block* block = iter.get();
+ if (largestSize < block->fSize) {
+ largestSize = block->fSize;
+ }
+ iter.next();
+ }
+ SkASSERT(largestSize == fLargestBlockSize);
+#endif
+ } else {
+ SkASSERT(bestFit->fSize == alignedSize);
+ if (fLargestBlockOffset == originalBestFitOffset) {
+ fLargestBlockSize = secondLargestSize;
+ fLargestBlockOffset = secondLargestOffset;
+ }
+ fFreeList.remove(bestFit);
+#ifdef SK_DEBUG
+ VkDeviceSize largestSize = 0;
+ iter = fFreeList.headIter();
+ while (iter.get()) {
+ Block* block = iter.get();
+ if (largestSize < block->fSize) {
+ largestSize = block->fSize;
+ }
+ iter.next();
+ }
+ SkASSERT(largestSize == fLargestBlockSize);
+#endif
+ }
+ fFreeSize -= alignedSize;
+ SkASSERT(*allocSize > 0);
+
+ return true;
}
- return mapPtr;
+
+ SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize);
+
+ return false;
}
-void GrVkMemory::UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) {
- if (alloc.fBackendMemory) {
- GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
- allocator->unmapMemory(alloc.fBackendMemory);
+void GrVkFreeListAlloc::free(VkDeviceSize allocOffset, VkDeviceSize allocSize) {
+ // find the block right after this allocation
+ FreeList::Iter iter = fFreeList.headIter();
+ FreeList::Iter prev;
+ while (iter.get() && iter.get()->fOffset < allocOffset) {
+ prev = iter;
+ iter.next();
+ }
+ // we have four cases:
+ // we exactly follow the previous one
+ Block* block;
+ if (prev.get() && prev.get()->fOffset + prev.get()->fSize == allocOffset) {
+ block = prev.get();
+ block->fSize += allocSize;
+ if (block->fOffset == fLargestBlockOffset) {
+ fLargestBlockSize = block->fSize;
+ }
+ // and additionally we may exactly precede the next one
+ if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) {
+ block->fSize += iter.get()->fSize;
+ if (iter.get()->fOffset == fLargestBlockOffset) {
+ fLargestBlockOffset = block->fOffset;
+ fLargestBlockSize = block->fSize;
+ }
+ fFreeList.remove(iter.get());
+ }
+ // or we only exactly proceed the next one
+ } else if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) {
+ block = iter.get();
+ block->fSize += allocSize;
+ if (block->fOffset == fLargestBlockOffset) {
+ fLargestBlockOffset = allocOffset;
+ fLargestBlockSize = block->fSize;
+ }
+ block->fOffset = allocOffset;
+ // or we fall somewhere in between, with gaps
} else {
- GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory));
+ block = fFreeList.addBefore(iter);
+ block->fOffset = allocOffset;
+ block->fSize = allocSize;
+ }
+ fFreeSize += allocSize;
+ if (block->fSize > fLargestBlockSize) {
+ fLargestBlockSize = block->fSize;
+ fLargestBlockOffset = block->fOffset;
+ }
+
+#ifdef SK_DEBUG
+ VkDeviceSize largestSize = 0;
+ iter = fFreeList.headIter();
+ while (iter.get()) {
+ Block* block = iter.get();
+ if (largestSize < block->fSize) {
+ largestSize = block->fSize;
+ }
+ iter.next();
}
+ SkASSERT(fLargestBlockSize == largestSize);
+#endif
}
-void GrVkMemory::GetNonCoherentMappedMemoryRange(const GrVkAlloc& alloc, VkDeviceSize offset,
- VkDeviceSize size, VkDeviceSize alignment,
- VkMappedMemoryRange* range) {
- SkASSERT(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag);
- offset = offset + alloc.fOffset;
- VkDeviceSize offsetDiff = offset & (alignment -1);
- offset = offset - offsetDiff;
- size = (size + alignment - 1) & ~(alignment - 1);
+GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
+ VkDeviceSize size, VkDeviceSize alignment)
+ : INHERITED(size, alignment)
+ , fGpu(gpu)
#ifdef SK_DEBUG
- SkASSERT(offset >= alloc.fOffset);
- SkASSERT(offset + size <= alloc.fOffset + alloc.fSize);
- SkASSERT(0 == (offset & (alignment-1)));
- SkASSERT(size > 0);
- SkASSERT(0 == (size & (alignment-1)));
+ , fHeapIndex(heapIndex)
#endif
+ , fMemoryTypeIndex(memoryTypeIndex) {
+
+ VkMemoryAllocateInfo allocInfo = {
+ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
+ nullptr, // pNext
+ size, // allocationSize
+ memoryTypeIndex, // memoryTypeIndex
+ };
- memset(range, 0, sizeof(VkMappedMemoryRange));
- range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
- range->memory = alloc.fMemory;
- range->offset = offset;
- range->size = size;
+ VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(),
+ &allocInfo,
+ nullptr,
+ &fAlloc));
+ if (VK_SUCCESS != err) {
+ this->reset();
+ }
+#ifdef SK_DEBUG
+ else {
+ gHeapUsage[heapIndex] += size;
+ }
+#endif
}
-void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset,
- VkDeviceSize size) {
- if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
- SkASSERT(offset == 0);
- SkASSERT(size <= alloc.fSize);
- if (alloc.fBackendMemory) {
- GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
- allocator->flushMappedMemory(alloc.fBackendMemory, offset, size);
- } else {
- VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
- VkMappedMemoryRange mappedMemoryRange;
- GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment,
- &mappedMemoryRange);
- GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), 1,
- &mappedMemoryRange));
+GrVkSubHeap::~GrVkSubHeap() {
+ const GrVkInterface* iface = fGpu->vkInterface();
+ GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr));
+#ifdef SK_DEBUG
+ gHeapUsage[fHeapIndex] -= fSize;
+#endif
+}
+
+bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) {
+ alloc->fMemory = fAlloc;
+ return INHERITED::alloc(size, &alloc->fOffset, &alloc->fSize);
+}
+
+void GrVkSubHeap::free(const GrVkAlloc& alloc) {
+ SkASSERT(alloc.fMemory == fAlloc);
+
+ INHERITED::free(alloc.fOffset, alloc.fSize);
+}
+
+bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment,
+ uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
+ VkDeviceSize alignedSize = align_size(size, alignment);
+
+ // if requested is larger than our subheap allocation, just alloc directly
+ if (alignedSize > fSubHeapSize) {
+ VkMemoryAllocateInfo allocInfo = {
+ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
+ nullptr, // pNext
+ alignedSize, // allocationSize
+ memoryTypeIndex, // memoryTypeIndex
+ };
+
+ VkResult err = GR_VK_CALL(fGpu->vkInterface(), AllocateMemory(fGpu->device(),
+ &allocInfo,
+ nullptr,
+ &alloc->fMemory));
+ if (VK_SUCCESS != err) {
+ return false;
}
+ alloc->fOffset = 0;
+ alloc->fSize = alignedSize;
+ alloc->fUsesSystemHeap = true;
+#ifdef SK_DEBUG
+ gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize;
+#endif
+
+ return true;
}
+
+ // first try to find a subheap that fits our allocation request
+ int bestFitIndex = -1;
+ VkDeviceSize bestFitSize = 0x7FFFFFFF;
+ for (auto i = 0; i < fSubHeaps.count(); ++i) {
+ if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex &&
+ fSubHeaps[i]->alignment() == alignment) {
+ VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize();
+ if (heapSize >= alignedSize && heapSize < bestFitSize) {
+ bestFitIndex = i;
+ bestFitSize = heapSize;
+ }
+ }
+ }
+
+ if (bestFitIndex >= 0) {
+ SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment);
+ if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) {
+ fUsedSize += alloc->fSize;
+ return true;
+ }
+ return false;
+ }
+
+ // need to allocate a new subheap
+ std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
+ subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, fSubHeapSize, alignment));
+ // try to recover from failed allocation by only allocating what we need
+ if (subHeap->size() == 0) {
+ VkDeviceSize alignedSize = align_size(size, alignment);
+ subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
+ if (subHeap->size() == 0) {
+ return false;
+ }
+ }
+ fAllocSize += fSubHeapSize;
+ if (subHeap->alloc(size, alloc)) {
+ fUsedSize += alloc->fSize;
+ return true;
+ }
+
+ return false;
}
-void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc,
- VkDeviceSize offset, VkDeviceSize size) {
- if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
- SkASSERT(offset == 0);
- SkASSERT(size <= alloc.fSize);
- if (alloc.fBackendMemory) {
- GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
- allocator->invalidateMappedMemory(alloc.fBackendMemory, offset, size);
- } else {
- VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
- VkMappedMemoryRange mappedMemoryRange;
- GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment,
- &mappedMemoryRange);
- GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), 1,
- &mappedMemoryRange));
+bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
+ uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
+ VkDeviceSize alignedSize = align_size(size, alignment);
+
+ // first try to find an unallocated subheap that fits our allocation request
+ int bestFitIndex = -1;
+ VkDeviceSize bestFitSize = 0x7FFFFFFF;
+ for (auto i = 0; i < fSubHeaps.count(); ++i) {
+ if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex &&
+ fSubHeaps[i]->alignment() == alignment &&
+ fSubHeaps[i]->unallocated()) {
+ VkDeviceSize heapSize = fSubHeaps[i]->size();
+ if (heapSize >= alignedSize && heapSize < bestFitSize) {
+ bestFitIndex = i;
+ bestFitSize = heapSize;
+ }
+ }
+ }
+
+ if (bestFitIndex >= 0) {
+ SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment);
+ if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) {
+ fUsedSize += alloc->fSize;
+ return true;
+ }
+ return false;
+ }
+
+ // need to allocate a new subheap
+ std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
+ subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
+ fAllocSize += alignedSize;
+ if (subHeap->alloc(size, alloc)) {
+ fUsedSize += alloc->fSize;
+ return true;
+ }
+
+ return false;
+}
+
+bool GrVkHeap::free(const GrVkAlloc& alloc) {
+ // a size of 0 means we're using the system heap
+ if (alloc.fUsesSystemHeap) {
+ const GrVkInterface* iface = fGpu->vkInterface();
+ GR_VK_CALL(iface, FreeMemory(fGpu->device(), alloc.fMemory, nullptr));
+ return true;
+ }
+
+ for (auto i = 0; i < fSubHeaps.count(); ++i) {
+ if (fSubHeaps[i]->memory() == alloc.fMemory) {
+ fSubHeaps[i]->free(alloc);
+ fUsedSize -= alloc.fSize;
+ return true;
}
}
+
+ return false;
}
+
diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h
index 741bdaa8a0..bb6681435f 100644
--- a/src/gpu/vk/GrVkMemory.h
+++ b/src/gpu/vk/GrVkMemory.h
@@ -34,25 +34,133 @@ namespace GrVkMemory {
GrVkAlloc* alloc);
void FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc);
- // Maps the entire GrVkAlloc and returns a pointer to the start of the allocation. Underneath
- // the hood, we may map more than the range of the GrVkAlloc (e.g. the entire VkDeviceMemory),
- // but the pointer returned will always be to the start of the GrVkAlloc. The caller should also
- // never assume more than the GrVkAlloc block has been mapped.
- void* MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc);
- void UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc);
-
- // For the Flush and Invalidate calls, the offset should be relative to the GrVkAlloc. Thus this
- // will often be 0. The client does not need to make sure the offset and size are aligned to the
- // nonCoherentAtomSize, the internal calls will handle that.
void FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset,
VkDeviceSize size);
void InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset,
VkDeviceSize size);
-
- // Helper for aligning and setting VkMappedMemoryRange for flushing/invalidating noncoherent
- // memory.
- void GetNonCoherentMappedMemoryRange(const GrVkAlloc&, VkDeviceSize offset, VkDeviceSize size,
- VkDeviceSize alignment, VkMappedMemoryRange*);
}
+class GrVkFreeListAlloc {
+public:
+ GrVkFreeListAlloc(VkDeviceSize size, VkDeviceSize alignment)
+ : fSize(size)
+ , fAlignment(alignment)
+ , fFreeSize(size)
+ , fLargestBlockSize(size)
+ , fLargestBlockOffset(0) {
+ Block* block = fFreeList.addToTail();
+ block->fOffset = 0;
+ block->fSize = fSize;
+ }
+ ~GrVkFreeListAlloc() {
+ this->reset();
+ }
+
+ VkDeviceSize size() const { return fSize; }
+ VkDeviceSize alignment() const { return fAlignment; }
+ VkDeviceSize freeSize() const { return fFreeSize; }
+ VkDeviceSize largestBlockSize() const { return fLargestBlockSize; }
+
+ bool unallocated() const { return fSize == fFreeSize; }
+
+protected:
+ bool alloc(VkDeviceSize requestedSize, VkDeviceSize* allocOffset, VkDeviceSize* allocSize);
+ void free(VkDeviceSize allocOffset, VkDeviceSize allocSize);
+
+ void reset() {
+ fSize = 0;
+ fAlignment = 0;
+ fFreeSize = 0;
+ fLargestBlockSize = 0;
+ fFreeList.reset();
+ }
+
+ struct Block {
+ VkDeviceSize fOffset;
+ VkDeviceSize fSize;
+ };
+ typedef SkTLList<Block, 16> FreeList;
+
+ VkDeviceSize fSize;
+ VkDeviceSize fAlignment;
+ VkDeviceSize fFreeSize;
+ VkDeviceSize fLargestBlockSize;
+ VkDeviceSize fLargestBlockOffset;
+ FreeList fFreeList;
+};
+
+class GrVkSubHeap : public GrVkFreeListAlloc {
+public:
+ GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
+ VkDeviceSize size, VkDeviceSize alignment);
+ ~GrVkSubHeap();
+
+ uint32_t memoryTypeIndex() const { return fMemoryTypeIndex; }
+ VkDeviceMemory memory() { return fAlloc; }
+
+ bool alloc(VkDeviceSize requestedSize, GrVkAlloc* alloc);
+ void free(const GrVkAlloc& alloc);
+
+private:
+ const GrVkGpu* fGpu;
+#ifdef SK_DEBUG
+ uint32_t fHeapIndex;
+#endif
+ uint32_t fMemoryTypeIndex;
+ VkDeviceMemory fAlloc;
+
+ typedef GrVkFreeListAlloc INHERITED;
+};
+
+class GrVkHeap {
+public:
+ enum Strategy {
+ kSubAlloc_Strategy, // alloc large subheaps and suballoc within them
+ kSingleAlloc_Strategy // alloc/recycle an individual subheap per object
+ };
+
+ GrVkHeap(const GrVkGpu* gpu, Strategy strategy, VkDeviceSize subHeapSize)
+ : fGpu(gpu)
+ , fSubHeapSize(subHeapSize)
+ , fAllocSize(0)
+ , fUsedSize(0) {
+ if (strategy == kSubAlloc_Strategy) {
+ fAllocFunc = &GrVkHeap::subAlloc;
+ } else {
+ fAllocFunc = &GrVkHeap::singleAlloc;
+ }
+ }
+
+ ~GrVkHeap() {}
+
+ VkDeviceSize allocSize() const { return fAllocSize; }
+ VkDeviceSize usedSize() const { return fUsedSize; }
+
+ bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex,
+ uint32_t heapIndex, GrVkAlloc* alloc) {
+ SkASSERT(size > 0);
+ alloc->fUsesSystemHeap = false;
+ return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, heapIndex, alloc);
+ }
+ bool free(const GrVkAlloc& alloc);
+
+private:
+ typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment,
+ uint32_t memoryTypeIndex, uint32_t heapIndex,
+ GrVkAlloc* alloc);
+
+ bool subAlloc(VkDeviceSize size, VkDeviceSize alignment,
+ uint32_t memoryTypeIndex, uint32_t heapIndex,
+ GrVkAlloc* alloc);
+ bool singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
+ uint32_t memoryTypeIndex, uint32_t heapIndex,
+ GrVkAlloc* alloc);
+
+ const GrVkGpu* fGpu;
+ VkDeviceSize fSubHeapSize;
+ VkDeviceSize fAllocSize;
+ VkDeviceSize fUsedSize;
+ AllocFunc fAllocFunc;
+ SkTArray<std::unique_ptr<GrVkSubHeap>> fSubHeaps;
+};
#endif
diff --git a/tests/VkHeapTests.cpp b/tests/VkHeapTests.cpp
new file mode 100644
index 0000000000..67eb045d98
--- /dev/null
+++ b/tests/VkHeapTests.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+// This is a GPU-backend specific test. It relies on static intializers to work
+
+#include "SkTypes.h"
+
+#if SK_SUPPORT_GPU && defined(SK_VULKAN)
+
+#include "GrContextPriv.h"
+#include "GrContextFactory.h"
+#include "GrTest.h"
+#include "Test.h"
+#include "vk/GrVkGpu.h"
+
+using sk_gpu_test::GrContextFactory;
+
+void subheap_test(skiatest::Reporter* reporter, GrContext* context) {
+ GrVkGpu* gpu = static_cast<GrVkGpu*>(context->contextPriv().getGpu());
+
+ // memtype doesn't matter, we're just testing the suballocation algorithm so we'll use 0
+ GrVkSubHeap heap(gpu, 0, 0, 64 * 1024, 32);
+ GrVkAlloc alloc0, alloc1, alloc2, alloc3;
+ // test full allocation and free
+ REPORTER_ASSERT(reporter, heap.alloc(64 * 1024, &alloc0));
+ REPORTER_ASSERT(reporter, alloc0.fOffset == 0);
+ REPORTER_ASSERT(reporter, alloc0.fSize == 64 * 1024);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 0 && heap.largestBlockSize() == 0);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 64*1024 && heap.largestBlockSize() == 64 * 1024);
+
+ // now let's suballoc some memory
+ REPORTER_ASSERT(reporter, heap.alloc(16 * 1024, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(23 * 1024, &alloc1));
+ REPORTER_ASSERT(reporter, heap.alloc(18 * 1024, &alloc2));
+ REPORTER_ASSERT(reporter, heap.freeSize() == 7 * 1024 && heap.largestBlockSize() == 7 * 1024);
+ // free lone block
+ heap.free(alloc1);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 30 * 1024 && heap.largestBlockSize() == 23 * 1024);
+ // allocate into smallest free block
+ REPORTER_ASSERT(reporter, heap.alloc(6 * 1024, &alloc3));
+ REPORTER_ASSERT(reporter, heap.freeSize() == 24 * 1024 && heap.largestBlockSize() == 23 * 1024);
+ // allocate into exact size free block
+ REPORTER_ASSERT(reporter, heap.alloc(23 * 1024, &alloc1));
+ REPORTER_ASSERT(reporter, heap.freeSize() == 1 * 1024 && heap.largestBlockSize() == 1 * 1024);
+ // free lone block
+ heap.free(alloc2);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 19 * 1024 && heap.largestBlockSize() == 18 * 1024);
+ // free and merge with preceding block and following
+ heap.free(alloc3);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 25 * 1024 && heap.largestBlockSize() == 25 * 1024);
+ // free and merge with following block
+ heap.free(alloc1);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 48 * 1024 && heap.largestBlockSize() == 48 * 1024);
+ // free starting block and merge with following
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 64 * 1024 && heap.largestBlockSize() == 64 * 1024);
+
+ // realloc
+ REPORTER_ASSERT(reporter, heap.alloc(4 * 1024, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(35 * 1024, &alloc1));
+ REPORTER_ASSERT(reporter, heap.alloc(10 * 1024, &alloc2));
+ REPORTER_ASSERT(reporter, heap.freeSize() == 15 * 1024 && heap.largestBlockSize() == 15 * 1024);
+ // free starting block and merge with following
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 19 * 1024 && heap.largestBlockSize() == 15 * 1024);
+ // free block and merge with preceding
+ heap.free(alloc1);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 54 * 1024 && heap.largestBlockSize() == 39 * 1024);
+ // free block and merge with preceding and following
+ heap.free(alloc2);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 64 * 1024 && heap.largestBlockSize() == 64 * 1024);
+
+ // fragment
+ REPORTER_ASSERT(reporter, heap.alloc(19 * 1024, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(5 * 1024, &alloc1));
+ REPORTER_ASSERT(reporter, heap.alloc(15 * 1024, &alloc2));
+ REPORTER_ASSERT(reporter, heap.alloc(3 * 1024, &alloc3));
+ REPORTER_ASSERT(reporter, heap.freeSize() == 22 * 1024 && heap.largestBlockSize() == 22 * 1024);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 41 * 1024 && heap.largestBlockSize() == 22 * 1024);
+ heap.free(alloc2);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 56 * 1024 && heap.largestBlockSize() == 22 * 1024);
+ REPORTER_ASSERT(reporter, !heap.alloc(40 * 1024, &alloc0));
+ heap.free(alloc3);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 59 * 1024 && heap.largestBlockSize() == 40 * 1024);
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, &alloc0));
+ REPORTER_ASSERT(reporter, heap.freeSize() == 19 * 1024 && heap.largestBlockSize() == 19 * 1024);
+ heap.free(alloc1);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 24 * 1024 && heap.largestBlockSize() == 24 * 1024);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 64 * 1024 && heap.largestBlockSize() == 64 * 1024);
+
+ // unaligned sizes
+ REPORTER_ASSERT(reporter, heap.alloc(19 * 1024 - 31, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 5, &alloc1));
+ REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 19, &alloc2));
+ REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 3, &alloc3));
+ REPORTER_ASSERT(reporter, heap.freeSize() == 22 * 1024 && heap.largestBlockSize() == 22 * 1024);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 41 * 1024 && heap.largestBlockSize() == 22 * 1024);
+ heap.free(alloc2);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 56 * 1024 && heap.largestBlockSize() == 22 * 1024);
+ REPORTER_ASSERT(reporter, !heap.alloc(40 * 1024, &alloc0));
+ heap.free(alloc3);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 59 * 1024 && heap.largestBlockSize() == 40 * 1024);
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, &alloc0));
+ REPORTER_ASSERT(reporter, heap.freeSize() == 19 * 1024 && heap.largestBlockSize() == 19 * 1024);
+ heap.free(alloc1);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 24 * 1024 && heap.largestBlockSize() == 24 * 1024);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.freeSize() == 64 * 1024 && heap.largestBlockSize() == 64 * 1024);
+}
+
+void suballoc_test(skiatest::Reporter* reporter, GrContext* context) {
+ GrVkGpu* gpu = static_cast<GrVkGpu*>(context->contextPriv().getGpu());
+
+ // memtype/heap index don't matter, we're just testing the allocation algorithm so we'll use 0
+ GrVkHeap heap(gpu, GrVkHeap::kSubAlloc_Strategy, 64 * 1024);
+ GrVkAlloc alloc0, alloc1, alloc2, alloc3;
+ const VkDeviceSize kAlignment = 16;
+ const uint32_t kMemType = 0;
+ const uint32_t kHeapIndex = 0;
+
+ REPORTER_ASSERT(reporter, heap.allocSize() == 0 && heap.usedSize() == 0);
+
+ // fragment allocations so we need to grow heap
+ REPORTER_ASSERT(reporter, heap.alloc(19 * 1024 - 3, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 9, kAlignment, kMemType, kHeapIndex, &alloc1));
+ REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 15, kAlignment, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 6, kAlignment, kMemType, kHeapIndex, &alloc3));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 42 * 1024);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 23 * 1024);
+ heap.free(alloc2);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 8 * 1024);
+ // we expect the heap to grow here
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 48 * 1024);
+ heap.free(alloc3);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 45 * 1024);
+ // heap should not grow here (first subheap has exactly enough room)
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc3));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 85 * 1024);
+ // heap should not grow here (second subheap has room)
+ REPORTER_ASSERT(reporter, heap.alloc(22 * 1024, kAlignment, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 107 * 1024);
+ heap.free(alloc1);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 102 * 1024);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 62 * 1024);
+ heap.free(alloc2);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 40 * 1024);
+ heap.free(alloc3);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024);
+ // heap should not grow here (allocating more than subheap size)
+ REPORTER_ASSERT(reporter, heap.alloc(128 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 24 * 1024);
+ // heap should alloc a new subheap because the memory type is different
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType+1, kHeapIndex, &alloc1));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 192 * 1024 && heap.usedSize() == 48 * 1024);
+ // heap should alloc a new subheap because the alignment is different
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, 128, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 256 * 1024 && heap.usedSize() == 72 * 1024);
+ heap.free(alloc2);
+ heap.free(alloc0);
+ heap.free(alloc1);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 256 * 1024 && heap.usedSize() == 0 * 1024);
+}
+
+void singlealloc_test(skiatest::Reporter* reporter, GrContext* context) {
+ GrVkGpu* gpu = static_cast<GrVkGpu*>(context->contextPriv().getGpu());
+
+ // memtype/heap index don't matter, we're just testing the allocation algorithm so we'll use 0
+ GrVkHeap heap(gpu, GrVkHeap::kSingleAlloc_Strategy, 64 * 1024);
+ GrVkAlloc alloc0, alloc1, alloc2, alloc3;
+ const VkDeviceSize kAlignment = 64;
+ const uint32_t kMemType = 0;
+ const uint32_t kHeapIndex = 0;
+
+ REPORTER_ASSERT(reporter, heap.allocSize() == 0 && heap.usedSize() == 0);
+
+ // make a few allocations
+ REPORTER_ASSERT(reporter, heap.alloc(49 * 1024 - 3, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 37, kAlignment, kMemType, kHeapIndex, &alloc1));
+ REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 11, kAlignment, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 29, kAlignment, kMemType, kHeapIndex, &alloc3));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 72 * 1024);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 23 * 1024);
+ heap.free(alloc2);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 8 * 1024);
+ // heap should not grow here (first subheap has room)
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 48 * 1024);
+ heap.free(alloc3);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 45 * 1024);
+ // check for exact fit -- heap should not grow here (third subheap has room)
+ REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 63, kAlignment, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 60 * 1024);
+ heap.free(alloc2);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 45 * 1024);
+ // heap should grow here (no subheap has room)
+ REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc3));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 85 * 1024);
+ heap.free(alloc1);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 80 * 1024);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 40 * 1024);
+ heap.free(alloc3);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 0 * 1024);
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 24 * 1024);
+ // heap should alloc a new subheap because the memory type is different
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType + 1, kHeapIndex, &alloc1));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 136 * 1024 && heap.usedSize() == 48 * 1024);
+ // heap should alloc a new subheap because the alignment is different
+ REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, 128, kMemType, kHeapIndex, &alloc2));
+ REPORTER_ASSERT(reporter, heap.allocSize() == 160 * 1024 && heap.usedSize() == 72 * 1024);
+ heap.free(alloc1);
+ heap.free(alloc2);
+ heap.free(alloc0);
+ REPORTER_ASSERT(reporter, heap.allocSize() == 160 * 1024 && heap.usedSize() == 0 * 1024);
+}
+
+DEF_GPUTEST_FOR_VULKAN_CONTEXT(VkHeapTests, reporter, ctxInfo) {
+ subheap_test(reporter, ctxInfo.grContext());
+ suballoc_test(reporter, ctxInfo.grContext());
+ singlealloc_test(reporter, ctxInfo.grContext());
+}
+
+#endif