aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/gpu
diff options
context:
space:
mode:
authorGravatar Greg Daniel <egdaniel@google.com>2018-05-31 13:13:33 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2018-05-31 18:21:25 +0000
commit81df0414c8226ed40ccf70b6f67890f136440b81 (patch)
tree3d4d3a3256b7dfc0d0604ef4fa15db34f82b1e47 /src/gpu
parentec0732433f28368b94973496fbdb990f967ba1f6 (diff)
Reland "Use GrVkMemoryAllocator for vulkan memory allocations in ganesh."
This is a reland of 331c266ed716526478a10885aff66181cec64486 Original change's description: > Use GrVkMemoryAllocator for vulkan memory allocations in ganesh. > > Besides using the new allocator, the big logical change is that map > and unmap calls form GrVkMemory are specc'd to map the entire GrVkAlloc > instead of a specific offset and size as they did before. As a > consequence of this, we move the handling of non-coherent alignment > for flush/invalidate calls to GrVkMemory instead of the callers. > > Bug: skia: > Change-Id: I794d713106602f27aa7e808c306bbb69fd2b67be > Reviewed-on: https://skia-review.googlesource.com/130021 > Commit-Queue: Greg Daniel <egdaniel@google.com> > Reviewed-by: Jim Van Verth <jvanverth@google.com> Bug: skia: Change-Id: Ia9a4192d344449fb444d2adaa1d62ff1ede4b21d Reviewed-on: https://skia-review.googlesource.com/131083 Reviewed-by: Jim Van Verth <jvanverth@google.com> Commit-Queue: Greg Daniel <egdaniel@google.com>
Diffstat (limited to 'src/gpu')
-rw-r--r--src/gpu/vk/GrVkAMDMemoryAllocator.cpp72
-rw-r--r--src/gpu/vk/GrVkBackendContext.cpp2
-rw-r--r--src/gpu/vk/GrVkBuffer.cpp37
-rw-r--r--src/gpu/vk/GrVkBuffer.h5
-rw-r--r--src/gpu/vk/GrVkGpu.cpp83
-rw-r--r--src/gpu/vk/GrVkGpu.h28
-rw-r--r--src/gpu/vk/GrVkMemory.cpp661
-rw-r--r--src/gpu/vk/GrVkMemory.h138
8 files changed, 207 insertions, 819 deletions
diff --git a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp
index 0b838ece3a..53703a2149 100644
--- a/src/gpu/vk/GrVkAMDMemoryAllocator.cpp
+++ b/src/gpu/vk/GrVkAMDMemoryAllocator.cpp
@@ -8,6 +8,7 @@
#include "GrVkAMDMemoryAllocator.h"
#include "vk/GrVkInterface.h"
+#include "GrVkMemory.h"
#include "GrVkUtil.h"
GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,
@@ -42,7 +43,10 @@ GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,
info.flags = 0;
info.physicalDevice = physicalDevice;
info.device = device;
- info.preferredLargeHeapBlockSize = 0;
+ // Manually testing runs of dm using 64 here instead of the default 256 shows less memory usage
+ // on average. Also dm seems to run faster using 64 so it doesn't seem to be trading off speed
+ // for memory.
+ info.preferredLargeHeapBlockSize = 64*1024*1024;
info.pAllocationCallbacks = nullptr;
info.pDeviceMemoryCallbacks = nullptr;
info.frameInUseCount = 0;
@@ -106,10 +110,10 @@ bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsag
info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
break;
case BufferUsage::kCpuWritesGpuReads:
- // First attempt to try memory is also device local
+ // First attempt to try memory is also cached
info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
break;
case BufferUsage::kGpuWritesCpuReads:
info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
@@ -134,7 +138,7 @@ bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsag
VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
if (VK_SUCCESS != result) {
if (usage == BufferUsage::kCpuWritesGpuReads) {
- // We try again but this time drop the requirement for device local
+ // We try again but this time drop the requirement for cached
info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
}
@@ -142,6 +146,7 @@ bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsag
if (VK_SUCCESS != result) {
return false;
}
+
*backendMemory = (GrVkBackendMemory)allocation;
return true;
}
@@ -173,6 +178,25 @@ void GrVkAMDMemoryAllocator::getAllocInfo(const GrVkBackendMemory& memoryHandle,
alloc->fSize = vmaInfo.size;
alloc->fFlags = flags;
alloc->fBackendMemory = memoryHandle;
+
+ // TODO: Remove this hack once the AMD allocator is able to handle the alignment of noncoherent
+ // memory itself.
+ if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
+ // This is a hack to say that the allocation size is actually larger than it is. This is to
+ // make sure when we are flushing and invalidating noncoherent memory we have a size that is
+ // aligned to the nonCoherentAtomSize. This is safe for three reasons. First the total size
+ // of the VkDeviceMemory we allocate will always be a multple of the max possible alignment
+ // (currently 256). Second all sub allocations are alignmed with an offset of 256. And
+ // finally the allocator we are using always maps the entire VkDeviceMemory so the range
+ // we'll be flushing/invalidating will be mapped. So our new fake allocation size will
+ // always fit into the VkDeviceMemory, will never push it into another suballocation, and
+ // will always be mapped when map is called.
+ const VkPhysicalDeviceProperties* devProps;
+ vmaGetPhysicalDeviceProperties(fAllocator, &devProps);
+ VkDeviceSize alignment = devProps->limits.nonCoherentAtomSize;
+
+ alloc->fSize = (alloc->fSize + alignment - 1) & ~(alignment -1);
+ }
}
void* GrVkAMDMemoryAllocator::mapMemory(const GrVkBackendMemory& memoryHandle) {
@@ -198,24 +222,9 @@ void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHa
vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
- offset = offset + info.fOffset;
- VkDeviceSize offsetDiff = offset & (alignment -1);
- offset = offset - offsetDiff;
- size = (size + alignment - 1) & ~(alignment - 1);
-#ifdef SK_DEBUG
- SkASSERT(offset >= info.fOffset);
- SkASSERT(offset + size <= info.fOffset + info.fSize);
- SkASSERT(0 == (offset & (alignment-1)));
- SkASSERT(size > 0);
- SkASSERT(0 == (size & (alignment-1)));
-#endif
-
VkMappedMemoryRange mappedMemoryRange;
- memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
- mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
- mappedMemoryRange.memory = info.fMemory;
- mappedMemoryRange.offset = offset;
- mappedMemoryRange.size = size;
+ GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
+ &mappedMemoryRange);
GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
}
}
@@ -231,24 +240,9 @@ void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& mem
vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
- offset = offset + info.fOffset;
- VkDeviceSize offsetDiff = offset & (alignment -1);
- offset = offset - offsetDiff;
- size = (size + alignment - 1) & ~(alignment - 1);
-#ifdef SK_DEBUG
- SkASSERT(offset >= info.fOffset);
- SkASSERT(offset + size <= info.fOffset + info.fSize);
- SkASSERT(0 == (offset & (alignment-1)));
- SkASSERT(size > 0);
- SkASSERT(0 == (size & (alignment-1)));
-#endif
-
VkMappedMemoryRange mappedMemoryRange;
- memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
- mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
- mappedMemoryRange.memory = info.fMemory;
- mappedMemoryRange.offset = offset;
- mappedMemoryRange.size = size;
+ GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
+ &mappedMemoryRange);
GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
}
}
diff --git a/src/gpu/vk/GrVkBackendContext.cpp b/src/gpu/vk/GrVkBackendContext.cpp
index 269a8911e4..d54582f3a5 100644
--- a/src/gpu/vk/GrVkBackendContext.cpp
+++ b/src/gpu/vk/GrVkBackendContext.cpp
@@ -8,7 +8,6 @@
#include "SkAutoMalloc.h"
#include "vk/GrVkBackendContext.h"
#include "vk/GrVkExtensions.h"
-#include "vk/GrVkInterface.h"
#include "vk/GrVkUtil.h"
////////////////////////////////////////////////////////////////////////////////
@@ -323,6 +322,7 @@ const GrVkBackendContext* GrVkBackendContext::Create(uint32_t* presentQueueIndex
}
GrVkBackendContext::~GrVkBackendContext() {
+ fMemoryAllocator.reset();
if (fInterface == nullptr || !fOwnsInstanceAndDevice) {
return;
}
diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp
index f65b15ded0..b3c1d825aa 100644
--- a/src/gpu/vk/GrVkBuffer.cpp
+++ b/src/gpu/vk/GrVkBuffer.cpp
@@ -170,28 +170,10 @@ void GrVkBuffer::internalMap(GrVkGpu* gpu, size_t size, bool* createdNewBuffer)
if (fDesc.fDynamic) {
const GrVkAlloc& alloc = this->alloc();
SkASSERT(alloc.fSize > 0);
+ SkASSERT(alloc.fSize >= size);
+ SkASSERT(0 == fOffset);
- // For Noncoherent buffers we want to make sure the range that we map, both offset and size,
- // are aligned to the nonCoherentAtomSize limit. The offset should have been correctly
- // aligned by our memory allocator. For size we pad out to make the range also aligned.
- if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) {
- // Currently we always have the internal offset as 0.
- SkASSERT(0 == fOffset);
- VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
- SkASSERT(0 == (alloc.fOffset & (alignment - 1)));
-
- // Make size of the map aligned to nonCoherentAtomSize
- size = (size + alignment - 1) & ~(alignment - 1);
- fMappedSize = size;
- }
- SkASSERT(size + fOffset <= alloc.fSize);
- VkResult err = VK_CALL(gpu, MapMemory(gpu->device(), alloc.fMemory,
- alloc.fOffset + fOffset,
- size, 0, &fMapPtr));
- if (err) {
- fMapPtr = nullptr;
- fMappedSize = 0;
- }
+ fMapPtr = GrVkMemory::MapAlloc(gpu, alloc);
} else {
if (!fMapPtr) {
fMapPtr = new unsigned char[this->size()];
@@ -206,16 +188,15 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) {
SkASSERT(this->vkIsMapped());
if (fDesc.fDynamic) {
+ const GrVkAlloc& alloc = this->alloc();
+ SkASSERT(alloc.fSize > 0);
+ SkASSERT(alloc.fSize >= size);
// We currently don't use fOffset
SkASSERT(0 == fOffset);
- VkDeviceSize flushOffset = this->alloc().fOffset + fOffset;
- VkDeviceSize flushSize = gpu->vkCaps().canUseWholeSizeOnFlushMappedMemory() ? VK_WHOLE_SIZE
- : fMappedSize;
- GrVkMemory::FlushMappedAlloc(gpu, this->alloc(), flushOffset, flushSize);
- VK_CALL(gpu, UnmapMemory(gpu->device(), this->alloc().fMemory));
+ GrVkMemory::FlushMappedAlloc(gpu, alloc, 0, size);
+ GrVkMemory::UnmapAlloc(gpu, alloc);
fMapPtr = nullptr;
- fMappedSize = 0;
} else {
// vkCmdUpdateBuffer requires size < 64k and 4-byte alignment.
// https://bugs.chromium.org/p/skia/issues/detail?id=7488
@@ -224,7 +205,7 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) {
} else {
GrVkTransferBuffer* transferBuffer =
GrVkTransferBuffer::Create(gpu, size, GrVkBuffer::kCopyRead_Type);
- if(!transferBuffer) {
+ if (!transferBuffer) {
return;
}
diff --git a/src/gpu/vk/GrVkBuffer.h b/src/gpu/vk/GrVkBuffer.h
index 8d116a40f8..6d0c1fda9a 100644
--- a/src/gpu/vk/GrVkBuffer.h
+++ b/src/gpu/vk/GrVkBuffer.h
@@ -82,7 +82,7 @@ protected:
const Desc& descriptor);
GrVkBuffer(const Desc& desc, const GrVkBuffer::Resource* resource)
- : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr), fMappedSize(0) {
+ : fDesc(desc), fResource(resource), fOffset(0), fMapPtr(nullptr) {
}
void* vkMap(GrVkGpu* gpu) {
@@ -115,9 +115,6 @@ private:
const Resource* fResource;
VkDeviceSize fOffset;
void* fMapPtr;
- // On certain Intel devices/drivers there is a bug if we try to flush non-coherent memory and
- // pass in VK_WHOLE_SIZE. Thus we track our mapped size and explicitly set it when calling flush
- VkDeviceSize fMappedSize;
typedef SkNoncopyable INHERITED;
};
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index 56d0b95bd0..2525c5c16c 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -17,6 +17,7 @@
#include "GrRenderTargetPriv.h"
#include "GrTexturePriv.h"
+#include "GrVkAMDMemoryAllocator.h"
#include "GrVkCommandBuffer.h"
#include "GrVkGpuCommandBuffer.h"
#include "GrVkImage.h"
@@ -92,6 +93,7 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options,
sk_sp<const GrVkBackendContext> backendCtx)
: INHERITED(context)
, fBackendContext(std::move(backendCtx))
+ , fMemoryAllocator(fBackendContext->fMemoryAllocator)
, fDevice(fBackendContext->fDevice)
, fQueue(fBackendContext->fQueue)
, fResourceProvider(this)
@@ -118,6 +120,12 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options,
}
#endif
+ if (!fMemoryAllocator) {
+ // We were not given a memory allocator at creation
+ fMemoryAllocator.reset(new GrVkAMDMemoryAllocator(fBackendContext->fPhysicalDevice,
+ fDevice, fBackendContext->fInterface));
+ }
+
fCompiler = new SkSL::Compiler();
fVkCaps.reset(new GrVkCaps(options, this->vkInterface(), fBackendContext->fPhysicalDevice,
@@ -142,17 +150,6 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options,
fCurrentCmdBuffer = fResourceProvider.findOrCreatePrimaryCommandBuffer();
SkASSERT(fCurrentCmdBuffer);
fCurrentCmdBuffer->begin(this);
-
- // set up our heaps
- fHeaps[kLinearImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024));
- fHeaps[kOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 64*1024*1024));
- fHeaps[kSmallOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 2*1024*1024));
- fHeaps[kVertexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
- fHeaps[kIndexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
- fHeaps[kUniformBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 256*1024));
- fHeaps[kTexelBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
- fHeaps[kCopyReadBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
- fHeaps[kCopyWriteBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024));
}
void GrVkGpu::destroyResources() {
@@ -562,7 +559,6 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i
0, // arraySlice
};
VkSubresourceLayout layout;
- VkResult err;
const GrVkInterface* interface = this->vkInterface();
@@ -573,28 +569,14 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i
int texTop = kBottomLeft_GrSurfaceOrigin == texOrigin ? tex->height() - top - height : top;
const GrVkAlloc& alloc = tex->alloc();
- VkDeviceSize offset = alloc.fOffset + texTop*layout.rowPitch + left*bpp;
- VkDeviceSize offsetDiff = 0;
+ VkDeviceSize offset = texTop*layout.rowPitch + left*bpp;
VkDeviceSize size = height*layout.rowPitch;
- // For Noncoherent buffers we want to make sure the range that we map, both offset and size,
- // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to
- // meet the alignment requirements. So we track how far we move back and then adjust the mapped
- // ptr back up so that this is opaque to the caller.
- if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) {
- VkDeviceSize alignment = this->physicalDeviceProperties().limits.nonCoherentAtomSize;
- offsetDiff = offset & (alignment - 1);
- offset = offset - offsetDiff;
- // Make size of the map aligned to nonCoherentAtomSize
- size = (size + alignment - 1) & ~(alignment - 1);
- }
- SkASSERT(offset >= alloc.fOffset);
- SkASSERT(size <= alloc.fOffset + alloc.fSize);
- void* mapPtr;
- err = GR_VK_CALL(interface, MapMemory(fDevice, alloc.fMemory, offset, size, 0, &mapPtr));
- if (err) {
+ SkASSERT(size + offset <= alloc.fSize);
+ void* mapPtr = GrVkMemory::MapAlloc(this, alloc);
+ if (!mapPtr) {
return false;
}
- mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff;
+ mapPtr = reinterpret_cast<char*>(mapPtr) + offset;
if (kBottomLeft_GrSurfaceOrigin == texOrigin) {
// copy into buffer by rows
@@ -611,7 +593,7 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i
}
GrVkMemory::FlushMappedAlloc(this, alloc, offset, size);
- GR_VK_CALL(interface, UnmapMemory(fDevice, alloc.fMemory));
+ GrVkMemory::UnmapAlloc(this, alloc);
return true;
}
@@ -1147,33 +1129,14 @@ GrStencilAttachment* GrVkGpu::createStencilAttachmentForRenderTarget(const GrRen
bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc,
size_t bufferOffset, size_t srcRowBytes, size_t dstRowBytes, int h) {
- // For Noncoherent buffers we want to make sure the range that we map, both offset and size,
- // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to
- // meet the alignment requirements. So we track how far we move back and then adjust the mapped
- // ptr back up so that this is opaque to the caller.
- VkDeviceSize mapSize = dstRowBytes * h;
- VkDeviceSize mapOffset = alloc.fOffset + bufferOffset;
- VkDeviceSize offsetDiff = 0;
- if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) {
- VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
- offsetDiff = mapOffset & (alignment - 1);
- mapOffset = mapOffset - offsetDiff;
- // Make size of the map aligned to nonCoherentAtomSize
- mapSize = (mapSize + alignment - 1) & ~(alignment - 1);
- }
- SkASSERT(mapOffset >= alloc.fOffset);
- SkASSERT(mapSize + mapOffset <= alloc.fOffset + alloc.fSize);
- void* mapPtr;
- VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(),
- alloc.fMemory,
- mapOffset,
- mapSize,
- 0,
- &mapPtr));
- mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff;
- if (err) {
+ VkDeviceSize size = dstRowBytes * h;
+ VkDeviceSize offset = bufferOffset;
+ SkASSERT(size + offset <= alloc.fSize);
+ void* mapPtr = GrVkMemory::MapAlloc(gpu, alloc);
+ if (!mapPtr) {
return false;
}
+ mapPtr = reinterpret_cast<char*>(mapPtr) + offset;
if (srcData) {
// If there is no padding on dst we can do a single memcopy.
@@ -1192,8 +1155,8 @@ bool copy_testing_data(GrVkGpu* gpu, const void* srcData, const GrVkAlloc& alloc
}
}
}
- GrVkMemory::FlushMappedAlloc(gpu, alloc, mapOffset, mapSize);
- GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory));
+ GrVkMemory::FlushMappedAlloc(gpu, alloc, offset, size);
+ GrVkMemory::UnmapAlloc(gpu, alloc);
return true;
}
@@ -2017,7 +1980,7 @@ bool GrVkGpu::onReadPixels(GrSurface* surface, GrSurfaceOrigin origin, int left,
this->submitCommandBuffer(kForce_SyncQueue);
void* mappedMemory = transferBuffer->map();
const GrVkAlloc& transAlloc = transferBuffer->alloc();
- GrVkMemory::InvalidateMappedAlloc(this, transAlloc, transAlloc.fOffset, VK_WHOLE_SIZE);
+ GrVkMemory::InvalidateMappedAlloc(this, transAlloc, 0, transAlloc.fSize);
if (copyFromOrigin) {
uint32_t skipRows = region.imageExtent.height - height;
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index 7bdfbeaab3..a44ea7230f 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -23,6 +23,7 @@
class GrPipeline;
class GrVkBufferImpl;
+class GrVkMemoryAllocator;
class GrVkPipeline;
class GrVkPipelineState;
class GrVkPrimaryCommandBuffer;
@@ -46,6 +47,8 @@ public:
const GrVkInterface* vkInterface() const { return fBackendContext->fInterface.get(); }
const GrVkCaps& vkCaps() const { return *fVkCaps; }
+ GrVkMemoryAllocator* memoryAllocator() const { return fMemoryAllocator.get(); }
+
VkDevice device() const { return fDevice; }
VkQueue queue() const { return fQueue; }
VkCommandPool cmdPool() const { return fCmdPool; }
@@ -140,28 +143,6 @@ public:
VkDeviceSize dstOffset, VkDeviceSize size);
bool updateBuffer(GrVkBuffer* buffer, const void* src, VkDeviceSize offset, VkDeviceSize size);
- // Heaps
- enum Heap {
- kLinearImage_Heap = 0,
- // We separate out small (i.e., <= 16K) images to reduce fragmentation
- // in the main heap.
- kOptimalImage_Heap,
- kSmallOptimalImage_Heap,
- // We have separate vertex and image heaps, because it's possible that
- // a given Vulkan driver may allocate them separately.
- kVertexBuffer_Heap,
- kIndexBuffer_Heap,
- kUniformBuffer_Heap,
- kTexelBuffer_Heap,
- kCopyReadBuffer_Heap,
- kCopyWriteBuffer_Heap,
-
- kLastHeap = kCopyWriteBuffer_Heap
- };
- static const int kHeapCount = kLastHeap + 1;
-
- GrVkHeap* getHeap(Heap heap) const { return fHeaps[heap].get(); }
-
private:
GrVkGpu(GrContext*, const GrContextOptions&, sk_sp<const GrVkBackendContext> backendContext);
@@ -251,6 +232,7 @@ private:
#endif
sk_sp<const GrVkBackendContext> fBackendContext;
+ sk_sp<GrVkMemoryAllocator> fMemoryAllocator;
sk_sp<GrVkCaps> fVkCaps;
// These Vulkan objects are provided by the client, and also stored in fBackendContext.
@@ -270,8 +252,6 @@ private:
VkPhysicalDeviceProperties fPhysDevProps;
VkPhysicalDeviceMemoryProperties fPhysDevMemProps;
- std::unique_ptr<GrVkHeap> fHeaps[kHeapCount];
-
GrVkCopyManager fCopyManager;
#ifdef SK_ENABLE_VK_LAYERS
diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp
index 4f619a3ef3..f999c26546 100644
--- a/src/gpu/vk/GrVkMemory.cpp
+++ b/src/gpu/vk/GrVkMemory.cpp
@@ -9,49 +9,26 @@
#include "GrVkGpu.h"
#include "GrVkUtil.h"
+#include "vk/GrVkMemoryAllocator.h"
-#ifdef SK_DEBUG
-// for simple tracking of how much we're using in each heap
-// last counter is for non-subheap allocations
-VkDeviceSize gHeapUsage[VK_MAX_MEMORY_HEAPS+1] = { 0 };
-#endif
+using AllocationPropertyFlags = GrVkMemoryAllocator::AllocationPropertyFlags;
+using BufferUsage = GrVkMemoryAllocator::BufferUsage;
-static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps,
- uint32_t typeBits,
- VkMemoryPropertyFlags requestedMemFlags,
- uint32_t* typeIndex,
- uint32_t* heapIndex) {
- for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) {
- if (typeBits & (1 << i)) {
- uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags &
- requestedMemFlags;
- if (supportedFlags == requestedMemFlags) {
- *typeIndex = i;
- *heapIndex = physDevMemProps.memoryTypes[i].heapIndex;
- return true;
- }
- }
+static BufferUsage get_buffer_usage(GrVkBuffer::Type type, bool dynamic) {
+ switch (type) {
+ case GrVkBuffer::kVertex_Type: // fall through
+ case GrVkBuffer::kIndex_Type: // fall through
+ case GrVkBuffer::kTexel_Type:
+ return dynamic ? BufferUsage::kCpuWritesGpuReads : BufferUsage::kGpuOnly;
+ case GrVkBuffer::kUniform_Type:
+ SkASSERT(dynamic);
+ return BufferUsage::kCpuWritesGpuReads;
+ case GrVkBuffer::kCopyRead_Type: // fall through
+ case GrVkBuffer::kCopyWrite_Type:
+ return BufferUsage::kCpuOnly;
}
- return false;
-}
-
-static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) {
- const GrVkGpu::Heap kBufferToHeap[]{
- GrVkGpu::kVertexBuffer_Heap,
- GrVkGpu::kIndexBuffer_Heap,
- GrVkGpu::kUniformBuffer_Heap,
- GrVkGpu::kTexelBuffer_Heap,
- GrVkGpu::kCopyReadBuffer_Heap,
- GrVkGpu::kCopyWriteBuffer_Heap,
- };
- GR_STATIC_ASSERT(0 == GrVkBuffer::kVertex_Type);
- GR_STATIC_ASSERT(1 == GrVkBuffer::kIndex_Type);
- GR_STATIC_ASSERT(2 == GrVkBuffer::kUniform_Type);
- GR_STATIC_ASSERT(3 == GrVkBuffer::kTexel_Type);
- GR_STATIC_ASSERT(4 == GrVkBuffer::kCopyRead_Type);
- GR_STATIC_ASSERT(5 == GrVkBuffer::kCopyWrite_Type);
-
- return kBufferToHeap[type];
+ SK_ABORT("Invalid GrVkBuffer::Type");
+ return BufferUsage::kCpuOnly; // Just returning an arbitrary value.
}
bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
@@ -59,68 +36,23 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
GrVkBuffer::Type type,
bool dynamic,
GrVkAlloc* alloc) {
- const GrVkInterface* iface = gpu->vkInterface();
- VkDevice device = gpu->device();
-
- VkMemoryRequirements memReqs;
- GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs));
+ GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
+ GrVkBackendMemory memory = 0;
- uint32_t typeIndex = 0;
- uint32_t heapIndex = 0;
- const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
- const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties();
- if (dynamic) {
- // try to get cached and ideally non-coherent memory first
- if (!get_valid_memory_type_index(phDevMemProps,
- memReqs.memoryTypeBits,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
- &typeIndex,
- &heapIndex)) {
- // some sort of host-visible memory type should always be available for dynamic buffers
- SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
- memReqs.memoryTypeBits,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
- &typeIndex,
- &heapIndex));
- }
+ GrVkMemoryAllocator::BufferUsage usage = get_buffer_usage(type, dynamic);
- VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
- alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0
- : GrVkAlloc::kNoncoherent_Flag;
- if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) {
- SkASSERT(SkIsPow2(memReqs.alignment));
- SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize));
- memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize);
- }
- } else {
- // device-local memory should always be available for static buffers
- SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
- memReqs.memoryTypeBits,
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- &typeIndex,
- &heapIndex));
- alloc->fFlags = 0x0;
- }
-
- GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
-
- if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
- // if static, try to allocate from non-host-visible non-device-local memory instead
- if (dynamic ||
- !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
- 0, &typeIndex, &heapIndex) ||
- !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
- SkDebugf("Failed to alloc buffer\n");
- return false;
- }
+ if (!allocator->allocateMemoryForBuffer(buffer, usage, AllocationPropertyFlags::kNone,
+ &memory)) {
+ return false;
}
+ allocator->getAllocInfo(memory, alloc);
// Bind buffer
- VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer,
- alloc->fMemory, alloc->fOffset));
+ VkResult err = GR_VK_CALL(gpu->vkInterface(), BindBufferMemory(gpu->device(), buffer,
+ alloc->fMemory,
+ alloc->fOffset));
if (err) {
- SkASSERT_RELEASE(heap->free(*alloc));
+ FreeBufferMemory(gpu, type, *alloc);
return false;
}
@@ -129,503 +61,152 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type,
const GrVkAlloc& alloc) {
-
- GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
- SkASSERT_RELEASE(heap->free(alloc));
+ if (alloc.fBackendMemory) {
+ GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
+ allocator->freeMemory(alloc.fBackendMemory);
+ } else {
+ GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr));
+ }
}
-// for debugging
-static uint64_t gTotalImageMemory = 0;
-static uint64_t gTotalImageMemoryFullPage = 0;
-
const VkDeviceSize kMaxSmallImageSize = 16 * 1024;
-const VkDeviceSize kMinVulkanPageSize = 16 * 1024;
-
-static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) {
- return (size + alignment - 1) & ~(alignment - 1);
-}
bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu,
VkImage image,
bool linearTiling,
GrVkAlloc* alloc) {
- const GrVkInterface* iface = gpu->vkInterface();
- VkDevice device = gpu->device();
+ SkASSERT(!linearTiling);
+ GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
+ GrVkBackendMemory memory = 0;
VkMemoryRequirements memReqs;
- GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs));
+ GR_VK_CALL(gpu->vkInterface(), GetImageMemoryRequirements(gpu->device(), image, &memReqs));
- uint32_t typeIndex = 0;
- uint32_t heapIndex = 0;
- GrVkHeap* heap;
- const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
- const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties();
- if (linearTiling) {
- VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
- if (!get_valid_memory_type_index(phDevMemProps,
- memReqs.memoryTypeBits,
- desiredMemProps,
- &typeIndex,
- &heapIndex)) {
- // some sort of host-visible memory type should always be available
- SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
- memReqs.memoryTypeBits,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
- &typeIndex,
- &heapIndex));
- }
- heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
- VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
- alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0
- : GrVkAlloc::kNoncoherent_Flag;
- if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) {
- SkASSERT(SkIsPow2(memReqs.alignment));
- SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize));
- memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize);
- }
+ AllocationPropertyFlags propFlags;
+ if (memReqs.size <= kMaxSmallImageSize) {
+ propFlags = AllocationPropertyFlags::kNone;
} else {
- // this memory type should always be available
- SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
- memReqs.memoryTypeBits,
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- &typeIndex,
- &heapIndex));
- if (memReqs.size <= kMaxSmallImageSize) {
- heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
- } else {
- heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap);
- }
- alloc->fFlags = 0x0;
+ propFlags = AllocationPropertyFlags::kDedicatedAllocation;
}
- if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
- // if optimal, try to allocate from non-host-visible non-device-local memory instead
- if (linearTiling ||
- !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
- 0, &typeIndex, &heapIndex) ||
- !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
- SkDebugf("Failed to alloc image\n");
- return false;
- }
+ if (!allocator->allocateMemoryForImage(image, AllocationPropertyFlags::kDedicatedAllocation,
+ &memory)) {
+ return false;
}
+ allocator->getAllocInfo(memory, alloc);
- // Bind image
- VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image,
- alloc->fMemory, alloc->fOffset));
+ // Bind buffer
+ VkResult err = GR_VK_CALL(gpu->vkInterface(), BindImageMemory(gpu->device(), image,
+ alloc->fMemory, alloc->fOffset));
if (err) {
- SkASSERT_RELEASE(heap->free(*alloc));
+ FreeImageMemory(gpu, linearTiling, *alloc);
return false;
}
- gTotalImageMemory += alloc->fSize;
-
- VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize);
- gTotalImageMemoryFullPage += pageAlignedSize;
-
return true;
}
void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling,
const GrVkAlloc& alloc) {
- GrVkHeap* heap;
- if (linearTiling) {
- heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
- } else if (alloc.fSize <= kMaxSmallImageSize) {
- heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
+ if (alloc.fBackendMemory) {
+ GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
+ allocator->freeMemory(alloc.fBackendMemory);
} else {
- heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap);
- }
- if (!heap->free(alloc)) {
- // must be an adopted allocation
GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr));
- } else {
- gTotalImageMemory -= alloc.fSize;
- VkDeviceSize pageAlignedSize = align_size(alloc.fSize, kMinVulkanPageSize);
- gTotalImageMemoryFullPage -= pageAlignedSize;
}
}
-void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset,
- VkDeviceSize size) {
- if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
+void* GrVkMemory::MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) {
+ SkASSERT(GrVkAlloc::kMappable_Flag & alloc.fFlags);
#ifdef SK_DEBUG
- SkASSERT(offset >= alloc.fOffset);
- VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
- SkASSERT(0 == (offset & (alignment-1)));
- if (size != VK_WHOLE_SIZE) {
- SkASSERT(size > 0);
- SkASSERT(0 == (size & (alignment-1)) ||
- (offset + size) == (alloc.fOffset + alloc.fSize));
- SkASSERT(offset + size <= alloc.fOffset + alloc.fSize);
- }
-#endif
-
- VkMappedMemoryRange mappedMemoryRange;
- memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
- mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
- mappedMemoryRange.memory = alloc.fMemory;
- mappedMemoryRange.offset = offset;
- mappedMemoryRange.size = size;
- GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(),
- 1, &mappedMemoryRange));
- }
-}
-
-void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc,
- VkDeviceSize offset, VkDeviceSize size) {
if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
-#ifdef SK_DEBUG
- SkASSERT(offset >= alloc.fOffset);
VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
- SkASSERT(0 == (offset & (alignment-1)));
- if (size != VK_WHOLE_SIZE) {
- SkASSERT(size > 0);
- SkASSERT(0 == (size & (alignment-1)) ||
- (offset + size) == (alloc.fOffset + alloc.fSize));
- SkASSERT(offset + size <= alloc.fOffset + alloc.fSize);
- }
-#endif
-
- VkMappedMemoryRange mappedMemoryRange;
- memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
- mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
- mappedMemoryRange.memory = alloc.fMemory;
- mappedMemoryRange.offset = offset;
- mappedMemoryRange.size = size;
- GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(),
- 1, &mappedMemoryRange));
+ SkASSERT(0 == (alloc.fOffset & (alignment-1)));
+ SkASSERT(0 == (alloc.fSize & (alignment-1)));
}
-}
-
-bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize,
- VkDeviceSize* allocOffset, VkDeviceSize* allocSize) {
- VkDeviceSize alignedSize = align_size(requestedSize, fAlignment);
-
- // find the smallest block big enough for our allocation
- FreeList::Iter iter = fFreeList.headIter();
- FreeList::Iter bestFitIter;
- VkDeviceSize bestFitSize = fSize + 1;
- VkDeviceSize secondLargestSize = 0;
- VkDeviceSize secondLargestOffset = 0;
- while (iter.get()) {
- Block* block = iter.get();
- // need to adjust size to match desired alignment
- SkASSERT(align_size(block->fOffset, fAlignment) - block->fOffset == 0);
- if (block->fSize >= alignedSize && block->fSize < bestFitSize) {
- bestFitIter = iter;
- bestFitSize = block->fSize;
- }
- if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) {
- secondLargestSize = block->fSize;
- secondLargestOffset = block->fOffset;
- }
- iter.next();
- }
- SkASSERT(secondLargestSize <= fLargestBlockSize);
-
- Block* bestFit = bestFitIter.get();
- if (bestFit) {
- SkASSERT(align_size(bestFit->fOffset, fAlignment) == bestFit->fOffset);
- *allocOffset = bestFit->fOffset;
- *allocSize = alignedSize;
- // adjust or remove current block
- VkDeviceSize originalBestFitOffset = bestFit->fOffset;
- if (bestFit->fSize > alignedSize) {
- bestFit->fOffset += alignedSize;
- bestFit->fSize -= alignedSize;
- if (fLargestBlockOffset == originalBestFitOffset) {
- if (bestFit->fSize >= secondLargestSize) {
- fLargestBlockSize = bestFit->fSize;
- fLargestBlockOffset = bestFit->fOffset;
- } else {
- fLargestBlockSize = secondLargestSize;
- fLargestBlockOffset = secondLargestOffset;
- }
- }
-#ifdef SK_DEBUG
- VkDeviceSize largestSize = 0;
- iter = fFreeList.headIter();
- while (iter.get()) {
- Block* block = iter.get();
- if (largestSize < block->fSize) {
- largestSize = block->fSize;
- }
- iter.next();
- }
- SkASSERT(largestSize == fLargestBlockSize);
#endif
- } else {
- SkASSERT(bestFit->fSize == alignedSize);
- if (fLargestBlockOffset == originalBestFitOffset) {
- fLargestBlockSize = secondLargestSize;
- fLargestBlockOffset = secondLargestOffset;
- }
- fFreeList.remove(bestFit);
-#ifdef SK_DEBUG
- VkDeviceSize largestSize = 0;
- iter = fFreeList.headIter();
- while (iter.get()) {
- Block* block = iter.get();
- if (largestSize < block->fSize) {
- largestSize = block->fSize;
- }
- iter.next();
- }
- SkASSERT(largestSize == fLargestBlockSize);
-#endif
- }
- fFreeSize -= alignedSize;
- SkASSERT(*allocSize > 0);
-
- return true;
+ if (alloc.fBackendMemory) {
+ GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
+ return allocator->mapMemory(alloc.fBackendMemory);
}
- SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize);
-
- return false;
-}
-
-void GrVkFreeListAlloc::free(VkDeviceSize allocOffset, VkDeviceSize allocSize) {
- // find the block right after this allocation
- FreeList::Iter iter = fFreeList.headIter();
- FreeList::Iter prev;
- while (iter.get() && iter.get()->fOffset < allocOffset) {
- prev = iter;
- iter.next();
- }
- // we have four cases:
- // we exactly follow the previous one
- Block* block;
- if (prev.get() && prev.get()->fOffset + prev.get()->fSize == allocOffset) {
- block = prev.get();
- block->fSize += allocSize;
- if (block->fOffset == fLargestBlockOffset) {
- fLargestBlockSize = block->fSize;
- }
- // and additionally we may exactly precede the next one
- if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) {
- block->fSize += iter.get()->fSize;
- if (iter.get()->fOffset == fLargestBlockOffset) {
- fLargestBlockOffset = block->fOffset;
- fLargestBlockSize = block->fSize;
- }
- fFreeList.remove(iter.get());
- }
- // or we only exactly proceed the next one
- } else if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) {
- block = iter.get();
- block->fSize += allocSize;
- if (block->fOffset == fLargestBlockOffset) {
- fLargestBlockOffset = allocOffset;
- fLargestBlockSize = block->fSize;
- }
- block->fOffset = allocOffset;
- // or we fall somewhere in between, with gaps
- } else {
- block = fFreeList.addBefore(iter);
- block->fOffset = allocOffset;
- block->fSize = allocSize;
- }
- fFreeSize += allocSize;
- if (block->fSize > fLargestBlockSize) {
- fLargestBlockSize = block->fSize;
- fLargestBlockOffset = block->fOffset;
- }
-
-#ifdef SK_DEBUG
- VkDeviceSize largestSize = 0;
- iter = fFreeList.headIter();
- while (iter.get()) {
- Block* block = iter.get();
- if (largestSize < block->fSize) {
- largestSize = block->fSize;
- }
- iter.next();
+ void* mapPtr;
+ VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), alloc.fMemory,
+ alloc.fOffset,
+ alloc.fSize, 0, &mapPtr));
+ if (err) {
+ mapPtr = nullptr;
}
- SkASSERT(fLargestBlockSize == largestSize);
-#endif
+ return mapPtr;
}
-GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
- VkDeviceSize size, VkDeviceSize alignment)
- : INHERITED(size, alignment)
- , fGpu(gpu)
-#ifdef SK_DEBUG
- , fHeapIndex(heapIndex)
-#endif
- , fMemoryTypeIndex(memoryTypeIndex) {
-
- VkMemoryAllocateInfo allocInfo = {
- VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
- nullptr, // pNext
- size, // allocationSize
- memoryTypeIndex, // memoryTypeIndex
- };
-
- VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(),
- &allocInfo,
- nullptr,
- &fAlloc));
- if (VK_SUCCESS != err) {
- this->reset();
- }
-#ifdef SK_DEBUG
- else {
- gHeapUsage[heapIndex] += size;
+void GrVkMemory::UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) {
+ if (alloc.fBackendMemory) {
+ GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
+ allocator->unmapMemory(alloc.fBackendMemory);
+ } else {
+ GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory));
}
-#endif
}
-GrVkSubHeap::~GrVkSubHeap() {
- const GrVkInterface* iface = fGpu->vkInterface();
- GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr));
+void GrVkMemory::GetNonCoherentMappedMemoryRange(const GrVkAlloc& alloc, VkDeviceSize offset,
+ VkDeviceSize size, VkDeviceSize alignment,
+ VkMappedMemoryRange* range) {
+ SkASSERT(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag);
+ offset = offset + alloc.fOffset;
+ VkDeviceSize offsetDiff = offset & (alignment -1);
+ offset = offset - offsetDiff;
+ size = (size + alignment - 1) & ~(alignment - 1);
#ifdef SK_DEBUG
- gHeapUsage[fHeapIndex] -= fSize;
+ SkASSERT(offset >= alloc.fOffset);
+ SkASSERT(offset + size <= alloc.fOffset + alloc.fSize);
+ SkASSERT(0 == (offset & (alignment-1)));
+ SkASSERT(size > 0);
+ SkASSERT(0 == (size & (alignment-1)));
#endif
-}
-bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) {
- alloc->fMemory = fAlloc;
- return INHERITED::alloc(size, &alloc->fOffset, &alloc->fSize);
+ memset(range, 0, sizeof(VkMappedMemoryRange));
+ range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ range->memory = alloc.fMemory;
+ range->offset = offset;
+ range->size = size;
}
-void GrVkSubHeap::free(const GrVkAlloc& alloc) {
- SkASSERT(alloc.fMemory == fAlloc);
-
- INHERITED::free(alloc.fOffset, alloc.fSize);
-}
-
-bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
- VkDeviceSize alignedSize = align_size(size, alignment);
-
- // if requested is larger than our subheap allocation, just alloc directly
- if (alignedSize > fSubHeapSize) {
- VkMemoryAllocateInfo allocInfo = {
- VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
- nullptr, // pNext
- alignedSize, // allocationSize
- memoryTypeIndex, // memoryTypeIndex
- };
-
- VkResult err = GR_VK_CALL(fGpu->vkInterface(), AllocateMemory(fGpu->device(),
- &allocInfo,
- nullptr,
- &alloc->fMemory));
- if (VK_SUCCESS != err) {
- return false;
- }
- alloc->fOffset = 0;
- alloc->fSize = alignedSize;
- alloc->fUsesSystemHeap = true;
-#ifdef SK_DEBUG
- gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize;
-#endif
-
- return true;
- }
-
- // first try to find a subheap that fits our allocation request
- int bestFitIndex = -1;
- VkDeviceSize bestFitSize = 0x7FFFFFFF;
- for (auto i = 0; i < fSubHeaps.count(); ++i) {
- if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex &&
- fSubHeaps[i]->alignment() == alignment) {
- VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize();
- if (heapSize >= alignedSize && heapSize < bestFitSize) {
- bestFitIndex = i;
- bestFitSize = heapSize;
- }
- }
- }
-
- if (bestFitIndex >= 0) {
- SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment);
- if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) {
- fUsedSize += alloc->fSize;
- return true;
- }
- return false;
- }
-
- // need to allocate a new subheap
- std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
- subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, fSubHeapSize, alignment));
- // try to recover from failed allocation by only allocating what we need
- if (subHeap->size() == 0) {
- VkDeviceSize alignedSize = align_size(size, alignment);
- subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
- if (subHeap->size() == 0) {
- return false;
- }
- }
- fAllocSize += fSubHeapSize;
- if (subHeap->alloc(size, alloc)) {
- fUsedSize += alloc->fSize;
- return true;
- }
-
- return false;
-}
-
-bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
- VkDeviceSize alignedSize = align_size(size, alignment);
-
- // first try to find an unallocated subheap that fits our allocation request
- int bestFitIndex = -1;
- VkDeviceSize bestFitSize = 0x7FFFFFFF;
- for (auto i = 0; i < fSubHeaps.count(); ++i) {
- if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex &&
- fSubHeaps[i]->alignment() == alignment &&
- fSubHeaps[i]->unallocated()) {
- VkDeviceSize heapSize = fSubHeaps[i]->size();
- if (heapSize >= alignedSize && heapSize < bestFitSize) {
- bestFitIndex = i;
- bestFitSize = heapSize;
- }
- }
- }
-
- if (bestFitIndex >= 0) {
- SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment);
- if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) {
- fUsedSize += alloc->fSize;
- return true;
+void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset,
+ VkDeviceSize size) {
+ if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
+ SkASSERT(offset == 0);
+ SkASSERT(size <= alloc.fSize);
+ if (alloc.fBackendMemory) {
+ GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
+ allocator->flushMappedMemory(alloc.fBackendMemory, offset, size);
+ } else {
+ VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
+ VkMappedMemoryRange mappedMemoryRange;
+ GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment,
+ &mappedMemoryRange);
+ GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), 1,
+ &mappedMemoryRange));
}
- return false;
- }
-
- // need to allocate a new subheap
- std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
- subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
- fAllocSize += alignedSize;
- if (subHeap->alloc(size, alloc)) {
- fUsedSize += alloc->fSize;
- return true;
}
-
- return false;
}
-bool GrVkHeap::free(const GrVkAlloc& alloc) {
- // a size of 0 means we're using the system heap
- if (alloc.fUsesSystemHeap) {
- const GrVkInterface* iface = fGpu->vkInterface();
- GR_VK_CALL(iface, FreeMemory(fGpu->device(), alloc.fMemory, nullptr));
- return true;
- }
-
- for (auto i = 0; i < fSubHeaps.count(); ++i) {
- if (fSubHeaps[i]->memory() == alloc.fMemory) {
- fSubHeaps[i]->free(alloc);
- fUsedSize -= alloc.fSize;
- return true;
+void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc,
+ VkDeviceSize offset, VkDeviceSize size) {
+ if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
+ SkASSERT(offset == 0);
+ SkASSERT(size <= alloc.fSize);
+ if (alloc.fBackendMemory) {
+ GrVkMemoryAllocator* allocator = gpu->memoryAllocator();
+ allocator->invalidateMappedMemory(alloc.fBackendMemory, offset, size);
+ } else {
+ VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
+ VkMappedMemoryRange mappedMemoryRange;
+ GrVkMemory::GetNonCoherentMappedMemoryRange(alloc, offset, size, alignment,
+ &mappedMemoryRange);
+ GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), 1,
+ &mappedMemoryRange));
}
}
-
- return false;
}
-
diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h
index bb6681435f..741bdaa8a0 100644
--- a/src/gpu/vk/GrVkMemory.h
+++ b/src/gpu/vk/GrVkMemory.h
@@ -34,133 +34,25 @@ namespace GrVkMemory {
GrVkAlloc* alloc);
void FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc);
+ // Maps the entire GrVkAlloc and returns a pointer to the start of the allocation. Underneath
+ // the hood, we may map more than the range of the GrVkAlloc (e.g. the entire VkDeviceMemory),
+ // but the pointer returned will always be to the start of the GrVkAlloc. The caller should also
+ // never assume more than the GrVkAlloc block has been mapped.
+ void* MapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc);
+ void UnmapAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc);
+
+ // For the Flush and Invalidate calls, the offset should be relative to the GrVkAlloc. Thus this
+ // will often be 0. The client does not need to make sure the offset and size are aligned to the
+ // nonCoherentAtomSize, the internal calls will handle that.
void FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset,
VkDeviceSize size);
void InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset,
VkDeviceSize size);
-}
-
-class GrVkFreeListAlloc {
-public:
- GrVkFreeListAlloc(VkDeviceSize size, VkDeviceSize alignment)
- : fSize(size)
- , fAlignment(alignment)
- , fFreeSize(size)
- , fLargestBlockSize(size)
- , fLargestBlockOffset(0) {
- Block* block = fFreeList.addToTail();
- block->fOffset = 0;
- block->fSize = fSize;
- }
- ~GrVkFreeListAlloc() {
- this->reset();
- }
-
- VkDeviceSize size() const { return fSize; }
- VkDeviceSize alignment() const { return fAlignment; }
- VkDeviceSize freeSize() const { return fFreeSize; }
- VkDeviceSize largestBlockSize() const { return fLargestBlockSize; }
-
- bool unallocated() const { return fSize == fFreeSize; }
-
-protected:
- bool alloc(VkDeviceSize requestedSize, VkDeviceSize* allocOffset, VkDeviceSize* allocSize);
- void free(VkDeviceSize allocOffset, VkDeviceSize allocSize);
-
- void reset() {
- fSize = 0;
- fAlignment = 0;
- fFreeSize = 0;
- fLargestBlockSize = 0;
- fFreeList.reset();
- }
-
- struct Block {
- VkDeviceSize fOffset;
- VkDeviceSize fSize;
- };
- typedef SkTLList<Block, 16> FreeList;
-
- VkDeviceSize fSize;
- VkDeviceSize fAlignment;
- VkDeviceSize fFreeSize;
- VkDeviceSize fLargestBlockSize;
- VkDeviceSize fLargestBlockOffset;
- FreeList fFreeList;
-};
-
-class GrVkSubHeap : public GrVkFreeListAlloc {
-public:
- GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
- VkDeviceSize size, VkDeviceSize alignment);
- ~GrVkSubHeap();
-
- uint32_t memoryTypeIndex() const { return fMemoryTypeIndex; }
- VkDeviceMemory memory() { return fAlloc; }
-
- bool alloc(VkDeviceSize requestedSize, GrVkAlloc* alloc);
- void free(const GrVkAlloc& alloc);
-private:
- const GrVkGpu* fGpu;
-#ifdef SK_DEBUG
- uint32_t fHeapIndex;
-#endif
- uint32_t fMemoryTypeIndex;
- VkDeviceMemory fAlloc;
-
- typedef GrVkFreeListAlloc INHERITED;
-};
-
-class GrVkHeap {
-public:
- enum Strategy {
- kSubAlloc_Strategy, // alloc large subheaps and suballoc within them
- kSingleAlloc_Strategy // alloc/recycle an individual subheap per object
- };
-
- GrVkHeap(const GrVkGpu* gpu, Strategy strategy, VkDeviceSize subHeapSize)
- : fGpu(gpu)
- , fSubHeapSize(subHeapSize)
- , fAllocSize(0)
- , fUsedSize(0) {
- if (strategy == kSubAlloc_Strategy) {
- fAllocFunc = &GrVkHeap::subAlloc;
- } else {
- fAllocFunc = &GrVkHeap::singleAlloc;
- }
- }
-
- ~GrVkHeap() {}
-
- VkDeviceSize allocSize() const { return fAllocSize; }
- VkDeviceSize usedSize() const { return fUsedSize; }
-
- bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex,
- uint32_t heapIndex, GrVkAlloc* alloc) {
- SkASSERT(size > 0);
- alloc->fUsesSystemHeap = false;
- return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, heapIndex, alloc);
- }
- bool free(const GrVkAlloc& alloc);
-
-private:
- typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, uint32_t heapIndex,
- GrVkAlloc* alloc);
-
- bool subAlloc(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, uint32_t heapIndex,
- GrVkAlloc* alloc);
- bool singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
- uint32_t memoryTypeIndex, uint32_t heapIndex,
- GrVkAlloc* alloc);
+ // Helper for aligning and setting VkMappedMemoryRange for flushing/invalidating noncoherent
+ // memory.
+ void GetNonCoherentMappedMemoryRange(const GrVkAlloc&, VkDeviceSize offset, VkDeviceSize size,
+ VkDeviceSize alignment, VkMappedMemoryRange*);
+}
- const GrVkGpu* fGpu;
- VkDeviceSize fSubHeapSize;
- VkDeviceSize fAllocSize;
- VkDeviceSize fUsedSize;
- AllocFunc fAllocFunc;
- SkTArray<std::unique_ptr<GrVkSubHeap>> fSubHeaps;
-};
#endif