aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--include/gpu/vk/GrVkTypes.h11
-rw-r--r--src/gpu/vk/GrVkBuffer.cpp15
-rw-r--r--src/gpu/vk/GrVkGpu.cpp43
-rw-r--r--src/gpu/vk/GrVkGpu.h4
-rw-r--r--src/gpu/vk/GrVkMemory.cpp17
-rw-r--r--src/gpu/vk/GrVkMemory.h1
-rw-r--r--tests/VkHeapTests.cpp1
-rw-r--r--tests/VkWrapTests.cpp6
-rw-r--r--tools/sk_app/VulkanWindowContext.cpp2
9 files changed, 81 insertions, 19 deletions
diff --git a/include/gpu/vk/GrVkTypes.h b/include/gpu/vk/GrVkTypes.h
index aa1334adca..5e93733531 100644
--- a/include/gpu/vk/GrVkTypes.h
+++ b/include/gpu/vk/GrVkTypes.h
@@ -31,14 +31,17 @@
* Vulkan textures are really const GrVkImageInfo*
*/
struct GrVkAlloc {
- VkDeviceMemory fMemory; // can be VK_NULL_HANDLE iff Tex is an RT and uses borrow semantics
- VkDeviceSize fOffset;
- VkDeviceSize fSize; // this can be indeterminate iff Tex uses borrow semantics
- uint32_t fFlags;
+ VkDeviceMemory fMemory = VK_NULL_HANDLE; // can be VK_NULL_HANDLE iff is an RT and is borrowed
+ VkDeviceSize fOffset = 0;
+ VkDeviceSize fSize = 0; // this can be indeterminate iff Tex uses borrow semantics
+ uint32_t fFlags= 0;
enum Flag {
kNoncoherent_Flag = 0x1, // memory must be flushed to device after mapping
};
+private:
+ friend class GrVkHeap; // For access to usesSystemHeap
+ bool fUsesSystemHeap = false;
};
struct GrVkImageInfo {
diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp
index 5aa3fb0c15..64f26309b7 100644
--- a/src/gpu/vk/GrVkBuffer.cpp
+++ b/src/gpu/vk/GrVkBuffer.cpp
@@ -169,6 +169,21 @@ void GrVkBuffer::internalMap(GrVkGpu* gpu, size_t size, bool* createdNewBuffer)
if (fDesc.fDynamic) {
const GrVkAlloc& alloc = this->alloc();
+ SkASSERT(alloc.fSize > 0);
+
+ // For Noncoherent buffers we want to make sure the range that we map, both offset and size,
+ // are aligned to the nonCoherentAtomSize limit. The offset should have been correctly
+ // aligned by our memory allocator. For size we pad out to make the range also aligned.
+ if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) {
+ // Currently we always have the internal offset as 0.
+ SkASSERT(0 == fOffset);
+ VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
+ SkASSERT(0 == (alloc.fOffset & (alignment - 1)));
+
+ // Make size of the map aligned to nonCoherentAtomSize
+ size = (size + alignment - 1) & ~(alignment - 1);
+ }
+ SkASSERT(size + fOffset <= alloc.fSize);
VkResult err = VK_CALL(gpu, MapMemory(gpu->device(), alloc.fMemory,
alloc.fOffset + fOffset,
size, 0, &fMapPtr));
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index 2ef765dc0b..c5a03f99bb 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -130,6 +130,7 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options,
fBackendContext->fFeatures, fBackendContext->fExtensions));
fCaps.reset(SkRef(fVkCaps.get()));
+ VK_CALL(GetPhysicalDeviceProperties(fBackendContext->fPhysicalDevice, &fPhysDevProps));
VK_CALL(GetPhysicalDeviceMemoryProperties(fBackendContext->fPhysicalDevice, &fPhysDevMemProps));
const VkCommandPoolCreateInfo cmdPoolInfo = {
@@ -578,12 +579,27 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i
int texTop = kBottomLeft_GrSurfaceOrigin == texOrigin ? tex->height() - top - height : top;
const GrVkAlloc& alloc = tex->alloc();
VkDeviceSize offset = alloc.fOffset + texTop*layout.rowPitch + left*bpp;
+ VkDeviceSize offsetDiff = 0;
VkDeviceSize size = height*layout.rowPitch;
+ // For Noncoherent buffers we want to make sure the range that we map, both offset and size,
+ // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to
+ // meet the alignment requirements. So we track how far we move back and then adjust the mapped
+ // ptr back up so that this is opaque to the caller.
+ if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) {
+ VkDeviceSize alignment = this->physicalDeviceProperties().limits.nonCoherentAtomSize;
+ offsetDiff = offset & (alignment - 1);
+ offset = offset - offsetDiff;
+ // Make size of the map aligned to nonCoherentAtomSize
+ size = (size + alignment - 1) & ~(alignment - 1);
+ }
+ SkASSERT(offset >= alloc.fOffset);
+ SkASSERT(size <= alloc.fOffset + alloc.fSize);
void* mapPtr;
err = GR_VK_CALL(interface, MapMemory(fDevice, alloc.fMemory, offset, size, 0, &mapPtr));
if (err) {
return false;
}
+ mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff;
if (kBottomLeft_GrSurfaceOrigin == texOrigin) {
// copy into buffer by rows
@@ -1108,13 +1124,30 @@ GrStencilAttachment* GrVkGpu::createStencilAttachmentForRenderTarget(const GrRen
bool copy_testing_data(GrVkGpu* gpu, void* srcData, const GrVkAlloc& alloc, size_t bufferOffset,
size_t srcRowBytes, size_t dstRowBytes, int h) {
+ // For Noncoherent buffers we want to make sure the range that we map, both offset and size,
+ // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to
+ // meet the alignment requirements. So we track how far we move back and then adjust the mapped
+ // ptr back up so that this is opaque to the caller.
+ VkDeviceSize mapSize = dstRowBytes * h;
+ VkDeviceSize mapOffset = alloc.fOffset + bufferOffset;
+ VkDeviceSize offsetDiff = 0;
+ if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) {
+ VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
+ offsetDiff = mapOffset & (alignment - 1);
+ mapOffset = mapOffset - offsetDiff;
+ // Make size of the map aligned to nonCoherentAtomSize
+ mapSize = (mapSize + alignment - 1) & ~(alignment - 1);
+ }
+ SkASSERT(mapOffset >= alloc.fOffset);
+ SkASSERT(mapSize + mapOffset <= alloc.fOffset + alloc.fSize);
void* mapPtr;
VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(),
alloc.fMemory,
- alloc.fOffset + bufferOffset,
- dstRowBytes * h,
+ mapOffset,
+ mapSize,
0,
&mapPtr));
+ mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff;
if (err) {
return false;
}
@@ -1179,7 +1212,7 @@ GrBackendTexture GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w,
}
VkImage image = VK_NULL_HANDLE;
- GrVkAlloc alloc = { VK_NULL_HANDLE, 0, 0, 0 };
+ GrVkAlloc alloc;
VkImageTiling imageTiling = linearTiling ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
VkImageLayout initialLayout = (VK_IMAGE_TILING_LINEAR == imageTiling)
@@ -1224,7 +1257,7 @@ GrBackendTexture GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w,
}
// We need to declare these early so that we can delete them at the end outside of the if block.
- GrVkAlloc bufferAlloc = { VK_NULL_HANDLE, 0, 0, 0 };
+ GrVkAlloc bufferAlloc;
VkBuffer buffer = VK_NULL_HANDLE;
VkResult err;
@@ -1978,8 +2011,8 @@ bool GrVkGpu::onReadPixels(GrSurface* surface, GrSurfaceOrigin origin, int left,
// We need to submit the current command buffer to the Queue and make sure it finishes before
// we can copy the data out of the buffer.
this->submitCommandBuffer(kForce_SyncQueue);
- GrVkMemory::InvalidateMappedAlloc(this, transferBuffer->alloc());
void* mappedMemory = transferBuffer->map();
+ GrVkMemory::InvalidateMappedAlloc(this, transferBuffer->alloc());
if (copyFromOrigin) {
uint32_t skipRows = region.imageExtent.height - height;
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index 3833c5f604..0b52147f5c 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -51,6 +51,9 @@ public:
VkDevice device() const { return fDevice; }
VkQueue queue() const { return fQueue; }
VkCommandPool cmdPool() const { return fCmdPool; }
+ VkPhysicalDeviceProperties physicalDeviceProperties() const {
+ return fPhysDevProps;
+ }
VkPhysicalDeviceMemoryProperties physicalDeviceMemoryProperties() const {
return fPhysDevMemProps;
}
@@ -253,6 +256,7 @@ private:
SkSTArray<1, GrVkSemaphore::Resource*> fSemaphoresToWaitOn;
SkSTArray<1, GrVkSemaphore::Resource*> fSemaphoresToSignal;
+ VkPhysicalDeviceProperties fPhysDevProps;
VkPhysicalDeviceMemoryProperties fPhysDevMemProps;
std::unique_ptr<GrVkHeap> fHeaps[kHeapCount];
diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp
index a90533e17b..e27e260dbd 100644
--- a/src/gpu/vk/GrVkMemory.cpp
+++ b/src/gpu/vk/GrVkMemory.cpp
@@ -68,6 +68,7 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
uint32_t typeIndex = 0;
uint32_t heapIndex = 0;
const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
+ const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties();
if (dynamic) {
// try to get cached and ideally non-coherent memory first
if (!get_valid_memory_type_index(phDevMemProps,
@@ -87,6 +88,11 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0
: GrVkAlloc::kNoncoherent_Flag;
+ if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) {
+ SkASSERT(SkIsPow2(memReqs.alignment));
+ SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize));
+ memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize);
+ }
} else {
// device-local memory should always be available for static buffers
SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
@@ -293,7 +299,7 @@ void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) {
mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
mappedMemoryRange.memory = alloc.fMemory;
mappedMemoryRange.offset = alloc.fOffset;
- mappedMemoryRange.size = alloc.fSize;
+ mappedMemoryRange.size = VK_WHOLE_SIZE; // Size of what we mapped
GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(),
1, &mappedMemoryRange));
}
@@ -306,7 +312,7 @@ void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& allo
mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
mappedMemoryRange.memory = alloc.fMemory;
mappedMemoryRange.offset = alloc.fOffset;
- mappedMemoryRange.size = alloc.fSize;
+ mappedMemoryRange.size = VK_WHOLE_SIZE; // Size of what we mapped
GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(),
1, &mappedMemoryRange));
}
@@ -519,7 +525,7 @@ bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment,
VkMemoryAllocateInfo allocInfo = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
nullptr, // pNext
- size, // allocationSize
+ alignedSize, // allocationSize
memoryTypeIndex, // memoryTypeIndex
};
@@ -531,7 +537,8 @@ bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment,
return false;
}
alloc->fOffset = 0;
- alloc->fSize = 0; // hint that this is not a subheap allocation
+ alloc->fSize = alignedSize;
+ alloc->fUsesSystemHeap = true;
#ifdef SK_DEBUG
gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize;
#endif
@@ -624,7 +631,7 @@ bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
bool GrVkHeap::free(const GrVkAlloc& alloc) {
// a size of 0 means we're using the system heap
- if (0 == alloc.fSize) {
+ if (alloc.fUsesSystemHeap) {
const GrVkInterface* iface = fGpu->vkInterface();
GR_VK_CALL(iface, FreeMemory(fGpu->device(), alloc.fMemory, nullptr));
return true;
diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h
index a8f3771388..88cc47b29f 100644
--- a/src/gpu/vk/GrVkMemory.h
+++ b/src/gpu/vk/GrVkMemory.h
@@ -141,6 +141,7 @@ public:
bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex,
uint32_t heapIndex, GrVkAlloc* alloc) {
SkASSERT(size > 0);
+ alloc->fUsesSystemHeap = false;
return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, heapIndex, alloc);
}
bool free(const GrVkAlloc& alloc);
diff --git a/tests/VkHeapTests.cpp b/tests/VkHeapTests.cpp
index 29e4e7906b..67eb045d98 100644
--- a/tests/VkHeapTests.cpp
+++ b/tests/VkHeapTests.cpp
@@ -159,7 +159,6 @@ void suballoc_test(skiatest::Reporter* reporter, GrContext* context) {
REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024);
// heap should not grow here (allocating more than subheap size)
REPORTER_ASSERT(reporter, heap.alloc(128 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
- REPORTER_ASSERT(reporter, 0 == alloc0.fSize);
REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024);
heap.free(alloc0);
REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
diff --git a/tests/VkWrapTests.cpp b/tests/VkWrapTests.cpp
index 9723d5763b..bcf59d222e 100644
--- a/tests/VkWrapTests.cpp
+++ b/tests/VkWrapTests.cpp
@@ -55,7 +55,7 @@ void wrap_tex_test(skiatest::Reporter* reporter, GrContext* context) {
// alloc is null
{
GrVkImageInfo backendCopy = *imageInfo;
- backendCopy.fAlloc = { VK_NULL_HANDLE, 0, 0, 0 };
+ backendCopy.fAlloc = GrVkAlloc();
GrBackendTexture backendTex = GrBackendTexture(kW, kH, backendCopy);
tex = gpu->wrapBackendTexture(backendTex, kBorrow_GrWrapOwnership);
REPORTER_ASSERT(reporter, !tex);
@@ -100,7 +100,7 @@ void wrap_rt_test(skiatest::Reporter* reporter, GrContext* context) {
// alloc is null
{
GrVkImageInfo backendCopy = *imageInfo;
- backendCopy.fAlloc = { VK_NULL_HANDLE, 0, 0, 0 };
+ backendCopy.fAlloc = GrVkAlloc();
// can wrap null alloc
GrBackendRenderTarget backendRT(kW, kH, 1, 0, backendCopy);
rt = gpu->wrapBackendRenderTarget(backendRT);
@@ -138,7 +138,7 @@ void wrap_trt_test(skiatest::Reporter* reporter, GrContext* context) {
// alloc is null
{
GrVkImageInfo backendCopy = *imageInfo;
- backendCopy.fAlloc = { VK_NULL_HANDLE, 0, 0, 0 };
+ backendCopy.fAlloc = GrVkAlloc();
GrBackendTexture backendTex = GrBackendTexture(kW, kH, backendCopy);
tex = gpu->wrapRenderableBackendTexture(backendTex, 1, kBorrow_GrWrapOwnership);
REPORTER_ASSERT(reporter, !tex);
diff --git a/tools/sk_app/VulkanWindowContext.cpp b/tools/sk_app/VulkanWindowContext.cpp
index 6237ee09ae..d7839bc189 100644
--- a/tools/sk_app/VulkanWindowContext.cpp
+++ b/tools/sk_app/VulkanWindowContext.cpp
@@ -287,7 +287,7 @@ void VulkanWindowContext::createBuffers(VkFormat format, SkColorType colorType)
GrVkImageInfo info;
info.fImage = fImages[i];
- info.fAlloc = { VK_NULL_HANDLE, 0, 0, 0 };
+ info.fAlloc = GrVkAlloc();
info.fImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
info.fImageTiling = VK_IMAGE_TILING_OPTIMAL;
info.fFormat = format;