diff options
-rw-r--r-- | include/gpu/vk/GrVkTypes.h | 11 | ||||
-rw-r--r-- | src/gpu/vk/GrVkBuffer.cpp | 15 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.cpp | 43 | ||||
-rw-r--r-- | src/gpu/vk/GrVkGpu.h | 4 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.cpp | 17 | ||||
-rw-r--r-- | src/gpu/vk/GrVkMemory.h | 1 | ||||
-rw-r--r-- | tests/VkHeapTests.cpp | 1 | ||||
-rw-r--r-- | tests/VkWrapTests.cpp | 6 | ||||
-rw-r--r-- | tools/sk_app/VulkanWindowContext.cpp | 2 |
9 files changed, 81 insertions, 19 deletions
diff --git a/include/gpu/vk/GrVkTypes.h b/include/gpu/vk/GrVkTypes.h index aa1334adca..5e93733531 100644 --- a/include/gpu/vk/GrVkTypes.h +++ b/include/gpu/vk/GrVkTypes.h @@ -31,14 +31,17 @@ * Vulkan textures are really const GrVkImageInfo* */ struct GrVkAlloc { - VkDeviceMemory fMemory; // can be VK_NULL_HANDLE iff Tex is an RT and uses borrow semantics - VkDeviceSize fOffset; - VkDeviceSize fSize; // this can be indeterminate iff Tex uses borrow semantics - uint32_t fFlags; + VkDeviceMemory fMemory = VK_NULL_HANDLE; // can be VK_NULL_HANDLE iff is an RT and is borrowed + VkDeviceSize fOffset = 0; + VkDeviceSize fSize = 0; // this can be indeterminate iff Tex uses borrow semantics + uint32_t fFlags= 0; enum Flag { kNoncoherent_Flag = 0x1, // memory must be flushed to device after mapping }; +private: + friend class GrVkHeap; // For access to usesSystemHeap + bool fUsesSystemHeap = false; }; struct GrVkImageInfo { diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp index 5aa3fb0c15..64f26309b7 100644 --- a/src/gpu/vk/GrVkBuffer.cpp +++ b/src/gpu/vk/GrVkBuffer.cpp @@ -169,6 +169,21 @@ void GrVkBuffer::internalMap(GrVkGpu* gpu, size_t size, bool* createdNewBuffer) if (fDesc.fDynamic) { const GrVkAlloc& alloc = this->alloc(); + SkASSERT(alloc.fSize > 0); + + // For Noncoherent buffers we want to make sure the range that we map, both offset and size, + // are aligned to the nonCoherentAtomSize limit. The offset should have been correctly + // aligned by our memory allocator. For size we pad out to make the range also aligned. + if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { + // Currently we always have the internal offset as 0. + SkASSERT(0 == fOffset); + VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; + SkASSERT(0 == (alloc.fOffset & (alignment - 1))); + + // Make size of the map aligned to nonCoherentAtomSize + size = (size + alignment - 1) & ~(alignment - 1); + } + SkASSERT(size + fOffset <= alloc.fSize); VkResult err = VK_CALL(gpu, MapMemory(gpu->device(), alloc.fMemory, alloc.fOffset + fOffset, size, 0, &fMapPtr)); diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp index 2ef765dc0b..c5a03f99bb 100644 --- a/src/gpu/vk/GrVkGpu.cpp +++ b/src/gpu/vk/GrVkGpu.cpp @@ -130,6 +130,7 @@ GrVkGpu::GrVkGpu(GrContext* context, const GrContextOptions& options, fBackendContext->fFeatures, fBackendContext->fExtensions)); fCaps.reset(SkRef(fVkCaps.get())); + VK_CALL(GetPhysicalDeviceProperties(fBackendContext->fPhysicalDevice, &fPhysDevProps)); VK_CALL(GetPhysicalDeviceMemoryProperties(fBackendContext->fPhysicalDevice, &fPhysDevMemProps)); const VkCommandPoolCreateInfo cmdPoolInfo = { @@ -578,12 +579,27 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, GrSurfaceOrigin texOrigin, i int texTop = kBottomLeft_GrSurfaceOrigin == texOrigin ? tex->height() - top - height : top; const GrVkAlloc& alloc = tex->alloc(); VkDeviceSize offset = alloc.fOffset + texTop*layout.rowPitch + left*bpp; + VkDeviceSize offsetDiff = 0; VkDeviceSize size = height*layout.rowPitch; + // For Noncoherent buffers we want to make sure the range that we map, both offset and size, + // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to + // meet the alignment requirements. So we track how far we move back and then adjust the mapped + // ptr back up so that this is opaque to the caller. + if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { + VkDeviceSize alignment = this->physicalDeviceProperties().limits.nonCoherentAtomSize; + offsetDiff = offset & (alignment - 1); + offset = offset - offsetDiff; + // Make size of the map aligned to nonCoherentAtomSize + size = (size + alignment - 1) & ~(alignment - 1); + } + SkASSERT(offset >= alloc.fOffset); + SkASSERT(size <= alloc.fOffset + alloc.fSize); void* mapPtr; err = GR_VK_CALL(interface, MapMemory(fDevice, alloc.fMemory, offset, size, 0, &mapPtr)); if (err) { return false; } + mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff; if (kBottomLeft_GrSurfaceOrigin == texOrigin) { // copy into buffer by rows @@ -1108,13 +1124,30 @@ GrStencilAttachment* GrVkGpu::createStencilAttachmentForRenderTarget(const GrRen bool copy_testing_data(GrVkGpu* gpu, void* srcData, const GrVkAlloc& alloc, size_t bufferOffset, size_t srcRowBytes, size_t dstRowBytes, int h) { + // For Noncoherent buffers we want to make sure the range that we map, both offset and size, + // are aligned to the nonCoherentAtomSize limit. We may have to move the initial offset back to + // meet the alignment requirements. So we track how far we move back and then adjust the mapped + // ptr back up so that this is opaque to the caller. + VkDeviceSize mapSize = dstRowBytes * h; + VkDeviceSize mapOffset = alloc.fOffset + bufferOffset; + VkDeviceSize offsetDiff = 0; + if (SkToBool(alloc.fFlags & GrVkAlloc::kNoncoherent_Flag)) { + VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize; + offsetDiff = mapOffset & (alignment - 1); + mapOffset = mapOffset - offsetDiff; + // Make size of the map aligned to nonCoherentAtomSize + mapSize = (mapSize + alignment - 1) & ~(alignment - 1); + } + SkASSERT(mapOffset >= alloc.fOffset); + SkASSERT(mapSize + mapOffset <= alloc.fOffset + alloc.fSize); void* mapPtr; VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), alloc.fMemory, - alloc.fOffset + bufferOffset, - dstRowBytes * h, + mapOffset, + mapSize, 0, &mapPtr)); + mapPtr = reinterpret_cast<char*>(mapPtr) + offsetDiff; if (err) { return false; } @@ -1179,7 +1212,7 @@ GrBackendTexture GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w, } VkImage image = VK_NULL_HANDLE; - GrVkAlloc alloc = { VK_NULL_HANDLE, 0, 0, 0 }; + GrVkAlloc alloc; VkImageTiling imageTiling = linearTiling ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; VkImageLayout initialLayout = (VK_IMAGE_TILING_LINEAR == imageTiling) @@ -1224,7 +1257,7 @@ GrBackendTexture GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w, } // We need to declare these early so that we can delete them at the end outside of the if block. - GrVkAlloc bufferAlloc = { VK_NULL_HANDLE, 0, 0, 0 }; + GrVkAlloc bufferAlloc; VkBuffer buffer = VK_NULL_HANDLE; VkResult err; @@ -1978,8 +2011,8 @@ bool GrVkGpu::onReadPixels(GrSurface* surface, GrSurfaceOrigin origin, int left, // We need to submit the current command buffer to the Queue and make sure it finishes before // we can copy the data out of the buffer. this->submitCommandBuffer(kForce_SyncQueue); - GrVkMemory::InvalidateMappedAlloc(this, transferBuffer->alloc()); void* mappedMemory = transferBuffer->map(); + GrVkMemory::InvalidateMappedAlloc(this, transferBuffer->alloc()); if (copyFromOrigin) { uint32_t skipRows = region.imageExtent.height - height; diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h index 3833c5f604..0b52147f5c 100644 --- a/src/gpu/vk/GrVkGpu.h +++ b/src/gpu/vk/GrVkGpu.h @@ -51,6 +51,9 @@ public: VkDevice device() const { return fDevice; } VkQueue queue() const { return fQueue; } VkCommandPool cmdPool() const { return fCmdPool; } + VkPhysicalDeviceProperties physicalDeviceProperties() const { + return fPhysDevProps; + } VkPhysicalDeviceMemoryProperties physicalDeviceMemoryProperties() const { return fPhysDevMemProps; } @@ -253,6 +256,7 @@ private: SkSTArray<1, GrVkSemaphore::Resource*> fSemaphoresToWaitOn; SkSTArray<1, GrVkSemaphore::Resource*> fSemaphoresToSignal; + VkPhysicalDeviceProperties fPhysDevProps; VkPhysicalDeviceMemoryProperties fPhysDevMemProps; std::unique_ptr<GrVkHeap> fHeaps[kHeapCount]; diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp index a90533e17b..e27e260dbd 100644 --- a/src/gpu/vk/GrVkMemory.cpp +++ b/src/gpu/vk/GrVkMemory.cpp @@ -68,6 +68,7 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, uint32_t typeIndex = 0; uint32_t heapIndex = 0; const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); + const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties(); if (dynamic) { // try to get cached and ideally non-coherent memory first if (!get_valid_memory_type_index(phDevMemProps, @@ -87,6 +88,11 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags; alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 : GrVkAlloc::kNoncoherent_Flag; + if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) { + SkASSERT(SkIsPow2(memReqs.alignment)); + SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize)); + memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize); + } } else { // device-local memory should always be available for static buffers SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, @@ -293,7 +299,7 @@ void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; mappedMemoryRange.memory = alloc.fMemory; mappedMemoryRange.offset = alloc.fOffset; - mappedMemoryRange.size = alloc.fSize; + mappedMemoryRange.size = VK_WHOLE_SIZE; // Size of what we mapped GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), 1, &mappedMemoryRange)); } @@ -306,7 +312,7 @@ void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& allo mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; mappedMemoryRange.memory = alloc.fMemory; mappedMemoryRange.offset = alloc.fOffset; - mappedMemoryRange.size = alloc.fSize; + mappedMemoryRange.size = VK_WHOLE_SIZE; // Size of what we mapped GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), 1, &mappedMemoryRange)); } @@ -519,7 +525,7 @@ bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment, VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType nullptr, // pNext - size, // allocationSize + alignedSize, // allocationSize memoryTypeIndex, // memoryTypeIndex }; @@ -531,7 +537,8 @@ bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment, return false; } alloc->fOffset = 0; - alloc->fSize = 0; // hint that this is not a subheap allocation + alloc->fSize = alignedSize; + alloc->fUsesSystemHeap = true; #ifdef SK_DEBUG gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize; #endif @@ -624,7 +631,7 @@ bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment, bool GrVkHeap::free(const GrVkAlloc& alloc) { // a size of 0 means we're using the system heap - if (0 == alloc.fSize) { + if (alloc.fUsesSystemHeap) { const GrVkInterface* iface = fGpu->vkInterface(); GR_VK_CALL(iface, FreeMemory(fGpu->device(), alloc.fMemory, nullptr)); return true; diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h index a8f3771388..88cc47b29f 100644 --- a/src/gpu/vk/GrVkMemory.h +++ b/src/gpu/vk/GrVkMemory.h @@ -141,6 +141,7 @@ public: bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) { SkASSERT(size > 0); + alloc->fUsesSystemHeap = false; return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, heapIndex, alloc); } bool free(const GrVkAlloc& alloc); diff --git a/tests/VkHeapTests.cpp b/tests/VkHeapTests.cpp index 29e4e7906b..67eb045d98 100644 --- a/tests/VkHeapTests.cpp +++ b/tests/VkHeapTests.cpp @@ -159,7 +159,6 @@ void suballoc_test(skiatest::Reporter* reporter, GrContext* context) { REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024); // heap should not grow here (allocating more than subheap size) REPORTER_ASSERT(reporter, heap.alloc(128 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0)); - REPORTER_ASSERT(reporter, 0 == alloc0.fSize); REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024); heap.free(alloc0); REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0)); diff --git a/tests/VkWrapTests.cpp b/tests/VkWrapTests.cpp index 9723d5763b..bcf59d222e 100644 --- a/tests/VkWrapTests.cpp +++ b/tests/VkWrapTests.cpp @@ -55,7 +55,7 @@ void wrap_tex_test(skiatest::Reporter* reporter, GrContext* context) { // alloc is null { GrVkImageInfo backendCopy = *imageInfo; - backendCopy.fAlloc = { VK_NULL_HANDLE, 0, 0, 0 }; + backendCopy.fAlloc = GrVkAlloc(); GrBackendTexture backendTex = GrBackendTexture(kW, kH, backendCopy); tex = gpu->wrapBackendTexture(backendTex, kBorrow_GrWrapOwnership); REPORTER_ASSERT(reporter, !tex); @@ -100,7 +100,7 @@ void wrap_rt_test(skiatest::Reporter* reporter, GrContext* context) { // alloc is null { GrVkImageInfo backendCopy = *imageInfo; - backendCopy.fAlloc = { VK_NULL_HANDLE, 0, 0, 0 }; + backendCopy.fAlloc = GrVkAlloc(); // can wrap null alloc GrBackendRenderTarget backendRT(kW, kH, 1, 0, backendCopy); rt = gpu->wrapBackendRenderTarget(backendRT); @@ -138,7 +138,7 @@ void wrap_trt_test(skiatest::Reporter* reporter, GrContext* context) { // alloc is null { GrVkImageInfo backendCopy = *imageInfo; - backendCopy.fAlloc = { VK_NULL_HANDLE, 0, 0, 0 }; + backendCopy.fAlloc = GrVkAlloc(); GrBackendTexture backendTex = GrBackendTexture(kW, kH, backendCopy); tex = gpu->wrapRenderableBackendTexture(backendTex, 1, kBorrow_GrWrapOwnership); REPORTER_ASSERT(reporter, !tex); diff --git a/tools/sk_app/VulkanWindowContext.cpp b/tools/sk_app/VulkanWindowContext.cpp index 6237ee09ae..d7839bc189 100644 --- a/tools/sk_app/VulkanWindowContext.cpp +++ b/tools/sk_app/VulkanWindowContext.cpp @@ -287,7 +287,7 @@ void VulkanWindowContext::createBuffers(VkFormat format, SkColorType colorType) GrVkImageInfo info; info.fImage = fImages[i]; - info.fAlloc = { VK_NULL_HANDLE, 0, 0, 0 }; + info.fAlloc = GrVkAlloc(); info.fImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; info.fImageTiling = VK_IMAGE_TILING_OPTIMAL; info.fFormat = format; |