diff options
author | Philip Langdale <philipl@overt.org> | 2018-09-29 17:56:07 -0700 |
---|---|---|
committer | sfan5 <sfan5@live.de> | 2018-10-22 21:35:48 +0200 |
commit | 93f800a00f3f8ef416082e0a3f9d34d979a1e9a6 (patch) | |
tree | b5108b3c2f4f4357adf0f9921c6b59a6c8e162e6 /video | |
parent | 6fbd933108a74bbd3a375be1456692320a97380e (diff) |
vo_gpu: vulkan: Add support for exporting buffer memory
The CUDA/Vulkan interop works on the basis of memory being exported
from Vulkan and then imported by CUDA. To enable this, we add a way
to declare a buffer as being intended for export, and then add a
function to do the export.
For now, we support the fd and Handle based exports on Linux and
Windows respectively. There are others, which we can support when
a need arises.
Also note that this is just for exporting buffers, rather than
textures (VkImages). Image import on the CUDA side is supposed to
work, but it is currently buggy and waiting for a new driver release.
Finally, at least with my nvidia hardware and drivers, everything
seems to work even if we don't initialise the buffer with the right
exportability options. Nevertheless I'm enforcing it so that we're
following the spec.
Diffstat (limited to 'video')
-rw-r--r-- | video/out/gpu/ra.h | 1 | ||||
-rw-r--r-- | video/out/vulkan/common.h | 4 | ||||
-rw-r--r-- | video/out/vulkan/malloc.c | 45 | ||||
-rw-r--r-- | video/out/vulkan/malloc.h | 4 | ||||
-rw-r--r-- | video/out/vulkan/ra_vk.c | 70 | ||||
-rw-r--r-- | video/out/vulkan/ra_vk.h | 14 | ||||
-rw-r--r-- | video/out/vulkan/utils.c | 41 | ||||
-rw-r--r-- | video/out/vulkan/utils.h | 6 |
8 files changed, 179 insertions, 6 deletions
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h index 79caacc919..748b485c95 100644 --- a/video/out/gpu/ra.h +++ b/video/out/gpu/ra.h @@ -188,6 +188,7 @@ enum ra_buf_type { RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO RA_BUF_TYPE_VERTEX, // not publicly usable (RA-internal usage) + RA_BUF_TYPE_SHARED_MEMORY, // device memory for sharing with external API }; struct ra_buf_params { diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h index 1a4c3b842f..a4284f9055 100644 --- a/video/out/vulkan/common.h +++ b/video/out/vulkan/common.h @@ -73,4 +73,8 @@ struct mpvk_ctx { // Cached capabilities VkPhysicalDeviceLimits limits; VkPhysicalDeviceFeatures features; + + // Extension availability + bool has_ext_external_memory; + bool has_ext_external_memory_export; }; diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c index 32c2c6b4d0..a68a9342e5 100644 --- a/video/out/vulkan/malloc.c +++ b/video/out/vulkan/malloc.c @@ -2,6 +2,10 @@ #include "utils.h" #include "osdep/timer.h" +#if HAVE_WIN32_DESKTOP +#include <versionhelpers.h> +#endif + // Controls the multiplication factor for new slab allocations. The new slab // will always be allocated such that the size of the slab is this factor times // the previous slab. Higher values make it grow faster. @@ -57,6 +61,7 @@ struct vk_heap { VkBufferUsageFlags usage; // the buffer usage type (or 0) VkMemoryPropertyFlags flags; // the memory type flags (or 0) uint32_t typeBits; // the memory type index requirements (or 0) + bool exportable; // whether memory is exportable to other APIs struct vk_slab **slabs; // array of slabs sorted by size int num_slabs; }; @@ -126,8 +131,20 @@ static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap, .end = slab->size, }); + VkExportMemoryAllocateInfoKHR eminfo = { + .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, +#if HAVE_WIN32_DESKTOP + .handleTypes = IsWindows8OrGreater() + ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR + : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, +#else + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, +#endif + }; + VkMemoryAllocateInfo minfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = heap->exportable ? &eminfo : NULL, .allocationSize = slab->size, }; @@ -141,8 +158,14 @@ static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap, for (int i = 0; i < vk->num_pools; i++) qfs[i] = vk->pools[i]->qf; + VkExternalMemoryBufferCreateInfo ebinfo = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .handleTypes = eminfo.handleTypes, + }; + VkBufferCreateInfo binfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = heap->exportable ? &ebinfo : NULL, .size = slab->size, .usage = heap->usage, .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT @@ -292,7 +315,8 @@ void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice) // reqs: can be NULL static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage, VkMemoryPropertyFlags flags, - VkMemoryRequirements *reqs) + VkMemoryRequirements *reqs, + bool exportable) { struct vk_malloc *ma = vk->alloc; int typeBits = reqs ? reqs->memoryTypeBits : 0; @@ -304,6 +328,8 @@ static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage, continue; if (ma->heaps[i].typeBits != typeBits) continue; + if (ma->heaps[i].exportable != exportable) + continue; return &ma->heaps[i]; } @@ -314,6 +340,7 @@ static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage, .usage = usage, .flags = flags, .typeBits = typeBits, + .exportable = exportable, }; return heap; } @@ -396,6 +423,7 @@ static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size, .vkmem = slab->mem, .offset = MP_ALIGN_UP(reg.start, alignment), .size = size, + .slab_size = slab->size, .priv = slab, }; @@ -413,15 +441,24 @@ static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size, bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs, VkMemoryPropertyFlags flags, struct vk_memslice *out) { - struct vk_heap *heap = find_heap(vk, 0, flags, &reqs); + struct vk_heap *heap = find_heap(vk, 0, flags, &reqs, false); return slice_heap(vk, heap, reqs.size, reqs.alignment, out); } bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags, VkMemoryPropertyFlags memFlags, VkDeviceSize size, - VkDeviceSize alignment, struct vk_bufslice *out) + VkDeviceSize alignment, bool exportable, + struct vk_bufslice *out) { - struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL); + if (exportable) { + if (!vk->has_ext_external_memory_export) { + MP_ERR(vk, "Exportable memory requires the %s extension\n", + MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME); + return false; + } + } + + struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL, exportable); if (!slice_heap(vk, heap, size, alignment, &out->mem)) return false; diff --git a/video/out/vulkan/malloc.h b/video/out/vulkan/malloc.h index 466c8d81bd..9b311ce311 100644 --- a/video/out/vulkan/malloc.h +++ b/video/out/vulkan/malloc.h @@ -11,6 +11,7 @@ struct vk_memslice { VkDeviceMemory vkmem; size_t offset; size_t size; + size_t slab_size; void *priv; }; @@ -32,4 +33,5 @@ struct vk_bufslice { // creating/destroying lots of (little) VkBuffers. bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags, VkMemoryPropertyFlags memFlags, VkDeviceSize size, - VkDeviceSize alignment, struct vk_bufslice *out); + VkDeviceSize alignment, bool exportable, + struct vk_bufslice *out); diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c index 236287d7d3..1548b8c785 100644 --- a/video/out/vulkan/ra_vk.c +++ b/video/out/vulkan/ra_vk.c @@ -4,6 +4,10 @@ #include "ra_vk.h" #include "malloc.h" +#if HAVE_WIN32_DESKTOP +#include <versionhelpers.h> +#endif + static struct ra_fns ra_fns_vk; enum queue_type { @@ -787,6 +791,7 @@ static struct ra_buf *vk_buf_create(struct ra *ra, VkBufferUsageFlags bufFlags = 0; VkMemoryPropertyFlags memFlags = 0; VkDeviceSize align = 4; // alignment 4 is needed for buf_update + bool exportable = false; switch (params->type) { case RA_BUF_TYPE_TEX_UPLOAD: @@ -811,6 +816,11 @@ static struct ra_buf *vk_buf_create(struct ra *ra, bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; break; + case RA_BUF_TYPE_SHARED_MEMORY: + bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + exportable = true; + break; default: abort(); } @@ -826,7 +836,7 @@ static struct ra_buf *vk_buf_create(struct ra *ra, } if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align, - &buf_vk->slice)) + exportable, &buf_vk->slice)) { goto error; } @@ -916,6 +926,64 @@ error: return false; } +static bool ra_vk_mem_get_external_info(struct ra *ra, struct vk_memslice *mem, struct vk_external_mem *ret) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + +#if HAVE_WIN32_DESKTOP + HANDLE mem_handle; + + VkMemoryGetWin32HandleInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, + .pNext = NULL, + .memory = mem->vkmem, + .handleType = IsWindows8OrGreater() + ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR + : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, + }; + + VK_LOAD_PFN(vkGetMemoryWin32HandleKHR); + VK(pfn_vkGetMemoryWin32HandleKHR(vk->dev, &info, &mem_handle)); + + ret->mem_handle = mem_handle; +#else + int mem_fd; + + VkMemoryGetFdInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .pNext = NULL, + .memory = mem->vkmem, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + }; + + VK_LOAD_PFN(vkGetMemoryFdKHR); + VK(pfn_vkGetMemoryFdKHR(vk->dev, &info, &mem_fd)); + + ret->mem_fd = mem_fd; +#endif + ret->size = mem->size; + ret->offset = mem->offset; + ret->mem_size = mem->slab_size; + + return true; + +error: + return false; +} + +bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret) +{ + if (buf->params.type != RA_BUF_TYPE_SHARED_MEMORY) { + MP_ERR(ra, "Buffer must be of TYPE_SHARED_MEMORY to be able to export it..."); + return false; + } + + struct ra_buf_vk *buf_vk = buf->priv; + struct vk_memslice *mem = &buf_vk->slice.mem; + + return ra_vk_mem_get_external_info(ra, mem, ret); +} + #define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH // For ra_renderpass.priv diff --git a/video/out/vulkan/ra_vk.h b/video/out/vulkan/ra_vk.h index da613c7f5b..89eb310146 100644 --- a/video/out/vulkan/ra_vk.h +++ b/video/out/vulkan/ra_vk.h @@ -29,3 +29,17 @@ struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex); // May be called on a struct ra of any type. Returns NULL if the ra is not // a vulkan ra. struct mpvk_ctx *ra_vk_get(struct ra *ra); + +struct vk_external_mem { +#if HAVE_WIN32_DESKTOP + HANDLE mem_handle; +#else + int mem_fd; +#endif + size_t mem_size; + size_t size; + size_t offset; +}; + +// Export an ra_buf for importing by another api. +bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret); diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c index cfe97377c0..4413fe70b1 100644 --- a/video/out/vulkan/utils.c +++ b/video/out/vulkan/utils.c @@ -438,6 +438,38 @@ static void add_qinfo(void *tactx, VkDeviceQueueCreateInfo **qinfos, MP_TARRAY_APPEND(tactx, *qinfos, *num_qinfos, qinfo); } +static bool detect_device_extensions(struct mpvk_ctx *vk) +{ + bool ret = false; + VkExtensionProperties *props = NULL; + + uint32_t num_exts; + VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL, + &num_exts, NULL)); + + props = talloc_array(NULL, VkExtensionProperties, num_exts); + VK(vkEnumerateDeviceExtensionProperties(vk->physd, + NULL, &num_exts, props)); + + for (uint32_t i = 0; i < num_exts; i++) { + if (!strcmp(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, + props[i].extensionName)) { + vk->has_ext_external_memory = true; + continue; + } + if (!strcmp(MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME, + props[i].extensionName)) { + vk->has_ext_external_memory_export = true; + continue; + } + } + + ret = true; +error: + talloc_free(props); + return ret; +} + bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) { assert(vk->physd); @@ -493,9 +525,18 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_comp, opts.queue_count); add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_tf, opts.queue_count); + if (!detect_device_extensions(vk)) { + MP_WARN(vk, "Failed to enumerate device extensions. " + "Some features may be disabled.\n"); + } + const char **exts = NULL; int num_exts = 0; MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME); + if (vk->has_ext_external_memory) + MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME); + if (vk->has_ext_external_memory_export) + MP_TARRAY_APPEND(tmp, exts, num_exts, MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME); if (vk->spirv->required_ext) MP_TARRAY_APPEND(tmp, exts, num_exts, vk->spirv->required_ext); diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h index 2962313257..97d1c24489 100644 --- a/video/out/vulkan/utils.h +++ b/video/out/vulkan/utils.h @@ -10,6 +10,12 @@ #define VK_LOAD_PFN(name) PFN_##name pfn_##name = (PFN_##name) \ vkGetInstanceProcAddr(vk->inst, #name); +#if HAVE_WIN32_DESKTOP + #define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME +#else + #define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME +#endif + // Return a human-readable name for various struct mpvk_ctx enums const char* vk_err(VkResult res); |