aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/options.rst36
-rw-r--r--options/options.c5
-rw-r--r--options/options.h1
-rw-r--r--video/out/gpu/context.c8
-rw-r--r--video/out/gpu/ra.h9
-rw-r--r--video/out/vo_gpu.c12
-rw-r--r--video/out/vulkan/common.h51
-rw-r--r--video/out/vulkan/context.c501
-rw-r--r--video/out/vulkan/context.h10
-rw-r--r--video/out/vulkan/context_xlib.c116
-rw-r--r--video/out/vulkan/formats.c55
-rw-r--r--video/out/vulkan/formats.h16
-rw-r--r--video/out/vulkan/malloc.c424
-rw-r--r--video/out/vulkan/malloc.h35
-rw-r--r--video/out/vulkan/ra_vk.c1590
-rw-r--r--video/out/vulkan/ra_vk.h31
-rw-r--r--video/out/vulkan/utils.c726
-rw-r--r--video/out/vulkan/utils.h153
-rw-r--r--wscript4
-rw-r--r--wscript_build.py6
20 files changed, 3773 insertions, 16 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index b08150c6bb..80e7350292 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -4103,10 +4103,6 @@ The following video options are currently all specific to ``--vo=gpu`` and
the video along the temporal axis. The filter used can be controlled using
the ``--tscale`` setting.
- Note that this relies on vsync to work, see ``--opengl-swapinterval`` for
- more information. It should also only be used with an ``--fbo-format``
- that has at least 16 bit precision.
-
``--interpolation-threshold=<0..1,-1>``
Threshold below which frame ratio interpolation gets disabled (default:
``0.0001``). This is calculated as ``abs(disphz/vfps - 1) < threshold``,
@@ -4184,6 +4180,31 @@ The following video options are currently all specific to ``--vo=gpu`` and
results, as can missing or incorrect display FPS information (see
``--display-fps``).
+``--vulkan-swap-mode=<mode>``
+ Controls the presentation mode of the vulkan swapchain. This is similar
+ to the ``--opengl-swapinterval`` option.
+
+ auto
+ Use the preferred swapchain mode for the vulkan context. (Default)
+ fifo
+ Non-tearing, vsync blocked. Similar to "VSync on".
+ fifo-relaxed
+ Tearing, vsync blocked. Late frames will tear instead of stuttering.
+ mailbox
+ Non-tearing, not vsync blocked. Similar to "triple buffering".
+ immediate
+ Tearing, not vsync blocked. Similar to "VSync off".
+
+``--vulkan-queue-count=<1..8>``
+ Controls the number of VkQueues used for rendering (limited by how many
+ your device supports). In theory, using more queues could enable some
+ parallelism between frames (when using a ``--swapchain-depth`` higher than
+ 1). (Default: 1)
+
+ NOTE: Setting this to a value higher than 1 may cause graphical corruption,
+ as mpv's vulkan implementation currently does not try and protect textures
+ against concurrent access.
+
``--glsl-shaders=<file-list>``
Custom GLSL hooks. These are a flexible way to add custom fragment shaders,
which can be injected at almost arbitrary points in the rendering pipeline,
@@ -4590,7 +4611,7 @@ The following video options are currently all specific to ``--vo=gpu`` and
on Nvidia and AMD. Newer Intel chips with the latest drivers may also
work.
x11
- X11/GLX
+ X11/GLX, VK_KHR_xlib_surface
x11probe
For internal autoprobing, equivalent to ``x11`` otherwise. Don't use
directly, it could be removed without warning as autoprobing is changed.
@@ -5020,7 +5041,10 @@ Miscellaneous
Media files must use constant framerate. Section-wise VFR might work as well
with some container formats (but not e.g. mkv). If the sync code detects
severe A/V desync, or the framerate cannot be detected, the player
- automatically reverts to ``audio`` mode for some time or permanently.
+ automatically reverts to ``audio`` mode for some time or permanently. These
+ modes also require a vsync blocked presentation mode. For OpenGL, this
+ translates to ``--opengl-swapinterval=1``. For Vulkan, it translates to
+ ``--vulkan-swap-mode=fifo`` (or ``fifo-relaxed``).
The modes with ``desync`` in their names do not attempt to keep audio/video
in sync. They will slowly (or quickly) desync, until e.g. the next seek
diff --git a/options/options.c b/options/options.c
index 1168cc196b..6467468691 100644
--- a/options/options.c
+++ b/options/options.c
@@ -89,6 +89,7 @@ extern const struct m_obj_list vo_obj_list;
extern const struct m_obj_list ao_obj_list;
extern const struct m_sub_options opengl_conf;
+extern const struct m_sub_options vulkan_conf;
extern const struct m_sub_options angle_conf;
extern const struct m_sub_options cocoa_conf;
@@ -690,6 +691,10 @@ const m_option_t mp_opts[] = {
OPT_SUBSTRUCT("", opengl_opts, opengl_conf, 0),
#endif
+#if HAVE_VULKAN
+ OPT_SUBSTRUCT("", vulkan_opts, vulkan_conf, 0),
+#endif
+
#if HAVE_EGL_ANGLE_WIN32
OPT_SUBSTRUCT("", angle_opts, angle_conf, 0),
#endif
diff --git a/options/options.h b/options/options.h
index c02b7a34ca..63dee03612 100644
--- a/options/options.h
+++ b/options/options.h
@@ -329,6 +329,7 @@ typedef struct MPOpts {
struct gl_video_opts *gl_video_opts;
struct angle_opts *angle_opts;
struct opengl_opts *opengl_opts;
+ struct vulkan_opts *vulkan_opts;
struct cocoa_opts *cocoa_opts;
struct dvd_opts *dvd_opts;
diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c
index c5721c73b4..25e2a754bf 100644
--- a/video/out/gpu/context.c
+++ b/video/out/gpu/context.c
@@ -44,6 +44,7 @@ extern const struct ra_ctx_fns ra_ctx_dxgl;
extern const struct ra_ctx_fns ra_ctx_rpi;
extern const struct ra_ctx_fns ra_ctx_mali;
extern const struct ra_ctx_fns ra_ctx_vdpauglx;
+extern const struct ra_ctx_fns ra_ctx_vulkan_xlib;
static const struct ra_ctx_fns *contexts[] = {
// OpenGL contexts:
@@ -83,6 +84,13 @@ static const struct ra_ctx_fns *contexts[] = {
#if HAVE_VDPAU_GL_X11
&ra_ctx_vdpauglx,
#endif
+
+// Vulkan contexts:
+#if HAVE_VULKAN
+#if HAVE_X11
+ &ra_ctx_vulkan_xlib,
+#endif
+#endif
};
static bool get_help(struct mp_log *log, struct bstr param)
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
index 10245b250e..7a2fa0e11c 100644
--- a/video/out/gpu/ra.h
+++ b/video/out/gpu/ra.h
@@ -146,6 +146,7 @@ enum ra_buf_type {
RA_BUF_TYPE_TEX_UPLOAD, // texture upload buffer (pixel buffer object)
RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW
RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO
+ RA_BUF_TYPE_VERTEX, // not publicly usable (RA-internal usage)
};
struct ra_buf_params {
@@ -369,10 +370,10 @@ struct ra_fns {
void (*buf_destroy)(struct ra *ra, struct ra_buf *buf);
- // Update the contents of a buffer, starting at a given offset and up to a
- // given size, with the contents of *data. This is an extremely common
- // operation. Calling this while the buffer is considered "in use" is an
- // error. (See: buf_poll)
+ // Update the contents of a buffer, starting at a given offset (*must* be a
+ // multiple of 4) and up to a given size, with the contents of *data. This
+ // is an extremely common operation. Calling this while the buffer is
+ // considered "in use" is an error. (See: buf_poll)
void (*buf_update)(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
const void *data, size_t size);
diff --git a/video/out/vo_gpu.c b/video/out/vo_gpu.c
index bd245de05b..a26912e0d8 100644
--- a/video/out/vo_gpu.c
+++ b/video/out/vo_gpu.c
@@ -60,6 +60,7 @@ struct gpu_priv {
static void resize(struct gpu_priv *p)
{
struct vo *vo = p->vo;
+ struct ra_swapchain *sw = p->ctx->swapchain;
MP_VERBOSE(vo, "Resize: %dx%d\n", vo->dwidth, vo->dheight);
@@ -69,6 +70,11 @@ static void resize(struct gpu_priv *p)
gl_video_resize(p->renderer, &src, &dst, &osd);
+ int fb_depth = sw->fns->color_depth ? sw->fns->color_depth(sw) : 0;
+ if (fb_depth)
+ MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth);
+ gl_video_set_fb_depth(p->renderer, fb_depth);
+
vo->want_redraw = true;
}
@@ -289,7 +295,6 @@ static int preinit(struct vo *vo)
goto err_out;
assert(p->ctx->ra);
assert(p->ctx->swapchain);
- struct ra_swapchain *sw = p->ctx->swapchain;
p->renderer = gl_video_init(p->ctx->ra, vo->log, vo->global);
gl_video_set_osd_source(p->renderer, vo->osd);
@@ -305,11 +310,6 @@ static int preinit(struct vo *vo)
vo->hwdec_devs, vo->opts->gl_hwdec_interop);
gl_video_set_hwdec(p->renderer, p->hwdec);
- int fb_depth = sw->fns->color_depth ? sw->fns->color_depth(sw) : 0;
- if (fb_depth)
- MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth);
- gl_video_set_fb_depth(p->renderer, fb_depth);
-
return 0;
err_out:
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
new file mode 100644
index 0000000000..4c0e783f0e
--- /dev/null
+++ b/video/out/vulkan/common.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "config.h"
+
+#include "common/common.h"
+#include "common/msg.h"
+
+// We need to define all platforms we want to support. Since we have
+// our own mechanism for checking this, we re-define the right symbols
+#if HAVE_X11
+#define VK_USE_PLATFORM_XLIB_KHR
+#endif
+
+#include <vulkan/vulkan.h>
+
+// Vulkan allows the optional use of a custom allocator. We don't need one but
+// mark this parameter with a better name in case we ever decide to change this
+// in the future. (And to make the code more readable)
+#define MPVK_ALLOCATOR NULL
+
+// A lot of things depend on streaming resources across frames. Depending on
+// how many frames we render ahead of time, we need to pick enough to avoid
+// any conflicts, so make all of these tunable relative to this constant in
+// order to centralize them.
+#define MPVK_MAX_STREAMING_DEPTH 8
+
+// Shared struct used to hold vulkan context information
+struct mpvk_ctx {
+ struct mp_log *log;
+ VkInstance inst;
+ VkPhysicalDevice physd;
+ VkDebugReportCallbackEXT dbg;
+ VkDevice dev;
+
+ // Surface, must be initialized fter the context itself
+ VkSurfaceKHR surf;
+ VkSurfaceFormatKHR surf_format; // picked at surface initialization time
+
+ struct vk_malloc *alloc; // memory allocator for this device
+ struct vk_cmdpool *pool; // primary command pool for this device
+ struct vk_cmd *last_cmd; // most recently submitted command
+
+ // Cached capabilities
+ VkPhysicalDeviceLimits limits;
+};
diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c
new file mode 100644
index 0000000000..bd456d214c
--- /dev/null
+++ b/video/out/vulkan/context.c
@@ -0,0 +1,501 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "options/m_config.h"
+#include "context.h"
+#include "ra_vk.h"
+#include "utils.h"
+
+enum {
+ SWAP_AUTO = 0,
+ SWAP_FIFO,
+ SWAP_FIFO_RELAXED,
+ SWAP_MAILBOX,
+ SWAP_IMMEDIATE,
+ SWAP_COUNT,
+};
+
+struct vulkan_opts {
+ struct mpvk_device_opts dev_opts; // logical device options
+ char *device; // force a specific GPU
+ int swap_mode;
+};
+
+static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
+ struct bstr name, struct bstr param)
+{
+ int ret = M_OPT_INVALID;
+ VkResult res;
+
+ // Create a dummy instance to validate/list the devices
+ VkInstanceCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ };
+
+ VkInstance inst;
+ VkPhysicalDevice *devices = NULL;
+ uint32_t num = 0;
+
+ res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst);
+ if (res != VK_SUCCESS)
+ goto error;
+
+ res = vkEnumeratePhysicalDevices(inst, &num, NULL);
+ if (res != VK_SUCCESS)
+ goto error;
+
+ devices = talloc_array(NULL, VkPhysicalDevice, num);
+ vkEnumeratePhysicalDevices(inst, &num, devices);
+ if (res != VK_SUCCESS)
+ goto error;
+
+ bool help = bstr_equals0(param, "help");
+ if (help) {
+ mp_info(log, "Available vulkan devices:\n");
+ ret = M_OPT_EXIT;
+ }
+
+ for (int i = 0; i < num; i++) {
+ VkPhysicalDeviceProperties prop;
+ vkGetPhysicalDeviceProperties(devices[i], &prop);
+
+ if (help) {
+ mp_info(log, " '%s' (GPU %d, ID %x:%x)\n", prop.deviceName, i,
+ (unsigned)prop.vendorID, (unsigned)prop.deviceID);
+ } else if (bstr_equals0(param, prop.deviceName)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!help)
+ mp_err(log, "No device with name '%.*s'!\n", BSTR_P(param));
+
+error:
+ talloc_free(devices);
+ return ret;
+}
+
+#define OPT_BASE_STRUCT struct vulkan_opts
+const struct m_sub_options vulkan_conf = {
+ .opts = (const struct m_option[]) {
+ OPT_STRING_VALIDATE("vulkan-device", device, 0, vk_validate_dev),
+ OPT_CHOICE("vulkan-swap-mode", swap_mode, 0,
+ ({"auto", SWAP_AUTO},
+ {"fifo", SWAP_FIFO},
+ {"fifo-relaxed", SWAP_FIFO_RELAXED},
+ {"mailbox", SWAP_MAILBOX},
+ {"immediate", SWAP_IMMEDIATE})),
+ OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1,
+ MPVK_MAX_QUEUES, OPTDEF_INT(1)),
+ {0}
+ },
+ .size = sizeof(struct vulkan_opts)
+};
+
+struct priv {
+ struct mpvk_ctx *vk;
+ struct vulkan_opts *opts;
+ // Swapchain metadata:
+ int w, h; // current size
+ VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype
+ VkSwapchainKHR swapchain;
+ VkSwapchainKHR old_swapchain;
+ int frames_in_flight;
+ // state of the images:
+ struct ra_tex **images; // ra_tex wrappers for the vkimages
+ int num_images; // size of images
+ VkSemaphore *acquired; // pool of semaphores used to synchronize images
+ int num_acquired; // size of this pool
+ int idx_acquired; // index of next free semaphore within this pool
+ int last_imgidx; // the image index last acquired (for submit)
+};
+
+static bool update_swapchain_info(struct priv *p,
+ VkSwapchainCreateInfoKHR *info)
+{
+ struct mpvk_ctx *vk = p->vk;
+
+ // Query the supported capabilities and update this struct as needed
+ VkSurfaceCapabilitiesKHR caps;
+ VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps));
+
+ // Sorted by preference
+ static const VkCompositeAlphaFlagBitsKHR alphaModes[] = {
+ VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
+ VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+ };
+
+ for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) {
+ if (caps.supportedCompositeAlpha & alphaModes[i]) {
+ info->compositeAlpha = alphaModes[i];
+ break;
+ }
+ }
+
+ if (!info->compositeAlpha) {
+ MP_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)\n",
+ caps.supportedCompositeAlpha);
+ goto error;
+ }
+
+ static const VkSurfaceTransformFlagBitsKHR rotModes[] = {
+ VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
+ VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR,
+ };
+
+ for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) {
+ if (caps.supportedTransforms & rotModes[i]) {
+ info->preTransform = rotModes[i];
+ break;
+ }
+ }
+
+ if (!info->preTransform) {
+ MP_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)\n",
+ caps.supportedTransforms);
+ goto error;
+ }
+
+ // Image count as required
+ MP_VERBOSE(vk, "Requested image count: %d (min %d max %d)\n",
+ (int)info->minImageCount, (int)caps.minImageCount,
+ (int)caps.maxImageCount);
+
+ info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount);
+ if (caps.maxImageCount)
+ info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount);
+
+ // Check the extent against the allowed parameters
+ if (caps.currentExtent.width != info->imageExtent.width &&
+ caps.currentExtent.width != 0xFFFFFFFF)
+ {
+ MP_WARN(vk, "Requested width %d does not match current width %d\n",
+ (int)info->imageExtent.width, (int)caps.currentExtent.width);
+ info->imageExtent.width = caps.currentExtent.width;
+ }
+
+ if (caps.currentExtent.height != info->imageExtent.height &&
+ caps.currentExtent.height != 0xFFFFFFFF)
+ {
+ MP_WARN(vk, "Requested height %d does not match current height %d\n",
+ (int)info->imageExtent.height, (int)caps.currentExtent.height);
+ info->imageExtent.height = caps.currentExtent.height;
+ }
+
+ if (caps.minImageExtent.width > info->imageExtent.width ||
+ caps.minImageExtent.height > info->imageExtent.height)
+ {
+ MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n",
+ (int)info->imageExtent.width, (int)info->imageExtent.height,
+ (int)caps.minImageExtent.width, (int)caps.minImageExtent.height);
+ goto error;
+ }
+
+ if (caps.maxImageExtent.width < info->imageExtent.width ||
+ caps.maxImageExtent.height < info->imageExtent.height)
+ {
+ MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n",
+ (int)info->imageExtent.width, (int)info->imageExtent.height,
+ (int)caps.maxImageExtent.width, (int)caps.maxImageExtent.height);
+ goto error;
+ }
+
+ // We just request whatever usage we can, and let the ra_vk decide what
+ // ra_tex_params that translates to. This makes the images as flexible
+ // as possible.
+ info->imageUsage = caps.supportedUsageFlags;
+ return true;
+
+error:
+ return false;
+}
+
+void ra_vk_ctx_uninit(struct ra_ctx *ctx)
+{
+ if (ctx->ra) {
+ struct priv *p = ctx->swapchain->priv;
+ struct mpvk_ctx *vk = p->vk;
+
+ mpvk_pool_wait_idle(vk, vk->pool);
+
+ for (int i = 0; i < p->num_images; i++)
+ ra_tex_free(ctx->ra, &p->images[i]);
+ for (int i = 0; i < p->num_acquired; i++)
+ vkDestroySemaphore(vk->dev, p->acquired[i], MPVK_ALLOCATOR);
+
+ vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+
+ talloc_free(p->images);
+ talloc_free(p->acquired);
+ ctx->ra->fns->destroy(ctx->ra);
+ ctx->ra = NULL;
+ }
+
+ talloc_free(ctx->swapchain);
+ ctx->swapchain = NULL;
+}
+
+static const struct ra_swapchain_fns vulkan_swapchain;
+
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+ VkPresentModeKHR preferred_mode)
+{
+ struct ra_swapchain *sw = ctx->swapchain = talloc_zero(NULL, struct ra_swapchain);
+ sw->ctx = ctx;
+ sw->fns = &vulkan_swapchain;
+
+ struct priv *p = sw->priv = talloc_zero(sw, struct priv);
+ p->vk = vk;
+ p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf);
+
+ if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw))
+ goto error;
+ if (!mpvk_pick_surface_format(vk))
+ goto error;
+ if (!mpvk_device_init(vk, p->opts->dev_opts))
+ goto error;
+
+ ctx->ra = ra_create_vk(vk, ctx->log);
+ if (!ctx->ra)
+ goto error;
+
+ static const VkPresentModeKHR present_modes[SWAP_COUNT] = {
+ [SWAP_FIFO] = VK_PRESENT_MODE_FIFO_KHR,
+ [SWAP_FIFO_RELAXED] = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
+ [SWAP_MAILBOX] = VK_PRESENT_MODE_MAILBOX_KHR,
+ [SWAP_IMMEDIATE] = VK_PRESENT_MODE_IMMEDIATE_KHR,
+ };
+
+ p->protoInfo = (VkSwapchainCreateInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+ .surface = vk->surf,
+ .imageFormat = vk->surf_format.format,
+ .imageColorSpace = vk->surf_format.colorSpace,
+ .imageArrayLayers = 1, // non-stereoscopic
+ .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .minImageCount = ctx->opts.swapchain_depth + 1, // +1 for FB
+ .presentMode = p->opts->swap_mode ? present_modes[p->opts->swap_mode]
+ : preferred_mode,
+ .clipped = true,
+ };
+
+ // Make sure the swapchain present mode is supported
+ int num_modes;
+ VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
+ &num_modes, NULL));
+ VkPresentModeKHR *modes = talloc_array(NULL, VkPresentModeKHR, num_modes);
+ VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
+ &num_modes, modes));
+ bool supported = false;
+ for (int i = 0; i < num_modes; i++)
+ supported |= (modes[i] == p->protoInfo.presentMode);
+ talloc_free(modes);
+
+ if (!supported) {
+ MP_ERR(ctx, "Requested swap mode unsupported by this device!\n");
+ goto error;
+ }
+
+ return true;
+
+error:
+ ra_vk_ctx_uninit(ctx);
+ return false;
+}
+
+static void destroy_swapchain(struct mpvk_ctx *vk, struct priv *p)
+{
+ assert(p->old_swapchain);
+ vkDestroySwapchainKHR(vk->dev, p->old_swapchain, MPVK_ALLOCATOR);
+ p->old_swapchain = NULL;
+}
+
+bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h)
+{
+ struct priv *p = sw->priv;
+ if (w == p->w && h == p->h)
+ return true;
+
+ struct ra *ra = sw->ctx->ra;
+ struct mpvk_ctx *vk = p->vk;
+ VkImage *vkimages = NULL;
+
+ // It's invalid to trigger another swapchain recreation while there's
+ // more than one swapchain already active, so we need to flush any pending
+ // asynchronous swapchain release operations that may be ongoing.
+ while (p->old_swapchain)
+ mpvk_dev_poll_cmds(vk, 100000); // 100μs
+
+ VkSwapchainCreateInfoKHR sinfo = p->protoInfo;
+ sinfo.imageExtent = (VkExtent2D){ w, h };
+ sinfo.oldSwapchain = p->swapchain;
+
+ if (!update_swapchain_info(p, &sinfo))
+ goto error;
+
+ VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &p->swapchain));
+ p->w = w;
+ p->h = h;
+
+ // Freeing the old swapchain while it's still in use is an error, so do
+ // it asynchronously once the device is idle.
+ if (sinfo.oldSwapchain) {
+ p->old_swapchain = sinfo.oldSwapchain;
+ vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p);
+ }
+
+ // Get the new swapchain images
+ int num;
+ VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, NULL));
+ vkimages = talloc_array(NULL, VkImage, num);
+ VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages));
+
+ // If needed, allocate some more semaphores
+ while (num > p->num_acquired) {
+ VkSemaphore sem;
+ static const VkSemaphoreCreateInfo seminfo = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ };
+ VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem));
+ MP_TARRAY_APPEND(NULL, p->acquired, p->num_acquired, sem);
+ }
+
+ // Recreate the ra_tex wrappers
+ for (int i = 0; i < p->num_images; i++)
+ ra_tex_free(ra, &p->images[i]);
+
+ p->num_images = num;
+ MP_TARRAY_GROW(NULL, p->images, p->num_images);
+ for (int i = 0; i < num; i++) {
+ p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo);
+ if (!p->images[i])
+ goto error;
+ }
+
+ talloc_free(vkimages);
+ return true;
+
+error:
+ talloc_free(vkimages);
+ vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+ p->swapchain = NULL;
+ return false;
+}
+
+static int color_depth(struct ra_swapchain *sw)
+{
+ struct priv *p = sw->priv;
+ int bits = 0;
+
+ if (!p->num_images)
+ return bits;
+
+ // The channel with the most bits is probably the most authoritative about
+ // the actual color information (consider e.g. a2bgr10). Slight downside
+ // in that it results in rounding r/b for e.g. rgb565, but we don't pick
+ // surfaces with fewer than 8 bits anyway.
+ const struct ra_format *fmt = p->images[0]->params.format;
+ for (int i = 0; i < fmt->num_components; i++) {
+ int depth = fmt->component_depth[i];
+ bits = MPMAX(bits, depth ? depth : fmt->component_size[i]);
+ }
+
+ return bits;
+}
+
+static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+ struct priv *p = sw->priv;
+ struct mpvk_ctx *vk = p->vk;
+ if (!p->swapchain)
+ goto error;
+
+ uint32_t imgidx = 0;
+ MP_TRACE(vk, "vkAcquireNextImageKHR\n");
+ VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX,
+ p->acquired[p->idx_acquired], NULL,
+ &imgidx);
+ if (res == VK_ERROR_OUT_OF_DATE_KHR)
+ goto error; // just return in this case
+ VK_ASSERT(res, "Failed acquiring swapchain image");
+
+ p->last_imgidx = imgidx;
+ *out_fbo = (struct ra_fbo) {
+ .tex = p->images[imgidx],
+ .flip = false,
+ };
+ return true;
+
+error:
+ return false;
+}
+
+static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
+{
+ struct priv *p = sw->priv;
+ struct ra *ra = sw->ctx->ra;
+ struct mpvk_ctx *vk = p->vk;
+ if (!p->swapchain)
+ goto error;
+
+ VkSemaphore acquired = p->acquired[p->idx_acquired++];
+ p->idx_acquired %= p->num_acquired;
+
+ VkSemaphore done;
+ if (!ra_vk_submit(ra, p->images[p->last_imgidx], acquired, &done,
+ &p->frames_in_flight))
+ goto error;
+
+ // For some reason, nvidia absolutely shits itself when presenting from a
+ // full queue - so advance all of the cmdpool indices first and then do the
+ // present on an "empty" queue
+ vk_cmd_cycle_queues(vk);
+ struct vk_cmdpool *pool = vk->pool;
+ VkQueue queue = pool->queues[pool->qindex];
+
+ VkPresentInfoKHR pinfo = {
+ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &done,
+ .swapchainCount = 1,
+ .pSwapchains = &p->swapchain,
+ .pImageIndices = &p->last_imgidx,
+ };
+
+ VK(vkQueuePresentKHR(queue, &pinfo));
+ return true;
+
+error:
+ return false;
+}
+
+static void swap_buffers(struct ra_swapchain *sw)
+{
+ struct priv *p = sw->priv;
+
+ while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth)
+ mpvk_dev_poll_cmds(p->vk, 100000); // 100μs
+}
+
+static const struct ra_swapchain_fns vulkan_swapchain = {
+ // .screenshot is not currently supported
+ .color_depth = color_depth,
+ .start_frame = start_frame,
+ .submit_frame = submit_frame,
+ .swap_buffers = swap_buffers,
+};
diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h
new file mode 100644
index 0000000000..3f630bc10e
--- /dev/null
+++ b/video/out/vulkan/context.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "video/out/gpu/context.h"
+#include "common.h"
+
+// Helpers for ra_ctx based on ra_vk. These initialize ctx->ra and ctx->swchain.
+void ra_vk_ctx_uninit(struct ra_ctx *ctx);
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+ VkPresentModeKHR preferred_mode);
+bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h);
diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c
new file mode 100644
index 0000000000..2611fbb706
--- /dev/null
+++ b/video/out/vulkan/context_xlib.c
@@ -0,0 +1,116 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/gpu/context.h"
+#include "video/out/x11_common.h"
+
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+struct priv {
+ struct mpvk_ctx vk;
+};
+
+static void xlib_uninit(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv;
+
+ ra_vk_ctx_uninit(ctx);
+ mpvk_uninit(&p->vk);
+ vo_x11_uninit(ctx->vo);
+}
+
+static bool xlib_init(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+ struct mpvk_ctx *vk = &p->vk;
+ int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+
+ if (!vo_x11_init(ctx->vo))
+ goto error;
+
+ if (!vo_x11_create_vo_window(ctx->vo, NULL, "mpvk"))
+ goto error;
+
+ if (!mpvk_instance_init(vk, ctx->log, ctx->opts.debug))
+ goto error;
+
+ VkXlibSurfaceCreateInfoKHR xinfo = {
+ .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
+ .dpy = ctx->vo->x11->display,
+ .window = ctx->vo->x11->window,
+ };
+
+ VkResult res = vkCreateXlibSurfaceKHR(vk->inst, &xinfo, MPVK_ALLOCATOR,
+ &vk->surf);
+ if (res != VK_SUCCESS) {
+ MP_MSG(ctx, msgl, "Failed creating Xlib surface: %s\n", vk_err(res));
+ goto error;
+ }
+
+ if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_FIFO_KHR))
+ goto error;
+
+ return true;
+
+error:
+ xlib_uninit(ctx);
+ return false;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+ return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight);
+}
+
+static bool xlib_reconfig(struct ra_ctx *ctx)
+{
+ vo_x11_config_vo_window(ctx->vo);
+ return resize(ctx);
+}
+
+static int xlib_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+ int ret = vo_x11_control(ctx->vo, events, request, arg);
+ if (*events & VO_EVENT_RESIZE) {
+ if (!resize(ctx))
+ return VO_ERROR;
+ }
+ return ret;
+}
+
+static void xlib_wakeup(struct ra_ctx *ctx)
+{
+ vo_x11_wakeup(ctx->vo);
+}
+
+static void xlib_wait_events(struct ra_ctx *ctx, int64_t until_time_us)
+{
+ vo_x11_wait_events(ctx->vo, until_time_us);
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_xlib = {
+ .type = "vulkan",
+ .name = "x11",
+ .reconfig = xlib_reconfig,
+ .control = xlib_control,
+ .wakeup = xlib_wakeup,
+ .wait_events = xlib_wait_events,
+ .init = xlib_init,
+ .uninit = xlib_uninit,
+};
diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c
new file mode 100644
index 0000000000..b44bead99c
--- /dev/null
+++ b/video/out/vulkan/formats.c
@@ -0,0 +1,55 @@
+#include "formats.h"
+
+const struct vk_format vk_formats[] = {
+ // Regular, byte-aligned integer formats
+ {"r8", VK_FORMAT_R8_UNORM, 1, 1, {8 }, RA_CTYPE_UNORM },
+ {"rg8", VK_FORMAT_R8G8_UNORM, 2, 2, {8, 8 }, RA_CTYPE_UNORM },
+ {"rgb8", VK_FORMAT_R8G8B8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM },
+ {"rgba8", VK_FORMAT_R8G8B8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM },
+ {"r16", VK_FORMAT_R16_UNORM, 1, 2, {16 }, RA_CTYPE_UNORM },
+ {"rg16", VK_FORMAT_R16G16_UNORM, 2, 4, {16, 16 }, RA_CTYPE_UNORM },
+ {"rgb16", VK_FORMAT_R16G16B16_UNORM, 3, 6, {16, 16, 16 }, RA_CTYPE_UNORM },
+ {"rgba16", VK_FORMAT_R16G16B16A16_UNORM, 4, 8, {16, 16, 16, 16}, RA_CTYPE_UNORM },
+
+ // Special, integer-only formats
+ {"r32ui", VK_FORMAT_R32_UINT, 1, 4, {32 }, RA_CTYPE_UINT },
+ {"rg32ui", VK_FORMAT_R32G32_UINT, 2, 8, {32, 32 }, RA_CTYPE_UINT },
+ {"rgb32ui", VK_FORMAT_R32G32B32_UINT, 3, 12, {32, 32, 32 }, RA_CTYPE_UINT },
+ {"rgba32ui", VK_FORMAT_R32G32B32A32_UINT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_UINT },
+ {"r64ui", VK_FORMAT_R64_UINT, 1, 8, {64 }, RA_CTYPE_UINT },
+ {"rg64ui", VK_FORMAT_R64G64_UINT, 2, 16, {64, 64 }, RA_CTYPE_UINT },
+ {"rgb64ui", VK_FORMAT_R64G64B64_UINT, 3, 24, {64, 64, 64 }, RA_CTYPE_UINT },
+ {"rgba64ui", VK_FORMAT_R64G64B64A64_UINT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_UINT },
+
+ // Packed integer formats
+ {"rg4", VK_FORMAT_R4G4_UNORM_PACK8, 2, 1, {4, 4 }, RA_CTYPE_UNORM },
+ {"rgba4", VK_FORMAT_R4G4B4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM },
+ {"rgb565", VK_FORMAT_R5G6B5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM },
+ {"rgb565a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM },
+
+ // Float formats (native formats, hf = half float, df = double float)
+ {"r16hf", VK_FORMAT_R16_SFLOAT, 1, 2, {16 }, RA_CTYPE_FLOAT },
+ {"rg16hf", VK_FORMAT_R16G16_SFLOAT, 2, 4, {16, 16 }, RA_CTYPE_FLOAT },
+ {"rgb16hf", VK_FORMAT_R16G16B16_SFLOAT, 3, 6, {16, 16, 16 }, RA_CTYPE_FLOAT },
+ {"rgba16hf", VK_FORMAT_R16G16B16A16_SFLOAT, 4, 8, {16, 16, 16, 16}, RA_CTYPE_FLOAT },
+ {"r32f", VK_FORMAT_R32_SFLOAT, 1, 4, {32 }, RA_CTYPE_FLOAT },
+ {"rg32f", VK_FORMAT_R32G32_SFLOAT, 2, 8, {32, 32 }, RA_CTYPE_FLOAT },
+ {"rgb32f", VK_FORMAT_R32G32B32_SFLOAT, 3, 12, {32, 32, 32 }, RA_CTYPE_FLOAT },
+ {"rgba32f", VK_FORMAT_R32G32B32A32_SFLOAT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_FLOAT },
+ {"r64df", VK_FORMAT_R64_SFLOAT, 1, 8, {64 }, RA_CTYPE_FLOAT },
+ {"rg64df", VK_FORMAT_R64G64_SFLOAT, 2, 16, {64, 64 }, RA_CTYPE_FLOAT },
+ {"rgb64df", VK_FORMAT_R64G64B64_SFLOAT, 3, 24, {64, 64, 64 }, RA_CTYPE_FLOAT },
+ {"rgba64df", VK_FORMAT_R64G64B64A64_SFLOAT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_FLOAT },
+
+ // "Swapped" component order images
+ {"bgr8", VK_FORMAT_B8G8R8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM, true },
+ {"bgra8", VK_FORMAT_B8G8R8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
+ {"bgra4", VK_FORMAT_B4G4R4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM, true },
+ {"bgr565", VK_FORMAT_B5G6R5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM, true },
+ {"bgr565a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM, true },
+ {"a1rgb5", VK_FORMAT_A1R5G5B5_UNORM_PACK16, 4, 2, {1, 5, 5, 5 }, RA_CTYPE_UNORM, true },
+ {"a2rgb10", VK_FORMAT_A2R10G10B10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
+ {"a2bgr10", VK_FORMAT_A2B10G10R10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
+ {"abgr8", VK_FORMAT_A8B8G8R8_UNORM_PACK32, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
+ {0}
+};
diff --git a/video/out/vulkan/formats.h b/video/out/vulkan/formats.h
new file mode 100644
index 0000000000..22782a6958
--- /dev/null
+++ b/video/out/vulkan/formats.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "video/out/gpu/ra.h"
+#include "common.h"
+
+struct vk_format {
+ const char *name;
+ VkFormat iformat; // vulkan format enum
+ int components; // how many components are there
+ int bytes; // how many bytes is a texel
+ int bits[4]; // how many bits per component
+ enum ra_ctype ctype; // format representation type
+ bool fucked_order; // used for formats which are not simply rgba
+};
+
+extern const struct vk_format vk_formats[];
diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c
new file mode 100644
index 0000000000..31fcd36ddb
--- /dev/null
+++ b/video/out/vulkan/malloc.c
@@ -0,0 +1,424 @@
+#include "malloc.h"
+#include "utils.h"
+#include "osdep/timer.h"
+
+// Controls the multiplication factor for new slab allocations. The new slab
+// will always be allocated such that the size of the slab is this factor times
+// the previous slab. Higher values make it grow faster.
+#define MPVK_HEAP_SLAB_GROWTH_RATE 4
+
+// Controls the minimum slab size, to reduce the frequency at which very small
+// slabs would need to get allocated when allocating the first few buffers.
+// (Default: 1 MB)
+#define MPVK_HEAP_MINIMUM_SLAB_SIZE (1 << 20)
+
+// Controls the maximum slab size, to reduce the effect of unbounded slab
+// growth exhausting memory. If the application needs a single allocation
+// that's bigger than this value, it will be allocated directly from the
+// device. (Default: 512 MB)
+#define MPVK_HEAP_MAXIMUM_SLAB_SIZE (1 << 29)
+
+// Controls the minimum free region size, to reduce thrashing the free space
+// map with lots of small buffers during uninit. (Default: 1 KB)
+#define MPVK_HEAP_MINIMUM_REGION_SIZE (1 << 10)
+
+// Represents a region of available memory
+struct vk_region {
+ size_t start; // first offset in region
+ size_t end; // first offset *not* in region
+};
+
+static inline size_t region_len(struct vk_region r)
+{
+ return r.end - r.start;
+}
+
+// A single slab represents a contiguous region of allocated memory. Actual
+// allocations are served as slices of this. Slabs are organized into linked
+// lists, which represent individual heaps.
+struct vk_slab {
+ VkDeviceMemory mem; // underlying device allocation
+ size_t size; // total size of `slab`
+ size_t used; // number of bytes actually in use (for GC accounting)
+ bool dedicated; // slab is allocated specifically for one object
+ // free space map: a sorted list of memory regions that are available
+ struct vk_region *regions;
+ int num_regions;
+ // optional, depends on the memory type:
+ VkBuffer buffer; // buffer spanning the entire slab
+ void *data; // mapped memory corresponding to `mem`
+};
+
+// Represents a single memory heap. We keep track of a vk_heap for each
+// combination of buffer type and memory selection parameters. This shouldn't
+// actually be that many in practice, because some combinations simply never
+// occur, and others will generally be the same for the same objects.
+struct vk_heap {
+ VkBufferUsageFlagBits usage; // the buffer usage type (or 0)
+ VkMemoryPropertyFlagBits flags; // the memory type flags (or 0)
+ uint32_t typeBits; // the memory type index requirements (or 0)
+ struct vk_slab **slabs; // array of slabs sorted by size
+ int num_slabs;
+};
+
+// The overall state of the allocator, which keeps track of a vk_heap for each
+// memory type.
+struct vk_malloc {
+ VkPhysicalDeviceMemoryProperties props;
+ struct vk_heap *heaps;
+ int num_heaps;
+};
+
+static void slab_free(struct mpvk_ctx *vk, struct vk_slab *slab)
+{
+ if (!slab)
+ return;
+
+ assert(slab->used == 0);
+
+ int64_t start = mp_time_us();
+ vkDestroyBuffer(vk->dev, slab->buffer, MPVK_ALLOCATOR);
+ // also implicitly unmaps the memory if needed
+ vkFreeMemory(vk->dev, slab->mem, MPVK_ALLOCATOR);
+ int64_t stop = mp_time_us();
+
+ MP_VERBOSE(vk, "Freeing slab of size %zu took %lld μs.\n",
+ slab->size, (long long)(stop - start));
+
+ talloc_free(slab);
+}
+
+static bool find_best_memtype(struct mpvk_ctx *vk, uint32_t typeBits,
+ VkMemoryPropertyFlagBits flags,
+ VkMemoryType *out_type, int *out_index)
+{
+ struct vk_malloc *ma = vk->alloc;
+
+ // The vulkan spec requires memory types to be sorted in the "optimal"
+ // order, so the first matching type we find will be the best/fastest one.
+ for (int i = 0; i < ma->props.memoryTypeCount; i++) {
+ // The memory type flags must include our properties
+ if ((ma->props.memoryTypes[i].propertyFlags & flags) != flags)
+ continue;
+ // The memory type must be supported by the requirements (bitfield)
+ if (typeBits && !(typeBits & (1 << i)))
+ continue;
+ *out_type = ma->props.memoryTypes[i];
+ *out_index = i;
+ return true;
+ }
+
+ MP_ERR(vk, "Found no memory type matching property flags 0x%x and type "
+ "bits 0x%x!\n", flags, (unsigned)typeBits);
+ return false;
+}
+
+static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
+ size_t size)
+{
+ struct vk_slab *slab = talloc_ptrtype(NULL, slab);
+ *slab = (struct vk_slab) {
+ .size = size,
+ };
+
+ MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, (struct vk_region) {
+ .start = 0,
+ .end = slab->size,
+ });
+
+ VkMemoryAllocateInfo minfo = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = slab->size,
+ };
+
+ uint32_t typeBits = heap->typeBits ? heap->typeBits : UINT32_MAX;
+ if (heap->usage) {
+ VkBufferCreateInfo binfo = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .size = slab->size,
+ .usage = heap->usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ };
+
+ VK(vkCreateBuffer(vk->dev, &binfo, MPVK_ALLOCATOR, &slab->buffer));
+
+ VkMemoryRequirements reqs;
+ vkGetBufferMemoryRequirements(vk->dev, slab->buffer, &reqs);
+ minfo.allocationSize = reqs.size; // this can be larger than slab->size
+ typeBits &= reqs.memoryTypeBits; // this can restrict the types
+ }
+
+ VkMemoryType type;
+ int index;
+ if (!find_best_memtype(vk, typeBits, heap->flags, &type, &index))
+ goto error;
+
+ MP_VERBOSE(vk, "Allocating %zu memory of type 0x%x (id %d) in heap %d.\n",
+ slab->size, type.propertyFlags, index, (int)type.heapIndex);
+
+ minfo.memoryTypeIndex = index;
+ VK(vkAllocateMemory(vk->dev, &minfo, MPVK_ALLOCATOR, &slab->mem));
+
+ if (heap->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ VK(vkMapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data));
+
+ if (slab->buffer)
+ VK(vkBindBufferMemory(vk->dev, slab->buffer, slab->mem, 0));
+
+ return slab;
+
+error:
+ slab_free(vk, slab);
+ return NULL;
+}
+
+static void insert_region(struct vk_slab *slab, struct vk_region region)
+{
+ if (region.start == region.end)
+ return;
+
+ bool big_enough = region_len(region) >= MPVK_HEAP_MINIMUM_REGION_SIZE;
+
+ // Find the index of the first region that comes after this
+ for (int i = 0; i < slab->num_regions; i++) {
+ struct vk_region *r = &slab->regions[i];
+
+ // Check for a few special cases which can be coalesced
+ if (r->end == region.start) {
+ // The new region is at the tail of this region. In addition to
+ // modifying this region, we also need to coalesce all the following
+ // regions for as long as possible
+ r->end = region.end;
+
+ struct vk_region *next = &slab->regions[i+1];
+ while (i+1 < slab->num_regions && r->end == next->start) {
+ r->end = next->end;
+ MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, i+1);
+ }
+ return;
+ }
+
+ if (r->start == region.end) {
+ // The new region is at the head of this region. We don't need to
+ // do anything special here - because if this could be further
+ // coalesced backwards, the previous loop iteration would already
+ // have caught it.
+ r->start = region.start;
+ return;
+ }
+
+ if (r->start > region.start) {
+ // The new region comes somewhere before this region, so insert
+ // it into this index in the array.
+ if (big_enough) {
+ MP_TARRAY_INSERT_AT(slab, slab->regions, slab->num_regions,
+ i, region);
+ }
+ return;
+ }
+ }
+
+ // If we've reached the end of this loop, then all of the regions
+ // come before the new region, and are disconnected - so append it
+ if (big_enough)
+ MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, region);
+}
+
+static void heap_uninit(struct mpvk_ctx *vk, struct vk_heap *heap)
+{
+ for (int i = 0; i < heap->num_slabs; i++)
+ slab_free(vk, heap->slabs[i]);
+
+ talloc_free(heap->slabs);
+ *heap = (struct vk_heap){0};
+}
+
+void vk_malloc_init(struct mpvk_ctx *vk)
+{
+ assert(vk->physd);
+ vk->alloc = talloc_zero(NULL, struct vk_malloc);
+ vkGetPhysicalDeviceMemoryProperties(vk->physd, &vk->alloc->props);
+}
+
+void vk_malloc_uninit(struct mpvk_ctx *vk)
+{
+ struct vk_malloc *ma = vk->alloc;
+ if (!ma)
+ return;
+
+ for (int i = 0; i < ma->num_heaps; i++)
+ heap_uninit(vk, &ma->heaps[i]);
+
+ talloc_free(ma);
+ vk->alloc = NULL;
+}
+
+void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice)
+{
+ struct vk_slab *slab = slice.priv;
+ if (!slab)
+ return;
+
+ assert(slab->used >= slice.size);
+ slab->used -= slice.size;
+
+ MP_DBG(vk, "Freeing slice %zu + %zu from slab with size %zu\n",
+ slice.offset, slice.size, slab->size);
+
+ if (slab->dedicated) {
+ // If the slab was purpose-allocated for this memslice, we can just
+ // free it here
+ slab_free(vk, slab);
+ } else {
+ // Return the allocation to the free space map
+ insert_region(slab, (struct vk_region) {
+ .start = slice.offset,
+ .end = slice.offset + slice.size,
+ });
+ }
+}
+
+// reqs: can be NULL
+static struct vk_heap *find_heap(struct mpvk_ctx *vk,
+ VkBufferUsageFlagBits usage,
+ VkMemoryPropertyFlagBits flags,
+ VkMemoryRequirements *reqs)
+{
+ struct vk_malloc *ma = vk->alloc;
+ int typeBits = reqs ? reqs->memoryTypeBits : 0;
+
+ for (int i = 0; i < ma->num_heaps; i++) {
+ if (ma->heaps[i].usage != usage)
+ continue;
+ if (ma->heaps[i].flags != flags)
+ continue;
+ if (ma->heaps[i].typeBits != typeBits)
+ continue;
+ return &ma->heaps[i];
+ }
+
+ // Not found => add it
+ MP_TARRAY_GROW(ma, ma->heaps, ma->num_heaps + 1);
+ struct vk_heap *heap = &ma->heaps[ma->num_heaps++];
+ *heap = (struct vk_heap) {
+ .usage = usage,
+ .flags = flags,
+ .typeBits = typeBits,
+ };
+ return heap;
+}
+
+static inline bool region_fits(struct vk_region r, size_t size, size_t align)
+{
+ return MP_ALIGN_UP(r.start, align) + size <= r.end;
+}
+
+// Finds the best-fitting region in a heap. If the heap is too small or too
+// fragmented, a new slab will be allocated under the hood.
+static bool heap_get_region(struct mpvk_ctx *vk, struct vk_heap *heap,
+ size_t size, size_t align,
+ struct vk_slab **out_slab, int *out_index)
+{
+ struct vk_slab *slab = NULL;
+
+ // If the allocation is very big, serve it directly instead of bothering
+ // with the heap
+ if (size > MPVK_HEAP_MAXIMUM_SLAB_SIZE) {
+ slab = slab_alloc(vk, heap, size);
+ *out_slab = slab;
+ *out_index = 0;
+ return !!slab;
+ }
+
+ for (int i = 0; i < heap->num_slabs; i++) {
+ slab = heap->slabs[i];
+ if (slab->size < size)
+ continue;
+
+ // Attempt a best fit search
+ int best = -1;
+ for (int n = 0; n < slab->num_regions; n++) {
+ struct vk_region r = slab->regions[n];
+ if (!region_fits(r, size, align))
+ continue;
+ if (best >= 0 && region_len(r) > region_len(slab->regions[best]))
+ continue;
+ best = n;
+ }
+
+ if (best >= 0) {
+ *out_slab = slab;
+ *out_index = best;
+ return true;
+ }
+ }
+
+ // Otherwise, allocate a new vk_slab and append it to the list.
+ size_t cur_size = MPMAX(size, slab ? slab->size : 0);
+ size_t slab_size = MPVK_HEAP_SLAB_GROWTH_RATE * cur_size;
+ slab_size = MPMAX(MPVK_HEAP_MINIMUM_SLAB_SIZE, slab_size);
+ slab_size = MPMIN(MPVK_HEAP_MAXIMUM_SLAB_SIZE, slab_size);
+ assert(slab_size >= size);
+ slab = slab_alloc(vk, heap, slab_size);
+ if (!slab)
+ return false;
+ MP_TARRAY_APPEND(NULL, heap->slabs, heap->num_slabs, slab);
+
+ // Return the only region there is in a newly allocated slab
+ assert(slab->num_regions == 1);
+ *out_slab = slab;
+ *out_index = 0;
+ return true;
+}
+
+static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size,
+ size_t alignment, struct vk_memslice *out)
+{
+ struct vk_slab *slab;
+ int index;
+ alignment = MP_ALIGN_UP(alignment, vk->limits.bufferImageGranularity);
+ if (!heap_get_region(vk, heap, size, alignment, &slab, &index))
+ return false;
+
+ struct vk_region reg = slab->regions[index];
+ MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, index);
+ *out = (struct vk_memslice) {
+ .vkmem = slab->mem,
+ .offset = MP_ALIGN_UP(reg.start, alignment),
+ .size = size,
+ .priv = slab,
+ };
+
+ MP_DBG(vk, "Sub-allocating slice %zu + %zu from slab with size %zu\n",
+ out->offset, out->size, slab->size);
+
+ size_t out_end = out->offset + out->size;
+ insert_region(slab, (struct vk_region) { reg.start, out->offset });
+ insert_region(slab, (struct vk_region) { out_end, reg.end });
+
+ slab->used += size;
+ return true;
+}
+
+bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
+ VkMemoryPropertyFlagBits flags, struct vk_memslice *out)
+{
+ struct vk_heap *heap = find_heap(vk, 0, flags, &reqs);
+ return slice_heap(vk, heap, reqs.size, reqs.alignment, out);
+}
+
+bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlagBits bufFlags,
+ VkMemoryPropertyFlagBits memFlags, VkDeviceSize size,
+ VkDeviceSize alignment, struct vk_bufslice *out)
+{
+ struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL);
+ if (!slice_heap(vk, heap, size, alignment, &out->mem))
+ return false;
+
+ struct vk_slab *slab = out->mem.priv;
+ out->buf = slab->buffer;
+ if (slab->data)
+ out->data = (void *)((uintptr_t)slab->data + (ptrdiff_t)out->mem.offset);
+
+ return true;
+}
diff --git a/video/out/vulkan/malloc.h b/video/out/vulkan/malloc.h
new file mode 100644
index 0000000000..65c1036929
--- /dev/null
+++ b/video/out/vulkan/malloc.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "common.h"
+
+void vk_malloc_init(struct mpvk_ctx *vk);
+void vk_malloc_uninit(struct mpvk_ctx *vk);
+
+// Represents a single "slice" of generic (non-buffer) memory, plus some
+// metadata for accounting. This struct is essentially read-only.
+struct vk_memslice {
+ VkDeviceMemory vkmem;
+ size_t offset;
+ size_t size;
+ void *priv;
+};
+
+void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice);
+bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
+ VkMemoryPropertyFlagBits flags, struct vk_memslice *out);
+
+// Represents a single "slice" of a larger buffer
+struct vk_bufslice {
+ struct vk_memslice mem; // must be freed by the user when done
+ VkBuffer buf; // the buffer this memory was sliced from
+ // For persistently mapped buffers, this points to the first usable byte of
+ // this slice.
+ void *data;
+};
+
+// Allocate a buffer slice. This is more efficient than vk_malloc_generic for
+// when the user needs lots of buffers, since it doesn't require
+// creating/destroying lots of (little) VkBuffers.
+bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlagBits bufFlags,
+ VkMemoryPropertyFlagBits memFlags, VkDeviceSize size,
+ VkDeviceSize alignment, struct vk_bufslice *out);
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
new file mode 100644
index 0000000000..ce0cbc66e9
--- /dev/null
+++ b/video/out/vulkan/ra_vk.c
@@ -0,0 +1,1590 @@
+#include "ra_vk.h"
+#include "malloc.h"
+#include "video/out/opengl/utils.h"
+
+static struct ra_fns ra_fns_vk;
+
+// For ra.priv
+struct ra_vk {
+ struct mpvk_ctx *vk;
+ struct ra_tex *clear_tex; // stupid hack for clear()
+ struct vk_cmd *cmd; // currently recording cmd
+};
+
+struct mpvk_ctx *ra_vk_get(struct ra *ra)
+{
+ if (ra->fns != &ra_fns_vk)
+ return NULL;
+
+ struct ra_vk *p = ra->priv;
+ return p->vk;
+}
+
+// Returns a command buffer, or NULL on error
+static struct vk_cmd *vk_require_cmd(struct ra *ra)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ if (!p->cmd)
+ p->cmd = vk_cmd_begin(vk, vk->pool);
+
+ return p->cmd;
+}
+
+// Note: This technically follows the flush() API, but we don't need
+// to expose that (and in fact, it's a bad idea) since we control flushing
+// behavior with ra_vk_present_frame already.
+static bool vk_flush(struct ra *ra, VkSemaphore *done)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ if (p->cmd) {
+ if (!vk_cmd_submit(vk, p->cmd, done))
+ return false;
+ p->cmd = NULL;
+ }
+
+ return true;
+}
+
+// The callback's *priv will always be set to `ra`
+static void vk_callback(struct ra *ra, vk_cb callback, void *arg)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ if (p->cmd) {
+ vk_cmd_callback(p->cmd, callback, ra, arg);
+ } else {
+ vk_dev_callback(vk, callback, ra, arg);
+ }
+}
+
+#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \
+ static void fun##_lazy(struct ra *ra, argtype *arg) { \
+ vk_callback(ra, (vk_cb) fun, arg); \
+ }
+
+static void vk_destroy_ra(struct ra *ra)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ vk_flush(ra, NULL);
+ mpvk_dev_wait_idle(vk);
+ ra_tex_free(ra, &p->clear_tex);
+
+ talloc_free(ra);
+}
+
+static bool vk_setup_formats(struct ra *ra)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) {
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop);
+
+ // As a bare minimum, we need to sample from an allocated image
+ VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
+ if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT))
+ continue;
+
+ VkFormatFeatureFlags linear_bits, render_bits;
+ linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+ render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+ VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
+
+ struct ra_format *fmt = talloc_zero(ra, struct ra_format);
+ *fmt = (struct ra_format) {
+ .name = vk_fmt->name,
+ .priv = (void *)vk_fmt,
+ .ctype = vk_fmt->ctype,
+ .ordered = !vk_fmt->fucked_order,
+ .num_components = vk_fmt->components,
+ .pixel_size = vk_fmt->bytes,
+ .linear_filter = !!(flags & linear_bits),
+ .renderable = !!(flags & render_bits),
+ };
+
+ for (int i = 0; i < 4; i++)
+ fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i];
+
+ MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
+ }
+
+ // Populate some other capabilities related to formats while we're at it
+ VkImageType imgType[3] = {
+ VK_IMAGE_TYPE_1D,
+ VK_IMAGE_TYPE_2D,
+ VK_IMAGE_TYPE_3D
+ };
+
+ // R8_UNORM is supported on literally every single vulkan implementation
+ const VkFormat testfmt = VK_FORMAT_R8_UNORM;
+
+ for (int d = 0; d < 3; d++) {
+ VkImageFormatProperties iprop;
+ VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
+ testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL,
+ VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop);
+
+ switch (imgType[d]) {
+ case VK_IMAGE_TYPE_1D:
+ if (res == VK_SUCCESS)
+ ra->caps |= RA_CAP_TEX_1D;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ // 2D formats must be supported by RA, so ensure this is the case
+ VK_ASSERT(res, "Querying 2D format limits");
+ ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height);
+ break;
+ case VK_IMAGE_TYPE_3D:
+ if (res == VK_SUCCESS)
+ ra->caps |= RA_CAP_TEX_3D;
+ break;
+ }
+ }
+
+ // RA_CAP_BLIT implies both blitting between images as well as blitting
+ // directly to the swapchain image, so check for all three operations
+ bool blittable = true;
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop);
+ if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
+ blittable = false;
+ if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
+ blittable = false;
+
+ vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop);
+ if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
+ blittable = false;
+
+ if (blittable)
+ ra->caps |= RA_CAP_BLIT;
+
+ return true;
+
+error:
+ return false;
+}
+
+static struct ra_fns ra_fns_vk;
+
+struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
+{
+ assert(vk->dev);
+ assert(vk->alloc);
+
+ struct ra *ra = talloc_zero(NULL, struct ra);
+ ra->log = log;
+ ra->fns = &ra_fns_vk;
+
+ struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk);
+ p->vk = vk;
+
+ // There's no way to query the supported GLSL version from VK_NV_glsl_shader
+ // (thanks nvidia), so just pick the GL version that modern nvidia devices
+ // support..
+ ra->glsl_version = 450;
+ ra->glsl_vulkan = true;
+ ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
+ ra->caps = RA_CAP_NESTED_ARRAY;
+
+ if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT)
+ ra->caps |= RA_CAP_COMPUTE;
+
+ if (!vk_setup_formats(ra))
+ goto error;
+
+ // UBO support is required
+ ra->caps |= RA_CAP_BUF_RO;
+
+ // Try creating a shader storage buffer
+ struct ra_buf_params ssbo_params = {
+ .type = RA_BUF_TYPE_SHADER_STORAGE,
+ .size = 16,
+ };
+
+ struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params);
+ if (ssbo) {
+ ra->caps |= RA_CAP_BUF_RW;
+ ra_buf_free(ra, &ssbo);
+ }
+
+ // To support clear() by region, we need to allocate a dummy 1x1 image that
+ // will be used as the source of blit operations
+ struct ra_tex_params clear_params = {
+ .dimensions = 1, // no point in using a 2D image if height = 1
+ .w = 1,
+ .h = 1,
+ .d = 1,
+ .format = ra_find_float16_format(ra, 4),
+ .blit_src = 1,
+ .host_mutable = 1,
+ };
+
+ p->clear_tex = ra_tex_create(ra, &clear_params);
+ if (!p->clear_tex) {
+ MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n");
+ goto error;
+ }
+
+ return ra;
+
+error:
+ vk_destroy_ra(ra);
+ return NULL;
+}
+
+// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain
+// compatible
+static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt,
+ bool load_fbo, VkRenderPass *out)
+{
+ struct vk_format *vk_fmt = fmt->priv;
+ assert(fmt->renderable);
+
+ VkRenderPassCreateInfo rinfo = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = vk_fmt->iformat,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = load_fbo ? VK_ATTACHMENT_LOAD_OP_LOAD
+ : VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ },
+ };
+
+ return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out);
+}
+
+// For ra_tex.priv
+struct ra_tex_vk {
+ bool external_img;
+ VkImageType type;
+ VkImage img;
+ struct vk_memslice mem;
+ // for sampling
+ VkImageView view;
+ VkSampler sampler;
+ // for rendering
+ VkFramebuffer framebuffer;
+ VkRenderPass dummyPass;
+ // for uploading
+ struct ra_buf_pool pbo;
+ // "current" metadata, can change during the course of execution
+ VkImageLayout current_layout;
+ VkPipelineStageFlagBits current_stage;
+ VkAccessFlagBits current_access;
+};
+
+// Small helper to ease image barrier creation. if `discard` is set, the contents
+// of the image will be undefined after the barrier
+static void tex_barrier(struct vk_cmd *cmd, struct ra_tex_vk *tex_vk,
+ VkPipelineStageFlagBits newStage,
+ VkAccessFlagBits newAccess, VkImageLayout newLayout,
+ bool discard)
+{
+ VkImageMemoryBarrier imgBarrier = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .oldLayout = tex_vk->current_layout,
+ .newLayout = newLayout,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .srcAccessMask = tex_vk->current_access,
+ .dstAccessMask = newAccess,
+ .image = tex_vk->img,
+ .subresourceRange = vk_range,
+ };
+
+ if (discard) {
+ imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ imgBarrier.srcAccessMask = 0;
+ }
+
+ if (imgBarrier.oldLayout != imgBarrier.newLayout ||
+ imgBarrier.srcAccessMask != imgBarrier.dstAccessMask)
+ {
+ vkCmdPipelineBarrier(cmd->buf, tex_vk->current_stage, newStage, 0,
+ 0, NULL, 0, NULL, 1, &imgBarrier);
+ }
+
+ tex_vk->current_stage = newStage;
+ tex_vk->current_layout = newLayout;
+ tex_vk->current_access = newAccess;
+}
+
+static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex)
+{
+ if (!tex)
+ return;
+
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ ra_buf_pool_uninit(ra, &tex_vk->pbo);
+ vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR);
+ vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR);
+ vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR);
+ vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR);
+ if (!tex_vk->external_img) {
+ vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR);
+ vk_free_memslice(vk, tex_vk->mem);
+ }
+
+ talloc_free(tex);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex);
+
+// Initializes non-VkImage values like the image view, samplers, etc.
+static bool vk_init_image(struct ra *ra, struct ra_tex *tex)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ struct ra_tex_params *params = &tex->params;
+ struct ra_tex_vk *tex_vk = tex->priv;
+ assert(tex_vk->img);
+
+ tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ tex_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ tex_vk->current_access = 0;
+
+ if (params->render_src || params->render_dst) {
+ static const VkImageViewType viewType[] = {
+ [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D,
+ [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D,
+ [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D,
+ };
+
+ const struct vk_format *fmt = params->format->priv;
+ VkImageViewCreateInfo vinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = tex_vk->img,
+ .viewType = viewType[tex_vk->type],
+ .format = fmt->iformat,
+ .subresourceRange = vk_range,
+ };
+
+ VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view));
+ }
+
+ if (params->render_src) {
+ assert(params->format->linear_filter || !params->src_linear);
+ VkFilter filter = params->src_linear
+ ? VK_FILTER_LINEAR
+ : VK_FILTER_NEAREST;
+ VkSamplerAddressMode wrap = params->src_repeat
+ ? VK_SAMPLER_ADDRESS_MODE_REPEAT
+ : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
+ VkSamplerCreateInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = filter,
+ .minFilter = filter,
+ .addressModeU = wrap,
+ .addressModeV = wrap,
+ .addressModeW = wrap,
+ .maxAnisotropy = 1.0,
+ };
+
+ VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler));
+ }
+
+ if (params->render_dst) {
+ // Framebuffers need to be created against a specific render pass
+ // layout, so we need to temporarily create a skeleton/dummy render
+ // pass for vulkan to figure out the compatibility
+ VK(vk_create_render_pass(vk->dev, params->format, false, &tex_vk->dummyPass));
+
+ VkFramebufferCreateInfo finfo = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = tex_vk->dummyPass,
+ .attachmentCount = 1,
+ .pAttachments = &tex_vk->view,
+ .width = tex->params.w,
+ .height = tex->params.h,
+ .layers = 1,
+ };
+
+ VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR,
+ &tex_vk->framebuffer));
+
+ // NOTE: Normally we would free the dummyPass again here, but a bug
+ // in the nvidia vulkan driver causes a segfault if you do.
+ }
+
+ return true;
+
+error:
+ return false;
+}
+
+static struct ra_tex *vk_tex_create(struct ra *ra,
+ const struct ra_tex_params *params)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ tex->params = *params;
+ tex->params.initial_data = NULL;
+
+ struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
+
+ const struct vk_format *fmt = params->format->priv;
+ switch (params->dimensions) {
+ case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
+ case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
+ case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
+ default: abort();
+ }
+
+ VkImageUsageFlags usage = 0;
+ if (params->render_src)
+ usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+ if (params->render_dst)
+ usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ if (params->storage_dst)
+ usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+ if (params->blit_src)
+ usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+ if (params->host_mutable || params->blit_dst || params->initial_data)
+ usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+
+ // Double-check image usage support and fail immediately if invalid
+ VkImageFormatProperties iprop;
+ VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
+ fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0,
+ &iprop);
+ if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+ return NULL;
+ } else {
+ VK_ASSERT(res, "Querying image format properties");
+ }
+
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
+ VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
+
+ bool has_blit_src = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT,
+ has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+
+ if (params->w > iprop.maxExtent.width ||
+ params->h > iprop.maxExtent.height ||
+ params->d > iprop.maxExtent.depth ||
+ (params->blit_src && !has_blit_src) ||
+ (params->src_linear && !has_src_linear))
+ {
+ return NULL;
+ }
+
+ VkImageCreateInfo iinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = tex_vk->type,
+ .format = fmt->iformat,
+ .extent = (VkExtent3D) { params->w, params->h, params->d },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = usage,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 1,
+ .pQueueFamilyIndices = &vk->pool->qf,
+ };
+
+ VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img));
+
+ VkMemoryPropertyFlagBits memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ VkMemoryRequirements reqs;
+ vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs);
+
+ struct vk_memslice *mem = &tex_vk->mem;
+ if (!vk_malloc_generic(vk, reqs, memFlags, mem))
+ goto error;
+
+ VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset));
+
+ if (!vk_init_image(ra, tex))
+ goto error;
+
+ if (params->initial_data) {
+ struct ra_tex_upload_params ul_params = {
+ .tex = tex,
+ .invalidate = true,
+ .src = params->initial_data,
+ .stride = params->w * fmt->bytes,
+ };
+ if (!ra->fns->tex_upload(ra, &ul_params))
+ goto error;
+ }
+
+ return tex;
+
+error:
+ vk_tex_destroy(ra, tex);
+ return NULL;
+}
+
+struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
+ VkSwapchainCreateInfoKHR info)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct ra_tex *tex = NULL;
+
+ const struct ra_format *format = NULL;
+ for (int i = 0; i < ra->num_formats; i++) {
+ const struct vk_format *fmt = ra->formats[i]->priv;
+ if (fmt->iformat == vk->surf_format.format) {
+ format = ra->formats[i];
+ break;
+ }
+ }
+
+ if (!format) {
+ MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image "
+ "with surface format 0x%x\n", vk->surf_format.format);
+ goto error;
+ }
+
+ tex = talloc_zero(NULL, struct ra_tex);
+ tex->params = (struct ra_tex_params) {
+ .format = format,
+ .dimensions = 2,
+ .w = info.imageExtent.width,
+ .h = info.imageExtent.height,
+ .d = 1,
+ .blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
+ .blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
+ .render_src = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT),
+ .render_dst = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT),
+ .storage_dst = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT),
+ };
+
+ struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
+ tex_vk->type = VK_IMAGE_TYPE_2D;
+ tex_vk->external_img = true;
+ tex_vk->img = vkimg;
+
+ if (!vk_init_image(ra, tex))
+ goto error;
+
+ return tex;
+
+error:
+ vk_tex_destroy(ra, tex);
+ return NULL;
+}
+
+// For ra_buf.priv
+struct ra_buf_vk {
+ struct vk_bufslice slice;
+ int refcount; // 1 = object allocated but not in use, > 1 = in use
+ bool needsflush;
+ // "current" metadata, can change during course of execution
+ VkPipelineStageFlagBits current_stage;
+ VkAccessFlagBits current_access;
+};
+
+static void vk_buf_deref(struct ra *ra, struct ra_buf *buf)
+{
+ if (!buf)
+ return;
+
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ if (--buf_vk->refcount == 0) {
+ vk_free_memslice(vk, buf_vk->slice.mem);
+ talloc_free(buf);
+ }
+}
+
+static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf,
+ VkPipelineStageFlagBits newStage,
+ VkAccessFlagBits newAccess, int offset, size_t size)
+{
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ VkBufferMemoryBarrier buffBarrier = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .srcAccessMask = buf_vk->current_access,
+ .dstAccessMask = newAccess,
+ .buffer = buf_vk->slice.buf,
+ .offset = offset,
+ .size = size,
+ };
+
+ if (buf_vk->needsflush || buf->params.host_mapped) {
+ buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+ buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT;
+ buf_vk->needsflush = false;
+ }
+
+ if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) {
+ vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0,
+ 0, NULL, 1, &buffBarrier, 0, NULL);
+ }
+
+ buf_vk->current_stage = newStage;
+ buf_vk->current_access = newAccess;
+ buf_vk->refcount++;
+ vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, ra, buf);
+}
+
+#define vk_buf_destroy vk_buf_deref
+MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf);
+
+static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+ const void *data, size_t size)
+{
+ assert(buf->params.host_mutable || buf->params.initial_data);
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ // For host-mapped buffers, we can just directly memcpy the buffer contents.
+ // Otherwise, we can update the buffer from the GPU using a command buffer.
+ if (buf_vk->slice.data) {
+ assert(offset + size <= buf->params.size);
+ uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset;
+ memcpy((void *)addr, data, size);
+ buf_vk->needsflush = true;
+ } else {
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd) {
+ MP_ERR(ra, "Failed updating buffer!\n");
+ return;
+ }
+
+ buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT, offset, size);
+
+ VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset;
+ assert(bufOffset == MP_ALIGN_UP(bufOffset, 4));
+ vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data);
+ }
+}
+
+static struct ra_buf *vk_buf_create(struct ra *ra,
+ const struct ra_buf_params *params)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
+ buf->params = *params;
+
+ struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk);
+ buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ buf_vk->current_access = 0;
+ buf_vk->refcount = 1;
+
+ VkBufferUsageFlagBits bufFlags = 0;
+ VkMemoryPropertyFlagBits memFlags = 0;
+ VkDeviceSize align = 4; // alignment 4 is needed for buf_update
+
+ switch (params->type) {
+ case RA_BUF_TYPE_TEX_UPLOAD:
+ bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ break;
+ case RA_BUF_TYPE_UNIFORM:
+ bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment);
+ break;
+ case RA_BUF_TYPE_SHADER_STORAGE:
+ bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment);
+ break;
+ case RA_BUF_TYPE_VERTEX:
+ bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ break;
+ default: abort();
+ }
+
+ if (params->host_mutable || params->initial_data) {
+ bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+ align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment);
+ }
+
+ if (params->host_mapped) {
+ memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ }
+
+ if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
+ &buf_vk->slice))
+ {
+ goto error;
+ }
+
+ if (params->host_mapped)
+ buf->data = buf_vk->slice.data;
+
+ if (params->initial_data)
+ vk_buf_update(ra, buf, 0, params->initial_data, params->size);
+
+ buf->params.initial_data = NULL; // do this after vk_buf_update
+ return buf;
+
+error:
+ vk_buf_destroy(ra, buf);
+ return NULL;
+}
+
+static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf)
+{
+ struct ra_buf_vk *buf_vk = buf->priv;
+ return buf_vk->refcount == 1;
+}
+
+static bool vk_tex_upload(struct ra *ra,
+ const struct ra_tex_upload_params *params)
+{
+ struct ra_tex *tex = params->tex;
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ if (!params->buf)
+ return ra_tex_upload_pbo(ra, &tex_vk->pbo, params);
+
+ assert(!params->src);
+ assert(params->buf);
+ struct ra_buf *buf = params->buf;
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ VkBufferImageCopy region = {
+ .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset,
+ .bufferRowLength = tex->params.w,
+ .bufferImageHeight = tex->params.h,
+ .imageSubresource = vk_layers,
+ .imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d},
+ };
+
+ if (tex->params.dimensions == 2) {
+ int pix_size = tex->params.format->pixel_size;
+ region.bufferRowLength = params->stride / pix_size;
+ if (region.bufferRowLength * pix_size != params->stride) {
+ MP_ERR(ra, "Texture upload strides must be a multiple of the texel "
+ "size!\n");
+ goto error;
+ }
+
+ if (params->rc) {
+ struct mp_rect *rc = params->rc;
+ region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0};
+ region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1};
+ }
+ }
+
+ uint64_t size = region.bufferRowLength * region.bufferImageHeight *
+ region.imageExtent.depth;
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ goto error;
+
+ buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size);
+
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ params->invalidate);
+
+ vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img,
+ tex_vk->current_layout, 1, &region);
+
+ return true;
+
+error:
+ return false;
+}
+
+#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH
+
+// For ra_renderpass.priv
+struct ra_renderpass_vk {
+ // Compiled shaders
+ VkShaderModule vert;
+ VkShaderModule frag;
+ VkShaderModule comp;
+ // Pipeline / render pass
+ VkPipeline pipe;
+ VkPipelineLayout pipeLayout;
+ VkPipelineCache pipeCache;
+ VkRenderPass renderPass;
+ // Descriptor set (bindings)
+ VkDescriptorSetLayout dsLayout;
+ VkDescriptorPool dsPool;
+ VkDescriptorSet dss[MPVK_NUM_DS];
+ int dindex;
+ // Vertex buffers (vertices)
+ struct ra_buf_pool vbo;
+
+ // For updating
+ VkWriteDescriptorSet *dswrite;
+ VkDescriptorImageInfo *dsiinfo;
+ VkDescriptorBufferInfo *dsbinfo;
+};
+
+static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
+{
+ if (!pass)
+ return;
+
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct ra_renderpass_vk *pass_vk = pass->priv;
+
+ ra_buf_pool_uninit(ra, &pass_vk->vbo);
+ vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR);
+ vkDestroyPipelineCache(vk->dev, pass_vk->pipeCache, MPVK_ALLOCATOR);
+ vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR);
+ vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR);
+ vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR);
+ vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR);
+ vkDestroyShaderModule(vk->dev, pass_vk->vert, MPVK_ALLOCATOR);
+ vkDestroyShaderModule(vk->dev, pass_vk->frag, MPVK_ALLOCATOR);
+ vkDestroyShaderModule(vk->dev, pass_vk->comp, MPVK_ALLOCATOR);
+
+ talloc_free(pass);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_renderpass_destroy, struct ra_renderpass);
+
+static const VkDescriptorType dsType[] = {
+ [RA_VARTYPE_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ [RA_VARTYPE_IMG_W] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ [RA_VARTYPE_BUF_RO] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ [RA_VARTYPE_BUF_RW] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+};
+
+static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp,
+ VkFormat *out_fmt)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ enum ra_ctype ctype;
+ switch (inp->type) {
+ case RA_VARTYPE_FLOAT: ctype = RA_CTYPE_FLOAT; break;
+ case RA_VARTYPE_BYTE_UNORM: ctype = RA_CTYPE_UNORM; break;
+ default: abort();
+ }
+
+ assert(inp->dim_m == 1);
+ for (const struct vk_format *fmt = vk_formats; fmt->name; fmt++) {
+ if (fmt->ctype != ctype)
+ continue;
+ if (fmt->components != inp->dim_v)
+ continue;
+ if (fmt->bytes != ra_renderpass_input_layout(inp).size)
+ continue;
+
+ // Ensure this format is valid for vertex attributes
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
+ if (!(prop.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
+ continue;
+
+ *out_fmt = fmt->iformat;
+ return true;
+ }
+
+ return false;
+}
+
+static const VkPipelineStageFlagBits stageFlags[] = {
+ [RA_RENDERPASS_TYPE_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT,
+ [RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
+};
+
+static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
+ const struct ra_renderpass_params *params)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
+ pass->params = *ra_renderpass_params_copy(pass, params);
+ pass->params.cached_program = (bstr){0};
+ struct ra_renderpass_vk *pass_vk = pass->priv =
+ talloc_zero(pass, struct ra_renderpass_vk);
+
+ static int dsCount[RA_VARTYPE_COUNT] = {0};
+ VkDescriptorSetLayoutBinding *bindings = NULL;
+ int num_bindings = 0;
+
+ for (int i = 0; i < params->num_inputs; i++) {
+ struct ra_renderpass_input *inp = &params->inputs[i];
+ switch (inp->type) {
+ case RA_VARTYPE_TEX:
+ case RA_VARTYPE_IMG_W:
+ case RA_VARTYPE_BUF_RO:
+ case RA_VARTYPE_BUF_RW: {
+ VkDescriptorSetLayoutBinding desc = {
+ .binding = inp->binding,
+ .descriptorType = dsType[inp->type],
+ .descriptorCount = 1,
+ .stageFlags = stageFlags[params->type],
+ };
+
+ MP_TARRAY_APPEND(pass, bindings, num_bindings, desc);
+ dsCount[inp->type]++;
+ break;
+ }
+ default: abort();
+ }
+ }
+
+ VkDescriptorPoolSize *dsPoolSizes = NULL;
+ int poolSizeCount = 0;
+ for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) {
+ if (dsCount[t] > 0) {
+ VkDescriptorPoolSize dssize = {
+ .type = dsType[t],
+ .descriptorCount = dsCount[t] * MPVK_NUM_DS,
+ };
+
+ MP_TARRAY_APPEND(pass, dsPoolSizes, poolSizeCount, dssize);
+ }
+ }
+
+ VkDescriptorPoolCreateInfo pinfo = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .maxSets = MPVK_NUM_DS,
+ .pPoolSizes = dsPoolSizes,
+ .poolSizeCount = poolSizeCount,
+ };
+
+ VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool));
+ talloc_free(dsPoolSizes);
+
+ pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings);
+ pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings);
+ pass_vk->dsbinfo = talloc_array(pass, VkDescriptorBufferInfo, num_bindings);
+
+ VkDescriptorSetLayoutCreateInfo dinfo = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pBindings = bindings,
+ .bindingCount = num_bindings,
+ };
+
+ VK(vkCreateDescriptorSetLayout(vk->dev, &dinfo, MPVK_ALLOCATOR,
+ &pass_vk->dsLayout));
+
+ VkDescriptorSetLayout layouts[MPVK_NUM_DS];
+ for (int i = 0; i < MPVK_NUM_DS; i++)
+ layouts[i] = pass_vk->dsLayout;
+
+ VkDescriptorSetAllocateInfo ainfo = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = pass_vk->dsPool,
+ .descriptorSetCount = MPVK_NUM_DS,
+ .pSetLayouts = layouts,
+ };
+
+ VK(vkAllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss));
+
+ VkPipelineLayoutCreateInfo linfo = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &pass_vk->dsLayout,
+ };
+
+ VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
+ &pass_vk->pipeLayout));
+
+ VkPipelineCacheCreateInfo pcinfo = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
+ .pInitialData = params->cached_program.start,
+ .initialDataSize = params->cached_program.len,
+ };
+
+ VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pass_vk->pipeCache));
+
+ VkShaderModuleCreateInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ };
+
+ switch (params->type) {
+ case RA_RENDERPASS_TYPE_RASTER: {
+ sinfo.pCode = (uint32_t *)params->vertex_shader;
+ sinfo.codeSize = strlen(params->vertex_shader);
+ VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->vert));
+
+ sinfo.pCode = (uint32_t *)params->frag_shader;
+ sinfo.codeSize = strlen(params->frag_shader);
+ VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->frag));
+
+ VK(vk_create_render_pass(vk->dev, params->target_format,
+ params->enable_blend, &pass_vk->renderPass));
+
+ VkPipelineShaderStageCreateInfo stages[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = pass_vk->vert,
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = pass_vk->frag,
+ .pName = "main",
+ }
+ };
+
+ VkVertexInputAttributeDescription *attrs = talloc_array(pass,
+ VkVertexInputAttributeDescription, params->num_vertex_attribs);
+
+ for (int i = 0; i < params->num_vertex_attribs; i++) {
+ struct ra_renderpass_input *inp = &params->vertex_attribs[i];
+ attrs[i] = (VkVertexInputAttributeDescription) {
+ .location = i,
+ .binding = 0,
+ .offset = inp->offset,
+ };
+
+ if (!vk_get_input_format(ra, inp, &attrs[i].format)) {
+ MP_ERR(ra, "No suitable VkFormat for vertex attrib '%s'!\n",
+ inp->name);
+ goto error;
+ }
+ }
+
+ static const VkBlendFactor blendFactors[] = {
+ [RA_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO,
+ [RA_BLEND_ONE] = VK_BLEND_FACTOR_ONE,
+ [RA_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA,
+ [RA_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+ };
+
+ VkPipelineColorBlendAttachmentState binfo = {
+ .blendEnable = params->enable_blend,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcColorBlendFactor = blendFactors[params->blend_src_rgb],
+ .dstColorBlendFactor = blendFactors[params->blend_dst_rgb],
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha],
+ .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha],
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT |
+ VK_COLOR_COMPONENT_A_BIT,
+ };
+
+ VkGraphicsPipelineCreateInfo cinfo = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = MP_ARRAY_SIZE(stages),
+ .pStages = &stages[0],
+ .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) {
+ .binding = 0,
+ .stride = params->vertex_stride,
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+ },
+ .vertexAttributeDescriptionCount = params->num_vertex_attribs,
+ .pVertexAttributeDescriptions = attrs,
+ },
+ .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+ },
+ .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .lineWidth = 1.0f,
+ },
+ .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ },
+ .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &binfo,
+ },
+ .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates = (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = pass_vk->pipeLayout,
+ .renderPass = pass_vk->renderPass,
+ };
+
+ VK(vkCreateGraphicsPipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo,
+ MPVK_ALLOCATOR, &pass_vk->pipe));
+ break;
+ }
+ case RA_RENDERPASS_TYPE_COMPUTE: {
+ sinfo.pCode = (uint32_t *)params->compute_shader;
+ sinfo.codeSize = strlen(params->compute_shader);
+ VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->comp));
+
+ VkComputePipelineCreateInfo cinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = pass_vk->comp,
+ .pName = "main",
+ },
+ .layout = pass_vk->pipeLayout,
+ };
+
+ VK(vkCreateComputePipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo,
+ MPVK_ALLOCATOR, &pass_vk->pipe));
+ break;
+ }
+ }
+
+ // Update cached program
+ bstr *prog = &pass->params.cached_program;
+ VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, NULL));
+ prog->start = talloc_size(pass, prog->len);
+ VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, prog->start));
+
+ return pass;
+
+error:
+ vk_renderpass_destroy(ra, pass);
+ return NULL;
+}
+
+static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd,
+ struct ra_renderpass *pass,
+ struct ra_renderpass_input_val val,
+ VkDescriptorSet ds, int idx)
+{
+ struct ra_renderpass_vk *pass_vk = pass->priv;
+ struct ra_renderpass_input *inp = &pass->params.inputs[val.index];
+
+ VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx];
+ *wds = (VkWriteDescriptorSet) {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ds,
+ .dstBinding = inp->binding,
+ .descriptorCount = 1,
+ .descriptorType = dsType[inp->type],
+ };
+
+ static const VkPipelineStageFlags passStages[] = {
+ [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+ [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ };
+
+ switch (inp->type) {
+ case RA_VARTYPE_TEX: {
+ struct ra_tex *tex = *(struct ra_tex **)val.data;
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ assert(tex->params.render_src);
+ tex_barrier(cmd, tex_vk, passStages[pass->params.type],
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false);
+
+ VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
+ *iinfo = (VkDescriptorImageInfo) {
+ .sampler = tex_vk->sampler,
+ .imageView = tex_vk->view,
+ .imageLayout = tex_vk->current_layout,
+ };
+
+ wds->pImageInfo = iinfo;
+ break;
+ }
+ case RA_VARTYPE_IMG_W: {
+ struct ra_tex *tex = *(struct ra_tex **)val.data;
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ assert(tex->params.storage_dst);
+ tex_barrier(cmd, tex_vk, passStages[pass->params.type],
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL, false);
+
+ VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
+ *iinfo = (VkDescriptorImageInfo) {
+ .imageView = tex_vk->view,
+ .imageLayout = tex_vk->current_layout,
+ };
+
+ wds->pImageInfo = iinfo;
+ break;
+ }
+ case RA_VARTYPE_BUF_RO:
+ case RA_VARTYPE_BUF_RW: {
+ struct ra_buf *buf = *(struct ra_buf **)val.data;
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ VkBufferUsageFlags access = VK_ACCESS_SHADER_READ_BIT;
+ if (inp->type == RA_VARTYPE_BUF_RW)
+ access |= VK_ACCESS_SHADER_WRITE_BIT;
+
+ buf_barrier(ra, cmd, buf, passStages[pass->params.type],
+ access, buf_vk->slice.mem.offset, buf->params.size);
+
+ VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx];
+ *binfo = (VkDescriptorBufferInfo) {
+ .buffer = buf_vk->slice.buf,
+ .offset = buf_vk->slice.mem.offset,
+ .range = buf->params.size,
+ };
+
+ wds->pBufferInfo = binfo;
+ break;
+ }
+ }
+}
+
+static void vk_renderpass_run(struct ra *ra,
+ const struct ra_renderpass_run_params *params)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct ra_renderpass *pass = params->pass;
+ struct ra_renderpass_vk *pass_vk = pass->priv;
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ goto error;
+
+ static const VkPipelineBindPoint bindPoint[] = {
+ [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE,
+ };
+
+ vkCmdBindPipeline(cmd->buf, bindPoint[pass->params.type], pass_vk->pipe);
+
+ VkDescriptorSet ds = pass_vk->dss[pass_vk->dindex++];
+ pass_vk->dindex %= MPVK_NUM_DS;
+
+ for (int i = 0; i < params->num_values; i++)
+ vk_update_descriptor(ra, cmd, pass, params->values[i], ds, i);
+
+ if (params->num_values > 0) {
+ vkUpdateDescriptorSets(vk->dev, params->num_values, pass_vk->dswrite,
+ 0, NULL);
+ }
+
+ vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
+ pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
+
+ switch (pass->params.type) {
+ case RA_RENDERPASS_TYPE_COMPUTE:
+ vkCmdDispatch(cmd->buf, params->compute_groups[0],
+ params->compute_groups[1],
+ params->compute_groups[2]);
+ break;
+ case RA_RENDERPASS_TYPE_RASTER: {
+ struct ra_tex *tex = params->target;
+ struct ra_tex_vk *tex_vk = tex->priv;
+ assert(tex->params.render_dst);
+
+ struct ra_buf_params buf_params = {
+ .type = RA_BUF_TYPE_VERTEX,
+ .size = params->vertex_count * pass->params.vertex_stride,
+ .host_mutable = true,
+ };
+
+ struct ra_buf *buf = ra_buf_pool_get(ra, &pass_vk->vbo, &buf_params);
+ if (!buf) {
+ MP_ERR(ra, "Failed allocating vertex buffer!\n");
+ goto error;
+ }
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ vk_buf_update(ra, buf, 0, params->vertex_data, buf_params.size);
+
+ buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+ VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+ buf_vk->slice.mem.offset, buf_params.size);
+
+ vkCmdBindVertexBuffers(cmd->buf, 0, 1, &buf_vk->slice.buf,
+ &buf_vk->slice.mem.offset);
+
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, false);
+
+ VkViewport viewport = {
+ .x = params->viewport.x0,
+ .y = params->viewport.y0,
+ .width = mp_rect_w(params->viewport),
+ .height = mp_rect_h(params->viewport),
+ };
+
+ VkRect2D scissor = {
+ .offset = {params->scissors.x0, params->scissors.y0},
+ .extent = {mp_rect_w(params->scissors), mp_rect_h(params->scissors)},
+ };
+
+ vkCmdSetViewport(cmd->buf, 0, 1, &viewport);
+ vkCmdSetScissor(cmd->buf, 0, 1, &scissor);
+
+ VkRenderPassBeginInfo binfo = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = pass_vk->renderPass,
+ .framebuffer = tex_vk->framebuffer,
+ .renderArea = (VkRect2D){{0, 0}, {tex->params.w, tex->params.h}},
+ };
+
+ vkCmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE);
+ vkCmdDraw(cmd->buf, params->vertex_count, 1, 0, 0);
+ vkCmdEndRenderPass(cmd->buf);
+ break;
+ }
+ default: abort();
+ };
+
+error:
+ return;
+}
+
+static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+ struct mp_rect *dst_rc, struct mp_rect *src_rc)
+{
+ assert(src->params.blit_src);
+ assert(dst->params.blit_dst);
+
+ struct ra_tex_vk *src_vk = src->priv;
+ struct ra_tex_vk *dst_vk = dst->priv;
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ return;
+
+ tex_barrier(cmd, src_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_READ_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ false);
+
+ bool discard = dst_rc->x0 == 0 &&
+ dst_rc->y0 == 0 &&
+ dst_rc->x1 == dst->params.w &&
+ dst_rc->y1 == dst->params.h;
+
+ tex_barrier(cmd, dst_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ discard);
+
+ VkImageBlit region = {
+ .srcSubresource = vk_layers,
+ .srcOffsets = {{src_rc->x0, src_rc->y0, 0}, {src_rc->x1, src_rc->y1, 1}},
+ .dstSubresource = vk_layers,
+ .dstOffsets = {{dst_rc->x0, dst_rc->y0, 0}, {dst_rc->x1, dst_rc->y1, 1}},
+ };
+
+ vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout, dst_vk->img,
+ dst_vk->current_layout, 1, &region, VK_FILTER_NEAREST);
+}
+
+static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4],
+ struct mp_rect *rc)
+{
+ struct ra_vk *p = ra->priv;
+ struct ra_tex_vk *tex_vk = tex->priv;
+ assert(tex->params.blit_dst);
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ return;
+
+ struct mp_rect full = {0, 0, tex->params.w, tex->params.h};
+ if (!rc || mp_rect_equals(rc, &full)) {
+ // To clear the entire image, we can use the efficient clear command
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, true);
+
+ VkClearColorValue clearColor = {0};
+ for (int c = 0; c < 4; c++)
+ clearColor.float32[c] = color[c];
+
+ vkCmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->current_layout,
+ &clearColor, 1, &vk_range);
+ } else {
+ // To simulate per-region clearing, we blit from a 1x1 texture instead
+ struct ra_tex_upload_params ul_params = {
+ .tex = p->clear_tex,
+ .invalidate = true,
+ .src = &color[0],
+ };
+ vk_tex_upload(ra, &ul_params);
+ vk_blit(ra, tex, p->clear_tex, rc, &(struct mp_rect){0, 0, 1, 1});
+ }
+}
+
+#define VK_QUERY_POOL_SIZE (MPVK_MAX_STREAMING_DEPTH * 4)
+
+struct vk_timer {
+ VkQueryPool pool;
+ int index;
+ uint64_t result;
+};
+
+static void vk_timer_destroy(struct ra *ra, ra_timer *ratimer)
+{
+ if (!ratimer)
+ return;
+
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct vk_timer *timer = ratimer;
+
+ vkDestroyQueryPool(vk->dev, timer->pool, MPVK_ALLOCATOR);
+
+ talloc_free(timer);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_timer_destroy, ra_timer);
+
+static ra_timer *vk_timer_create(struct ra *ra)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ struct vk_timer *timer = talloc_zero(NULL, struct vk_timer);
+
+ struct VkQueryPoolCreateInfo qinfo = {
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .queryType = VK_QUERY_TYPE_TIMESTAMP,
+ .queryCount = VK_QUERY_POOL_SIZE,
+ };
+
+ VK(vkCreateQueryPool(vk->dev, &qinfo, MPVK_ALLOCATOR, &timer->pool));
+
+ return (ra_timer *)timer;
+
+error:
+ vk_timer_destroy(ra, timer);
+ return NULL;
+}
+
+static void vk_timer_record(struct ra *ra, VkQueryPool pool, int index,
+ VkPipelineStageFlags stage)
+{
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ return;
+
+ vkCmdWriteTimestamp(cmd->buf, stage, pool, index);
+}
+
+static void vk_timer_start(struct ra *ra, ra_timer *ratimer)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct vk_timer *timer = ratimer;
+
+ timer->index = (timer->index + 2) % VK_QUERY_POOL_SIZE;
+
+ uint64_t out[2];
+ VkResult res = vkGetQueryPoolResults(vk->dev, timer->pool, timer->index, 2,
+ sizeof(out), &out[0], sizeof(uint64_t),
+ VK_QUERY_RESULT_64_BIT);
+ switch (res) {
+ case VK_SUCCESS:
+ timer->result = (out[1] - out[0]) * vk->limits.timestampPeriod;
+ break;
+ case VK_NOT_READY:
+ timer->result = 0;
+ break;
+ default:
+ MP_WARN(vk, "Failed reading timer query result: %s\n", vk_err(res));
+ return;
+ };
+
+ vk_timer_record(ra, timer->pool, timer->index,
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+}
+
+static uint64_t vk_timer_stop(struct ra *ra, ra_timer *ratimer)
+{
+ struct vk_timer *timer = ratimer;
+ vk_timer_record(ra, timer->pool, timer->index + 1,
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+
+ return timer->result;
+}
+
+static struct ra_fns ra_fns_vk = {
+ .destroy = vk_destroy_ra,
+ .tex_create = vk_tex_create,
+ .tex_destroy = vk_tex_destroy_lazy,
+ .tex_upload = vk_tex_upload,
+ .buf_create = vk_buf_create,
+ .buf_destroy = vk_buf_destroy_lazy,
+ .buf_update = vk_buf_update,
+ .buf_poll = vk_buf_poll,
+ .clear = vk_clear,
+ .blit = vk_blit,
+ .uniform_layout = std140_layout,
+ .renderpass_create = vk_renderpass_create,
+ .renderpass_destroy = vk_renderpass_destroy_lazy,
+ .renderpass_run = vk_renderpass_run,
+ .timer_create = vk_timer_create,
+ .timer_destroy = vk_timer_destroy_lazy,
+ .timer_start = vk_timer_start,
+ .timer_stop = vk_timer_stop,
+};
+
+static void present_cb(void *priv, int *inflight)
+{
+ *inflight -= 1;
+}
+
+bool ra_vk_submit(struct ra *ra, struct ra_tex *tex, VkSemaphore acquired,
+ VkSemaphore *done, int *inflight)
+{
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ goto error;
+
+ if (inflight) {
+ *inflight += 1;
+ vk_cmd_callback(cmd, (vk_cb)present_cb, NULL, inflight);
+ }
+
+ struct ra_tex_vk *tex_vk = tex->priv;
+ assert(tex_vk->external_img);
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0,
+ VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, false);
+
+ // These are the only two stages that we use/support for actually
+ // outputting to swapchain imagechain images, so just add a dependency
+ // on both of them. In theory, we could maybe come up with some more
+ // advanced mechanism of tracking dynamic dependencies, but that seems
+ // like overkill.
+ vk_cmd_dep(cmd, acquired,
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
+ VK_PIPELINE_STAGE_TRANSFER_BIT);
+
+ return vk_flush(ra, done);
+
+error:
+ return false;
+}
diff --git a/video/out/vulkan/ra_vk.h b/video/out/vulkan/ra_vk.h
new file mode 100644
index 0000000000..893421bc59
--- /dev/null
+++ b/video/out/vulkan/ra_vk.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "video/out/gpu/ra.h"
+
+#include "common.h"
+#include "utils.h"
+
+struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log);
+
+// Access to the VkDevice is needed for swapchain creation
+VkDevice ra_vk_get_dev(struct ra *ra);
+
+// Allocates a ra_tex that wraps a swapchain image. The contents of the image
+// will be invalidated, and access to it will only be internally synchronized.
+// So the calling could should not do anything else with the VkImage.
+struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
+ VkSwapchainCreateInfoKHR info);
+
+// This function flushes the command buffers, transitions `tex` (which must be
+// a wrapped swapchain image) into a format suitable for presentation, and
+// submits the current rendering commands. The indicated semaphore must fire
+// before the submitted command can run. If `done` is non-NULL, it will be
+// set to a semaphore that fires once the command completes. If `inflight`
+// is non-NULL, it will be incremented when the command starts and decremented
+// when it completes.
+bool ra_vk_submit(struct ra *ra, struct ra_tex *tex, VkSemaphore acquired,
+ VkSemaphore *done, int *inflight);
+
+// May be called on a struct ra of any type. Returns NULL if the ra is not
+// a vulkan ra.
+struct mpvk_ctx *ra_vk_get(struct ra *ra);
diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c
new file mode 100644
index 0000000000..43e446bc36
--- /dev/null
+++ b/video/out/vulkan/utils.c
@@ -0,0 +1,726 @@
+#include <libavutil/macros.h>
+
+#include "utils.h"
+#include "malloc.h"
+
+const char* vk_err(VkResult res)
+{
+ switch (res) {
+ // These are technically success codes, but include them nonetheless
+ case VK_SUCCESS: return "VK_SUCCESS";
+ case VK_NOT_READY: return "VK_NOT_READY";
+ case VK_TIMEOUT: return "VK_TIMEOUT";
+ case VK_EVENT_SET: return "VK_EVENT_SET";
+ case VK_EVENT_RESET: return "VK_EVENT_RESET";
+ case VK_INCOMPLETE: return "VK_INCOMPLETE";
+ case VK_SUBOPTIMAL_KHR: return "VK_SUBOPTIMAL_KHR";
+
+ // Actual error codes
+ case VK_ERROR_OUT_OF_HOST_MEMORY: return "VK_ERROR_OUT_OF_HOST_MEMORY";
+ case VK_ERROR_OUT_OF_DEVICE_MEMORY: return "VK_ERROR_OUT_OF_DEVICE_MEMORY";
+ case VK_ERROR_INITIALIZATION_FAILED: return "VK_ERROR_INITIALIZATION_FAILED";
+ case VK_ERROR_DEVICE_LOST: return "VK_ERROR_DEVICE_LOST";
+ case VK_ERROR_MEMORY_MAP_FAILED: return "VK_ERROR_MEMORY_MAP_FAILED";
+ case VK_ERROR_LAYER_NOT_PRESENT: return "VK_ERROR_LAYER_NOT_PRESENT";
+ case VK_ERROR_EXTENSION_NOT_PRESENT: return "VK_ERROR_EXTENSION_NOT_PRESENT";
+ case VK_ERROR_FEATURE_NOT_PRESENT: return "VK_ERROR_FEATURE_NOT_PRESENT";
+ case VK_ERROR_INCOMPATIBLE_DRIVER: return "VK_ERROR_INCOMPATIBLE_DRIVER";
+ case VK_ERROR_TOO_MANY_OBJECTS: return "VK_ERROR_TOO_MANY_OBJECTS";
+ case VK_ERROR_FORMAT_NOT_SUPPORTED: return "VK_ERROR_FORMAT_NOT_SUPPORTED";
+ case VK_ERROR_FRAGMENTED_POOL: return "VK_ERROR_FRAGMENTED_POOL";
+ case VK_ERROR_INVALID_SHADER_NV: return "VK_ERROR_INVALID_SHADER_NV";
+ case VK_ERROR_OUT_OF_DATE_KHR: return "VK_ERROR_OUT_OF_DATE_KHR";
+ case VK_ERROR_SURFACE_LOST_KHR: return "VK_ERROR_SURFACE_LOST_KHR";
+ }
+
+ return "Unknown error!";
+}
+
+static const char* vk_dbg_type(VkDebugReportObjectTypeEXT type)
+{
+ switch (type) {
+ case VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT:
+ return "VkInstance";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT:
+ return "VkPhysicalDevice";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT:
+ return "VkDevice";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT:
+ return "VkQueue";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT:
+ return "VkSemaphore";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT:
+ return "VkCommandBuffer";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT:
+ return "VkFence";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT:
+ return "VkDeviceMemory";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT:
+ return "VkBuffer";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT:
+ return "VkImage";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT:
+ return "VkEvent";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT:
+ return "VkQueryPool";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT:
+ return "VkBufferView";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT:
+ return "VkImageView";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT:
+ return "VkShaderModule";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT:
+ return "VkPipelineCache";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT:
+ return "VkPipelineLayout";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT:
+ return "VkRenderPass";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT:
+ return "VkPipeline";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT:
+ return "VkDescriptorSetLayout";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT:
+ return "VkSampler";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT:
+ return "VkDescriptorPool";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT:
+ return "VkDescriptorSet";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT:
+ return "VkFramebuffer";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT:
+ return "VkCommandPool";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT:
+ return "VkSurfaceKHR";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT:
+ return "VkSwapchainKHR";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT:
+ return "VkDebugReportCallbackEXT";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT:
+ default:
+ return "unknown object";
+ }
+}
+
+static VkBool32 vk_dbg_callback(VkDebugReportFlagsEXT flags,
+ VkDebugReportObjectTypeEXT objType,
+ uint64_t obj, size_t loc, int32_t msgCode,
+ const char *layer, const char *msg, void *priv)
+{
+ struct mpvk_ctx *vk = priv;
+ int lev = MSGL_V;
+
+ switch (flags) {
+ case VK_DEBUG_REPORT_ERROR_BIT_EXT: lev = MSGL_ERR; break;
+ case VK_DEBUG_REPORT_WARNING_BIT_EXT: lev = MSGL_WARN; break;
+ case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: lev = MSGL_TRACE; break;
+ case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: lev = MSGL_WARN; break;
+ case VK_DEBUG_REPORT_DEBUG_BIT_EXT: lev = MSGL_DEBUG; break;
+ };
+
+ MP_MSG(vk, lev, "vk [%s] %d: %s (obj 0x%llx (%s), loc 0x%zx)\n",
+ layer, (int)msgCode, msg, (unsigned long long)obj,
+ vk_dbg_type(objType), loc);
+
+ // The return value of this function determines whether the call will
+ // be explicitly aborted (to prevent GPU errors) or not. In this case,
+ // we generally want this to be on for the errors.
+ return (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT);
+}
+
+static void vk_cmdpool_uninit(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
+{
+ if (!pool)
+ return;
+
+ // also frees associated command buffers
+ vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR);
+ for (int n = 0; n < MPVK_MAX_CMDS; n++) {
+ vkDestroyFence(vk->dev, pool->cmds[n].fence, MPVK_ALLOCATOR);
+ vkDestroySemaphore(vk->dev, pool->cmds[n].done, MPVK_ALLOCATOR);
+ talloc_free(pool->cmds[n].callbacks);
+ }
+ talloc_free(pool);
+}
+
+void mpvk_uninit(struct mpvk_ctx *vk)
+{
+ if (!vk->inst)
+ return;
+
+ if (vk->dev) {
+ vk_cmdpool_uninit(vk, vk->pool);
+ vk_malloc_uninit(vk);
+ vkDestroyDevice(vk->dev, MPVK_ALLOCATOR);
+ }
+
+ if (vk->dbg) {
+ // Same deal as creating the debug callback, we need to load this
+ // first.
+ VK_LOAD_PFN(vkDestroyDebugReportCallbackEXT)
+ pfn_vkDestroyDebugReportCallbackEXT(vk->inst, vk->dbg, MPVK_ALLOCATOR);
+ }
+
+ vkDestroySurfaceKHR(vk->inst, vk->surf, MPVK_ALLOCATOR);
+ vkDestroyInstance(vk->inst, MPVK_ALLOCATOR);
+
+ *vk = (struct mpvk_ctx){0};
+}
+
+bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log, bool debug)
+{
+ *vk = (struct mpvk_ctx) {
+ .log = log,
+ };
+
+ VkInstanceCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ };
+
+ if (debug) {
+ // Enables the LunarG standard validation layer, which
+ // is a meta-layer that loads lots of other validators
+ static const char* layers[] = {
+ "VK_LAYER_LUNARG_standard_validation",
+ };
+
+ info.ppEnabledLayerNames = layers;
+ info.enabledLayerCount = MP_ARRAY_SIZE(layers);
+ }
+
+ // Enable whatever extensions were compiled in.
+ static const char *extensions[] = {
+ VK_KHR_SURFACE_EXTENSION_NAME,
+#if HAVE_X11
+ VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
+#endif
+
+ // Extra extensions only used for debugging. These are toggled by
+ // decreasing the enabledExtensionCount, so the number needs to be
+ // synchronized with the code below.
+ VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
+ };
+
+ const int debugExtensionCount = 1;
+
+ info.ppEnabledExtensionNames = extensions;
+ info.enabledExtensionCount = MP_ARRAY_SIZE(extensions);
+
+ if (!debug)
+ info.enabledExtensionCount -= debugExtensionCount;
+
+ MP_VERBOSE(vk, "Creating instance with extensions:\n");
+ for (int i = 0; i < info.enabledExtensionCount; i++)
+ MP_VERBOSE(vk, " %s\n", info.ppEnabledExtensionNames[i]);
+
+ VkResult res = vkCreateInstance(&info, MPVK_ALLOCATOR, &vk->inst);
+ if (res != VK_SUCCESS) {
+ MP_VERBOSE(vk, "Failed creating instance: %s\n", vk_err(res));
+ return false;
+ }
+
+ if (debug) {
+ // Set up a debug callback to catch validation messages
+ VkDebugReportCallbackCreateInfoEXT dinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
+ .flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT |
+ VK_DEBUG_REPORT_WARNING_BIT_EXT |
+ VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT |
+ VK_DEBUG_REPORT_ERROR_BIT_EXT |
+ VK_DEBUG_REPORT_DEBUG_BIT_EXT,
+ .pfnCallback = vk_dbg_callback,
+ .pUserData = vk,
+ };
+
+ // Since this is not part of the core spec, we need to load it. This
+ // can't fail because we've already successfully created an instance
+ // with this extension enabled.
+ VK_LOAD_PFN(vkCreateDebugReportCallbackEXT)
+ pfn_vkCreateDebugReportCallbackEXT(vk->inst, &dinfo, MPVK_ALLOCATOR,
+ &vk->dbg);
+ }
+
+ return true;
+}
+
+#define MPVK_MAX_DEVICES 16
+
+static bool physd_supports_surface(struct mpvk_ctx *vk, VkPhysicalDevice physd)
+{
+ uint32_t qfnum;
+ vkGetPhysicalDeviceQueueFamilyProperties(physd, &qfnum, NULL);
+
+ for (int i = 0; i < qfnum; i++) {
+ VkBool32 sup;
+ VK(vkGetPhysicalDeviceSurfaceSupportKHR(physd, i, vk->surf, &sup));
+ if (sup)
+ return true;
+ }
+
+error:
+ return false;
+}
+
+bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw)
+{
+ assert(vk->surf);
+
+ MP_VERBOSE(vk, "Probing for vulkan devices:\n");
+
+ VkPhysicalDevice *devices = NULL;
+ uint32_t num = 0;
+ VK(vkEnumeratePhysicalDevices(vk->inst, &num, NULL));
+ devices = talloc_array(NULL, VkPhysicalDevice, num);
+ VK(vkEnumeratePhysicalDevices(vk->inst, &num, devices));
+
+ // Sorted by "priority". Reuses some m_opt code for convenience
+ static const struct m_opt_choice_alternatives types[] = {
+ {"discrete", VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU},
+ {"integrated", VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU},
+ {"virtual", VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU},
+ {"software", VK_PHYSICAL_DEVICE_TYPE_CPU},
+ {"unknown", VK_PHYSICAL_DEVICE_TYPE_OTHER},
+ {0}
+ };
+
+ VkPhysicalDeviceProperties props[MPVK_MAX_DEVICES];
+ for (int i = 0; i < num; i++) {
+ vkGetPhysicalDeviceProperties(devices[i], &props[i]);
+ MP_VERBOSE(vk, " GPU %d: %s (%s)\n", i, props[i].deviceName,
+ m_opt_choice_str(types, props[i].deviceType));
+ }
+
+ // Iterate through each type in order of decreasing preference
+ for (int t = 0; types[t].name; t++) {
+ // Disallow SW rendering unless explicitly enabled
+ if (types[t].value == VK_PHYSICAL_DEVICE_TYPE_CPU && !sw)
+ continue;
+
+ for (int i = 0; i < num; i++) {
+ VkPhysicalDeviceProperties prop = props[i];
+ if (prop.deviceType != types[t].value)
+ continue;
+ if (name && strcmp(name, prop.deviceName) != 0)
+ continue;
+ if (!physd_supports_surface(vk, devices[i]))
+ continue;
+
+ MP_VERBOSE(vk, "Chose device:\n");
+ MP_VERBOSE(vk, " Device Name: %s\n", prop.deviceName);
+ MP_VERBOSE(vk, " Device ID: %x:%x\n",
+ (unsigned)prop.vendorID, (unsigned)prop.deviceID);
+ MP_VERBOSE(vk, " Driver version: %d\n", (int)prop.driverVersion);
+ MP_VERBOSE(vk, " API version: %d.%d.%d\n",
+ (int)VK_VERSION_MAJOR(prop.apiVersion),
+ (int)VK_VERSION_MINOR(prop.apiVersion),
+ (int)VK_VERSION_PATCH(prop.apiVersion));
+ vk->physd = devices[i];
+ vk->limits = prop.limits;
+ talloc_free(devices);
+ return true;
+ }
+ }
+
+error:
+ MP_VERBOSE(vk, "Found no suitable device, giving up.\n");
+ talloc_free(devices);
+ return false;
+}
+
+bool mpvk_pick_surface_format(struct mpvk_ctx *vk)
+{
+ assert(vk->physd);
+
+ VkSurfaceFormatKHR *formats = NULL;
+ int num;
+
+ // Enumerate through the surface formats and find one that we can map to
+ // a ra_format
+ VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, NULL));
+ formats = talloc_array(NULL, VkSurfaceFormatKHR, num);
+ VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, formats));
+
+ for (int i = 0; i < num; i++) {
+ // A value of VK_FORMAT_UNDEFINED means we can pick anything we want
+ if (formats[i].format == VK_FORMAT_UNDEFINED) {
+ vk->surf_format = (VkSurfaceFormatKHR) {
+ .colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
+ .format = VK_FORMAT_R16G16B16A16_UNORM,
+ };
+ break;
+ }
+
+ if (formats[i].colorSpace != VK_COLOR_SPACE_SRGB_NONLINEAR_KHR)
+ continue;
+
+ // Format whitelist, since we want only >= 8 bit _UNORM formats
+ switch (formats[i].format) {
+ case VK_FORMAT_R8G8B8_UNORM:
+ case VK_FORMAT_B8G8R8_UNORM:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+ case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+ case VK_FORMAT_R16G16B16_UNORM:
+ case VK_FORMAT_R16G16B16A16_UNORM:
+ break; // accept
+ default: continue;
+ }
+
+ vk->surf_format = formats[i];
+ break;
+ }
+
+ talloc_free(formats);
+
+ if (!vk->surf_format.format)
+ goto error;
+
+ return true;
+
+error:
+ MP_ERR(vk, "Failed picking surface format!\n");
+ talloc_free(formats);
+ return false;
+}
+
+static bool vk_cmdpool_init(struct mpvk_ctx *vk, VkDeviceQueueCreateInfo qinfo,
+ VkQueueFamilyProperties props,
+ struct vk_cmdpool **out)
+{
+ struct vk_cmdpool *pool = *out = talloc_ptrtype(NULL, pool);
+ *pool = (struct vk_cmdpool) {
+ .qf = qinfo.queueFamilyIndex,
+ .props = props,
+ .qcount = qinfo.queueCount,
+ };
+
+ for (int n = 0; n < pool->qcount; n++)
+ vkGetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]);
+
+ VkCommandPoolCreateInfo cinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
+ VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = pool->qf,
+ };
+
+ VK(vkCreateCommandPool(vk->dev, &cinfo, MPVK_ALLOCATOR, &pool->pool));
+
+ VkCommandBufferAllocateInfo ainfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .commandPool = pool->pool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = MPVK_MAX_CMDS,
+ };
+
+ VkCommandBuffer cmdbufs[MPVK_MAX_CMDS];
+ VK(vkAllocateCommandBuffers(vk->dev, &ainfo, cmdbufs));
+
+ for (int n = 0; n < MPVK_MAX_CMDS; n++) {
+ struct vk_cmd *cmd = &pool->cmds[n];
+ cmd->pool = pool;
+ cmd->buf = cmdbufs[n];
+
+ VkFenceCreateInfo finfo = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+ };
+
+ VK(vkCreateFence(vk->dev, &finfo, MPVK_ALLOCATOR, &cmd->fence));
+
+ VkSemaphoreCreateInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ };
+
+ VK(vkCreateSemaphore(vk->dev, &sinfo, MPVK_ALLOCATOR, &cmd->done));
+ }
+
+ return true;
+
+error:
+ return false;
+}
+
+bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
+{
+ assert(vk->physd);
+
+ VkQueueFamilyProperties *qfs = NULL;
+ int qfnum;
+
+ // Enumerate the queue families and find suitable families for each task
+ vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL);
+ qfs = talloc_array(NULL, VkQueueFamilyProperties, qfnum);
+ vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs);
+
+ MP_VERBOSE(vk, "Queue families supported by device:\n");
+
+ for (int i = 0; i < qfnum; i++) {
+ MP_VERBOSE(vk, "QF %d: flags 0x%x num %d\n", i,
+ (unsigned)qfs[i].queueFlags, (int)qfs[i].queueCount);
+ }
+
+ // For most of our rendering operations, we want to use one "primary" pool,
+ // so just pick the queue family with the most features.
+ int idx = -1;
+ for (int i = 0; i < qfnum; i++) {
+ if (!(qfs[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
+ continue;
+
+ // QF supports more features
+ if (idx < 0 || qfs[i].queueFlags > qfs[idx].queueFlags)
+ idx = i;
+
+ // QF supports more queues (at the same specialization level)
+ if (qfs[i].queueFlags == qfs[idx].queueFlags &&
+ qfs[i].queueCount > qfs[idx].queueCount)
+ {
+ idx = i;
+ }
+ }
+
+ // Vulkan requires at least one GRAPHICS queue, so if this fails something
+ // is horribly wrong.
+ assert(idx >= 0);
+
+ // Ensure we can actually present to the surface using this queue
+ VkBool32 sup;
+ VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx, vk->surf, &sup));
+ if (!sup) {
+ MP_ERR(vk, "Queue family does not support surface presentation!\n");
+ goto error;
+ }
+
+ // Now that we know which queue families we want, we can create the logical
+ // device
+ assert(opts.queue_count <= MPVK_MAX_QUEUES);
+ static const float priorities[MPVK_MAX_QUEUES] = {0};
+ VkDeviceQueueCreateInfo qinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueFamilyIndex = idx,
+ .queueCount = MPMIN(qfs[idx].queueCount, opts.queue_count),
+ .pQueuePriorities = priorities,
+ };
+
+ static const char *exts[] = {
+ VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+ VK_NV_GLSL_SHADER_EXTENSION_NAME,
+ };
+
+ VkDeviceCreateInfo dinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .queueCreateInfoCount = 1,
+ .pQueueCreateInfos = &qinfo,
+ .ppEnabledExtensionNames = exts,
+ .enabledExtensionCount = MP_ARRAY_SIZE(exts),
+ };
+
+ MP_VERBOSE(vk, "Creating vulkan device...\n");
+ VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev));
+
+ vk_malloc_init(vk);
+
+ // Create the vk_cmdpools and all required queues / synchronization objects
+ if (!vk_cmdpool_init(vk, qinfo, qfs[idx], &vk->pool))
+ goto error;
+
+ talloc_free(qfs);
+ return true;
+
+error:
+ MP_ERR(vk, "Failed creating logical device!\n");
+ talloc_free(qfs);
+ return false;
+}
+
+static void run_callbacks(struct mpvk_ctx *vk, struct vk_cmd *cmd)
+{
+ for (int i = 0; i < cmd->num_callbacks; i++) {
+ struct vk_callback *cb = &cmd->callbacks[i];
+ cb->run(cb->priv, cb->arg);
+ *cb = (struct vk_callback){0};
+ }
+
+ cmd->num_callbacks = 0;
+
+ // Also reset vk->last_cmd in case this was the last command to run
+ if (vk->last_cmd == cmd)
+ vk->last_cmd = NULL;
+}
+
+static void wait_for_cmds(struct mpvk_ctx *vk, struct vk_cmd cmds[], int num)
+{
+ if (!num)
+ return;
+
+ VkFence fences[MPVK_MAX_CMDS];
+ for (int i = 0; i < num; i++)
+ fences[i] = cmds[i].fence;
+
+ vkWaitForFences(vk->dev, num, fences, true, UINT64_MAX);
+
+ for (int i = 0; i < num; i++)
+ run_callbacks(vk, &cmds[i]);
+}
+
+void mpvk_pool_wait_idle(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
+{
+ if (!pool)
+ return;
+
+ int idx = pool->cindex, pidx = pool->cindex_pending;
+ if (pidx < idx) { // range doesn't wrap
+ wait_for_cmds(vk, &pool->cmds[pidx], idx - pidx);
+ } else if (pidx > idx) { // range wraps
+ wait_for_cmds(vk, &pool->cmds[pidx], MPVK_MAX_CMDS - pidx);
+ wait_for_cmds(vk, &pool->cmds[0], idx);
+ }
+ pool->cindex_pending = pool->cindex;
+}
+
+void mpvk_dev_wait_idle(struct mpvk_ctx *vk)
+{
+ mpvk_pool_wait_idle(vk, vk->pool);
+}
+
+void mpvk_pool_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool,
+ uint64_t timeout)
+{
+ if (!pool)
+ return;
+
+ // If requested, hard block until at least one command completes
+ if (timeout > 0 && pool->cindex_pending != pool->cindex) {
+ vkWaitForFences(vk->dev, 1, &pool->cmds[pool->cindex_pending].fence,
+ true, timeout);
+ }
+
+ // Lazily garbage collect the commands based on their status
+ while (pool->cindex_pending != pool->cindex) {
+ struct vk_cmd *cmd = &pool->cmds[pool->cindex_pending];
+ VkResult res = vkGetFenceStatus(vk->dev, cmd->fence);
+ if (res != VK_SUCCESS)
+ break;
+ run_callbacks(vk, cmd);
+ pool->cindex_pending++;
+ pool->cindex_pending %= MPVK_MAX_CMDS;
+ }
+}
+
+void mpvk_dev_poll_cmds(struct mpvk_ctx *vk, uint32_t timeout)
+{
+ mpvk_pool_poll_cmds(vk, vk->pool, timeout);
+}
+
+void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg)
+{
+ if (vk->last_cmd) {
+ vk_cmd_callback(vk->last_cmd, callback, p, arg);
+ } else {
+ // The device was already idle, so we can just immediately call it
+ callback(p, arg);
+ }
+}
+
+void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg)
+{
+ MP_TARRAY_GROW(NULL, cmd->callbacks, cmd->num_callbacks);
+ cmd->callbacks[cmd->num_callbacks++] = (struct vk_callback) {
+ .run = callback,
+ .priv = p,
+ .arg = arg,
+ };
+}
+
+void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep,
+ VkPipelineStageFlagBits depstage)
+{
+ assert(cmd->num_deps < MPVK_MAX_CMD_DEPS);
+ cmd->deps[cmd->num_deps] = dep;
+ cmd->depstages[cmd->num_deps++] = depstage;
+}
+
+struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
+{
+ // Garbage collect the cmdpool first
+ mpvk_pool_poll_cmds(vk, pool, 0);
+
+ int next = (pool->cindex + 1) % MPVK_MAX_CMDS;
+ if (next == pool->cindex_pending) {
+ MP_ERR(vk, "No free command buffers!\n");
+ goto error;
+ }
+
+ struct vk_cmd *cmd = &pool->cmds[pool->cindex];
+ pool->cindex = next;
+
+ VK(vkResetCommandBuffer(cmd->buf, 0));
+
+ VkCommandBufferBeginInfo binfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VK(vkBeginCommandBuffer(cmd->buf, &binfo));
+
+ return cmd;
+
+error:
+ return NULL;
+}
+
+bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done)
+{
+ VK(vkEndCommandBuffer(cmd->buf));
+
+ struct vk_cmdpool *pool = cmd->pool;
+ VkQueue queue = pool->queues[pool->qindex];
+
+ VkSubmitInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &cmd->buf,
+ .waitSemaphoreCount = cmd->num_deps,
+ .pWaitSemaphores = cmd->deps,
+ .pWaitDstStageMask = cmd->depstages,
+ };
+
+ if (done) {
+ sinfo.signalSemaphoreCount = 1;
+ sinfo.pSignalSemaphores = &cmd->done;
+ *done = cmd->done;
+ }
+
+ VK(vkResetFences(vk->dev, 1, &cmd->fence));
+ VK(vkQueueSubmit(queue, 1, &sinfo, cmd->fence));
+ MP_TRACE(vk, "Submitted command on queue %p (QF %d)\n", (void *)queue,
+ pool->qf);
+
+ for (int i = 0; i < cmd->num_deps; i++)
+ cmd->deps[i] = NULL;
+ cmd->num_deps = 0;
+
+ vk->last_cmd = cmd;
+ return true;
+
+error:
+ return false;
+}
+
+void vk_cmd_cycle_queues(struct mpvk_ctx *vk)
+{
+ struct vk_cmdpool *pool = vk->pool;
+ pool->qindex = (pool->qindex + 1) % pool->qcount;
+}
+
+const VkImageSubresourceRange vk_range = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .levelCount = 1,
+ .layerCount = 1,
+};
+
+const VkImageSubresourceLayers vk_layers = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .layerCount = 1,
+};
diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h
new file mode 100644
index 0000000000..5bde48089d
--- /dev/null
+++ b/video/out/vulkan/utils.h
@@ -0,0 +1,153 @@
+#pragma once
+
+#include "video/out/vo.h"
+#include "video/out/gpu/context.h"
+#include "video/mp_image.h"
+
+#include "common.h"
+#include "formats.h"
+
+#define VK_LOAD_PFN(name) PFN_##name pfn_##name = (PFN_##name) \
+ vkGetInstanceProcAddr(vk->inst, #name);
+
+// Return a human-readable name for various struct mpvk_ctx enums
+const char* vk_err(VkResult res);
+
+// Convenience macros to simplify a lot of common boilerplate
+#define VK_ASSERT(res, str) \
+ do { \
+ if (res != VK_SUCCESS) { \
+ MP_ERR(vk, str ": %s\n", vk_err(res)); \
+ goto error; \
+ } \
+ } while (0)
+
+#define VK(cmd) \
+ do { \
+ MP_TRACE(vk, #cmd "\n"); \
+ VkResult res ## __LINE__ = (cmd); \
+ VK_ASSERT(res ## __LINE__, #cmd); \
+ } while (0)
+
+// Uninits everything in the correct order
+void mpvk_uninit(struct mpvk_ctx *vk);
+
+// Initialization functions: As a rule of thumb, these need to be called in
+// this order, followed by vk_malloc_init, followed by RA initialization, and
+// finally followed by vk_swchain initialization.
+
+// Create a vulkan instance. Returns VK_NULL_HANDLE on failure
+bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log, bool debug);
+
+// Generate a VkSurfaceKHR usable for video output. Returns VK_NULL_HANDLE on
+// failure. Must be called after mpvk_instance_init.
+bool mpvk_surface_init(struct vo *vo, struct mpvk_ctx *vk);
+
+// Find a suitable physical device for use with rendering and which supports
+// the surface.
+// name: only match a device with this name
+// sw: also allow software/virtual devices
+bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw);
+
+// Pick a suitable surface format that's supported by this physical device.
+bool mpvk_pick_surface_format(struct mpvk_ctx *vk);
+
+struct mpvk_device_opts {
+ int queue_count; // number of queues to use
+};
+
+// Create a logical device and initialize the vk_cmdpools
+bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts);
+
+// Wait until all commands submitted to all queues have completed
+void mpvk_pool_wait_idle(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
+void mpvk_dev_wait_idle(struct mpvk_ctx *vk);
+
+// Wait until at least one command submitted to any queue has completed, and
+// process the callbacks. Good for event loops that need to delay until a
+// command completes. Will block at most `timeout` nanoseconds. If used with
+// 0, it only garbage collects completed commands without blocking.
+void mpvk_pool_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool,
+ uint64_t timeout);
+void mpvk_dev_poll_cmds(struct mpvk_ctx *vk, uint32_t timeout);
+
+// Since lots of vulkan operations need to be done lazily once the affected
+// resources are no longer in use, provide an abstraction for tracking these.
+// In practice, these are only checked and run when submitting new commands, so
+// the actual execution may be delayed by a frame.
+typedef void (*vk_cb)(void *priv, void *arg);
+
+struct vk_callback {
+ vk_cb run;
+ void *priv;
+ void *arg; // as a convenience, you also get to pass an arg for "free"
+};
+
+// Associate a callback with the completion of all currently pending commands.
+// This will essentially run once the device is completely idle.
+void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg);
+
+#define MPVK_MAX_CMD_DEPS 8
+
+// Helper wrapper around command buffers that also track dependencies,
+// callbacks and synchronization primitives
+struct vk_cmd {
+ struct vk_cmdpool *pool; // pool it was allocated from
+ VkCommandBuffer buf;
+ VkFence fence; // the fence guards cmd buffer reuse
+ VkSemaphore done; // the semaphore signals when execution is done
+ // The semaphores represent dependencies that need to complete before
+ // this command can be executed. These are *not* owned by the vk_cmd
+ VkSemaphore deps[MPVK_MAX_CMD_DEPS];
+ VkPipelineStageFlags depstages[MPVK_MAX_CMD_DEPS];
+ int num_deps;
+ // Since VkFences are useless, we have to manually track "callbacks"
+ // to fire once the VkFence completes. These are used for multiple purposes,
+ // ranging from garbage collection (resource deallocation) to fencing.
+ struct vk_callback *callbacks;
+ int num_callbacks;
+};
+
+// Associate a callback with the completion of the current command. This
+// bool will be set to `true` once the command completes, or shortly thereafter.
+void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg);
+
+// Associate a dependency for the current command. This semaphore must signal
+// by the corresponding stage before the command may execute.
+void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep,
+ VkPipelineStageFlagBits depstage);
+
+#define MPVK_MAX_QUEUES 8
+#define MPVK_MAX_CMDS 64
+
+// Command pool / queue family hybrid abstraction
+struct vk_cmdpool {
+ VkQueueFamilyProperties props;
+ uint32_t qf; // queue family index
+ VkCommandPool pool;
+ VkQueue queues[MPVK_MAX_QUEUES];
+ int qcount;
+ int qindex;
+ // Command buffers associated with this queue
+ struct vk_cmd cmds[MPVK_MAX_CMDS];
+ int cindex;
+ int cindex_pending;
+};
+
+// Fetch the next command buffer from a command pool and begin recording to it.
+// Returns NULL on failure.
+struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
+
+// Finish the currently recording command buffer and submit it for execution.
+// If `done` is not NULL, it will be set to a semaphore that will signal once
+// the command completes. (And MUST have a corresponding semaphore wait)
+// Returns whether successful.
+bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done);
+
+// Rotate the queues for each vk_cmdpool. Call this once per frame to ensure
+// good parallelism between frames when using multiple queues
+void vk_cmd_cycle_queues(struct mpvk_ctx *vk);
+
+// Predefined structs for a simple non-layered, non-mipped image
+extern const VkImageSubresourceRange vk_range;
+extern const VkImageSubresourceLayers vk_layers;
diff --git a/wscript b/wscript
index dd47956392..964b7878c7 100644
--- a/wscript
+++ b/wscript
@@ -803,6 +803,10 @@ video_output_features = [
"Aborting. If you really mean to compile without OpenGL " +
"video outputs use --disable-gl.",
}, {
+ 'name': '--vulkan',
+ 'desc': 'Vulkan context support',
+ 'func': check_cc(header_name='vulkan/vulkan.h', lib='vulkan'),
+ }, {
'name': 'egl-helpers',
'desc': 'EGL helper functions',
'deps': 'egl-x11 || mali-fbdev || rpi || gl-wayland || egl-drm || ' +
diff --git a/wscript_build.py b/wscript_build.py
index 68cfafb94f..86b51daaa2 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -445,6 +445,12 @@ def build(ctx):
( "video/out/w32_common.c", "win32-desktop" ),
( "video/out/win32/displayconfig.c", "win32-desktop" ),
( "video/out/win32/droptarget.c", "win32-desktop" ),
+ ( "video/out/vulkan/utils.c", "vulkan" ),
+ ( "video/out/vulkan/malloc.c", "vulkan" ),
+ ( "video/out/vulkan/formats.c", "vulkan" ),
+ ( "video/out/vulkan/ra_vk.c", "vulkan" ),
+ ( "video/out/vulkan/context.c", "vulkan" ),
+ ( "video/out/vulkan/context_xlib.c", "vulkan && x11" ),
( "video/out/win32/exclusive_hack.c", "gl-win32" ),
( "video/out/wayland_common.c", "wayland" ),
( "video/out/wayland/buffer.c", "wayland" ),