20 files changed, 3773 insertions, 16 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index b08150c6bb..80e7350292 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -4103,10 +4103,6 @@ The following video options are currently all specific to ``--vo=gpu`` and
     the video along the temporal axis. The filter used can be controlled using
     the ``--tscale`` setting.
 
-    Note that this relies on vsync to work, see ``--opengl-swapinterval`` for
-    more information. It should also only be used with an ``--fbo-format``
-    that has at least 16 bit precision.
-
 ``--interpolation-threshold=<0..1,-1>``
     Threshold below which frame ratio interpolation gets disabled (default:
     ``0.0001``). This is calculated as ``abs(disphz/vfps - 1) < threshold``,
@@ -4184,6 +4180,31 @@ The following video options are currently all specific to ``--vo=gpu`` and
     results, as can missing or incorrect display FPS information (see
     ``--display-fps``).
 
+``--vulkan-swap-mode=<mode>``
+    Controls the presentation mode of the vulkan swapchain. This is similar
+    to the ``--opengl-swapinterval`` option.
+
+    auto
+        Use the preferred swapchain mode for the vulkan context. (Default)
+    fifo
+        Non-tearing, vsync blocked. Similar to "VSync on".
+    fifo-relaxed
+        Tearing, vsync blocked. Late frames will tear instead of stuttering.
+    mailbox
+        Non-tearing, not vsync blocked. Similar to "triple buffering".
+    immediate
+        Tearing, not vsync blocked. Similar to "VSync off".
+
+``--vulkan-queue-count=<1..8>``
+    Controls the number of VkQueues used for rendering (limited by how many
+    your device supports). In theory, using more queues could enable some
+    parallelism between frames (when using a ``--swapchain-depth`` higher than
+    1). (Default: 1)
+
+    NOTE: Setting this to a value higher than 1 may cause graphical corruption,
+    as mpv's vulkan implementation currently does not try and protect textures
+    against concurrent access.
+
 ``--glsl-shaders=<file-list>``
     Custom GLSL hooks. These are a flexible way to add custom fragment shaders,
     which can be injected at almost arbitrary points in the rendering pipeline,
@@ -4590,7 +4611,7 @@ The following video options are currently all specific to ``--vo=gpu`` and
         on Nvidia and AMD. Newer Intel chips with the latest drivers may also
         work.
     x11
-        X11/GLX
+        X11/GLX, VK_KHR_xlib_surface
     x11probe
         For internal autoprobing, equivalent to ``x11`` otherwise. Don't use
         directly, it could be removed without warning as autoprobing is changed.
@@ -5020,7 +5041,10 @@ Miscellaneous
     Media files must use constant framerate. Section-wise VFR might work as well
     with some container formats (but not e.g. mkv). If the sync code detects
     severe A/V desync, or the framerate cannot be detected, the player
-    automatically reverts to ``audio`` mode for some time or permanently.
+    automatically reverts to ``audio`` mode for some time or permanently. These
+    modes also require a vsync blocked presentation mode. For OpenGL, this
+    translates to ``--opengl-swapinterval=1``. For Vulkan, it translates to
+    ``--vulkan-swap-mode=fifo`` (or ``fifo-relaxed``).
 
     The modes with ``desync`` in their names do not attempt to keep audio/video
     in sync. They will slowly (or quickly) desync, until e.g. the next seek
diff --git a/options/options.c b/options/options.c
index 1168cc196b..6467468691 100644
--- a/options/options.c
+++ b/options/options.c
@@ -89,6 +89,7 @@ extern const struct m_obj_list vo_obj_list;
 extern const struct m_obj_list ao_obj_list;
 
 extern const struct m_sub_options opengl_conf;
+extern const struct m_sub_options vulkan_conf;
 extern const struct m_sub_options angle_conf;
 extern const struct m_sub_options cocoa_conf;
 
@@ -690,6 +691,10 @@ const m_option_t mp_opts[] = {
     OPT_SUBSTRUCT("", opengl_opts, opengl_conf, 0),
 #endif
 
+#if HAVE_VULKAN
+    OPT_SUBSTRUCT("", vulkan_opts, vulkan_conf, 0),
+#endif
+
 #if HAVE_EGL_ANGLE_WIN32
     OPT_SUBSTRUCT("", angle_opts, angle_conf, 0),
 #endif
diff --git a/options/options.h b/options/options.h
index c02b7a34ca..63dee03612 100644
--- a/options/options.h
+++ b/options/options.h
@@ -329,6 +329,7 @@ typedef struct MPOpts {
     struct gl_video_opts *gl_video_opts;
     struct angle_opts *angle_opts;
     struct opengl_opts *opengl_opts;
+    struct vulkan_opts *vulkan_opts;
     struct cocoa_opts *cocoa_opts;
     struct dvd_opts *dvd_opts;
 
diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c
index c5721c73b4..25e2a754bf 100644
--- a/video/out/gpu/context.c
+++ b/video/out/gpu/context.c
@@ -44,6 +44,7 @@ extern const struct ra_ctx_fns ra_ctx_dxgl;
 extern const struct ra_ctx_fns ra_ctx_rpi;
 extern const struct ra_ctx_fns ra_ctx_mali;
 extern const struct ra_ctx_fns ra_ctx_vdpauglx;
+extern const struct ra_ctx_fns ra_ctx_vulkan_xlib;
 
 static const struct ra_ctx_fns *contexts[] = {
 // OpenGL contexts:
@@ -83,6 +84,13 @@ static const struct ra_ctx_fns *contexts[] = {
 #if HAVE_VDPAU_GL_X11
     &ra_ctx_vdpauglx,
 #endif
+
+// Vulkan contexts:
+#if HAVE_VULKAN
+#if HAVE_X11
+    &ra_ctx_vulkan_xlib,
+#endif
+#endif
 };
 
 static bool get_help(struct mp_log *log, struct bstr param)
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
index 10245b250e..7a2fa0e11c 100644
--- a/video/out/gpu/ra.h
+++ b/video/out/gpu/ra.h
@@ -146,6 +146,7 @@ enum ra_buf_type {
     RA_BUF_TYPE_TEX_UPLOAD,     // texture upload buffer (pixel buffer object)
     RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW
     RA_BUF_TYPE_UNIFORM,        // uniform buffer (UBO), for RA_VARTYPE_BUF_RO
+    RA_BUF_TYPE_VERTEX,         // not publicly usable (RA-internal usage)
 };
 
 struct ra_buf_params {
@@ -369,10 +370,10 @@ struct ra_fns {
 
     void (*buf_destroy)(struct ra *ra, struct ra_buf *buf);
 
-    // Update the contents of a buffer, starting at a given offset and up to a
-    // given size, with the contents of *data. This is an extremely common
-    // operation. Calling this while the buffer is considered "in use" is an
-    // error. (See: buf_poll)
+    // Update the contents of a buffer, starting at a given offset (*must* be a
+    // multiple of 4) and up to a given size, with the contents of *data. This
+    // is an extremely common operation. Calling this while the buffer is
+    // considered "in use" is an error. (See: buf_poll)
     void (*buf_update)(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
                        const void *data, size_t size);
 
diff --git a/video/out/vo_gpu.c b/video/out/vo_gpu.c
index bd245de05b..a26912e0d8 100644
--- a/video/out/vo_gpu.c
+++ b/video/out/vo_gpu.c
@@ -60,6 +60,7 @@ struct gpu_priv {
 static void resize(struct gpu_priv *p)
 {
     struct vo *vo = p->vo;
+    struct ra_swapchain *sw = p->ctx->swapchain;
 
     MP_VERBOSE(vo, "Resize: %dx%d\n", vo->dwidth, vo->dheight);
 
@@ -69,6 +70,11 @@ static void resize(struct gpu_priv *p)
 
     gl_video_resize(p->renderer, &src, &dst, &osd);
 
+    int fb_depth = sw->fns->color_depth ? sw->fns->color_depth(sw) : 0;
+    if (fb_depth)
+        MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth);
+    gl_video_set_fb_depth(p->renderer, fb_depth);
+
     vo->want_redraw = true;
 }
 
@@ -289,7 +295,6 @@ static int preinit(struct vo *vo)
         goto err_out;
     assert(p->ctx->ra);
     assert(p->ctx->swapchain);
-    struct ra_swapchain *sw = p->ctx->swapchain;
 
     p->renderer = gl_video_init(p->ctx->ra, vo->log, vo->global);
     gl_video_set_osd_source(p->renderer, vo->osd);
@@ -305,11 +310,6 @@ static int preinit(struct vo *vo)
                              vo->hwdec_devs, vo->opts->gl_hwdec_interop);
     gl_video_set_hwdec(p->renderer, p->hwdec);
 
-    int fb_depth = sw->fns->color_depth ? sw->fns->color_depth(sw) : 0;
-    if (fb_depth)
-        MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth);
-    gl_video_set_fb_depth(p->renderer, fb_depth);
-
     return 0;
 
 err_out:
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
new file mode 100644
index 0000000000..4c0e783f0e
--- /dev/null
+++ b/video/out/vulkan/common.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "config.h"
+
+#include "common/common.h"
+#include "common/msg.h"
+
+// We need to define all platforms we want to support. Since we have
+// our own mechanism for checking this, we re-define the right symbols
+#if HAVE_X11
+#define VK_USE_PLATFORM_XLIB_KHR
+#endif
+
+#include <vulkan/vulkan.h>
+
+// Vulkan allows the optional use of a custom allocator. We don't need one but
+// mark this parameter with a better name in case we ever decide to change this
+// in the future. (And to make the code more readable)
+#define MPVK_ALLOCATOR NULL
+
+// A lot of things depend on streaming resources across frames. Depending on
+// how many frames we render ahead of time, we need to pick enough to avoid
+// any conflicts, so make all of these tunable relative to this constant in
+// order to centralize them.
+#define MPVK_MAX_STREAMING_DEPTH 8
+
+// Shared struct used to hold vulkan context information
+struct mpvk_ctx {
+    struct mp_log *log;
+    VkInstance inst;
+    VkPhysicalDevice physd;
+    VkDebugReportCallbackEXT dbg;
+    VkDevice dev;
+
+    // Surface, must be initialized fter the context itself
+    VkSurfaceKHR surf;
+    VkSurfaceFormatKHR surf_format; // picked at surface initialization time
+
+    struct vk_malloc *alloc; // memory allocator for this device
+    struct vk_cmdpool *pool; // primary command pool for this device
+    struct vk_cmd *last_cmd; // most recently submitted command
+
+    // Cached capabilities
+    VkPhysicalDeviceLimits limits;
+};
diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c
new file mode 100644
index 0000000000..bd456d214c
--- /dev/null
+++ b/video/out/vulkan/context.c
@@ -0,0 +1,501 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "options/m_config.h"
+#include "context.h"
+#include "ra_vk.h"
+#include "utils.h"
+
+enum {
+    SWAP_AUTO = 0,
+    SWAP_FIFO,
+    SWAP_FIFO_RELAXED,
+    SWAP_MAILBOX,
+    SWAP_IMMEDIATE,
+    SWAP_COUNT,
+};
+
+struct vulkan_opts {
+    struct mpvk_device_opts dev_opts; // logical device options
+    char *device; // force a specific GPU
+    int swap_mode;
+};
+
+static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
+                           struct bstr name, struct bstr param)
+{
+    int ret = M_OPT_INVALID;
+    VkResult res;
+
+    // Create a dummy instance to validate/list the devices
+    VkInstanceCreateInfo info = {
+        .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+    };
+
+    VkInstance inst;
+    VkPhysicalDevice *devices = NULL;
+    uint32_t num = 0;
+
+    res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst);
+    if (res != VK_SUCCESS)
+        goto error;
+
+    res = vkEnumeratePhysicalDevices(inst, &num, NULL);
+    if (res != VK_SUCCESS)
+        goto error;
+
+    devices = talloc_array(NULL, VkPhysicalDevice, num);
+    vkEnumeratePhysicalDevices(inst, &num, devices);
+    if (res != VK_SUCCESS)
+        goto error;
+
+    bool help = bstr_equals0(param, "help");
+    if (help) {
+        mp_info(log, "Available vulkan devices:\n");
+        ret = M_OPT_EXIT;
+    }
+
+    for (int i = 0; i < num; i++) {
+        VkPhysicalDeviceProperties prop;
+        vkGetPhysicalDeviceProperties(devices[i], &prop);
+
+        if (help) {
+            mp_info(log, "  '%s' (GPU %d, ID %x:%x)\n", prop.deviceName, i,
+                    (unsigned)prop.vendorID, (unsigned)prop.deviceID);
+        } else if (bstr_equals0(param, prop.deviceName)) {
+            ret = 0;
+            break;
+        }
+    }
+
+    if (!help)
+        mp_err(log, "No device with name '%.*s'!\n", BSTR_P(param));
+
+error:
+    talloc_free(devices);
+    return ret;
+}
+
+#define OPT_BASE_STRUCT struct vulkan_opts
+const struct m_sub_options vulkan_conf = {
+    .opts = (const struct m_option[]) {
+        OPT_STRING_VALIDATE("vulkan-device", device, 0, vk_validate_dev),
+        OPT_CHOICE("vulkan-swap-mode", swap_mode, 0,
+                   ({"auto",        SWAP_AUTO},
+                   {"fifo",         SWAP_FIFO},
+                   {"fifo-relaxed", SWAP_FIFO_RELAXED},
+                   {"mailbox",      SWAP_MAILBOX},
+                   {"immediate",    SWAP_IMMEDIATE})),
+        OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1,
+                     MPVK_MAX_QUEUES, OPTDEF_INT(1)),
+        {0}
+    },
+    .size = sizeof(struct vulkan_opts)
+};
+
+struct priv {
+    struct mpvk_ctx *vk;
+    struct vulkan_opts *opts;
+    // Swapchain metadata:
+    int w, h;                 // current size
+    VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype
+    VkSwapchainKHR swapchain;
+    VkSwapchainKHR old_swapchain;
+    int frames_in_flight;
+    // state of the images:
+    struct ra_tex **images;   // ra_tex wrappers for the vkimages
+    int num_images;           // size of images
+    VkSemaphore *acquired;    // pool of semaphores used to synchronize images
+    int num_acquired;         // size of this pool
+    int idx_acquired;         // index of next free semaphore within this pool
+    int last_imgidx;          // the image index last acquired (for submit)
+};
+
+static bool update_swapchain_info(struct priv *p,
+                                  VkSwapchainCreateInfoKHR *info)
+{
+    struct mpvk_ctx *vk = p->vk;
+
+    // Query the supported capabilities and update this struct as needed
+    VkSurfaceCapabilitiesKHR caps;
+    VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps));
+
+    // Sorted by preference
+    static const VkCompositeAlphaFlagBitsKHR alphaModes[] = {
+        VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
+        VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+    };
+
+    for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) {
+        if (caps.supportedCompositeAlpha & alphaModes[i]) {
+            info->compositeAlpha = alphaModes[i];
+            break;
+        }
+    }
+
+    if (!info->compositeAlpha) {
+        MP_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)\n",
+               caps.supportedCompositeAlpha);
+        goto error;
+    }
+
+    static const VkSurfaceTransformFlagBitsKHR rotModes[] = {
+        VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
+        VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR,
+    };
+
+    for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) {
+        if (caps.supportedTransforms & rotModes[i]) {
+            info->preTransform = rotModes[i];
+            break;
+        }
+    }
+
+    if (!info->preTransform) {
+        MP_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)\n",
+               caps.supportedTransforms);
+        goto error;
+    }
+
+    // Image count as required
+    MP_VERBOSE(vk, "Requested image count: %d (min %d max %d)\n",
+               (int)info->minImageCount, (int)caps.minImageCount,
+               (int)caps.maxImageCount);
+
+    info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount);
+    if (caps.maxImageCount)
+        info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount);
+
+    // Check the extent against the allowed parameters
+    if (caps.currentExtent.width != info->imageExtent.width &&
+        caps.currentExtent.width != 0xFFFFFFFF)
+    {
+        MP_WARN(vk, "Requested width %d does not match current width %d\n",
+                (int)info->imageExtent.width, (int)caps.currentExtent.width);
+        info->imageExtent.width = caps.currentExtent.width;
+    }
+
+    if (caps.currentExtent.height != info->imageExtent.height &&
+        caps.currentExtent.height != 0xFFFFFFFF)
+    {
+        MP_WARN(vk, "Requested height %d does not match current height %d\n",
+                (int)info->imageExtent.height, (int)caps.currentExtent.height);
+        info->imageExtent.height = caps.currentExtent.height;
+    }
+
+    if (caps.minImageExtent.width  > info->imageExtent.width ||
+        caps.minImageExtent.height > info->imageExtent.height)
+    {
+        MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n",
+               (int)info->imageExtent.width, (int)info->imageExtent.height,
+               (int)caps.minImageExtent.width, (int)caps.minImageExtent.height);
+        goto error;
+    }
+
+    if (caps.maxImageExtent.width  < info->imageExtent.width ||
+        caps.maxImageExtent.height < info->imageExtent.height)
+    {
+        MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n",
+               (int)info->imageExtent.width, (int)info->imageExtent.height,
+               (int)caps.maxImageExtent.width, (int)caps.maxImageExtent.height);
+        goto error;
+    }
+
+    // We just request whatever usage we can, and let the ra_vk decide what
+    // ra_tex_params that translates to. This makes the images as flexible
+    // as possible.
+    info->imageUsage = caps.supportedUsageFlags;
+    return true;
+
+error:
+    return false;
+}
+
+void ra_vk_ctx_uninit(struct ra_ctx *ctx)
+{
+    if (ctx->ra) {
+        struct priv *p = ctx->swapchain->priv;
+        struct mpvk_ctx *vk = p->vk;
+
+        mpvk_pool_wait_idle(vk, vk->pool);
+
+        for (int i = 0; i < p->num_images; i++)
+            ra_tex_free(ctx->ra, &p->images[i]);
+        for (int i = 0; i < p->num_acquired; i++)
+            vkDestroySemaphore(vk->dev, p->acquired[i], MPVK_ALLOCATOR);
+
+        vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+
+        talloc_free(p->images);
+        talloc_free(p->acquired);
+        ctx->ra->fns->destroy(ctx->ra);
+        ctx->ra = NULL;
+    }
+
+    talloc_free(ctx->swapchain);
+    ctx->swapchain = NULL;
+}
+
+static const struct ra_swapchain_fns vulkan_swapchain;
+
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+                    VkPresentModeKHR preferred_mode)
+{
+    struct ra_swapchain *sw = ctx->swapchain = talloc_zero(NULL, struct ra_swapchain);
+    sw->ctx = ctx;
+    sw->fns = &vulkan_swapchain;
+
+    struct priv *p = sw->priv = talloc_zero(sw, struct priv);
+    p->vk = vk;
+    p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf);
+
+    if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw))
+        goto error;
+    if (!mpvk_pick_surface_format(vk))
+        goto error;
+    if (!mpvk_device_init(vk, p->opts->dev_opts))
+        goto error;
+
+    ctx->ra = ra_create_vk(vk, ctx->log);
+    if (!ctx->ra)
+        goto error;
+
+    static const VkPresentModeKHR present_modes[SWAP_COUNT] = {
+        [SWAP_FIFO]         = VK_PRESENT_MODE_FIFO_KHR,
+        [SWAP_FIFO_RELAXED] = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
+        [SWAP_MAILBOX]      = VK_PRESENT_MODE_MAILBOX_KHR,
+        [SWAP_IMMEDIATE]    = VK_PRESENT_MODE_IMMEDIATE_KHR,
+    };
+
+    p->protoInfo = (VkSwapchainCreateInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+        .surface = vk->surf,
+        .imageFormat = vk->surf_format.format,
+        .imageColorSpace = vk->surf_format.colorSpace,
+        .imageArrayLayers = 1, // non-stereoscopic
+        .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .minImageCount = ctx->opts.swapchain_depth + 1, // +1 for FB
+        .presentMode = p->opts->swap_mode ? present_modes[p->opts->swap_mode]
+                                          : preferred_mode,
+        .clipped = true,
+    };
+
+    // Make sure the swapchain present mode is supported
+    int num_modes;
+    VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
+                                                 &num_modes, NULL));
+    VkPresentModeKHR *modes = talloc_array(NULL, VkPresentModeKHR, num_modes);
+    VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
+                                                 &num_modes, modes));
+    bool supported = false;
+    for (int i = 0; i < num_modes; i++)
+        supported |= (modes[i] == p->protoInfo.presentMode);
+    talloc_free(modes);
+
+    if (!supported) {
+        MP_ERR(ctx, "Requested swap mode unsupported by this device!\n");
+        goto error;
+    }
+
+    return true;
+
+error:
+    ra_vk_ctx_uninit(ctx);
+    return false;
+}
+
+static void destroy_swapchain(struct mpvk_ctx *vk, struct priv *p)
+{
+    assert(p->old_swapchain);
+    vkDestroySwapchainKHR(vk->dev, p->old_swapchain, MPVK_ALLOCATOR);
+    p->old_swapchain = NULL;
+}
+
+bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h)
+{
+    struct priv *p = sw->priv;
+    if (w == p->w && h == p->h)
+        return true;
+
+    struct ra *ra = sw->ctx->ra;
+    struct mpvk_ctx *vk = p->vk;
+    VkImage *vkimages = NULL;
+
+    // It's invalid to trigger another swapchain recreation while there's
+    // more than one swapchain already active, so we need to flush any pending
+    // asynchronous swapchain release operations that may be ongoing.
+    while (p->old_swapchain)
+        mpvk_dev_poll_cmds(vk, 100000); // 100μs
+
+    VkSwapchainCreateInfoKHR sinfo = p->protoInfo;
+    sinfo.imageExtent  = (VkExtent2D){ w, h };
+    sinfo.oldSwapchain = p->swapchain;
+
+    if (!update_swapchain_info(p, &sinfo))
+        goto error;
+
+    VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &p->swapchain));
+    p->w = w;
+    p->h = h;
+
+    // Freeing the old swapchain while it's still in use is an error, so do
+    // it asynchronously once the device is idle.
+    if (sinfo.oldSwapchain) {
+        p->old_swapchain = sinfo.oldSwapchain;
+        vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p);
+    }
+
+    // Get the new swapchain images
+    int num;
+    VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, NULL));
+    vkimages = talloc_array(NULL, VkImage, num);
+    VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages));
+
+    // If needed, allocate some more semaphores
+    while (num > p->num_acquired) {
+        VkSemaphore sem;
+        static const VkSemaphoreCreateInfo seminfo = {
+            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+        };
+        VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem));
+        MP_TARRAY_APPEND(NULL, p->acquired, p->num_acquired, sem);
+    }
+
+    // Recreate the ra_tex wrappers
+    for (int i = 0; i < p->num_images; i++)
+        ra_tex_free(ra, &p->images[i]);
+
+    p->num_images = num;
+    MP_TARRAY_GROW(NULL, p->images, p->num_images);
+    for (int i = 0; i < num; i++) {
+        p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo);
+        if (!p->images[i])
+            goto error;
+    }
+
+    talloc_free(vkimages);
+    return true;
+
+error:
+    talloc_free(vkimages);
+    vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+    p->swapchain = NULL;
+    return false;
+}
+
+static int color_depth(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+    int bits = 0;
+
+    if (!p->num_images)
+        return bits;
+
+    // The channel with the most bits is probably the most authoritative about
+    // the actual color information (consider e.g. a2bgr10). Slight downside
+    // in that it results in rounding r/b for e.g. rgb565, but we don't pick
+    // surfaces with fewer than 8 bits anyway.
+    const struct ra_format *fmt = p->images[0]->params.format;
+    for (int i = 0; i < fmt->num_components; i++) {
+        int depth = fmt->component_depth[i];
+        bits = MPMAX(bits, depth ? depth : fmt->component_size[i]);
+    }
+
+    return bits;
+}
+
+static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+    struct priv *p = sw->priv;
+    struct mpvk_ctx *vk = p->vk;
+    if (!p->swapchain)
+        goto error;
+
+    uint32_t imgidx = 0;
+    MP_TRACE(vk, "vkAcquireNextImageKHR\n");
+    VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX,
+                                         p->acquired[p->idx_acquired], NULL,
+                                         &imgidx);
+    if (res == VK_ERROR_OUT_OF_DATE_KHR)
+        goto error; // just return in this case
+    VK_ASSERT(res, "Failed acquiring swapchain image");
+
+    p->last_imgidx = imgidx;
+    *out_fbo = (struct ra_fbo) {
+        .tex = p->images[imgidx],
+        .flip = false,
+    };
+    return true;
+
+error:
+    return false;
+}
+
+static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
+{
+    struct priv *p = sw->priv;
+    struct ra *ra = sw->ctx->ra;
+    struct mpvk_ctx *vk = p->vk;
+    if (!p->swapchain)
+        goto error;
+
+    VkSemaphore acquired = p->acquired[p->idx_acquired++];
+    p->idx_acquired %= p->num_acquired;
+
+    VkSemaphore done;
+    if (!ra_vk_submit(ra, p->images[p->last_imgidx], acquired, &done,
+                      &p->frames_in_flight))
+        goto error;
+
+    // For some reason, nvidia absolutely shits itself when presenting from a
+    // full queue - so advance all of the cmdpool indices first and then do the
+    // present on an "empty" queue
+    vk_cmd_cycle_queues(vk);
+    struct vk_cmdpool *pool = vk->pool;
+    VkQueue queue = pool->queues[pool->qindex];
+
+    VkPresentInfoKHR pinfo = {
+        .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+        .waitSemaphoreCount = 1,
+        .pWaitSemaphores = &done,
+        .swapchainCount = 1,
+        .pSwapchains = &p->swapchain,
+        .pImageIndices = &p->last_imgidx,
+    };
+
+    VK(vkQueuePresentKHR(queue, &pinfo));
+    return true;
+
+error:
+    return false;
+}
+
+static void swap_buffers(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+
+    while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth)
+        mpvk_dev_poll_cmds(p->vk, 100000); // 100μs
+}
+
+static const struct ra_swapchain_fns vulkan_swapchain = {
+    // .screenshot is not currently supported
+    .color_depth   = color_depth,
+    .start_frame   = start_frame,
+    .submit_frame  = submit_frame,
+    .swap_buffers  = swap_buffers,
+};
diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h
new file mode 100644
index 0000000000..3f630bc10e
--- /dev/null
+++ b/video/out/vulkan/context.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "video/out/gpu/context.h"
+#include "common.h"
+
+// Helpers for ra_ctx based on ra_vk. These initialize ctx->ra and ctx->swchain.
+void ra_vk_ctx_uninit(struct ra_ctx *ctx);
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+                    VkPresentModeKHR preferred_mode);
+bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h);
diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c
new file mode 100644
index 0000000000..2611fbb706
--- /dev/null
+++ b/video/out/vulkan/context_xlib.c
@@ -0,0 +1,116 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/gpu/context.h"
+#include "video/out/x11_common.h"
+
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+struct priv {
+    struct mpvk_ctx vk;
+};
+
+static void xlib_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    ra_vk_ctx_uninit(ctx);
+    mpvk_uninit(&p->vk);
+    vo_x11_uninit(ctx->vo);
+}
+
+static bool xlib_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct mpvk_ctx *vk = &p->vk;
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+
+    if (!vo_x11_init(ctx->vo))
+        goto error;
+
+    if (!vo_x11_create_vo_window(ctx->vo, NULL, "mpvk"))
+        goto error;
+
+    if (!mpvk_instance_init(vk, ctx->log, ctx->opts.debug))
+        goto error;
+
+    VkXlibSurfaceCreateInfoKHR xinfo = {
+         .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
+         .dpy = ctx->vo->x11->display,
+         .window = ctx->vo->x11->window,
+    };
+
+    VkResult res = vkCreateXlibSurfaceKHR(vk->inst, &xinfo, MPVK_ALLOCATOR,
+                                          &vk->surf);
+    if (res != VK_SUCCESS) {
+        MP_MSG(ctx, msgl, "Failed creating Xlib surface: %s\n", vk_err(res));
+        goto error;
+    }
+
+    if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_FIFO_KHR))
+        goto error;
+
+    return true;
+
+error:
+    xlib_uninit(ctx);
+    return false;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+    return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight);
+}
+
+static bool xlib_reconfig(struct ra_ctx *ctx)
+{
+    vo_x11_config_vo_window(ctx->vo);
+    return resize(ctx);
+}
+
+static int xlib_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    int ret = vo_x11_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE) {
+        if (!resize(ctx))
+            return VO_ERROR;
+    }
+    return ret;
+}
+
+static void xlib_wakeup(struct ra_ctx *ctx)
+{
+    vo_x11_wakeup(ctx->vo);
+}
+
+static void xlib_wait_events(struct ra_ctx *ctx, int64_t until_time_us)
+{
+    vo_x11_wait_events(ctx->vo, until_time_us);
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_xlib = {
+    .type           = "vulkan",
+    .name           = "x11",
+    .reconfig       = xlib_reconfig,
+    .control        = xlib_control,
+    .wakeup         = xlib_wakeup,
+    .wait_events    = xlib_wait_events,
+    .init           = xlib_init,
+    .uninit         = xlib_uninit,
+};
diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c
new file mode 100644
index 0000000000..b44bead99c
--- /dev/null
+++ b/video/out/vulkan/formats.c
@@ -0,0 +1,55 @@
+#include "formats.h"
+
+const struct vk_format vk_formats[] = {
+    // Regular, byte-aligned integer formats
+    {"r8",       VK_FORMAT_R8_UNORM,                  1,  1,   {8             }, RA_CTYPE_UNORM },
+    {"rg8",      VK_FORMAT_R8G8_UNORM,                2,  2,   {8,  8         }, RA_CTYPE_UNORM },
+    {"rgb8",     VK_FORMAT_R8G8B8_UNORM,              3,  3,   {8,  8,  8     }, RA_CTYPE_UNORM },
+    {"rgba8",    VK_FORMAT_R8G8B8A8_UNORM,            4,  4,   {8,  8,  8,  8 }, RA_CTYPE_UNORM },
+    {"r16",      VK_FORMAT_R16_UNORM,                 1,  2,   {16            }, RA_CTYPE_UNORM },
+    {"rg16",     VK_FORMAT_R16G16_UNORM,              2,  4,   {16, 16        }, RA_CTYPE_UNORM },
+    {"rgb16",    VK_FORMAT_R16G16B16_UNORM,           3,  6,   {16, 16, 16    }, RA_CTYPE_UNORM },
+    {"rgba16",   VK_FORMAT_R16G16B16A16_UNORM,        4,  8,   {16, 16, 16, 16}, RA_CTYPE_UNORM },
+
+    // Special, integer-only formats
+    {"r32ui",    VK_FORMAT_R32_UINT,                  1,  4,   {32            }, RA_CTYPE_UINT },
+    {"rg32ui",   VK_FORMAT_R32G32_UINT,               2,  8,   {32, 32        }, RA_CTYPE_UINT },
+    {"rgb32ui",  VK_FORMAT_R32G32B32_UINT,            3,  12,  {32, 32, 32    }, RA_CTYPE_UINT },
+    {"rgba32ui", VK_FORMAT_R32G32B32A32_UINT,         4,  16,  {32, 32, 32, 32}, RA_CTYPE_UINT },
+    {"r64ui",    VK_FORMAT_R64_UINT,                  1,  8,   {64            }, RA_CTYPE_UINT },
+    {"rg64ui",   VK_FORMAT_R64G64_UINT,               2,  16,  {64, 64        }, RA_CTYPE_UINT },
+    {"rgb64ui",  VK_FORMAT_R64G64B64_UINT,            3,  24,  {64, 64, 64    }, RA_CTYPE_UINT },
+    {"rgba64ui", VK_FORMAT_R64G64B64A64_UINT,         4,  32,  {64, 64, 64, 64}, RA_CTYPE_UINT },
+
+    // Packed integer formats
+    {"rg4",      VK_FORMAT_R4G4_UNORM_PACK8,          2,  1,   {4,  4         }, RA_CTYPE_UNORM },
+    {"rgba4",    VK_FORMAT_R4G4B4A4_UNORM_PACK16,     4,  2,   {4,  4,  4,  4 }, RA_CTYPE_UNORM },
+    {"rgb565",   VK_FORMAT_R5G6B5_UNORM_PACK16,       3,  2,   {5,  6,  5     }, RA_CTYPE_UNORM },
+    {"rgb565a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16,     4,  2,   {5,  5,  5,  1 }, RA_CTYPE_UNORM },
+
+    // Float formats (native formats, hf = half float, df = double float)
+    {"r16hf",    VK_FORMAT_R16_SFLOAT,                1,  2,   {16            }, RA_CTYPE_FLOAT },
+    {"rg16hf",   VK_FORMAT_R16G16_SFLOAT,             2,  4,   {16, 16        }, RA_CTYPE_FLOAT },
+    {"rgb16hf",  VK_FORMAT_R16G16B16_SFLOAT,          3,  6,   {16, 16, 16    }, RA_CTYPE_FLOAT },
+    {"rgba16hf", VK_FORMAT_R16G16B16A16_SFLOAT,       4,  8,   {16, 16, 16, 16}, RA_CTYPE_FLOAT },
+    {"r32f",     VK_FORMAT_R32_SFLOAT,                1,  4,   {32            }, RA_CTYPE_FLOAT },
+    {"rg32f",    VK_FORMAT_R32G32_SFLOAT,             2,  8,   {32, 32        }, RA_CTYPE_FLOAT },
+    {"rgb32f",   VK_FORMAT_R32G32B32_SFLOAT,          3, 12,   {32, 32, 32    }, RA_CTYPE_FLOAT },
+    {"rgba32f",  VK_FORMAT_R32G32B32A32_SFLOAT,       4, 16,   {32, 32, 32, 32}, RA_CTYPE_FLOAT },
+    {"r64df",    VK_FORMAT_R64_SFLOAT,                1,  8,   {64            }, RA_CTYPE_FLOAT },
+    {"rg64df",   VK_FORMAT_R64G64_SFLOAT,             2, 16,   {64, 64        }, RA_CTYPE_FLOAT },
+    {"rgb64df",  VK_FORMAT_R64G64B64_SFLOAT,          3, 24,   {64, 64, 64    }, RA_CTYPE_FLOAT },
+    {"rgba64df", VK_FORMAT_R64G64B64A64_SFLOAT,       4, 32,   {64, 64, 64, 64}, RA_CTYPE_FLOAT },
+
+    // "Swapped" component order images
+    {"bgr8",     VK_FORMAT_B8G8R8_UNORM,              3,  3,   {8,  8,  8     }, RA_CTYPE_UNORM, true },
+    {"bgra8",    VK_FORMAT_B8G8R8A8_UNORM,            4,  4,   {8,  8,  8,  8 }, RA_CTYPE_UNORM, true },
+    {"bgra4",    VK_FORMAT_B4G4R4A4_UNORM_PACK16,     4,  2,   {4,  4,  4,  4 }, RA_CTYPE_UNORM, true },
+    {"bgr565",   VK_FORMAT_B5G6R5_UNORM_PACK16,       3,  2,   {5,  6,  5     }, RA_CTYPE_UNORM, true },
+    {"bgr565a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16,     4,  2,   {5,  5,  5,  1 }, RA_CTYPE_UNORM, true },
+    {"a1rgb5",   VK_FORMAT_A1R5G5B5_UNORM_PACK16,     4,  2,   {1,  5,  5,  5 }, RA_CTYPE_UNORM, true },
+    {"a2rgb10",  VK_FORMAT_A2R10G10B10_UNORM_PACK32,  4,  4,   {2,  10, 10, 10}, RA_CTYPE_UNORM, true },
+    {"a2bgr10",  VK_FORMAT_A2B10G10R10_UNORM_PACK32,  4,  4,   {2,  10, 10, 10}, RA_CTYPE_UNORM, true },
+    {"abgr8",    VK_FORMAT_A8B8G8R8_UNORM_PACK32,     4,  4,   {8,  8,  8,  8 }, RA_CTYPE_UNORM, true },
+    {0}
+};
diff --git a/video/out/vulkan/formats.h b/video/out/vulkan/formats.h
new file mode 100644
index 0000000000..22782a6958
--- /dev/null
+++ b/video/out/vulkan/formats.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "video/out/gpu/ra.h"
+#include "common.h"
+
+struct vk_format {
+    const char *name;
+    VkFormat iformat;    // vulkan format enum
+    int components;      // how many components are there
+    int bytes;           // how many bytes is a texel
+    int bits[4];         // how many bits per component
+    enum ra_ctype ctype; // format representation type
+    bool fucked_order;   // used for formats which are not simply rgba
+};
+
+extern const struct vk_format vk_formats[];
diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c
new file mode 100644
index 0000000000..31fcd36ddb
--- /dev/null
+++ b/video/out/vulkan/malloc.c
@@ -0,0 +1,424 @@
+#include "malloc.h"
+#include "utils.h"
+#include "osdep/timer.h"
+
+// Controls the multiplication factor for new slab allocations. The new slab
+// will always be allocated such that the size of the slab is this factor times
+// the previous slab. Higher values make it grow faster.
+#define MPVK_HEAP_SLAB_GROWTH_RATE 4
+
+// Controls the minimum slab size, to reduce the frequency at which very small
+// slabs would need to get allocated when allocating the first few buffers.
+// (Default: 1 MB)
+#define MPVK_HEAP_MINIMUM_SLAB_SIZE (1 << 20)
+
+// Controls the maximum slab size, to reduce the effect of unbounded slab
+// growth exhausting memory. If the application needs a single allocation
+// that's bigger than this value, it will be allocated directly from the
+// device. (Default: 512 MB)
+#define MPVK_HEAP_MAXIMUM_SLAB_SIZE (1 << 29)
+
+// Controls the minimum free region size, to reduce thrashing the free space
+// map with lots of small buffers during uninit. (Default: 1 KB)
+#define MPVK_HEAP_MINIMUM_REGION_SIZE (1 << 10)
+
+// Represents a region of available memory
+struct vk_region {
+    size_t start; // first offset in region
+    size_t end;   // first offset *not* in region
+};
+
+static inline size_t region_len(struct vk_region r)
+{
+    return r.end - r.start;
+}
+
+// A single slab represents a contiguous region of allocated memory. Actual
+// allocations are served as slices of this. Slabs are organized into linked
+// lists, which represent individual heaps.
+struct vk_slab {
+    VkDeviceMemory mem;   // underlying device allocation
+    size_t size;          // total size of `slab`
+    size_t used;          // number of bytes actually in use (for GC accounting)
+    bool dedicated;       // slab is allocated specifically for one object
+    // free space map: a sorted list of memory regions that are available
+    struct vk_region *regions;
+    int num_regions;
+    // optional, depends on the memory type:
+    VkBuffer buffer;      // buffer spanning the entire slab
+    void *data;           // mapped memory corresponding to `mem`
+};
+
+// Represents a single memory heap. We keep track of a vk_heap for each
+// combination of buffer type and memory selection parameters. This shouldn't
+// actually be that many in practice, because some combinations simply never
+// occur, and others will generally be the same for the same objects.
+struct vk_heap {
+    VkBufferUsageFlagBits usage;    // the buffer usage type (or 0)
+    VkMemoryPropertyFlagBits flags; // the memory type flags (or 0)
+    uint32_t typeBits;              // the memory type index requirements (or 0)
+    struct vk_slab **slabs;         // array of slabs sorted by size
+    int num_slabs;
+};
+
+// The overall state of the allocator, which keeps track of a vk_heap for each
+// memory type.
+struct vk_malloc {
+    VkPhysicalDeviceMemoryProperties props;
+    struct vk_heap *heaps;
+    int num_heaps;
+};
+
+static void slab_free(struct mpvk_ctx *vk, struct vk_slab *slab)
+{
+    if (!slab)
+        return;
+
+    assert(slab->used == 0);
+
+    int64_t start = mp_time_us();
+    vkDestroyBuffer(vk->dev, slab->buffer, MPVK_ALLOCATOR);
+    // also implicitly unmaps the memory if needed
+    vkFreeMemory(vk->dev, slab->mem, MPVK_ALLOCATOR);
+    int64_t stop = mp_time_us();
+
+    MP_VERBOSE(vk, "Freeing slab of size %zu took %lld μs.\n",
+               slab->size, (long long)(stop - start));
+
+    talloc_free(slab);
+}
+
+static bool find_best_memtype(struct mpvk_ctx *vk, uint32_t typeBits,
+                              VkMemoryPropertyFlagBits flags,
+                              VkMemoryType *out_type, int *out_index)
+{
+    struct vk_malloc *ma = vk->alloc;
+
+    // The vulkan spec requires memory types to be sorted in the "optimal"
+    // order, so the first matching type we find will be the best/fastest one.
+    for (int i = 0; i < ma->props.memoryTypeCount; i++) {
+        // The memory type flags must include our properties
+        if ((ma->props.memoryTypes[i].propertyFlags & flags) != flags)
+            continue;
+        // The memory type must be supported by the requirements (bitfield)
+        if (typeBits && !(typeBits & (1 << i)))
+            continue;
+        *out_type = ma->props.memoryTypes[i];
+        *out_index = i;
+        return true;
+    }
+
+    MP_ERR(vk, "Found no memory type matching property flags 0x%x and type "
+               "bits 0x%x!\n", flags, (unsigned)typeBits);
+    return false;
+}
+
+static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
+                                  size_t size)
+{
+    struct vk_slab *slab = talloc_ptrtype(NULL, slab);
+    *slab = (struct vk_slab) {
+        .size = size,
+    };
+
+    MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, (struct vk_region) {
+        .start = 0,
+        .end   = slab->size,
+    });
+
+    VkMemoryAllocateInfo minfo = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+        .allocationSize = slab->size,
+    };
+
+    uint32_t typeBits = heap->typeBits ? heap->typeBits : UINT32_MAX;
+    if (heap->usage) {
+        VkBufferCreateInfo binfo = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+            .size  = slab->size,
+            .usage = heap->usage,
+            .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        };
+
+        VK(vkCreateBuffer(vk->dev, &binfo, MPVK_ALLOCATOR, &slab->buffer));
+
+        VkMemoryRequirements reqs;
+        vkGetBufferMemoryRequirements(vk->dev, slab->buffer, &reqs);
+        minfo.allocationSize = reqs.size; // this can be larger than slab->size
+        typeBits &= reqs.memoryTypeBits;  // this can restrict the types
+    }
+
+    VkMemoryType type;
+    int index;
+    if (!find_best_memtype(vk, typeBits, heap->flags, &type, &index))
+        goto error;
+
+    MP_VERBOSE(vk, "Allocating %zu memory of type 0x%x (id %d) in heap %d.\n",
+               slab->size, type.propertyFlags, index, (int)type.heapIndex);
+
+    minfo.memoryTypeIndex = index;
+    VK(vkAllocateMemory(vk->dev, &minfo, MPVK_ALLOCATOR, &slab->mem));
+
+    if (heap->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+        VK(vkMapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data));
+
+    if (slab->buffer)
+        VK(vkBindBufferMemory(vk->dev, slab->buffer, slab->mem, 0));
+
+    return slab;
+
+error:
+    slab_free(vk, slab);
+    return NULL;
+}
+
+static void insert_region(struct vk_slab *slab, struct vk_region region)
+{
+    if (region.start == region.end)
+        return;
+
+    bool big_enough = region_len(region) >= MPVK_HEAP_MINIMUM_REGION_SIZE;
+
+    // Find the index of the first region that comes after this
+    for (int i = 0; i < slab->num_regions; i++) {
+        struct vk_region *r = &slab->regions[i];
+
+        // Check for a few special cases which can be coalesced
+        if (r->end == region.start) {
+            // The new region is at the tail of this region. In addition to
+            // modifying this region, we also need to coalesce all the following
+            // regions for as long as possible
+            r->end = region.end;
+
+            struct vk_region *next = &slab->regions[i+1];
+            while (i+1 < slab->num_regions && r->end == next->start) {
+                r->end = next->end;
+                MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, i+1);
+            }
+            return;
+        }
+
+        if (r->start == region.end) {
+            // The new region is at the head of this region. We don't need to
+            // do anything special here - because if this could be further
+            // coalesced backwards, the previous loop iteration would already
+            // have caught it.
+            r->start = region.start;
+            return;
+        }
+
+        if (r->start > region.start) {
+            // The new region comes somewhere before this region, so insert
+            // it into this index in the array.
+            if (big_enough) {
+                MP_TARRAY_INSERT_AT(slab, slab->regions, slab->num_regions,
+                                    i, region);
+            }
+            return;
+        }
+    }
+
+    // If we've reached the end of this loop, then all of the regions
+    // come before the new region, and are disconnected - so append it
+    if (big_enough)
+        MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, region);
+}
+
+static void heap_uninit(struct mpvk_ctx *vk, struct vk_heap *heap)
+{
+    for (int i = 0; i < heap->num_slabs; i++)
+        slab_free(vk, heap->slabs[i]);
+
+    talloc_free(heap->slabs);
+    *heap = (struct vk_heap){0};
+}
+
+void vk_malloc_init(struct mpvk_ctx *vk)
+{
+    assert(vk->physd);
+    vk->alloc = talloc_zero(NULL, struct vk_malloc);
+    vkGetPhysicalDeviceMemoryProperties(vk->physd, &vk->alloc->props);
+}
+
+void vk_malloc_uninit(struct mpvk_ctx *vk)
+{
+    struct vk_malloc *ma = vk->alloc;
+    if (!ma)
+        return;
+
+    for (int i = 0; i < ma->num_heaps; i++)
+        heap_uninit(vk, &ma->heaps[i]);
+
+    talloc_free(ma);
+    vk->alloc = NULL;
+}
+
+void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice)
+{
+    struct vk_slab *slab = slice.priv;
+    if (!slab)
+        return;
+
+    assert(slab->used >= slice.size);
+    slab->used -= slice.size;
+
+    MP_DBG(vk, "Freeing slice %zu + %zu from slab with size %zu\n",
+           slice.offset, slice.size, slab->size);
+
+    if (slab->dedicated) {
+        // If the slab was purpose-allocated for this memslice, we can just
+        // free it here
+        slab_free(vk, slab);
+    } else {
+        // Return the allocation to the free space map
+        insert_region(slab, (struct vk_region) {
+            .start = slice.offset,
+            .end   = slice.offset + slice.size,
+        });
+    }
+}
+
+// reqs: can be NULL
+static struct vk_heap *find_heap(struct mpvk_ctx *vk,
+                                 VkBufferUsageFlagBits usage,
+                                 VkMemoryPropertyFlagBits flags,
+                                 VkMemoryRequirements *reqs)
+{
+    struct vk_malloc *ma = vk->alloc;
+    int typeBits = reqs ? reqs->memoryTypeBits : 0;
+
+    for (int i = 0; i < ma->num_heaps; i++) {
+        if (ma->heaps[i].usage != usage)
+            continue;
+        if (ma->heaps[i].flags != flags)
+            continue;
+        if (ma->heaps[i].typeBits != typeBits)
+            continue;
+        return &ma->heaps[i];
+    }
+
+    // Not found => add it
+    MP_TARRAY_GROW(ma, ma->heaps, ma->num_heaps + 1);
+    struct vk_heap *heap = &ma->heaps[ma->num_heaps++];
+    *heap = (struct vk_heap) {
+        .usage    = usage,
+        .flags    = flags,
+        .typeBits = typeBits,
+    };
+    return heap;
+}
+
+static inline bool region_fits(struct vk_region r, size_t size, size_t align)
+{
+    return MP_ALIGN_UP(r.start, align) + size <= r.end;
+}
+
+// Finds the best-fitting region in a heap. If the heap is too small or too
+// fragmented, a new slab will be allocated under the hood.
+static bool heap_get_region(struct mpvk_ctx *vk, struct vk_heap *heap,
+                            size_t size, size_t align,
+                            struct vk_slab **out_slab, int *out_index)
+{
+    struct vk_slab *slab = NULL;
+
+    // If the allocation is very big, serve it directly instead of bothering
+    // with the heap
+    if (size > MPVK_HEAP_MAXIMUM_SLAB_SIZE) {
+        slab = slab_alloc(vk, heap, size);
+        *out_slab = slab;
+        *out_index = 0;
+        return !!slab;
+    }
+
+    for (int i = 0; i < heap->num_slabs; i++) {
+        slab = heap->slabs[i];
+        if (slab->size < size)
+            continue;
+
+        // Attempt a best fit search
+        int best = -1;
+        for (int n = 0; n < slab->num_regions; n++) {
+            struct vk_region r = slab->regions[n];
+            if (!region_fits(r, size, align))
+                continue;
+            if (best >= 0 && region_len(r) > region_len(slab->regions[best]))
+                continue;
+            best = n;
+        }
+
+        if (best >= 0) {
+            *out_slab = slab;
+            *out_index = best;
+            return true;
+        }
+    }
+
+    // Otherwise, allocate a new vk_slab and append it to the list.
+    size_t cur_size = MPMAX(size, slab ? slab->size : 0);
+    size_t slab_size = MPVK_HEAP_SLAB_GROWTH_RATE * cur_size;
+    slab_size = MPMAX(MPVK_HEAP_MINIMUM_SLAB_SIZE, slab_size);
+    slab_size = MPMIN(MPVK_HEAP_MAXIMUM_SLAB_SIZE, slab_size);
+    assert(slab_size >= size);
+    slab = slab_alloc(vk, heap, slab_size);
+    if (!slab)
+        return false;
+    MP_TARRAY_APPEND(NULL, heap->slabs, heap->num_slabs, slab);
+
+    // Return the only region there is in a newly allocated slab
+    assert(slab->num_regions == 1);
+    *out_slab = slab;
+    *out_index = 0;
+    return true;
+}
+
+static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size,
+                       size_t alignment, struct vk_memslice *out)
+{
+    struct vk_slab *slab;
+    int index;
+    alignment = MP_ALIGN_UP(alignment, vk->limits.bufferImageGranularity);
+    if (!heap_get_region(vk, heap, size, alignment, &slab, &index))
+        return false;
+
+    struct vk_region reg = slab->regions[index];
+    MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, index);
+    *out = (struct vk_memslice) {
+        .vkmem = slab->mem,
+        .offset = MP_ALIGN_UP(reg.start, alignment),
+        .size = size,
+        .priv = slab,
+    };
+
+    MP_DBG(vk, "Sub-allocating slice %zu + %zu from slab with size %zu\n",
+           out->offset, out->size, slab->size);
+
+    size_t out_end = out->offset + out->size;
+    insert_region(slab, (struct vk_region) { reg.start, out->offset });
+    insert_region(slab, (struct vk_region) { out_end, reg.end });
+
+    slab->used += size;
+    return true;
+}
+
+bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
+                       VkMemoryPropertyFlagBits flags, struct vk_memslice *out)
+{
+    struct vk_heap *heap = find_heap(vk, 0, flags, &reqs);
+    return slice_heap(vk, heap, reqs.size, reqs.alignment, out);
+}
+
+bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlagBits bufFlags,
+                      VkMemoryPropertyFlagBits memFlags, VkDeviceSize size,
+                      VkDeviceSize alignment, struct vk_bufslice *out)
+{
+    struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL);
+    if (!slice_heap(vk, heap, size, alignment, &out->mem))
+        return false;
+
+    struct vk_slab *slab = out->mem.priv;
+    out->buf = slab->buffer;
+    if (slab->data)
+        out->data = (void *)((uintptr_t)slab->data + (ptrdiff_t)out->mem.offset);
+
+    return true;
+}
diff --git a/video/out/vulkan/malloc.h b/video/out/vulkan/malloc.h
new file mode 100644
index 0000000000..65c1036929
--- /dev/null
+++ b/video/out/vulkan/malloc.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "common.h"
+
+void vk_malloc_init(struct mpvk_ctx *vk);
+void vk_malloc_uninit(struct mpvk_ctx *vk);
+
+// Represents a single "slice" of generic (non-buffer) memory, plus some
+// metadata for accounting. This struct is essentially read-only.
+struct vk_memslice {
+    VkDeviceMemory vkmem;
+    size_t offset;
+    size_t size;
+    void *priv;
+};
+
+void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice);
+bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
+                       VkMemoryPropertyFlagBits flags, struct vk_memslice *out);
+
+// Represents a single "slice" of a larger buffer
+struct vk_bufslice {
+    struct vk_memslice mem; // must be freed by the user when done
+    VkBuffer buf;           // the buffer this memory was sliced from
+    // For persistently mapped buffers, this points to the first usable byte of
+    // this slice.
+    void *data;
+};
+
+// Allocate a buffer slice. This is more efficient than vk_malloc_generic for
+// when the user needs lots of buffers, since it doesn't require
+// creating/destroying lots of (little) VkBuffers.
+bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlagBits bufFlags,
+                      VkMemoryPropertyFlagBits memFlags, VkDeviceSize size,
+                      VkDeviceSize alignment, struct vk_bufslice *out);
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
new file mode 100644
index 0000000000..ce0cbc66e9
--- /dev/null
+++ b/video/out/vulkan/ra_vk.c
@@ -0,0 +1,1590 @@
+#include "ra_vk.h"
+#include "malloc.h"
+#include "video/out/opengl/utils.h"
+
+static struct ra_fns ra_fns_vk;
+
+// For ra.priv
+struct ra_vk {
+    struct mpvk_ctx *vk;
+    struct ra_tex *clear_tex; // stupid hack for clear()
+    struct vk_cmd *cmd;       // currently recording cmd
+};
+
+struct mpvk_ctx *ra_vk_get(struct ra *ra)
+{
+    if (ra->fns != &ra_fns_vk)
+        return NULL;
+
+    struct ra_vk *p = ra->priv;
+    return p->vk;
+}
+
+// Returns a command buffer, or NULL on error
+static struct vk_cmd *vk_require_cmd(struct ra *ra)
+{
+    struct ra_vk *p = ra->priv;
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    if (!p->cmd)
+        p->cmd = vk_cmd_begin(vk, vk->pool);
+
+    return p->cmd;
+}
+
+// Note: This technically follows the flush() API, but we don't need
+// to expose that (and in fact, it's a bad idea) since we control flushing
+// behavior with ra_vk_present_frame already.
+static bool vk_flush(struct ra *ra, VkSemaphore *done)
+{
+    struct ra_vk *p = ra->priv;
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    if (p->cmd) {
+        if (!vk_cmd_submit(vk, p->cmd, done))
+            return false;
+        p->cmd = NULL;
+    }
+
+    return true;
+}
+
+// The callback's *priv will always be set to `ra`
+static void vk_callback(struct ra *ra, vk_cb callback, void *arg)
+{
+    struct ra_vk *p = ra->priv;
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    if (p->cmd) {
+        vk_cmd_callback(p->cmd, callback, ra, arg);
+    } else {
+        vk_dev_callback(vk, callback, ra, arg);
+    }
+}
+
+#define MAKE_LAZY_DESTRUCTOR(fun, argtype)                  \
+    static void fun##_lazy(struct ra *ra, argtype *arg) {   \
+        vk_callback(ra, (vk_cb) fun, arg);                  \
+    }
+
+static void vk_destroy_ra(struct ra *ra)
+{
+    struct ra_vk *p = ra->priv;
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    vk_flush(ra, NULL);
+    mpvk_dev_wait_idle(vk);
+    ra_tex_free(ra, &p->clear_tex);
+
+    talloc_free(ra);
+}
+
+static bool vk_setup_formats(struct ra *ra)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) {
+        VkFormatProperties prop;
+        vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop);
+
+        // As a bare minimum, we need to sample from an allocated image
+        VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
+        if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT))
+            continue;
+
+        VkFormatFeatureFlags linear_bits, render_bits;
+        linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+        render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+                      VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
+
+        struct ra_format *fmt = talloc_zero(ra, struct ra_format);
+        *fmt = (struct ra_format) {
+            .name            = vk_fmt->name,
+            .priv            = (void *)vk_fmt,
+            .ctype           = vk_fmt->ctype,
+            .ordered         = !vk_fmt->fucked_order,
+            .num_components  = vk_fmt->components,
+            .pixel_size      = vk_fmt->bytes,
+            .linear_filter   = !!(flags & linear_bits),
+            .renderable      = !!(flags & render_bits),
+        };
+
+        for (int i = 0; i < 4; i++)
+            fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i];
+
+        MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
+    }
+
+    // Populate some other capabilities related to formats while we're at it
+    VkImageType imgType[3] = {
+        VK_IMAGE_TYPE_1D,
+        VK_IMAGE_TYPE_2D,
+        VK_IMAGE_TYPE_3D
+    };
+
+    // R8_UNORM is supported on literally every single vulkan implementation
+    const VkFormat testfmt = VK_FORMAT_R8_UNORM;
+
+    for (int d = 0; d < 3; d++) {
+        VkImageFormatProperties iprop;
+        VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
+                testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL,
+                VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop);
+
+        switch (imgType[d]) {
+        case VK_IMAGE_TYPE_1D:
+            if (res == VK_SUCCESS)
+                ra->caps |= RA_CAP_TEX_1D;
+            break;
+        case VK_IMAGE_TYPE_2D:
+            // 2D formats must be supported by RA, so ensure this is the case
+            VK_ASSERT(res, "Querying 2D format limits");
+            ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height);
+            break;
+        case VK_IMAGE_TYPE_3D:
+            if (res == VK_SUCCESS)
+                ra->caps |= RA_CAP_TEX_3D;
+            break;
+        }
+    }
+
+    // RA_CAP_BLIT implies both blitting between images as well as blitting
+    // directly to the swapchain image, so check for all three operations
+    bool blittable = true;
+    VkFormatProperties prop;
+    vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop);
+    if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
+        blittable = false;
+    if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
+        blittable = false;
+
+    vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop);
+    if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
+        blittable = false;
+
+    if (blittable)
+        ra->caps |= RA_CAP_BLIT;
+
+    return true;
+
+error:
+    return false;
+}
+
+static struct ra_fns ra_fns_vk;
+
+struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
+{
+    assert(vk->dev);
+    assert(vk->alloc);
+
+    struct ra *ra = talloc_zero(NULL, struct ra);
+    ra->log = log;
+    ra->fns = &ra_fns_vk;
+
+    struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk);
+    p->vk = vk;
+
+    // There's no way to query the supported GLSL version from VK_NV_glsl_shader
+    // (thanks nvidia), so just pick the GL version that modern nvidia devices
+    // support..
+    ra->glsl_version = 450;
+    ra->glsl_vulkan = true;
+    ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
+    ra->caps = RA_CAP_NESTED_ARRAY;
+
+    if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT)
+        ra->caps |= RA_CAP_COMPUTE;
+
+    if (!vk_setup_formats(ra))
+        goto error;
+
+    // UBO support is required
+    ra->caps |= RA_CAP_BUF_RO;
+
+    // Try creating a shader storage buffer
+    struct ra_buf_params ssbo_params = {
+        .type = RA_BUF_TYPE_SHADER_STORAGE,
+        .size = 16,
+    };
+
+    struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params);
+    if (ssbo) {
+        ra->caps |= RA_CAP_BUF_RW;
+        ra_buf_free(ra, &ssbo);
+    }
+
+    // To support clear() by region, we need to allocate a dummy 1x1 image that
+    // will be used as the source of blit operations
+    struct ra_tex_params clear_params = {
+        .dimensions = 1, // no point in using a 2D image if height = 1
+        .w = 1,
+        .h = 1,
+        .d = 1,
+        .format = ra_find_float16_format(ra, 4),
+        .blit_src = 1,
+        .host_mutable = 1,
+    };
+
+    p->clear_tex = ra_tex_create(ra, &clear_params);
+    if (!p->clear_tex) {
+        MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n");
+        goto error;
+    }
+
+    return ra;
+
+error:
+    vk_destroy_ra(ra);
+    return NULL;
+}
+
+// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain
+// compatible
+static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt,
+                                      bool load_fbo, VkRenderPass *out)
+{
+    struct vk_format *vk_fmt = fmt->priv;
+    assert(fmt->renderable);
+
+    VkRenderPassCreateInfo rinfo = {
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+        .attachmentCount = 1,
+        .pAttachments = &(VkAttachmentDescription) {
+            .format = vk_fmt->iformat,
+            .samples = VK_SAMPLE_COUNT_1_BIT,
+            .loadOp = load_fbo ? VK_ATTACHMENT_LOAD_OP_LOAD
+                               : VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+            .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+            .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+        },
+        .subpassCount = 1,
+        .pSubpasses = &(VkSubpassDescription) {
+            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+            .colorAttachmentCount = 1,
+            .pColorAttachments = &(VkAttachmentReference) {
+                .attachment = 0,
+                .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+            },
+        },
+    };
+
+    return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out);
+}
+
+// For ra_tex.priv
+struct ra_tex_vk {
+    bool external_img;
+    VkImageType type;
+    VkImage img;
+    struct vk_memslice mem;
+    // for sampling
+    VkImageView view;
+    VkSampler sampler;
+    // for rendering
+    VkFramebuffer framebuffer;
+    VkRenderPass dummyPass;
+    // for uploading
+    struct ra_buf_pool pbo;
+    // "current" metadata, can change during the course of execution
+    VkImageLayout current_layout;
+    VkPipelineStageFlagBits current_stage;
+    VkAccessFlagBits current_access;
+};
+
+// Small helper to ease image barrier creation. if `discard` is set, the contents
+// of the image will be undefined after the barrier
+static void tex_barrier(struct vk_cmd *cmd, struct ra_tex_vk *tex_vk,
+                        VkPipelineStageFlagBits newStage,
+                        VkAccessFlagBits newAccess, VkImageLayout newLayout,
+                        bool discard)
+{
+    VkImageMemoryBarrier imgBarrier = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+        .oldLayout = tex_vk->current_layout,
+        .newLayout = newLayout,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .srcAccessMask = tex_vk->current_access,
+        .dstAccessMask = newAccess,
+        .image = tex_vk->img,
+        .subresourceRange = vk_range,
+    };
+
+    if (discard) {
+        imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+        imgBarrier.srcAccessMask = 0;
+    }
+
+    if (imgBarrier.oldLayout != imgBarrier.newLayout ||
+        imgBarrier.srcAccessMask != imgBarrier.dstAccessMask)
+    {
+        vkCmdPipelineBarrier(cmd->buf, tex_vk->current_stage, newStage, 0,
+                             0, NULL, 0, NULL, 1, &imgBarrier);
+    }
+
+    tex_vk->current_stage = newStage;
+    tex_vk->current_layout = newLayout;
+    tex_vk->current_access = newAccess;
+}
+
+static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex)
+{
+    if (!tex)
+        return;
+
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+    struct ra_tex_vk *tex_vk = tex->priv;
+
+    ra_buf_pool_uninit(ra, &tex_vk->pbo);
+    vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR);
+    vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR);
+    vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR);
+    vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR);
+    if (!tex_vk->external_img) {
+        vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR);
+        vk_free_memslice(vk, tex_vk->mem);
+    }
+
+    talloc_free(tex);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex);
+
+// Initializes non-VkImage values like the image view, samplers, etc.
+static bool vk_init_image(struct ra *ra, struct ra_tex *tex)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    struct ra_tex_params *params = &tex->params;
+    struct ra_tex_vk *tex_vk = tex->priv;
+    assert(tex_vk->img);
+
+    tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+    tex_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    tex_vk->current_access = 0;
+
+    if (params->render_src || params->render_dst) {
+        static const VkImageViewType viewType[] = {
+            [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D,
+            [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D,
+            [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D,
+        };
+
+        const struct vk_format *fmt = params->format->priv;
+        VkImageViewCreateInfo vinfo = {
+            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+            .image = tex_vk->img,
+            .viewType = viewType[tex_vk->type],
+            .format = fmt->iformat,
+            .subresourceRange = vk_range,
+        };
+
+        VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view));
+    }
+
+    if (params->render_src) {
+        assert(params->format->linear_filter || !params->src_linear);
+        VkFilter filter = params->src_linear
+            ? VK_FILTER_LINEAR
+            : VK_FILTER_NEAREST;
+        VkSamplerAddressMode wrap = params->src_repeat
+            ? VK_SAMPLER_ADDRESS_MODE_REPEAT
+            : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
+        VkSamplerCreateInfo sinfo = {
+            .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+            .magFilter = filter,
+            .minFilter = filter,
+            .addressModeU = wrap,
+            .addressModeV = wrap,
+            .addressModeW = wrap,
+            .maxAnisotropy = 1.0,
+        };
+
+        VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler));
+    }
+
+    if (params->render_dst) {
+        // Framebuffers need to be created against a specific render pass
+        // layout, so we need to temporarily create a skeleton/dummy render
+        // pass for vulkan to figure out the compatibility
+        VK(vk_create_render_pass(vk->dev, params->format, false, &tex_vk->dummyPass));
+
+        VkFramebufferCreateInfo finfo = {
+            .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+            .renderPass = tex_vk->dummyPass,
+            .attachmentCount = 1,
+            .pAttachments = &tex_vk->view,
+            .width = tex->params.w,
+            .height = tex->params.h,
+            .layers = 1,
+        };
+
+        VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR,
+                               &tex_vk->framebuffer));
+
+        // NOTE: Normally we would free the dummyPass again here, but a bug
+        // in the nvidia vulkan driver causes a segfault if you do.
+    }
+
+    return true;
+
+error:
+    return false;
+}
+
+static struct ra_tex *vk_tex_create(struct ra *ra,
+                                    const struct ra_tex_params *params)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+    tex->params = *params;
+    tex->params.initial_data = NULL;
+
+    struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
+
+    const struct vk_format *fmt = params->format->priv;
+    switch (params->dimensions) {
+    case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
+    case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
+    case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
+    default: abort();
+    }
+
+    VkImageUsageFlags usage = 0;
+    if (params->render_src)
+        usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+    if (params->render_dst)
+        usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+    if (params->storage_dst)
+        usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+    if (params->blit_src)
+        usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+    if (params->host_mutable || params->blit_dst || params->initial_data)
+        usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+
+    // Double-check image usage support and fail immediately if invalid
+    VkImageFormatProperties iprop;
+    VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
+            fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0,
+            &iprop);
+    if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+        return NULL;
+    } else {
+        VK_ASSERT(res, "Querying image format properties");
+    }
+
+    VkFormatProperties prop;
+    vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
+    VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
+
+    bool has_blit_src   = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT,
+         has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+
+    if (params->w > iprop.maxExtent.width ||
+        params->h > iprop.maxExtent.height ||
+        params->d > iprop.maxExtent.depth ||
+        (params->blit_src && !has_blit_src) ||
+        (params->src_linear && !has_src_linear))
+    {
+        return NULL;
+    }
+
+    VkImageCreateInfo iinfo = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .imageType = tex_vk->type,
+        .format = fmt->iformat,
+        .extent = (VkExtent3D) { params->w, params->h, params->d },
+        .mipLevels = 1,
+        .arrayLayers = 1,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .tiling = VK_IMAGE_TILING_OPTIMAL,
+        .usage = usage,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 1,
+        .pQueueFamilyIndices = &vk->pool->qf,
+    };
+
+    VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img));
+
+    VkMemoryPropertyFlagBits memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+    VkMemoryRequirements reqs;
+    vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs);
+
+    struct vk_memslice *mem = &tex_vk->mem;
+    if (!vk_malloc_generic(vk, reqs, memFlags, mem))
+        goto error;
+
+    VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset));
+
+    if (!vk_init_image(ra, tex))
+        goto error;
+
+    if (params->initial_data) {
+        struct ra_tex_upload_params ul_params = {
+            .tex = tex,
+            .invalidate = true,
+            .src = params->initial_data,
+            .stride = params->w * fmt->bytes,
+        };
+        if (!ra->fns->tex_upload(ra, &ul_params))
+            goto error;
+    }
+
+    return tex;
+
+error:
+    vk_tex_destroy(ra, tex);
+    return NULL;
+}
+
+struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
+                                        VkSwapchainCreateInfoKHR info)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+    struct ra_tex *tex = NULL;
+
+    const struct ra_format *format = NULL;
+    for (int i = 0; i < ra->num_formats; i++) {
+        const struct vk_format *fmt = ra->formats[i]->priv;
+        if (fmt->iformat == vk->surf_format.format) {
+            format = ra->formats[i];
+            break;
+        }
+    }
+
+    if (!format) {
+        MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image "
+                   "with surface format 0x%x\n", vk->surf_format.format);
+        goto error;
+    }
+
+    tex = talloc_zero(NULL, struct ra_tex);
+    tex->params = (struct ra_tex_params) {
+        .format = format,
+        .dimensions = 2,
+        .w = info.imageExtent.width,
+        .h = info.imageExtent.height,
+        .d = 1,
+        .blit_src    = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
+        .blit_dst    = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
+        .render_src  = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT),
+        .render_dst  = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT),
+        .storage_dst = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT),
+    };
+
+    struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
+    tex_vk->type = VK_IMAGE_TYPE_2D;
+    tex_vk->external_img = true;
+    tex_vk->img = vkimg;
+
+    if (!vk_init_image(ra, tex))
+        goto error;
+
+    return tex;
+
+error:
+    vk_tex_destroy(ra, tex);
+    return NULL;
+}
+
+// For ra_buf.priv
+struct ra_buf_vk {
+    struct vk_bufslice slice;
+    int refcount; // 1 = object allocated but not in use, > 1 = in use
+    bool needsflush;
+    // "current" metadata, can change during course of execution
+    VkPipelineStageFlagBits current_stage;
+    VkAccessFlagBits current_access;
+};
+
+static void vk_buf_deref(struct ra *ra, struct ra_buf *buf)
+{
+    if (!buf)
+        return;
+
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+    struct ra_buf_vk *buf_vk = buf->priv;
+
+    if (--buf_vk->refcount == 0) {
+        vk_free_memslice(vk, buf_vk->slice.mem);
+        talloc_free(buf);
+    }
+}
+
+static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf,
+                        VkPipelineStageFlagBits newStage,
+                        VkAccessFlagBits newAccess, int offset, size_t size)
+{
+    struct ra_buf_vk *buf_vk = buf->priv;
+
+    VkBufferMemoryBarrier buffBarrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = buf_vk->current_access,
+        .dstAccessMask = newAccess,
+        .buffer = buf_vk->slice.buf,
+        .offset = offset,
+        .size = size,
+    };
+
+    if (buf_vk->needsflush || buf->params.host_mapped) {
+        buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+        buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT;
+        buf_vk->needsflush = false;
+    }
+
+    if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) {
+        vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0,
+                             0, NULL, 1, &buffBarrier, 0, NULL);
+    }
+
+    buf_vk->current_stage = newStage;
+    buf_vk->current_access = newAccess;
+    buf_vk->refcount++;
+    vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, ra, buf);
+}
+
+#define vk_buf_destroy vk_buf_deref
+MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf);
+
+static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+                          const void *data, size_t size)
+{
+    assert(buf->params.host_mutable || buf->params.initial_data);
+    struct ra_buf_vk *buf_vk = buf->priv;
+
+    // For host-mapped buffers, we can just directly memcpy the buffer contents.
+    // Otherwise, we can update the buffer from the GPU using a command buffer.
+    if (buf_vk->slice.data) {
+        assert(offset + size <= buf->params.size);
+        uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset;
+        memcpy((void *)addr, data, size);
+        buf_vk->needsflush = true;
+    } else {
+        struct vk_cmd *cmd = vk_require_cmd(ra);
+        if (!cmd) {
+            MP_ERR(ra, "Failed updating buffer!\n");
+            return;
+        }
+
+        buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                    VK_ACCESS_TRANSFER_WRITE_BIT, offset, size);
+
+        VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset;
+        assert(bufOffset == MP_ALIGN_UP(bufOffset, 4));
+        vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data);
+    }
+}
+
+static struct ra_buf *vk_buf_create(struct ra *ra,
+                                    const struct ra_buf_params *params)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
+    buf->params = *params;
+
+    struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk);
+    buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    buf_vk->current_access = 0;
+    buf_vk->refcount = 1;
+
+    VkBufferUsageFlagBits bufFlags = 0;
+    VkMemoryPropertyFlagBits memFlags = 0;
+    VkDeviceSize align = 4; // alignment 4 is needed for buf_update
+
+    switch (params->type) {
+    case RA_BUF_TYPE_TEX_UPLOAD:
+        bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+        memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+        break;
+    case RA_BUF_TYPE_UNIFORM:
+        bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+        memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+        align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment);
+        break;
+    case RA_BUF_TYPE_SHADER_STORAGE:
+        bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+        memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+        align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment);
+        break;
+    case RA_BUF_TYPE_VERTEX:
+        bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+        memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+        break;
+    default: abort();
+    }
+
+    if (params->host_mutable || params->initial_data) {
+        bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+        align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment);
+    }
+
+    if (params->host_mapped) {
+        memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                    VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+                    VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+    }
+
+    if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
+                          &buf_vk->slice))
+    {
+        goto error;
+    }
+
+    if (params->host_mapped)
+        buf->data = buf_vk->slice.data;
+
+    if (params->initial_data)
+        vk_buf_update(ra, buf, 0, params->initial_data, params->size);
+
+    buf->params.initial_data = NULL; // do this after vk_buf_update
+    return buf;
+
+error:
+    vk_buf_destroy(ra, buf);
+    return NULL;
+}
+
+static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf)
+{
+    struct ra_buf_vk *buf_vk = buf->priv;
+    return buf_vk->refcount == 1;
+}
+
+static bool vk_tex_upload(struct ra *ra,
+                          const struct ra_tex_upload_params *params)
+{
+    struct ra_tex *tex = params->tex;
+    struct ra_tex_vk *tex_vk = tex->priv;
+
+    if (!params->buf)
+        return ra_tex_upload_pbo(ra, &tex_vk->pbo, params);
+
+    assert(!params->src);
+    assert(params->buf);
+    struct ra_buf *buf = params->buf;
+    struct ra_buf_vk *buf_vk = buf->priv;
+
+    VkBufferImageCopy region = {
+        .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset,
+        .bufferRowLength = tex->params.w,
+        .bufferImageHeight = tex->params.h,
+        .imageSubresource = vk_layers,
+        .imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d},
+    };
+
+    if (tex->params.dimensions == 2) {
+        int pix_size = tex->params.format->pixel_size;
+        region.bufferRowLength = params->stride / pix_size;
+        if (region.bufferRowLength * pix_size != params->stride) {
+            MP_ERR(ra, "Texture upload strides must be a multiple of the texel "
+                       "size!\n");
+            goto error;
+        }
+
+        if (params->rc) {
+            struct mp_rect *rc = params->rc;
+            region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0};
+            region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1};
+        }
+    }
+
+    uint64_t size = region.bufferRowLength * region.bufferImageHeight *
+                    region.imageExtent.depth;
+
+    struct vk_cmd *cmd = vk_require_cmd(ra);
+    if (!cmd)
+        goto error;
+
+    buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size);
+
+    tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                VK_ACCESS_TRANSFER_WRITE_BIT,
+                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                params->invalidate);
+
+    vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img,
+                           tex_vk->current_layout, 1, &region);
+
+    return true;
+
+error:
+    return false;
+}
+
+#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH
+
+// For ra_renderpass.priv
+struct ra_renderpass_vk {
+    // Compiled shaders
+    VkShaderModule vert;
+    VkShaderModule frag;
+    VkShaderModule comp;
+    // Pipeline / render pass
+    VkPipeline pipe;
+    VkPipelineLayout pipeLayout;
+    VkPipelineCache pipeCache;
+    VkRenderPass renderPass;
+    // Descriptor set (bindings)
+    VkDescriptorSetLayout dsLayout;
+    VkDescriptorPool dsPool;
+    VkDescriptorSet dss[MPVK_NUM_DS];
+    int dindex;
+    // Vertex buffers (vertices)
+    struct ra_buf_pool vbo;
+
+    // For updating
+    VkWriteDescriptorSet *dswrite;
+    VkDescriptorImageInfo *dsiinfo;
+    VkDescriptorBufferInfo *dsbinfo;
+};
+
+static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
+{
+    if (!pass)
+        return;
+
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+    struct ra_renderpass_vk *pass_vk = pass->priv;
+
+    ra_buf_pool_uninit(ra, &pass_vk->vbo);
+    vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR);
+    vkDestroyPipelineCache(vk->dev, pass_vk->pipeCache, MPVK_ALLOCATOR);
+    vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR);
+    vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR);
+    vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR);
+    vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR);
+    vkDestroyShaderModule(vk->dev, pass_vk->vert, MPVK_ALLOCATOR);
+    vkDestroyShaderModule(vk->dev, pass_vk->frag, MPVK_ALLOCATOR);
+    vkDestroyShaderModule(vk->dev, pass_vk->comp, MPVK_ALLOCATOR);
+
+    talloc_free(pass);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_renderpass_destroy, struct ra_renderpass);
+
+static const VkDescriptorType dsType[] = {
+    [RA_VARTYPE_TEX]    = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+    [RA_VARTYPE_IMG_W]  = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+    [RA_VARTYPE_BUF_RO] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+    [RA_VARTYPE_BUF_RW] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+};
+
+static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp,
+                                VkFormat *out_fmt)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    enum ra_ctype ctype;
+    switch (inp->type) {
+    case RA_VARTYPE_FLOAT:      ctype = RA_CTYPE_FLOAT; break;
+    case RA_VARTYPE_BYTE_UNORM: ctype = RA_CTYPE_UNORM; break;
+    default: abort();
+    }
+
+    assert(inp->dim_m == 1);
+    for (const struct vk_format *fmt = vk_formats; fmt->name; fmt++) {
+        if (fmt->ctype != ctype)
+            continue;
+        if (fmt->components != inp->dim_v)
+            continue;
+        if (fmt->bytes != ra_renderpass_input_layout(inp).size)
+            continue;
+
+        // Ensure this format is valid for vertex attributes
+        VkFormatProperties prop;
+        vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
+        if (!(prop.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
+            continue;
+
+        *out_fmt = fmt->iformat;
+        return true;
+    }
+
+    return false;
+}
+
+static const VkPipelineStageFlagBits stageFlags[] = {
+    [RA_RENDERPASS_TYPE_RASTER]  = VK_SHADER_STAGE_FRAGMENT_BIT,
+    [RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
+};
+
+static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
+                                    const struct ra_renderpass_params *params)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
+    pass->params = *ra_renderpass_params_copy(pass, params);
+    pass->params.cached_program = (bstr){0};
+    struct ra_renderpass_vk *pass_vk = pass->priv =
+        talloc_zero(pass, struct ra_renderpass_vk);
+
+    static int dsCount[RA_VARTYPE_COUNT] = {0};
+    VkDescriptorSetLayoutBinding *bindings = NULL;
+    int num_bindings = 0;
+
+    for (int i = 0; i < params->num_inputs; i++) {
+        struct ra_renderpass_input *inp = &params->inputs[i];
+        switch (inp->type) {
+        case RA_VARTYPE_TEX:
+        case RA_VARTYPE_IMG_W:
+        case RA_VARTYPE_BUF_RO:
+        case RA_VARTYPE_BUF_RW: {
+            VkDescriptorSetLayoutBinding desc = {
+                .binding = inp->binding,
+                .descriptorType = dsType[inp->type],
+                .descriptorCount = 1,
+                .stageFlags = stageFlags[params->type],
+            };
+
+            MP_TARRAY_APPEND(pass, bindings, num_bindings, desc);
+            dsCount[inp->type]++;
+            break;
+        }
+        default: abort();
+        }
+    }
+
+    VkDescriptorPoolSize *dsPoolSizes = NULL;
+    int poolSizeCount = 0;
+    for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) {
+        if (dsCount[t] > 0) {
+            VkDescriptorPoolSize dssize = {
+                .type = dsType[t],
+                .descriptorCount = dsCount[t] * MPVK_NUM_DS,
+            };
+
+            MP_TARRAY_APPEND(pass, dsPoolSizes, poolSizeCount, dssize);
+        }
+    }
+
+    VkDescriptorPoolCreateInfo pinfo = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+        .maxSets = MPVK_NUM_DS,
+        .pPoolSizes = dsPoolSizes,
+        .poolSizeCount = poolSizeCount,
+    };
+
+    VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool));
+    talloc_free(dsPoolSizes);
+
+    pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings);
+    pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings);
+    pass_vk->dsbinfo = talloc_array(pass, VkDescriptorBufferInfo, num_bindings);
+
+    VkDescriptorSetLayoutCreateInfo dinfo = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .pBindings = bindings,
+        .bindingCount = num_bindings,
+    };
+
+    VK(vkCreateDescriptorSetLayout(vk->dev, &dinfo, MPVK_ALLOCATOR,
+                                   &pass_vk->dsLayout));
+
+    VkDescriptorSetLayout layouts[MPVK_NUM_DS];
+    for (int i = 0; i < MPVK_NUM_DS; i++)
+        layouts[i] = pass_vk->dsLayout;
+
+    VkDescriptorSetAllocateInfo ainfo = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+        .descriptorPool = pass_vk->dsPool,
+        .descriptorSetCount = MPVK_NUM_DS,
+        .pSetLayouts = layouts,
+    };
+
+    VK(vkAllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss));
+
+    VkPipelineLayoutCreateInfo linfo = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+        .setLayoutCount = 1,
+        .pSetLayouts = &pass_vk->dsLayout,
+    };
+
+    VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
+                              &pass_vk->pipeLayout));
+
+    VkPipelineCacheCreateInfo pcinfo = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
+        .pInitialData = params->cached_program.start,
+        .initialDataSize = params->cached_program.len,
+    };
+
+    VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pass_vk->pipeCache));
+
+    VkShaderModuleCreateInfo sinfo = {
+        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+    };
+
+    switch (params->type) {
+    case RA_RENDERPASS_TYPE_RASTER: {
+        sinfo.pCode = (uint32_t *)params->vertex_shader;
+        sinfo.codeSize = strlen(params->vertex_shader);
+        VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->vert));
+
+        sinfo.pCode = (uint32_t *)params->frag_shader;
+        sinfo.codeSize = strlen(params->frag_shader);
+        VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->frag));
+
+        VK(vk_create_render_pass(vk->dev, params->target_format,
+                                 params->enable_blend, &pass_vk->renderPass));
+
+        VkPipelineShaderStageCreateInfo stages[] = {
+            {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .stage = VK_SHADER_STAGE_VERTEX_BIT,
+                .module = pass_vk->vert,
+                .pName = "main",
+            },
+            {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+                .module = pass_vk->frag,
+                .pName = "main",
+            }
+        };
+
+        VkVertexInputAttributeDescription *attrs = talloc_array(pass,
+                VkVertexInputAttributeDescription, params->num_vertex_attribs);
+
+        for (int i = 0; i < params->num_vertex_attribs; i++) {
+            struct ra_renderpass_input *inp = &params->vertex_attribs[i];
+            attrs[i] = (VkVertexInputAttributeDescription) {
+                .location = i,
+                .binding = 0,
+                .offset = inp->offset,
+            };
+
+            if (!vk_get_input_format(ra, inp, &attrs[i].format)) {
+                MP_ERR(ra, "No suitable VkFormat for vertex attrib '%s'!\n",
+                       inp->name);
+                goto error;
+            }
+        }
+
+        static const VkBlendFactor blendFactors[] = {
+            [RA_BLEND_ZERO]                = VK_BLEND_FACTOR_ZERO,
+            [RA_BLEND_ONE]                 = VK_BLEND_FACTOR_ONE,
+            [RA_BLEND_SRC_ALPHA]           = VK_BLEND_FACTOR_SRC_ALPHA,
+            [RA_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+        };
+
+        VkPipelineColorBlendAttachmentState binfo = {
+            .blendEnable = params->enable_blend,
+            .colorBlendOp = VK_BLEND_OP_ADD,
+            .srcColorBlendFactor = blendFactors[params->blend_src_rgb],
+            .dstColorBlendFactor = blendFactors[params->blend_dst_rgb],
+            .alphaBlendOp = VK_BLEND_OP_ADD,
+            .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha],
+            .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha],
+            .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
+                              VK_COLOR_COMPONENT_G_BIT |
+                              VK_COLOR_COMPONENT_B_BIT |
+                              VK_COLOR_COMPONENT_A_BIT,
+        };
+
+        VkGraphicsPipelineCreateInfo cinfo = {
+            .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+            .stageCount = MP_ARRAY_SIZE(stages),
+            .pStages = &stages[0],
+            .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+                .vertexBindingDescriptionCount = 1,
+                .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) {
+                    .binding = 0,
+                    .stride = params->vertex_stride,
+                    .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+                },
+                .vertexAttributeDescriptionCount = params->num_vertex_attribs,
+                .pVertexAttributeDescriptions = attrs,
+            },
+            .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+                .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+            },
+            .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+                .viewportCount = 1,
+                .scissorCount = 1,
+            },
+            .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+                .polygonMode = VK_POLYGON_MODE_FILL,
+                .cullMode = VK_CULL_MODE_NONE,
+                .lineWidth = 1.0f,
+            },
+            .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+                .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+            },
+            .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+                .attachmentCount = 1,
+                .pAttachments = &binfo,
+            },
+            .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+                .dynamicStateCount = 2,
+                .pDynamicStates = (VkDynamicState[]){
+                    VK_DYNAMIC_STATE_VIEWPORT,
+                    VK_DYNAMIC_STATE_SCISSOR,
+                },
+            },
+            .layout = pass_vk->pipeLayout,
+            .renderPass = pass_vk->renderPass,
+        };
+
+        VK(vkCreateGraphicsPipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo,
+                                     MPVK_ALLOCATOR, &pass_vk->pipe));
+        break;
+    }
+    case RA_RENDERPASS_TYPE_COMPUTE: {
+        sinfo.pCode = (uint32_t *)params->compute_shader;
+        sinfo.codeSize = strlen(params->compute_shader);
+        VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->comp));
+
+        VkComputePipelineCreateInfo cinfo = {
+            .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+            .stage = {
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+                .module = pass_vk->comp,
+                .pName = "main",
+            },
+            .layout = pass_vk->pipeLayout,
+        };
+
+        VK(vkCreateComputePipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo,
+                                    MPVK_ALLOCATOR, &pass_vk->pipe));
+        break;
+    }
+    }
+
+    // Update cached program
+    bstr *prog = &pass->params.cached_program;
+    VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, NULL));
+    prog->start = talloc_size(pass, prog->len);
+    VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, prog->start));
+
+    return pass;
+
+error:
+    vk_renderpass_destroy(ra, pass);
+    return NULL;
+}
+
+static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd,
+                                 struct ra_renderpass *pass,
+                                 struct ra_renderpass_input_val val,
+                                 VkDescriptorSet ds, int idx)
+{
+    struct ra_renderpass_vk *pass_vk = pass->priv;
+    struct ra_renderpass_input *inp = &pass->params.inputs[val.index];
+
+    VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx];
+    *wds = (VkWriteDescriptorSet) {
+        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+        .dstSet = ds,
+        .dstBinding = inp->binding,
+        .descriptorCount = 1,
+        .descriptorType = dsType[inp->type],
+    };
+
+    static const VkPipelineStageFlags passStages[] = {
+        [RA_RENDERPASS_TYPE_RASTER]  = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+        [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+    };
+
+    switch (inp->type) {
+    case RA_VARTYPE_TEX: {
+        struct ra_tex *tex = *(struct ra_tex **)val.data;
+        struct ra_tex_vk *tex_vk = tex->priv;
+
+        assert(tex->params.render_src);
+        tex_barrier(cmd, tex_vk, passStages[pass->params.type],
+                    VK_ACCESS_SHADER_READ_BIT,
+                    VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false);
+
+        VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
+        *iinfo = (VkDescriptorImageInfo) {
+            .sampler = tex_vk->sampler,
+            .imageView = tex_vk->view,
+            .imageLayout = tex_vk->current_layout,
+        };
+
+        wds->pImageInfo = iinfo;
+        break;
+    }
+    case RA_VARTYPE_IMG_W: {
+        struct ra_tex *tex = *(struct ra_tex **)val.data;
+        struct ra_tex_vk *tex_vk = tex->priv;
+
+        assert(tex->params.storage_dst);
+        tex_barrier(cmd, tex_vk, passStages[pass->params.type],
+                    VK_ACCESS_SHADER_WRITE_BIT,
+                    VK_IMAGE_LAYOUT_GENERAL, false);
+
+        VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
+        *iinfo = (VkDescriptorImageInfo) {
+            .imageView = tex_vk->view,
+            .imageLayout = tex_vk->current_layout,
+        };
+
+        wds->pImageInfo = iinfo;
+        break;
+    }
+    case RA_VARTYPE_BUF_RO:
+    case RA_VARTYPE_BUF_RW: {
+        struct ra_buf *buf = *(struct ra_buf **)val.data;
+        struct ra_buf_vk *buf_vk = buf->priv;
+
+        VkBufferUsageFlags access = VK_ACCESS_SHADER_READ_BIT;
+        if (inp->type == RA_VARTYPE_BUF_RW)
+            access |= VK_ACCESS_SHADER_WRITE_BIT;
+
+        buf_barrier(ra, cmd, buf, passStages[pass->params.type],
+                    access, buf_vk->slice.mem.offset, buf->params.size);
+
+        VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx];
+        *binfo = (VkDescriptorBufferInfo) {
+            .buffer = buf_vk->slice.buf,
+            .offset = buf_vk->slice.mem.offset,
+            .range = buf->params.size,
+        };
+
+        wds->pBufferInfo = binfo;
+        break;
+    }
+    }
+}
+
+static void vk_renderpass_run(struct ra *ra,
+                              const struct ra_renderpass_run_params *params)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+    struct ra_renderpass *pass = params->pass;
+    struct ra_renderpass_vk *pass_vk = pass->priv;
+
+    struct vk_cmd *cmd = vk_require_cmd(ra);
+    if (!cmd)
+        goto error;
+
+    static const VkPipelineBindPoint bindPoint[] = {
+        [RA_RENDERPASS_TYPE_RASTER]  = VK_PIPELINE_BIND_POINT_GRAPHICS,
+        [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE,
+    };
+
+    vkCmdBindPipeline(cmd->buf, bindPoint[pass->params.type], pass_vk->pipe);
+
+    VkDescriptorSet ds = pass_vk->dss[pass_vk->dindex++];
+    pass_vk->dindex %= MPVK_NUM_DS;
+
+    for (int i = 0; i < params->num_values; i++)
+        vk_update_descriptor(ra, cmd, pass, params->values[i], ds, i);
+
+    if (params->num_values > 0) {
+        vkUpdateDescriptorSets(vk->dev, params->num_values, pass_vk->dswrite,
+                               0, NULL);
+    }
+
+    vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
+                            pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
+
+    switch (pass->params.type) {
+    case RA_RENDERPASS_TYPE_COMPUTE:
+        vkCmdDispatch(cmd->buf, params->compute_groups[0],
+                      params->compute_groups[1],
+                      params->compute_groups[2]);
+        break;
+    case RA_RENDERPASS_TYPE_RASTER: {
+        struct ra_tex *tex = params->target;
+        struct ra_tex_vk *tex_vk = tex->priv;
+        assert(tex->params.render_dst);
+
+        struct ra_buf_params buf_params = {
+            .type = RA_BUF_TYPE_VERTEX,
+            .size = params->vertex_count * pass->params.vertex_stride,
+            .host_mutable = true,
+        };
+
+        struct ra_buf *buf = ra_buf_pool_get(ra, &pass_vk->vbo, &buf_params);
+        if (!buf) {
+            MP_ERR(ra, "Failed allocating vertex buffer!\n");
+            goto error;
+        }
+        struct ra_buf_vk *buf_vk = buf->priv;
+
+        vk_buf_update(ra, buf, 0, params->vertex_data, buf_params.size);
+
+        buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+                    VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+                    buf_vk->slice.mem.offset, buf_params.size);
+
+        vkCmdBindVertexBuffers(cmd->buf, 0, 1, &buf_vk->slice.buf,
+                               &buf_vk->slice.mem.offset);
+
+        tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+                    VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+                    VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, false);
+
+        VkViewport viewport = {
+            .x = params->viewport.x0,
+            .y = params->viewport.y0,
+            .width  = mp_rect_w(params->viewport),
+            .height = mp_rect_h(params->viewport),
+        };
+
+        VkRect2D scissor = {
+            .offset = {params->scissors.x0, params->scissors.y0},
+            .extent = {mp_rect_w(params->scissors), mp_rect_h(params->scissors)},
+        };
+
+        vkCmdSetViewport(cmd->buf, 0, 1, &viewport);
+        vkCmdSetScissor(cmd->buf, 0, 1, &scissor);
+
+        VkRenderPassBeginInfo binfo = {
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+            .renderPass = pass_vk->renderPass,
+            .framebuffer = tex_vk->framebuffer,
+            .renderArea = (VkRect2D){{0, 0}, {tex->params.w, tex->params.h}},
+        };
+
+        vkCmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE);
+        vkCmdDraw(cmd->buf, params->vertex_count, 1, 0, 0);
+        vkCmdEndRenderPass(cmd->buf);
+        break;
+    }
+    default: abort();
+    };
+
+error:
+    return;
+}
+
+static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+                    struct mp_rect *dst_rc, struct mp_rect *src_rc)
+{
+    assert(src->params.blit_src);
+    assert(dst->params.blit_dst);
+
+    struct ra_tex_vk *src_vk = src->priv;
+    struct ra_tex_vk *dst_vk = dst->priv;
+
+    struct vk_cmd *cmd = vk_require_cmd(ra);
+    if (!cmd)
+        return;
+
+    tex_barrier(cmd, src_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                VK_ACCESS_TRANSFER_READ_BIT,
+                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                false);
+
+    bool discard = dst_rc->x0 == 0 &&
+                   dst_rc->y0 == 0 &&
+                   dst_rc->x1 == dst->params.w &&
+                   dst_rc->y1 == dst->params.h;
+
+    tex_barrier(cmd, dst_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                VK_ACCESS_TRANSFER_WRITE_BIT,
+                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                discard);
+
+    VkImageBlit region = {
+        .srcSubresource = vk_layers,
+        .srcOffsets = {{src_rc->x0, src_rc->y0, 0}, {src_rc->x1, src_rc->y1, 1}},
+        .dstSubresource = vk_layers,
+        .dstOffsets = {{dst_rc->x0, dst_rc->y0, 0}, {dst_rc->x1, dst_rc->y1, 1}},
+    };
+
+    vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout, dst_vk->img,
+                   dst_vk->current_layout, 1, &region, VK_FILTER_NEAREST);
+}
+
+static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4],
+                     struct mp_rect *rc)
+{
+    struct ra_vk *p = ra->priv;
+    struct ra_tex_vk *tex_vk = tex->priv;
+    assert(tex->params.blit_dst);
+
+    struct vk_cmd *cmd = vk_require_cmd(ra);
+    if (!cmd)
+        return;
+
+    struct mp_rect full = {0, 0, tex->params.w, tex->params.h};
+    if (!rc || mp_rect_equals(rc, &full)) {
+        // To clear the entire image, we can use the efficient clear command
+        tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                    VK_ACCESS_TRANSFER_WRITE_BIT,
+                    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, true);
+
+        VkClearColorValue clearColor = {0};
+        for (int c = 0; c < 4; c++)
+            clearColor.float32[c] = color[c];
+
+        vkCmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->current_layout,
+                             &clearColor, 1, &vk_range);
+    } else {
+        // To simulate per-region clearing, we blit from a 1x1 texture instead
+        struct ra_tex_upload_params ul_params = {
+            .tex = p->clear_tex,
+            .invalidate = true,
+            .src = &color[0],
+        };
+        vk_tex_upload(ra, &ul_params);
+        vk_blit(ra, tex, p->clear_tex, rc, &(struct mp_rect){0, 0, 1, 1});
+    }
+}
+
+#define VK_QUERY_POOL_SIZE (MPVK_MAX_STREAMING_DEPTH * 4)
+
+struct vk_timer {
+    VkQueryPool pool;
+    int index;
+    uint64_t result;
+};
+
+static void vk_timer_destroy(struct ra *ra, ra_timer *ratimer)
+{
+    if (!ratimer)
+        return;
+
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+    struct vk_timer *timer = ratimer;
+
+    vkDestroyQueryPool(vk->dev, timer->pool, MPVK_ALLOCATOR);
+
+    talloc_free(timer);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_timer_destroy, ra_timer);
+
+static ra_timer *vk_timer_create(struct ra *ra)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+
+    struct vk_timer *timer = talloc_zero(NULL, struct vk_timer);
+
+    struct VkQueryPoolCreateInfo qinfo = {
+        .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+        .queryType = VK_QUERY_TYPE_TIMESTAMP,
+        .queryCount = VK_QUERY_POOL_SIZE,
+    };
+
+    VK(vkCreateQueryPool(vk->dev, &qinfo, MPVK_ALLOCATOR, &timer->pool));
+
+    return (ra_timer *)timer;
+
+error:
+    vk_timer_destroy(ra, timer);
+    return NULL;
+}
+
+static void vk_timer_record(struct ra *ra, VkQueryPool pool, int index,
+                            VkPipelineStageFlags stage)
+{
+    struct vk_cmd *cmd = vk_require_cmd(ra);
+    if (!cmd)
+        return;
+
+    vkCmdWriteTimestamp(cmd->buf, stage, pool, index);
+}
+
+static void vk_timer_start(struct ra *ra, ra_timer *ratimer)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+    struct vk_timer *timer = ratimer;
+
+    timer->index = (timer->index + 2) % VK_QUERY_POOL_SIZE;
+
+    uint64_t out[2];
+    VkResult res = vkGetQueryPoolResults(vk->dev, timer->pool, timer->index, 2,
+                                         sizeof(out), &out[0], sizeof(uint64_t),
+                                         VK_QUERY_RESULT_64_BIT);
+    switch (res) {
+    case VK_SUCCESS:
+        timer->result = (out[1] - out[0]) * vk->limits.timestampPeriod;
+        break;
+    case VK_NOT_READY:
+        timer->result = 0;
+        break;
+    default:
+        MP_WARN(vk, "Failed reading timer query result: %s\n", vk_err(res));
+        return;
+    };
+
+    vk_timer_record(ra, timer->pool, timer->index,
+                    VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+}
+
+static uint64_t vk_timer_stop(struct ra *ra, ra_timer *ratimer)
+{
+    struct vk_timer *timer = ratimer;
+    vk_timer_record(ra, timer->pool, timer->index + 1,
+                    VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+
+    return timer->result;
+}
+
+static struct ra_fns ra_fns_vk = {
+    .destroy                = vk_destroy_ra,
+    .tex_create             = vk_tex_create,
+    .tex_destroy            = vk_tex_destroy_lazy,
+    .tex_upload             = vk_tex_upload,
+    .buf_create             = vk_buf_create,
+    .buf_destroy            = vk_buf_destroy_lazy,
+    .buf_update             = vk_buf_update,
+    .buf_poll               = vk_buf_poll,
+    .clear                  = vk_clear,
+    .blit                   = vk_blit,
+    .uniform_layout         = std140_layout,
+    .renderpass_create      = vk_renderpass_create,
+    .renderpass_destroy     = vk_renderpass_destroy_lazy,
+    .renderpass_run         = vk_renderpass_run,
+    .timer_create           = vk_timer_create,
+    .timer_destroy          = vk_timer_destroy_lazy,
+    .timer_start            = vk_timer_start,
+    .timer_stop             = vk_timer_stop,
+};
+
+static void present_cb(void *priv, int *inflight)
+{
+    *inflight -= 1;
+}
+
+bool ra_vk_submit(struct ra *ra, struct ra_tex *tex, VkSemaphore acquired,
+                  VkSemaphore *done, int *inflight)
+{
+    struct vk_cmd *cmd = vk_require_cmd(ra);
+    if (!cmd)
+        goto error;
+
+    if (inflight) {
+        *inflight += 1;
+        vk_cmd_callback(cmd, (vk_cb)present_cb, NULL, inflight);
+    }
+
+    struct ra_tex_vk *tex_vk = tex->priv;
+    assert(tex_vk->external_img);
+    tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0,
+                VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, false);
+
+    // These are the only two stages that we use/support for actually
+    // outputting to swapchain imagechain images, so just add a dependency
+    // on both of them. In theory, we could maybe come up with some more
+    // advanced mechanism of tracking dynamic dependencies, but that seems
+    // like overkill.
+    vk_cmd_dep(cmd, acquired,
+               VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
+               VK_PIPELINE_STAGE_TRANSFER_BIT);
+
+    return vk_flush(ra, done);
+
+error:
+    return false;
+}
diff --git a/video/out/vulkan/ra_vk.h b/video/out/vulkan/ra_vk.h
new file mode 100644
index 0000000000..893421bc59
--- /dev/null
+++ b/video/out/vulkan/ra_vk.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "video/out/gpu/ra.h"
+
+#include "common.h"
+#include "utils.h"
+
+struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log);
+
+// Access to the VkDevice is needed for swapchain creation
+VkDevice ra_vk_get_dev(struct ra *ra);
+
+// Allocates a ra_tex that wraps a swapchain image. The contents of the image
+// will be invalidated, and access to it will only be internally synchronized.
+// So the calling could should not do anything else with the VkImage.
+struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
+                                        VkSwapchainCreateInfoKHR info);
+
+// This function flushes the command buffers, transitions `tex` (which must be
+// a wrapped swapchain image) into a format suitable for presentation, and
+// submits the current rendering commands. The indicated semaphore must fire
+// before the submitted command can run. If `done` is non-NULL, it will be
+// set to a semaphore that fires once the command completes. If `inflight`
+// is non-NULL, it will be incremented when the command starts and decremented
+// when it completes.
+bool ra_vk_submit(struct ra *ra, struct ra_tex *tex, VkSemaphore acquired,
+                  VkSemaphore *done, int *inflight);
+
+// May be called on a struct ra of any type. Returns NULL if the ra is not
+// a vulkan ra.
+struct mpvk_ctx *ra_vk_get(struct ra *ra);
diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c
new file mode 100644
index 0000000000..43e446bc36
--- /dev/null
+++ b/video/out/vulkan/utils.c
@@ -0,0 +1,726 @@
+#include <libavutil/macros.h>
+
+#include "utils.h"
+#include "malloc.h"
+
+const char* vk_err(VkResult res)
+{
+    switch (res) {
+    // These are technically success codes, but include them nonetheless
+    case VK_SUCCESS:     return "VK_SUCCESS";
+    case VK_NOT_READY:   return "VK_NOT_READY";
+    case VK_TIMEOUT:     return "VK_TIMEOUT";
+    case VK_EVENT_SET:   return "VK_EVENT_SET";
+    case VK_EVENT_RESET: return "VK_EVENT_RESET";
+    case VK_INCOMPLETE:  return "VK_INCOMPLETE";
+    case VK_SUBOPTIMAL_KHR: return "VK_SUBOPTIMAL_KHR";
+
+    // Actual error codes
+    case VK_ERROR_OUT_OF_HOST_MEMORY:    return "VK_ERROR_OUT_OF_HOST_MEMORY";
+    case VK_ERROR_OUT_OF_DEVICE_MEMORY:  return "VK_ERROR_OUT_OF_DEVICE_MEMORY";
+    case VK_ERROR_INITIALIZATION_FAILED: return "VK_ERROR_INITIALIZATION_FAILED";
+    case VK_ERROR_DEVICE_LOST:           return "VK_ERROR_DEVICE_LOST";
+    case VK_ERROR_MEMORY_MAP_FAILED:     return "VK_ERROR_MEMORY_MAP_FAILED";
+    case VK_ERROR_LAYER_NOT_PRESENT:     return "VK_ERROR_LAYER_NOT_PRESENT";
+    case VK_ERROR_EXTENSION_NOT_PRESENT: return "VK_ERROR_EXTENSION_NOT_PRESENT";
+    case VK_ERROR_FEATURE_NOT_PRESENT:   return "VK_ERROR_FEATURE_NOT_PRESENT";
+    case VK_ERROR_INCOMPATIBLE_DRIVER:   return "VK_ERROR_INCOMPATIBLE_DRIVER";
+    case VK_ERROR_TOO_MANY_OBJECTS:      return "VK_ERROR_TOO_MANY_OBJECTS";
+    case VK_ERROR_FORMAT_NOT_SUPPORTED:  return "VK_ERROR_FORMAT_NOT_SUPPORTED";
+    case VK_ERROR_FRAGMENTED_POOL:       return "VK_ERROR_FRAGMENTED_POOL";
+    case VK_ERROR_INVALID_SHADER_NV:     return "VK_ERROR_INVALID_SHADER_NV";
+    case VK_ERROR_OUT_OF_DATE_KHR:       return "VK_ERROR_OUT_OF_DATE_KHR";
+    case VK_ERROR_SURFACE_LOST_KHR:      return "VK_ERROR_SURFACE_LOST_KHR";
+    }
+
+    return "Unknown error!";
+}
+
+static const char* vk_dbg_type(VkDebugReportObjectTypeEXT type)
+{
+    switch (type) {
+    case VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT:
+        return "VkInstance";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT:
+        return "VkPhysicalDevice";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT:
+        return "VkDevice";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT:
+        return "VkQueue";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT:
+        return "VkSemaphore";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT:
+        return "VkCommandBuffer";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT:
+        return "VkFence";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT:
+        return "VkDeviceMemory";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT:
+        return "VkBuffer";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT:
+        return "VkImage";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT:
+        return "VkEvent";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT:
+        return "VkQueryPool";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT:
+        return "VkBufferView";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT:
+        return "VkImageView";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT:
+        return "VkShaderModule";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT:
+        return "VkPipelineCache";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT:
+        return "VkPipelineLayout";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT:
+        return "VkRenderPass";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT:
+        return "VkPipeline";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT:
+        return "VkDescriptorSetLayout";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT:
+        return "VkSampler";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT:
+        return "VkDescriptorPool";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT:
+        return "VkDescriptorSet";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT:
+        return "VkFramebuffer";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT:
+        return "VkCommandPool";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT:
+        return "VkSurfaceKHR";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT:
+        return "VkSwapchainKHR";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT:
+        return "VkDebugReportCallbackEXT";
+    case VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT:
+    default:
+        return "unknown object";
+    }
+}
+
+static VkBool32 vk_dbg_callback(VkDebugReportFlagsEXT flags,
+                                VkDebugReportObjectTypeEXT objType,
+                                uint64_t obj, size_t loc, int32_t msgCode,
+                                const char *layer, const char *msg, void *priv)
+{
+    struct mpvk_ctx *vk = priv;
+    int lev = MSGL_V;
+
+    switch (flags) {
+    case VK_DEBUG_REPORT_ERROR_BIT_EXT:               lev = MSGL_ERR;   break;
+    case VK_DEBUG_REPORT_WARNING_BIT_EXT:             lev = MSGL_WARN;  break;
+    case VK_DEBUG_REPORT_INFORMATION_BIT_EXT:         lev = MSGL_TRACE; break;
+    case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: lev = MSGL_WARN;  break;
+    case VK_DEBUG_REPORT_DEBUG_BIT_EXT:               lev = MSGL_DEBUG; break;
+    };
+
+    MP_MSG(vk, lev, "vk [%s] %d: %s (obj 0x%llx (%s), loc 0x%zx)\n",
+           layer, (int)msgCode, msg, (unsigned long long)obj,
+           vk_dbg_type(objType), loc);
+
+    // The return value of this function determines whether the call will
+    // be explicitly aborted (to prevent GPU errors) or not. In this case,
+    // we generally want this to be on for the errors.
+    return (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT);
+}
+
+static void vk_cmdpool_uninit(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
+{
+    if (!pool)
+        return;
+
+    // also frees associated command buffers
+    vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR);
+    for (int n = 0; n < MPVK_MAX_CMDS; n++) {
+        vkDestroyFence(vk->dev, pool->cmds[n].fence, MPVK_ALLOCATOR);
+        vkDestroySemaphore(vk->dev, pool->cmds[n].done, MPVK_ALLOCATOR);
+        talloc_free(pool->cmds[n].callbacks);
+    }
+    talloc_free(pool);
+}
+
+void mpvk_uninit(struct mpvk_ctx *vk)
+{
+    if (!vk->inst)
+        return;
+
+    if (vk->dev) {
+        vk_cmdpool_uninit(vk, vk->pool);
+        vk_malloc_uninit(vk);
+        vkDestroyDevice(vk->dev, MPVK_ALLOCATOR);
+    }
+
+    if (vk->dbg) {
+        // Same deal as creating the debug callback, we need to load this
+        // first.
+        VK_LOAD_PFN(vkDestroyDebugReportCallbackEXT)
+        pfn_vkDestroyDebugReportCallbackEXT(vk->inst, vk->dbg, MPVK_ALLOCATOR);
+    }
+
+    vkDestroySurfaceKHR(vk->inst, vk->surf, MPVK_ALLOCATOR);
+    vkDestroyInstance(vk->inst, MPVK_ALLOCATOR);
+
+    *vk = (struct mpvk_ctx){0};
+}
+
+bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log, bool debug)
+{
+    *vk = (struct mpvk_ctx) {
+        .log = log,
+    };
+
+    VkInstanceCreateInfo info = {
+        .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+    };
+
+    if (debug) {
+        // Enables the LunarG standard validation layer, which
+        // is a meta-layer that loads lots of other validators
+        static const char* layers[] = {
+            "VK_LAYER_LUNARG_standard_validation",
+        };
+
+        info.ppEnabledLayerNames = layers;
+        info.enabledLayerCount = MP_ARRAY_SIZE(layers);
+    }
+
+    // Enable whatever extensions were compiled in.
+    static const char *extensions[] = {
+        VK_KHR_SURFACE_EXTENSION_NAME,
+#if HAVE_X11
+        VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
+#endif
+
+        // Extra extensions only used for debugging. These are toggled by
+        // decreasing the enabledExtensionCount, so the number needs to be
+        // synchronized with the code below.
+        VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
+    };
+
+    const int debugExtensionCount = 1;
+
+    info.ppEnabledExtensionNames = extensions;
+    info.enabledExtensionCount = MP_ARRAY_SIZE(extensions);
+
+    if (!debug)
+        info.enabledExtensionCount -= debugExtensionCount;
+
+    MP_VERBOSE(vk, "Creating instance with extensions:\n");
+    for (int i = 0; i < info.enabledExtensionCount; i++)
+        MP_VERBOSE(vk, "    %s\n", info.ppEnabledExtensionNames[i]);
+
+    VkResult res = vkCreateInstance(&info, MPVK_ALLOCATOR, &vk->inst);
+    if (res != VK_SUCCESS) {
+        MP_VERBOSE(vk, "Failed creating instance: %s\n", vk_err(res));
+        return false;
+    }
+
+    if (debug) {
+        // Set up a debug callback to catch validation messages
+        VkDebugReportCallbackCreateInfoEXT dinfo = {
+            .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
+            .flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT |
+                     VK_DEBUG_REPORT_WARNING_BIT_EXT |
+                     VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT |
+                     VK_DEBUG_REPORT_ERROR_BIT_EXT |
+                     VK_DEBUG_REPORT_DEBUG_BIT_EXT,
+            .pfnCallback = vk_dbg_callback,
+            .pUserData = vk,
+        };
+
+        // Since this is not part of the core spec, we need to load it. This
+        // can't fail because we've already successfully created an instance
+        // with this extension enabled.
+        VK_LOAD_PFN(vkCreateDebugReportCallbackEXT)
+        pfn_vkCreateDebugReportCallbackEXT(vk->inst, &dinfo, MPVK_ALLOCATOR,
+                                           &vk->dbg);
+    }
+
+    return true;
+}
+
+#define MPVK_MAX_DEVICES 16
+
+static bool physd_supports_surface(struct mpvk_ctx *vk, VkPhysicalDevice physd)
+{
+    uint32_t qfnum;
+    vkGetPhysicalDeviceQueueFamilyProperties(physd, &qfnum, NULL);
+
+    for (int i = 0; i < qfnum; i++) {
+        VkBool32 sup;
+        VK(vkGetPhysicalDeviceSurfaceSupportKHR(physd, i, vk->surf, &sup));
+        if (sup)
+            return true;
+    }
+
+error:
+    return false;
+}
+
+bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw)
+{
+    assert(vk->surf);
+
+    MP_VERBOSE(vk, "Probing for vulkan devices:\n");
+
+    VkPhysicalDevice *devices = NULL;
+    uint32_t num = 0;
+    VK(vkEnumeratePhysicalDevices(vk->inst, &num, NULL));
+    devices = talloc_array(NULL, VkPhysicalDevice, num);
+    VK(vkEnumeratePhysicalDevices(vk->inst, &num, devices));
+
+    // Sorted by "priority". Reuses some m_opt code for convenience
+    static const struct m_opt_choice_alternatives types[] = {
+        {"discrete",   VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU},
+        {"integrated", VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU},
+        {"virtual",    VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU},
+        {"software",   VK_PHYSICAL_DEVICE_TYPE_CPU},
+        {"unknown",    VK_PHYSICAL_DEVICE_TYPE_OTHER},
+        {0}
+    };
+
+    VkPhysicalDeviceProperties props[MPVK_MAX_DEVICES];
+    for (int i = 0; i < num; i++) {
+        vkGetPhysicalDeviceProperties(devices[i], &props[i]);
+        MP_VERBOSE(vk, "    GPU %d: %s (%s)\n", i, props[i].deviceName,
+                   m_opt_choice_str(types, props[i].deviceType));
+    }
+
+    // Iterate through each type in order of decreasing preference
+    for (int t = 0; types[t].name; t++) {
+        // Disallow SW rendering unless explicitly enabled
+        if (types[t].value == VK_PHYSICAL_DEVICE_TYPE_CPU && !sw)
+            continue;
+
+        for (int i = 0; i < num; i++) {
+            VkPhysicalDeviceProperties prop = props[i];
+            if (prop.deviceType != types[t].value)
+                continue;
+            if (name && strcmp(name, prop.deviceName) != 0)
+                continue;
+            if (!physd_supports_surface(vk, devices[i]))
+                continue;
+
+            MP_VERBOSE(vk, "Chose device:\n");
+            MP_VERBOSE(vk, "    Device Name: %s\n", prop.deviceName);
+            MP_VERBOSE(vk, "    Device ID: %x:%x\n",
+                       (unsigned)prop.vendorID, (unsigned)prop.deviceID);
+            MP_VERBOSE(vk, "    Driver version: %d\n", (int)prop.driverVersion);
+            MP_VERBOSE(vk, "    API version: %d.%d.%d\n",
+                    (int)VK_VERSION_MAJOR(prop.apiVersion),
+                    (int)VK_VERSION_MINOR(prop.apiVersion),
+                    (int)VK_VERSION_PATCH(prop.apiVersion));
+            vk->physd = devices[i];
+            vk->limits = prop.limits;
+            talloc_free(devices);
+            return true;
+        }
+    }
+
+error:
+    MP_VERBOSE(vk, "Found no suitable device, giving up.\n");
+    talloc_free(devices);
+    return false;
+}
+
+bool mpvk_pick_surface_format(struct mpvk_ctx *vk)
+{
+    assert(vk->physd);
+
+    VkSurfaceFormatKHR *formats = NULL;
+    int num;
+
+    // Enumerate through the surface formats and find one that we can map to
+    // a ra_format
+    VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, NULL));
+    formats = talloc_array(NULL, VkSurfaceFormatKHR, num);
+    VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, formats));
+
+    for (int i = 0; i < num; i++) {
+        // A value of VK_FORMAT_UNDEFINED means we can pick anything we want
+        if (formats[i].format == VK_FORMAT_UNDEFINED) {
+            vk->surf_format = (VkSurfaceFormatKHR) {
+                .colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
+                .format = VK_FORMAT_R16G16B16A16_UNORM,
+            };
+            break;
+        }
+
+        if (formats[i].colorSpace != VK_COLOR_SPACE_SRGB_NONLINEAR_KHR)
+            continue;
+
+        // Format whitelist, since we want only >= 8 bit _UNORM formats
+        switch (formats[i].format) {
+        case VK_FORMAT_R8G8B8_UNORM:
+        case VK_FORMAT_B8G8R8_UNORM:
+        case VK_FORMAT_R8G8B8A8_UNORM:
+        case VK_FORMAT_B8G8R8A8_UNORM:
+        case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+        case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+        case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+        case VK_FORMAT_R16G16B16_UNORM:
+        case VK_FORMAT_R16G16B16A16_UNORM:
+             break; // accept
+        default: continue;
+        }
+
+        vk->surf_format = formats[i];
+        break;
+    }
+
+    talloc_free(formats);
+
+    if (!vk->surf_format.format)
+        goto error;
+
+    return true;
+
+error:
+    MP_ERR(vk, "Failed picking surface format!\n");
+    talloc_free(formats);
+    return false;
+}
+
+static bool vk_cmdpool_init(struct mpvk_ctx *vk, VkDeviceQueueCreateInfo qinfo,
+                            VkQueueFamilyProperties props,
+                            struct vk_cmdpool **out)
+{
+    struct vk_cmdpool *pool = *out = talloc_ptrtype(NULL, pool);
+    *pool = (struct vk_cmdpool) {
+        .qf = qinfo.queueFamilyIndex,
+        .props = props,
+        .qcount = qinfo.queueCount,
+    };
+
+    for (int n = 0; n < pool->qcount; n++)
+        vkGetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]);
+
+    VkCommandPoolCreateInfo cinfo = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+        .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
+                 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+        .queueFamilyIndex = pool->qf,
+    };
+
+    VK(vkCreateCommandPool(vk->dev, &cinfo, MPVK_ALLOCATOR, &pool->pool));
+
+    VkCommandBufferAllocateInfo ainfo = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+        .commandPool = pool->pool,
+        .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+        .commandBufferCount = MPVK_MAX_CMDS,
+    };
+
+    VkCommandBuffer cmdbufs[MPVK_MAX_CMDS];
+    VK(vkAllocateCommandBuffers(vk->dev, &ainfo, cmdbufs));
+
+    for (int n = 0; n < MPVK_MAX_CMDS; n++) {
+        struct vk_cmd *cmd = &pool->cmds[n];
+        cmd->pool = pool;
+        cmd->buf = cmdbufs[n];
+
+        VkFenceCreateInfo finfo = {
+            .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+            .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+        };
+
+        VK(vkCreateFence(vk->dev, &finfo, MPVK_ALLOCATOR, &cmd->fence));
+
+        VkSemaphoreCreateInfo sinfo = {
+            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+        };
+
+        VK(vkCreateSemaphore(vk->dev, &sinfo, MPVK_ALLOCATOR, &cmd->done));
+    }
+
+    return true;
+
+error:
+    return false;
+}
+
+bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
+{
+    assert(vk->physd);
+
+    VkQueueFamilyProperties *qfs = NULL;
+    int qfnum;
+
+    // Enumerate the queue families and find suitable families for each task
+    vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL);
+    qfs = talloc_array(NULL, VkQueueFamilyProperties, qfnum);
+    vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs);
+
+    MP_VERBOSE(vk, "Queue families supported by device:\n");
+
+    for (int i = 0; i < qfnum; i++) {
+        MP_VERBOSE(vk, "QF %d: flags 0x%x num %d\n", i,
+                   (unsigned)qfs[i].queueFlags, (int)qfs[i].queueCount);
+    }
+
+    // For most of our rendering operations, we want to use one "primary" pool,
+    // so just pick the queue family with the most features.
+    int idx = -1;
+    for (int i = 0; i < qfnum; i++) {
+        if (!(qfs[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
+            continue;
+
+        // QF supports more features
+        if (idx < 0 || qfs[i].queueFlags > qfs[idx].queueFlags)
+            idx = i;
+
+        // QF supports more queues (at the same specialization level)
+        if (qfs[i].queueFlags == qfs[idx].queueFlags &&
+            qfs[i].queueCount > qfs[idx].queueCount)
+        {
+            idx = i;
+        }
+    }
+
+    // Vulkan requires at least one GRAPHICS queue, so if this fails something
+    // is horribly wrong.
+    assert(idx >= 0);
+
+    // Ensure we can actually present to the surface using this queue
+    VkBool32 sup;
+    VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx, vk->surf, &sup));
+    if (!sup) {
+        MP_ERR(vk, "Queue family does not support surface presentation!\n");
+        goto error;
+    }
+
+    // Now that we know which queue families we want, we can create the logical
+    // device
+    assert(opts.queue_count <= MPVK_MAX_QUEUES);
+    static const float priorities[MPVK_MAX_QUEUES] = {0};
+    VkDeviceQueueCreateInfo qinfo = {
+        .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+        .queueFamilyIndex = idx,
+        .queueCount = MPMIN(qfs[idx].queueCount, opts.queue_count),
+        .pQueuePriorities = priorities,
+    };
+
+    static const char *exts[] = {
+        VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+        VK_NV_GLSL_SHADER_EXTENSION_NAME,
+    };
+
+    VkDeviceCreateInfo dinfo = {
+        .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+        .queueCreateInfoCount = 1,
+        .pQueueCreateInfos = &qinfo,
+        .ppEnabledExtensionNames = exts,
+        .enabledExtensionCount = MP_ARRAY_SIZE(exts),
+    };
+
+    MP_VERBOSE(vk, "Creating vulkan device...\n");
+    VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev));
+
+    vk_malloc_init(vk);
+
+    // Create the vk_cmdpools and all required queues / synchronization objects
+    if (!vk_cmdpool_init(vk, qinfo, qfs[idx], &vk->pool))
+        goto error;
+
+    talloc_free(qfs);
+    return true;
+
+error:
+    MP_ERR(vk, "Failed creating logical device!\n");
+    talloc_free(qfs);
+    return false;
+}
+
+static void run_callbacks(struct mpvk_ctx *vk, struct vk_cmd *cmd)
+{
+    for (int i = 0; i < cmd->num_callbacks; i++) {
+        struct vk_callback *cb = &cmd->callbacks[i];
+        cb->run(cb->priv, cb->arg);
+        *cb = (struct vk_callback){0};
+    }
+
+    cmd->num_callbacks = 0;
+
+    // Also reset vk->last_cmd in case this was the last command to run
+    if (vk->last_cmd == cmd)
+        vk->last_cmd = NULL;
+}
+
+static void wait_for_cmds(struct mpvk_ctx *vk, struct vk_cmd cmds[], int num)
+{
+    if (!num)
+        return;
+
+    VkFence fences[MPVK_MAX_CMDS];
+    for (int i = 0; i < num; i++)
+        fences[i] = cmds[i].fence;
+
+    vkWaitForFences(vk->dev, num, fences, true, UINT64_MAX);
+
+    for (int i = 0; i < num; i++)
+        run_callbacks(vk, &cmds[i]);
+}
+
+void mpvk_pool_wait_idle(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
+{
+    if (!pool)
+        return;
+
+    int idx = pool->cindex, pidx = pool->cindex_pending;
+    if (pidx < idx) { // range doesn't wrap
+        wait_for_cmds(vk, &pool->cmds[pidx], idx - pidx);
+    } else if (pidx > idx) { // range wraps
+        wait_for_cmds(vk, &pool->cmds[pidx], MPVK_MAX_CMDS - pidx);
+        wait_for_cmds(vk, &pool->cmds[0], idx);
+    }
+    pool->cindex_pending = pool->cindex;
+}
+
+void mpvk_dev_wait_idle(struct mpvk_ctx *vk)
+{
+    mpvk_pool_wait_idle(vk, vk->pool);
+}
+
+void mpvk_pool_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool,
+                         uint64_t timeout)
+{
+    if (!pool)
+        return;
+
+    // If requested, hard block until at least one command completes
+    if (timeout > 0 && pool->cindex_pending != pool->cindex) {
+        vkWaitForFences(vk->dev, 1, &pool->cmds[pool->cindex_pending].fence,
+                        true, timeout);
+    }
+
+    // Lazily garbage collect the commands based on their status
+    while (pool->cindex_pending != pool->cindex) {
+        struct vk_cmd *cmd = &pool->cmds[pool->cindex_pending];
+        VkResult res = vkGetFenceStatus(vk->dev, cmd->fence);
+        if (res != VK_SUCCESS)
+            break;
+        run_callbacks(vk, cmd);
+        pool->cindex_pending++;
+        pool->cindex_pending %= MPVK_MAX_CMDS;
+    }
+}
+
+void mpvk_dev_poll_cmds(struct mpvk_ctx *vk, uint32_t timeout)
+{
+    mpvk_pool_poll_cmds(vk, vk->pool, timeout);
+}
+
+void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg)
+{
+    if (vk->last_cmd) {
+        vk_cmd_callback(vk->last_cmd, callback, p, arg);
+    } else {
+        // The device was already idle, so we can just immediately call it
+        callback(p, arg);
+    }
+}
+
+void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg)
+{
+    MP_TARRAY_GROW(NULL, cmd->callbacks, cmd->num_callbacks);
+    cmd->callbacks[cmd->num_callbacks++] = (struct vk_callback) {
+        .run  = callback,
+        .priv = p,
+        .arg  = arg,
+    };
+}
+
+void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep,
+                VkPipelineStageFlagBits depstage)
+{
+    assert(cmd->num_deps < MPVK_MAX_CMD_DEPS);
+    cmd->deps[cmd->num_deps] = dep;
+    cmd->depstages[cmd->num_deps++] = depstage;
+}
+
+struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
+{
+    // Garbage collect the cmdpool first
+    mpvk_pool_poll_cmds(vk, pool, 0);
+
+    int next = (pool->cindex + 1) % MPVK_MAX_CMDS;
+    if (next == pool->cindex_pending) {
+        MP_ERR(vk, "No free command buffers!\n");
+        goto error;
+    }
+
+    struct vk_cmd *cmd = &pool->cmds[pool->cindex];
+    pool->cindex = next;
+
+    VK(vkResetCommandBuffer(cmd->buf, 0));
+
+    VkCommandBufferBeginInfo binfo = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+    };
+
+    VK(vkBeginCommandBuffer(cmd->buf, &binfo));
+
+    return cmd;
+
+error:
+    return NULL;
+}
+
+bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done)
+{
+    VK(vkEndCommandBuffer(cmd->buf));
+
+    struct vk_cmdpool *pool = cmd->pool;
+    VkQueue queue = pool->queues[pool->qindex];
+
+    VkSubmitInfo sinfo = {
+        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .commandBufferCount = 1,
+        .pCommandBuffers = &cmd->buf,
+        .waitSemaphoreCount = cmd->num_deps,
+        .pWaitSemaphores = cmd->deps,
+        .pWaitDstStageMask = cmd->depstages,
+    };
+
+    if (done) {
+        sinfo.signalSemaphoreCount = 1;
+        sinfo.pSignalSemaphores = &cmd->done;
+        *done = cmd->done;
+    }
+
+    VK(vkResetFences(vk->dev, 1, &cmd->fence));
+    VK(vkQueueSubmit(queue, 1, &sinfo, cmd->fence));
+    MP_TRACE(vk, "Submitted command on queue %p (QF %d)\n", (void *)queue,
+             pool->qf);
+
+    for (int i = 0; i < cmd->num_deps; i++)
+        cmd->deps[i] = NULL;
+    cmd->num_deps = 0;
+
+    vk->last_cmd = cmd;
+    return true;
+
+error:
+    return false;
+}
+
+void vk_cmd_cycle_queues(struct mpvk_ctx *vk)
+{
+    struct vk_cmdpool *pool = vk->pool;
+    pool->qindex = (pool->qindex + 1) % pool->qcount;
+}
+
+const VkImageSubresourceRange vk_range = {
+    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+    .levelCount = 1,
+    .layerCount = 1,
+};
+
+const VkImageSubresourceLayers vk_layers = {
+    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+    .layerCount = 1,
+};
diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h
new file mode 100644
index 0000000000..5bde48089d
--- /dev/null
+++ b/video/out/vulkan/utils.h
@@ -0,0 +1,153 @@
+#pragma once
+
+#include "video/out/vo.h"
+#include "video/out/gpu/context.h"
+#include "video/mp_image.h"
+
+#include "common.h"
+#include "formats.h"
+
+#define VK_LOAD_PFN(name) PFN_##name pfn_##name = (PFN_##name) \
+                            vkGetInstanceProcAddr(vk->inst, #name);
+
+// Return a human-readable name for various struct mpvk_ctx enums
+const char* vk_err(VkResult res);
+
+// Convenience macros to simplify a lot of common boilerplate
+#define VK_ASSERT(res, str)                               \
+    do {                                                  \
+        if (res != VK_SUCCESS) {                          \
+            MP_ERR(vk, str ": %s\n", vk_err(res));        \
+            goto error;                                   \
+        }                                                 \
+    } while (0)
+
+#define VK(cmd)                                           \
+    do {                                                  \
+        MP_TRACE(vk, #cmd "\n");                          \
+        VkResult res ## __LINE__ = (cmd);                 \
+        VK_ASSERT(res ## __LINE__, #cmd);                 \
+    } while (0)
+
+// Uninits everything in the correct order
+void mpvk_uninit(struct mpvk_ctx *vk);
+
+// Initialization functions: As a rule of thumb, these need to be called in
+// this order, followed by vk_malloc_init, followed by RA initialization, and
+// finally followed by vk_swchain initialization.
+
+// Create a vulkan instance. Returns VK_NULL_HANDLE on failure
+bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log, bool debug);
+
+// Generate a VkSurfaceKHR usable for video output. Returns VK_NULL_HANDLE on
+// failure. Must be called after mpvk_instance_init.
+bool mpvk_surface_init(struct vo *vo, struct mpvk_ctx *vk);
+
+// Find a suitable physical device for use with rendering and which supports
+// the surface.
+// name: only match a device with this name
+// sw: also allow software/virtual devices
+bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw);
+
+// Pick a suitable surface format that's supported by this physical device.
+bool mpvk_pick_surface_format(struct mpvk_ctx *vk);
+
+struct mpvk_device_opts {
+    int queue_count;    // number of queues to use
+};
+
+// Create a logical device and initialize the vk_cmdpools
+bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts);
+
+// Wait until all commands submitted to all queues have completed
+void mpvk_pool_wait_idle(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
+void mpvk_dev_wait_idle(struct mpvk_ctx *vk);
+
+// Wait until at least one command submitted to any queue has completed, and
+// process the callbacks. Good for event loops that need to delay until a
+// command completes. Will block at most `timeout` nanoseconds. If used with
+// 0, it only garbage collects completed commands without blocking.
+void mpvk_pool_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool,
+                         uint64_t timeout);
+void mpvk_dev_poll_cmds(struct mpvk_ctx *vk, uint32_t timeout);
+
+// Since lots of vulkan operations need to be done lazily once the affected
+// resources are no longer in use, provide an abstraction for tracking these.
+// In practice, these are only checked and run when submitting new commands, so
+// the actual execution may be delayed by a frame.
+typedef void (*vk_cb)(void *priv, void *arg);
+
+struct vk_callback {
+    vk_cb run;
+    void *priv;
+    void *arg; // as a convenience, you also get to pass an arg for "free"
+};
+
+// Associate a callback with the completion of all currently pending commands.
+// This will essentially run once the device is completely idle.
+void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg);
+
+#define MPVK_MAX_CMD_DEPS 8
+
+// Helper wrapper around command buffers that also track dependencies,
+// callbacks and synchronization primitives
+struct vk_cmd {
+    struct vk_cmdpool *pool; // pool it was allocated from
+    VkCommandBuffer buf;
+    VkFence fence; // the fence guards cmd buffer reuse
+    VkSemaphore done; // the semaphore signals when execution is done
+    // The semaphores represent dependencies that need to complete before
+    // this command can be executed. These are *not* owned by the vk_cmd
+    VkSemaphore deps[MPVK_MAX_CMD_DEPS];
+    VkPipelineStageFlags depstages[MPVK_MAX_CMD_DEPS];
+    int num_deps;
+    // Since VkFences are useless, we have to manually track "callbacks"
+    // to fire once the VkFence completes. These are used for multiple purposes,
+    // ranging from garbage collection (resource deallocation) to fencing.
+    struct vk_callback *callbacks;
+    int num_callbacks;
+};
+
+// Associate a callback with the completion of the current command. This
+// bool will be set to `true` once the command completes, or shortly thereafter.
+void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg);
+
+// Associate a dependency for the current command. This semaphore must signal
+// by the corresponding stage before the command may execute.
+void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep,
+                VkPipelineStageFlagBits depstage);
+
+#define MPVK_MAX_QUEUES 8
+#define MPVK_MAX_CMDS 64
+
+// Command pool / queue family hybrid abstraction
+struct vk_cmdpool {
+    VkQueueFamilyProperties props;
+    uint32_t qf; // queue family index
+    VkCommandPool pool;
+    VkQueue queues[MPVK_MAX_QUEUES];
+    int qcount;
+    int qindex;
+    // Command buffers associated with this queue
+    struct vk_cmd cmds[MPVK_MAX_CMDS];
+    int cindex;
+    int cindex_pending;
+};
+
+// Fetch the next command buffer from a command pool and begin recording to it.
+// Returns NULL on failure.
+struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
+
+// Finish the currently recording command buffer and submit it for execution.
+// If `done` is not NULL, it will be set to a semaphore that will signal once
+// the command completes. (And MUST have a corresponding semaphore wait)
+// Returns whether successful.
+bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done);
+
+// Rotate the queues for each vk_cmdpool. Call this once per frame to ensure
+// good parallelism between frames when using multiple queues
+void vk_cmd_cycle_queues(struct mpvk_ctx *vk);
+
+// Predefined structs for a simple non-layered, non-mipped image
+extern const VkImageSubresourceRange vk_range;
+extern const VkImageSubresourceLayers vk_layers;
diff --git a/wscript b/wscript
index dd47956392..964b7878c7 100644
--- a/wscript
+++ b/wscript
@@ -803,6 +803,10 @@ video_output_features = [
                 "Aborting. If you really mean to compile without OpenGL " +
                 "video outputs use --disable-gl.",
     }, {
+        'name': '--vulkan',
+        'desc':  'Vulkan context support',
+        'func': check_cc(header_name='vulkan/vulkan.h', lib='vulkan'),
+    }, {
         'name': 'egl-helpers',
         'desc': 'EGL helper functions',
         'deps': 'egl-x11 || mali-fbdev || rpi || gl-wayland || egl-drm || ' +
diff --git a/wscript_build.py b/wscript_build.py
index 68cfafb94f..86b51daaa2 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -445,6 +445,12 @@ def build(ctx):
         ( "video/out/w32_common.c",              "win32-desktop" ),
         ( "video/out/win32/displayconfig.c",     "win32-desktop" ),
         ( "video/out/win32/droptarget.c",        "win32-desktop" ),
+        ( "video/out/vulkan/utils.c",            "vulkan" ),
+        ( "video/out/vulkan/malloc.c",           "vulkan" ),
+        ( "video/out/vulkan/formats.c",          "vulkan" ),
+        ( "video/out/vulkan/ra_vk.c",            "vulkan" ),
+        ( "video/out/vulkan/context.c",          "vulkan" ),
+        ( "video/out/vulkan/context_xlib.c",     "vulkan && x11" ),
         ( "video/out/win32/exclusive_hack.c",    "gl-win32" ),
         ( "video/out/wayland_common.c",          "wayland" ),
         ( "video/out/wayland/buffer.c",          "wayland" ),