From 68eac1a1e7ac931576a8b59dd159a7961189ca48 Mon Sep 17 00:00:00 2001 From: James Ross-Gowan Date: Thu, 7 Sep 2017 20:18:06 +1000 Subject: vo_gpu: d3d11: initial implementation This is a new RA/vo_gpu backend that uses Direct3D 11. The GLSL generated by vo_gpu is cross-compiled to HLSL with SPIRV-Cross. What works: - All of mpv's internal shaders should work, including compute shaders. - Some external shaders have been tested and work, including RAVU and adaptive-sharpen. - Non-dumb mode works, even on very old hardware. Most features work at feature level 9_3 and all features work at feature level 10_0. Some features also work at feature level 9_1 and 9_2, but without high-bit- depth FBOs, it's not very useful. (Hardware this old is probably not fast enough for advanced features anyway.) Note: This is more compatible than ANGLE, which requires 9_3 to work at all (GLES 2.0,) and 10_1 for non-dumb-mode (GLES 3.0.) - Hardware decoding with D3D11VA, including decoding of 10-bit formats without truncation to 8-bit. What doesn't work / can be improved: - PBO upload and direct rendering does not work yet. Direct rendering requires persistent-mapped PBOs because the decoder needs to be able to read data from images that have already been decoded and uploaded. Unfortunately, it seems like persistent-mapped PBOs are fundamentally incompatible with D3D11, which requires all resources to use driver- managed memory and requires memory to be unmapped (and hence pointers to be invalidated) when a resource is used in a draw or copy operation. However it might be possible to use D3D11's limited multithreading capabilities to emulate some features of PBOs, like asynchronous texture uploading. - The blit() and clear() operations don't have equivalents in the D3D11 API that handle all cases, so in most cases, they have to be emulated with a shader. This is currently done inside ra_d3d11, but ideally it would be done in generic code, so it can take advantage of mpv's shader generation utilities. - SPIRV-Cross is used through a NIH C-compatible wrapper library, since it does not expose a C interface itself. The library is available here: https://github.com/rossy/crossc - The D3D11 context could be made to support more modern DXGI features in future. For example, it should be possible to add support for high-bit-depth and HDR output with DXGI 1.5/1.6. --- DOCS/man/options.rst | 41 +- options/options.c | 5 + options/options.h | 1 + video/out/d3d11/context.c | 235 ++++ video/out/d3d11/hwdec_d3d11va.c | 195 ++++ video/out/d3d11/ra_d3d11.c | 2235 ++++++++++++++++++++++++++++++++++++++ video/out/d3d11/ra_d3d11.h | 34 + video/out/gpu/context.c | 7 + video/out/gpu/d3d11_helpers.c | 401 +++++++ video/out/gpu/d3d11_helpers.h | 79 ++ video/out/gpu/hwdec.c | 4 + video/out/opengl/context_angle.c | 2 +- video/out/opengl/d3d11_helpers.c | 383 ------- video/out/opengl/d3d11_helpers.h | 73 -- wscript | 17 +- wscript_build.py | 5 +- 16 files changed, 3249 insertions(+), 468 deletions(-) create mode 100644 video/out/d3d11/context.c create mode 100644 video/out/d3d11/hwdec_d3d11va.c create mode 100644 video/out/d3d11/ra_d3d11.c create mode 100644 video/out/d3d11/ra_d3d11.h create mode 100644 video/out/gpu/d3d11_helpers.c create mode 100644 video/out/gpu/d3d11_helpers.h delete mode 100644 video/out/opengl/d3d11_helpers.c delete mode 100644 video/out/opengl/d3d11_helpers.h diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index a6c2136947..d6dfc48535 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -686,8 +686,8 @@ Video :dxva2: requires ``--vo=gpu`` with ``--gpu-context=angle`` or ``--gpu-context=dxinterop`` (Windows only) :dxva2-copy: copies video back to system RAM (Windows only) - :d3d11va: requires ``--vo=gpu`` with ``--gpu-context=angle`` - (Windows 8+ only) + :d3d11va: requires ``--vo=gpu`` with ``--gpu-context=d3d11`` or + ``--gpu-context=angle`` (Windows 8+ only) :d3d11va-copy: copies video back to system RAM (Windows 8+ only) :mediacodec: requires ``--vo=mediacodec_embed`` (Android only) :mediacodec-copy: copies video back to system RAM (Android only) @@ -775,10 +775,11 @@ Video BT.601 or BT.709, a forced, low-quality but correct RGB conversion is performed. Otherwise, the result will be totally incorrect. - ``d3d11va`` is usually safe (if used with ANGLE builds that support - ``EGL_KHR_stream path`` - otherwise, it converts to RGB), except that - 10 bit input (HEVC main 10 profiles) will be rounded down to 8 bits, - which results in reduced quality. + ``d3d11va`` is safe when used with the ``d3d11`` backend. If used with + ``angle`` is it usually safe, except that 10 bit input (HEVC main 10 + profiles) will be rounded down to 8 bits, which will result in reduced + quality. Also note that with very old ANGLE builds (without + ``EGL_KHR_stream path``,) all input will be converted to RGB. ``dxva2`` is not safe. It appears to always use BT.601 for forced RGB conversion, but actual behavior depends on the GPU drivers. Some drivers @@ -4272,6 +4273,30 @@ The following video options are currently all specific to ``--vo=gpu`` and as mpv's vulkan implementation currently does not try and protect textures against concurrent access. +``--d3d11-warp=`` + Use WARP (Windows Advanced Rasterization Platform) with the D3D11 GPU + backend (default: auto). This is a high performance software renderer. By + default, it is only used when the system has no hardware adapters that + support D3D11. While the extended GPU features will work with WARP, they + can be very slow. + +``--d3d11-feature-level=<12_1|12_0|11_1|11_0|10_1|10_0|9_3|9_2|9_1>`` + Select a specific feature level when using the D3D11 GPU backend. By + default, the highest available feature level is used. This option can be + used to select a lower feature level, which is mainly useful for debugging. + Most extended GPU features will not work at 9_x feature levels. + +``--d3d11-flip=`` + Enable flip-model presentation, which avoids unnecessarily copying the + backbuffer by sharing surfaces with the DWM (default: yes). This may cause + performance issues with older drivers. If flip-model presentation is not + supported (for example, on Windows 7 without the platform update), mpv will + automatically fall back to the older bitblt presentation model. + +``--d3d11-sync-interval=<0..4>`` + Schedule each frame to be presented for this number of VBlank intervals. + (default: 1) Setting to 1 will enable VSync, setting to 0 will disable it. + ``--spirv-compiler=`` Controls which compiler is used to translate GLSL to SPIR-V. This is (currently) only relevant for ``--gpu-api=vulkan``. The possible choices @@ -4694,6 +4719,8 @@ The following video options are currently all specific to ``--vo=gpu`` and Win32, using WGL for rendering and Direct3D 9Ex for presentation. Works on Nvidia and AMD. Newer Intel chips with the latest drivers may also work. + d3d11 + Win32, with native Direct3D 11 rendering. x11 X11/GLX x11vk @@ -4728,6 +4755,8 @@ The following video options are currently all specific to ``--vo=gpu`` and Allow only OpenGL (requires OpenGL 2.1+ or GLES 2.0+) vulkan Allow only Vulkan (requires a valid/working ``--spirv-compiler``) + d3d11 + Allow only ``--gpu-context=d3d11`` ``--opengl-es=`` Controls which type of OpenGL context will be accepted: diff --git a/options/options.c b/options/options.c index 22e448d22f..3bf4ee1108 100644 --- a/options/options.c +++ b/options/options.c @@ -90,6 +90,7 @@ extern const struct m_obj_list ao_obj_list; extern const struct m_sub_options opengl_conf; extern const struct m_sub_options vulkan_conf; extern const struct m_sub_options spirv_conf; +extern const struct m_sub_options d3d11_conf; extern const struct m_sub_options angle_conf; extern const struct m_sub_options cocoa_conf; @@ -699,6 +700,10 @@ const m_option_t mp_opts[] = { OPT_SUBSTRUCT("", vulkan_opts, vulkan_conf, 0), #endif +#if HAVE_D3D11 + OPT_SUBSTRUCT("", d3d11_opts, d3d11_conf, 0), +#endif + #if HAVE_EGL_ANGLE_WIN32 OPT_SUBSTRUCT("", angle_opts, angle_conf, 0), #endif diff --git a/options/options.h b/options/options.h index 0d697a717c..47a4622430 100644 --- a/options/options.h +++ b/options/options.h @@ -332,6 +332,7 @@ typedef struct MPOpts { struct opengl_opts *opengl_opts; struct vulkan_opts *vulkan_opts; struct spirv_opts *spirv_opts; + struct d3d11_opts *d3d11_opts; struct cocoa_opts *cocoa_opts; struct dvd_opts *dvd_opts; diff --git a/video/out/d3d11/context.c b/video/out/d3d11/context.c new file mode 100644 index 0000000000..018fd99934 --- /dev/null +++ b/video/out/d3d11/context.c @@ -0,0 +1,235 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include "common/msg.h" +#include "options/m_config.h" +#include "osdep/windows_utils.h" + +#include "video/out/gpu/context.h" +#include "video/out/gpu/d3d11_helpers.h" +#include "video/out/gpu/spirv.h" +#include "video/out/w32_common.h" +#include "ra_d3d11.h" + +struct d3d11_opts { + int feature_level; + int warp; + int flip; + int sync_interval; +}; + +#define OPT_BASE_STRUCT struct d3d11_opts +const struct m_sub_options d3d11_conf = { + .opts = (const struct m_option[]) { + OPT_CHOICE("d3d11-warp", warp, 0, + ({"auto", -1}, + {"no", 0}, + {"yes", 1})), + OPT_CHOICE("d3d11-feature-level", feature_level, 0, + ({"12_1", D3D_FEATURE_LEVEL_12_1}, + {"12_0", D3D_FEATURE_LEVEL_12_0}, + {"11_1", D3D_FEATURE_LEVEL_11_1}, + {"11_0", D3D_FEATURE_LEVEL_11_0}, + {"10_1", D3D_FEATURE_LEVEL_10_1}, + {"10_0", D3D_FEATURE_LEVEL_10_0}, + {"9_3", D3D_FEATURE_LEVEL_9_3}, + {"9_2", D3D_FEATURE_LEVEL_9_2}, + {"9_1", D3D_FEATURE_LEVEL_9_1})), + OPT_FLAG("d3d11-flip", flip, 0), + OPT_INTRANGE("d3d11-sync-interval", sync_interval, 0, 0, 4), + {0} + }, + .defaults = &(const struct d3d11_opts) { + .feature_level = D3D_FEATURE_LEVEL_12_1, + .warp = -1, + .flip = 1, + .sync_interval = 1, + }, + .size = sizeof(struct d3d11_opts) +}; + +struct priv { + struct d3d11_opts *opts; + + struct ra_tex *backbuffer; + ID3D11Device *device; + IDXGISwapChain *swapchain; +}; + +static struct ra_tex *get_backbuffer(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ID3D11Texture2D *backbuffer = NULL; + struct ra_tex *tex = NULL; + HRESULT hr; + + hr = IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D, + (void**)&backbuffer); + if (FAILED(hr)) { + MP_ERR(ctx, "Couldn't get swapchain image\n"); + goto done; + } + + tex = ra_d3d11_wrap_tex(ctx->ra, (ID3D11Resource *)backbuffer); +done: + SAFE_RELEASE(backbuffer); + return tex; +} + +static bool resize(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + HRESULT hr; + + ra_tex_free(ctx->ra, &p->backbuffer); + + hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, ctx->vo->dwidth, + ctx->vo->dheight, DXGI_FORMAT_UNKNOWN, 0); + if (FAILED(hr)) { + MP_FATAL(ctx, "Couldn't resize swapchain: %s\n", mp_HRESULT_to_str(hr)); + return false; + } + + p->backbuffer = get_backbuffer(ctx); + + return true; +} + +static bool d3d11_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + return resize(ctx); +} + +static int d3d11_color_depth(struct ra_swapchain *sw) +{ + return 8; +} + +static bool d3d11_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) +{ + struct priv *p = sw->priv; + *out_fbo = (struct ra_fbo) { + .tex = p->backbuffer, + .flip = false, + }; + return true; +} + +static bool d3d11_submit_frame(struct ra_swapchain *sw, + const struct vo_frame *frame) +{ + ra_d3d11_flush(sw->ctx->ra); + return true; +} + +static void d3d11_swap_buffers(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + IDXGISwapChain_Present(p->swapchain, p->opts->sync_interval, 0); +} + +static int d3d11_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + return ret; +} + +static void d3d11_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_tex_free(ctx->ra, &p->backbuffer); + SAFE_RELEASE(p->swapchain); + vo_w32_uninit(ctx->vo); + SAFE_RELEASE(p->device); + + // Destory the RA last to prevent objects we hold from showing up in D3D's + // leak checker + ctx->ra->fns->destroy(ctx->ra); +} + +static const struct ra_swapchain_fns d3d11_swapchain = { + .color_depth = d3d11_color_depth, + .start_frame = d3d11_start_frame, + .submit_frame = d3d11_submit_frame, + .swap_buffers = d3d11_swap_buffers, +}; + +static bool d3d11_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + p->opts = mp_get_config_group(ctx, ctx->global, &d3d11_conf); + + struct ra_swapchain *sw = ctx->swapchain = talloc_zero(ctx, struct ra_swapchain); + sw->priv = p; + sw->ctx = ctx; + sw->fns = &d3d11_swapchain; + + struct d3d11_device_opts dopts = { + .debug = ctx->opts.debug, + .allow_warp = p->opts->warp != 0, + .force_warp = p->opts->warp == 1, + .max_feature_level = p->opts->feature_level, + .max_frame_latency = ctx->opts.swapchain_depth, + }; + if (!mp_d3d11_create_present_device(ctx->log, &dopts, &p->device)) + goto error; + + if (!spirv_compiler_init(ctx)) + goto error; + ctx->ra = ra_d3d11_create(p->device, ctx->log, ctx->spirv); + if (!ctx->ra) + goto error; + + if (!vo_w32_init(ctx->vo)) + goto error; + + struct d3d11_swapchain_opts scopts = { + .window = vo_w32_hwnd(ctx->vo), + .width = ctx->vo->dwidth, + .height = ctx->vo->dheight, + .flip = p->opts->flip, + // Add one frame for the backbuffer and one frame of "slack" to reduce + // contention with the window manager when acquiring the backbuffer + .length = ctx->opts.swapchain_depth + 2, + .usage = DXGI_USAGE_RENDER_TARGET_OUTPUT, + }; + if (!mp_d3d11_create_swapchain(p->device, ctx->log, &scopts, &p->swapchain)) + goto error; + + p->backbuffer = get_backbuffer(ctx); + + return true; + +error: + d3d11_uninit(ctx); + return false; +} + +const struct ra_ctx_fns ra_ctx_d3d11 = { + .type = "d3d11", + .name = "d3d11", + .reconfig = d3d11_reconfig, + .control = d3d11_control, + .init = d3d11_init, + .uninit = d3d11_uninit, +}; diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c new file mode 100644 index 0000000000..f179298ac1 --- /dev/null +++ b/video/out/d3d11/hwdec_d3d11va.c @@ -0,0 +1,195 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include + +#include "config.h" + +#include "common/common.h" +#include "osdep/windows_utils.h" +#include "video/hwdec.h" +#include "video/decode/d3d.h" +#include "video/out/d3d11/ra_d3d11.h" +#include "video/out/gpu/hwdec.h" + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + ID3D11Device *device; + ID3D11Device1 *device1; +}; + +struct priv { + ID3D11DeviceContext1 *ctx; + ID3D11Texture2D *copy_tex; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + if (p->hwctx.ctx) + hwdec_devices_remove(hw->devs, &p->hwctx); + SAFE_RELEASE(p->device); + SAFE_RELEASE(p->device1); +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + HRESULT hr; + + if (!ra_is_d3d11(hw->ra)) + return -1; + p->device = ra_d3d11_get_device(hw->ra); + if (!p->device) + return -1; + + // D3D11VA requires Direct3D 11.1, so this should always succeed + hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1, + (void**)&p->device1); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get D3D11.1 interface: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + ID3D10Multithread *multithread; + hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D10Multithread, + (void **)&multithread); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get Multithread interface: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + ID3D10Multithread_SetMultithreadProtected(multithread, TRUE); + ID3D10Multithread_Release(multithread); + + p->hwctx = (struct mp_hwdec_ctx){ + .type = HWDEC_D3D11VA, + .driver_name = hw->driver->name, + .ctx = p->device, + .av_device_ref = d3d11_wrap_device_ref(p->device), + }; + hwdec_devices_add(hw->devs, &p->hwctx); + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + for (int i = 0; i < 4; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); + SAFE_RELEASE(p->copy_tex); + SAFE_RELEASE(p->ctx); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + HRESULT hr; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + struct ra_imgfmt_desc desc = {0}; + struct mp_image layout = {0}; + + if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) + return -1; + + mp_image_set_params(&layout, &mapper->dst_params); + + DXGI_FORMAT copy_fmt; + switch (mapper->dst_params.imgfmt) { + case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break; + case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break; + default: return -1; + } + + // We copy decoder images to an intermediate texture. This is slower than + // the zero-copy path, but according to MSDN, decoder textures should not + // be bound to SRVs, so it is technically correct, and it works around some + // driver "bugs" that can happen with the zero-copy path. It also allows + // samplers to work correctly when the decoder image includes padding. + D3D11_TEXTURE2D_DESC copy_desc = { + .Width = mapper->dst_params.w, + .Height = mapper->dst_params.h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = copy_fmt, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + }; + hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL, &p->copy_tex); + if (FAILED(hr)) { + MP_FATAL(mapper, "Could not create shader resource texture\n"); + return -1; + } + + for (int i = 0; i < desc.num_planes; i++) { + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex, + mp_image_plane_w(&layout, i), + mp_image_plane_h(&layout, i), + desc.planes[i]); + if (!mapper->tex[i]) { + MP_FATAL(mapper, "Could not create RA texture view\n"); + return -1; + } + } + + ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx); + + return 0; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + ID3D11Texture2D *tex = (void *)mapper->src->planes[0]; + int subresource = (intptr_t)mapper->src->planes[1]; + + ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx, + (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0, + (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) { + .left = 0, + .top = 0, + .front = 0, + .right = mapper->dst_params.w, + .bottom = mapper->dst_params.h, + .back = 1, + }), D3D11_COPY_DISCARD); + + return 0; +} + +const struct ra_hwdec_driver ra_hwdec_d3d11va = { + .name = "d3d11va", + .priv_size = sizeof(struct priv_owner), + .api = HWDEC_D3D11VA, + .imgfmts = {IMGFMT_D3D11VA, IMGFMT_D3D11NV12, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + }, +}; diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c new file mode 100644 index 0000000000..372b65d49f --- /dev/null +++ b/video/out/d3d11/ra_d3d11.c @@ -0,0 +1,2235 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/msg.h" +#include "osdep/io.h" +#include "osdep/subprocess.h" +#include "osdep/windows_utils.h" +#include "video/out/gpu/spirv.h" +#include "video/out/gpu/utils.h" + +#include "ra_d3d11.h" + +#ifndef D3D11_1_UAV_SLOT_COUNT +#define D3D11_1_UAV_SLOT_COUNT (64) +#endif + +struct ra_d3d11 { + struct spirv_compiler *spirv; + + ID3D11Device *dev; + ID3D11Device1 *dev1; + ID3D11DeviceContext *ctx; + ID3D11DeviceContext1 *ctx1; + pD3DCompile D3DCompile; + + // Debug interfaces (--gpu-debug) + ID3D11Debug *debug; + ID3D11InfoQueue *iqueue; + + // Device capabilities + D3D_FEATURE_LEVEL fl; + bool has_clear_view; + int max_uavs; + + // Streaming dynamic vertex buffer, which is used for all renderpasses + ID3D11Buffer *vbuf; + size_t vbuf_size; + size_t vbuf_used; + + // clear() renderpass resources (only used when has_clear_view is false) + ID3D11PixelShader *clear_ps; + ID3D11VertexShader *clear_vs; + ID3D11InputLayout *clear_layout; + ID3D11Buffer *clear_vbuf; + ID3D11Buffer *clear_cbuf; + + // blit() renderpass resources + ID3D11PixelShader *blit_float_ps; + ID3D11VertexShader *blit_vs; + ID3D11InputLayout *blit_layout; + ID3D11Buffer *blit_vbuf; + ID3D11SamplerState *blit_sampler; +}; + +struct d3d_tex { + // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not + // hold an additional reference to the texture object. + ID3D11Resource *res; + + ID3D11Texture1D *tex1d; + ID3D11Texture2D *tex2d; + ID3D11Texture3D *tex3d; + + ID3D11ShaderResourceView *srv; + ID3D11RenderTargetView *rtv; + ID3D11UnorderedAccessView *uav; + ID3D11SamplerState *sampler; +}; + +struct d3d_buf { + ID3D11Buffer *buf; + ID3D11Buffer *staging; + ID3D11UnorderedAccessView *uav; + void *data; // Data for mapped staging texture +}; + +struct d3d_rpass { + ID3D11PixelShader *ps; + ID3D11VertexShader *vs; + ID3D11ComputeShader *cs; + ID3D11InputLayout *layout; + ID3D11BlendState *bstate; +}; + +struct d3d_timer { + ID3D11Query *ts_start; + ID3D11Query *ts_end; + ID3D11Query *disjoint; + uint64_t result; // Latches the result from the previous use of the timer +}; + +struct d3d_fmt { + const char *name; + int components; + int bytes; + int bits[4]; + DXGI_FORMAT fmt; + enum ra_ctype ctype; + bool unordered; +}; + +static const char clear_vs[] = "\ +float4 main(float2 pos : POSITION) : SV_Position\n\ +{\n\ + return float4(pos, 0.0, 1.0);\n\ +}\n\ +"; + +static const char clear_ps[] = "\ +cbuffer ps_cbuf : register(b0) {\n\ + float4 color : packoffset(c0);\n\ +}\n\ +\n\ +float4 main(float4 pos : SV_Position) : SV_Target\n\ +{\n\ + return color;\n\ +}\n\ +"; + +struct blit_vert { + float x, y, u, v; +}; + +static const char blit_vs[] = "\ +void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\ + out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\ +{\n\ + out_pos = float4(pos, 0.0, 1.0);\n\ + out_coord = coord;\n\ +}\n\ +"; + +static const char blit_float_ps[] = "\ +Texture2D tex : register(t0);\n\ +SamplerState samp : register(s0);\n\ +\n\ +float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\ +{\n\ + return tex.Sample(samp, coord);\n\ +}\n\ +"; + +#define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t +static struct d3d_fmt formats[] = { + { "r8", 1, 1, { 8}, DXFMT(R8, UNORM) }, + { "rg8", 2, 2, { 8, 8}, DXFMT(R8G8, UNORM) }, + { "rgba8", 4, 4, { 8, 8, 8, 8}, DXFMT(R8G8B8A8, UNORM) }, + { "r16", 1, 2, {16}, DXFMT(R16, UNORM) }, + { "rg16", 2, 4, {16, 16}, DXFMT(R16G16, UNORM) }, + { "rgba16", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) }, + + { "r32ui", 1, 4, {32}, DXFMT(R32, UINT) }, + { "rg32ui", 2, 8, {32, 32}, DXFMT(R32G32, UINT) }, + { "rgb32ui", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, UINT) }, + { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT) }, + + { "r16hf", 1, 2, {16}, DXFMT(R16, FLOAT) }, + { "rg16hf", 2, 4, {16, 16}, DXFMT(R16G16, FLOAT) }, + { "rgba16hf", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) }, + { "r32f", 1, 4, {32}, DXFMT(R32, FLOAT) }, + { "rg32f", 2, 8, {32, 32}, DXFMT(R32G32, FLOAT) }, + { "rgb32f", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, FLOAT) }, + { "rgba32f", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) }, + + { "rgb10_a2", 4, 4, {10, 10, 10, 2}, DXFMT(R10G10B10A2, UNORM) }, + { "bgra8", 4, 4, { 8, 8, 8, 8}, DXFMT(B8G8R8A8, UNORM), .unordered = false }, +}; + +static DXGI_FORMAT fmt_to_dxgi(const struct ra_format *fmt) +{ + struct d3d_fmt *d3d = fmt->priv; + return d3d->fmt; +} + +static void setup_formats(struct ra *ra) +{ + // All formats must be usable as a 2D texture + static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D; + // SHADER_SAMPLE indicates support for linear sampling, point always works + static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE; + // RA requires renderable surfaces to be blendable as well + static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET | + D3D11_FORMAT_SUPPORT_BLENDABLE; + + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) { + struct d3d_fmt *d3dfmt = &formats[i]; + UINT support = 0; + hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support); + if (FAILED(hr)) + continue; + if ((support & sup_basic) != sup_basic) + continue; + + struct ra_format *fmt = talloc_zero(ra, struct ra_format); + *fmt = (struct ra_format) { + .name = d3dfmt->name, + .priv = d3dfmt, + .ctype = d3dfmt->ctype, + .ordered = !d3dfmt->unordered, + .num_components = d3dfmt->components, + .pixel_size = d3dfmt->bytes, + .linear_filter = (support & sup_filter) == sup_filter, + .renderable = (support & sup_render) == sup_render, + }; + + if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D) + ra->caps |= RA_CAP_TEX_1D; + + for (int j = 0; j < d3dfmt->components; j++) + fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j]; + + fmt->glsl_format = ra_fmt_glsl_format(fmt); + + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); + } +} + +static bool tex_init(struct ra *ra, struct ra_tex *tex) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + HRESULT hr; + + // A SRV is required for renderpasses and blitting, since blitting can use + // a renderpass internally + if (params->render_src || params->blit_src) { + // Always specify the SRV format for simplicity. This will match the + // texture format for textures created with tex_create, but it can be + // different for wrapped planar video textures. + D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = { + .Format = fmt_to_dxgi(params->format), + }; + switch (params->dimensions) { + case 1: + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + srvdesc.Texture1D.MipLevels = 1; + break; + case 2: + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvdesc.Texture2D.MipLevels = 1; + break; + case 3: + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + srvdesc.Texture3D.MipLevels = 1; + break; + } + hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc, + &tex_p->srv); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + // Samplers are required for renderpasses, but not blitting, since the blit + // code uses its own point sampler + if (params->render_src) { + D3D11_SAMPLER_DESC sdesc = { + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .ComparisonFunc = D3D11_COMPARISON_NEVER, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX, + .MaxAnisotropy = 1, + }; + if (params->src_linear) + sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + if (params->src_repeat) { + sdesc.AddressU = sdesc.AddressV = sdesc.AddressW = + D3D11_TEXTURE_ADDRESS_WRAP; + } + // The runtime pools sampler state objects internally, so we don't have + // to worry about resource usage when creating one for every ra_tex + hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + // Like SRVs, an RTV is required for renderpass output and blitting + if (params->render_dst || params->blit_dst) { + hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL, + &tex_p->rtv); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) { + hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL, + &tex_p->uav); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + return true; +error: + return false; +} + +static void tex_destroy(struct ra *ra, struct ra_tex *tex) +{ + if (!tex) + return; + struct d3d_tex *tex_p = tex->priv; + + SAFE_RELEASE(tex_p->srv); + SAFE_RELEASE(tex_p->rtv); + SAFE_RELEASE(tex_p->uav); + SAFE_RELEASE(tex_p->sampler); + SAFE_RELEASE(tex_p->res); + talloc_free(tex); +} + +static struct ra_tex *tex_create(struct ra *ra, + const struct ra_tex_params *params) +{ + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + tex->params = *params; + tex->params.initial_data = NULL; + + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + DXGI_FORMAT fmt = fmt_to_dxgi(params->format); + + D3D11_SUBRESOURCE_DATA *pdata = NULL; + if (params->initial_data) { + pdata = &(D3D11_SUBRESOURCE_DATA) { + .pSysMem = params->initial_data, + .SysMemPitch = params->w * params->format->pixel_size, + }; + if (params->dimensions >= 3) + pdata->SysMemSlicePitch = pdata->SysMemPitch * params->h; + } + + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + D3D11_BIND_FLAG bind_flags = 0; + + if (params->render_src || params->blit_src) + bind_flags |= D3D11_BIND_SHADER_RESOURCE; + if (params->render_dst || params->blit_dst) + bind_flags |= D3D11_BIND_RENDER_TARGET; + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) + bind_flags |= D3D11_BIND_UNORDERED_ACCESS; + + // Apparently IMMUTABLE textures are efficient, so try to infer whether we + // can use one + if (params->initial_data && !params->render_dst && !params->storage_dst && + !params->blit_dst && !params->host_mutable) + usage = D3D11_USAGE_IMMUTABLE; + + switch (params->dimensions) { + case 1:; + D3D11_TEXTURE1D_DESC desc1d = { + .Width = params->w, + .MipLevels = 1, + .ArraySize = 1, + .Format = fmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + hr = ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create Texture1D: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex1d; + break; + case 2:; + D3D11_TEXTURE2D_DESC desc2d = { + .Width = params->w, + .Height = params->h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = fmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create Texture2D: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + break; + case 3:; + D3D11_TEXTURE3D_DESC desc3d = { + .Width = params->w, + .Height = params->h, + .Depth = params->d, + .MipLevels = 1, + .Format = fmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + hr = ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create Texture3D: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex3d; + break; + default: + abort(); + } + + if (!tex_init(ra, tex)) + goto error; + + return tex; + +error: + tex_destroy(ra, tex); + return NULL; +} + +struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res) +{ + HRESULT hr; + + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + struct ra_tex_params *params = &tex->params; + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + + DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN; + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + D3D11_BIND_FLAG bind_flags = 0; + + D3D11_RESOURCE_DIMENSION type; + ID3D11Resource_GetType(res, &type); + switch (type) { + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + hr = ID3D11Resource_QueryInterface(res, &IID_ID3D11Texture2D, + (void**)&tex_p->tex2d); + if (FAILED(hr)) { + MP_ERR(ra, "Resource is not a ID3D11Texture2D\n"); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); + if (desc2d.MipLevels != 1 || desc2d.ArraySize != 1) + goto error; + if (desc2d.SampleDesc.Count != 1) + goto error; + + params->dimensions = 2; + params->w = desc2d.Width; + params->h = desc2d.Height; + params->d = 1; + usage = desc2d.Usage; + bind_flags = desc2d.BindFlags; + fmt = desc2d.Format; + break; + default: + // We could wrap Texture1D/3D as well, but keep it simple, since this + // function is only used for swapchain backbuffers at the moment + MP_ERR(ra, "Resource is not suitable to wrap\n"); + goto error; + } + + for (int i = 0; i < ra->num_formats; i++) { + DXGI_FORMAT target_fmt = fmt_to_dxgi(ra->formats[i]); + if (fmt == target_fmt) { + params->format = ra->formats[i]; + break; + } + } + if (!params->format) { + MP_ERR(ra, "Could not find a suitable RA format for wrapped resource\n"); + goto error; + } + + if (bind_flags & D3D11_BIND_SHADER_RESOURCE) + params->render_src = params->blit_src = true; + if (bind_flags & D3D11_BIND_RENDER_TARGET) + params->render_dst = params->blit_dst = true; + if (bind_flags & D3D11_BIND_UNORDERED_ACCESS) + params->storage_dst = true; + + if (usage != D3D11_USAGE_DEFAULT) { + MP_ERR(ra, "Resource is not D3D11_USAGE_DEFAULT\n"); + goto error; + } + + if (!tex_init(ra, tex)) + goto error; + + return tex; +error: + tex_destroy(ra, tex); + return NULL; +} + +struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, + int w, int h, + const struct ra_format *fmt) +{ + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + struct ra_tex_params *params = &tex->params; + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + + tex_p->tex2d = res; + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + ID3D11Texture2D_AddRef(res); + + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); + if (!(desc2d.BindFlags & D3D11_BIND_SHADER_RESOURCE)) { + MP_ERR(ra, "Video resource is not bindable\n"); + goto error; + } + + params->dimensions = 2; + params->w = w; + params->h = h; + params->d = 1; + params->render_src = true; + params->src_linear = true; + // fmt can be different to the texture format for planar video textures + params->format = fmt; + + if (!tex_init(ra, tex)) + goto error; + + return tex; +error: + tex_destroy(ra, tex); + return NULL; +} + +static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_tex *tex = params->tex; + struct d3d_tex *tex_p = tex->priv; + + if (!params->src) { + MP_ERR(ra, "Pixel buffers are not supported\n"); + return false; + } + + const char *src = params->src; + ptrdiff_t stride = tex->params.dimensions >= 2 ? tex->params.w : 0; + ptrdiff_t pitch = tex->params.dimensions >= 3 ? stride * tex->params.h : 0; + bool invalidate = true; + D3D11_BOX *rc = NULL; + + if (tex->params.dimensions == 2) { + stride = params->stride; + + // stride can be negative, but vo_gpu expects the RA backend to ignore + // the negative stride and upload the image "upside-down" for now + if (stride < 0) { + int h = params->rc ? mp_rect_h(*params->rc) : tex->params.h; + src += (h - 1) * stride; + stride = -stride; + } + + if (params->rc && (params->rc->x0 != 0 || params->rc->y0 != 0 || + params->rc->x1 != tex->params.w || params->rc->y1 != tex->params.h)) + { + rc = &(D3D11_BOX) { + .left = params->rc->x0, + .top = params->rc->y0, + .front = 0, + .right = params->rc->x1, + .bottom = params->rc->y1, + .back = 1, + }; + invalidate = params->invalidate; + } + } + + if (p->ctx1) { + ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res, 0, rc, + src, stride, pitch, invalidate ? D3D11_COPY_DISCARD : 0); + } else { + ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, 0, rc, + src, stride, pitch); + } + + return true; +} + +static void buf_destroy(struct ra *ra, struct ra_buf *buf) +{ + if (!buf) + return; + struct ra_d3d11 *p = ra->priv; + struct d3d_buf *buf_p = buf->priv; + + if (buf_p->data) + ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)buf_p->staging, 0); + SAFE_RELEASE(buf_p->buf); + SAFE_RELEASE(buf_p->staging); + SAFE_RELEASE(buf_p->uav); + talloc_free(buf); +} + +static struct ra_buf *buf_create(struct ra *ra, + const struct ra_buf_params *params) +{ + // D3D11 does not support permanent mapping or pixel buffers + if (params->host_mapped || params->type == RA_BUF_TYPE_TEX_UPLOAD) + return NULL; + + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + struct ra_buf *buf = talloc_zero(NULL, struct ra_buf); + buf->params = *params; + buf->params.initial_data = NULL; + + struct d3d_buf *buf_p = buf->priv = talloc_zero(buf, struct d3d_buf); + + D3D11_SUBRESOURCE_DATA *pdata = NULL; + if (params->initial_data) + pdata = &(D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data }; + + D3D11_BUFFER_DESC desc = { .ByteWidth = params->size }; + switch (params->type) { + case RA_BUF_TYPE_SHADER_STORAGE: + desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS; + desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float)); + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; + break; + case RA_BUF_TYPE_UNIFORM: + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float[4])); + break; + } + + hr = ID3D11Device_CreateBuffer(p->dev, &desc, pdata, &buf_p->buf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create buffer: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + if (params->host_mutable) { + // D3D11 doesn't allow constant buffer updates that aren't aligned to a + // full constant boundary (vec4,) and some drivers don't allow partial + // constant buffer updates at all, but the RA consumer is allowed to + // partially update an ra_buf. The best way to handle partial updates + // without causing a pipeline stall is probably to keep a copy of the + // data in a staging buffer. + + desc.Usage = D3D11_USAGE_STAGING; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.BindFlags = 0; + hr = ID3D11Device_CreateBuffer(p->dev, &desc, NULL, &buf_p->staging); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create staging buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + } + + if (params->type == RA_BUF_TYPE_SHADER_STORAGE) { + D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, + .Buffer = { + .NumElements = desc.ByteWidth / sizeof(float), + .Flags = D3D11_BUFFER_UAV_FLAG_RAW, + }, + }; + hr = ID3D11Device_CreateUnorderedAccessView(p->dev, + (ID3D11Resource *)buf_p->buf, &udesc, &buf_p->uav); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + return buf; +error: + buf_destroy(ra, buf); + return NULL; +} + +static void buf_resolve(struct ra *ra, struct ra_buf *buf) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_buf *buf_p = buf->priv; + + assert(buf->params.host_mutable); + if (!buf_p->data) + return; + + ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)buf_p->staging, 0); + buf_p->data = NULL; + + // Synchronize the GPU buffer with the staging buffer + ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource *)buf_p->buf, + (ID3D11Resource *)buf_p->staging); +} + +static void buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_buf *buf_p = buf->priv; + HRESULT hr; + + if (!buf_p->data) { + // If this is the first update after the buffer was created or after it + // has been used in a renderpass, it will be unmapped, so map it + D3D11_MAPPED_SUBRESOURCE map = {0}; + hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)buf_p->staging, + 0, D3D11_MAP_WRITE, 0, &map); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to map resource\n"); + return; + } + buf_p->data = map.pData; + } + + char *cdata = buf_p->data; + memcpy(cdata + offset, data, size); +} + +static const char *get_shader_target(struct ra *ra, enum glsl_shader type) +{ + struct ra_d3d11 *p = ra->priv; + switch (p->fl) { + default: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_5_0"; + case GLSL_SHADER_FRAGMENT: return "ps_5_0"; + case GLSL_SHADER_COMPUTE: return "cs_5_0"; + } + break; + case D3D_FEATURE_LEVEL_10_1: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_1"; + case GLSL_SHADER_FRAGMENT: return "ps_4_1"; + case GLSL_SHADER_COMPUTE: return "cs_4_1"; + } + break; + case D3D_FEATURE_LEVEL_10_0: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_0"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0"; + case GLSL_SHADER_COMPUTE: return "cs_4_0"; + } + break; + case D3D_FEATURE_LEVEL_9_3: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3"; + } + break; + case D3D_FEATURE_LEVEL_9_2: + case D3D_FEATURE_LEVEL_9_1: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1"; + } + break; + } + return NULL; +} + +static bool setup_clear_rpass(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3DBlob *vs_blob = NULL; + ID3DBlob *ps_blob = NULL; + HRESULT hr; + + hr = p->D3DCompile(clear_vs, sizeof(clear_vs), NULL, NULL, NULL, "main", + get_shader_target(ra, GLSL_SHADER_VERTEX), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile clear() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreateVertexShader(p->dev, + ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), + NULL, &p->clear_vs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = p->D3DCompile(clear_ps, sizeof(clear_ps), NULL, NULL, NULL, "main", + get_shader_target(ra, GLSL_SHADER_FRAGMENT), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &ps_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile clear() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreatePixelShader(p->dev, + ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), + NULL, &p->clear_ps); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_INPUT_ELEMENT_DESC in_descs[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 }, + }; + hr = ID3D11Device_CreateInputLayout(p->dev, in_descs, + MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob), + ID3D10Blob_GetBufferSize(vs_blob), &p->clear_layout); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() IA layout: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + // clear() always draws to a quad covering the whole viewport + static const float verts[] = { + -1, -1, + 1, -1, + 1, 1, + -1, 1, + -1, -1, + 1, 1, + }; + D3D11_BUFFER_DESC vdesc = { + .ByteWidth = sizeof(verts), + .Usage = D3D11_USAGE_IMMUTABLE, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + }; + D3D11_SUBRESOURCE_DATA vdata = { + .pSysMem = verts, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, &vdata, &p->clear_vbuf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() vertex buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_BUFFER_DESC cdesc = { + .ByteWidth = sizeof(float[4]), + .BindFlags = D3D11_BIND_CONSTANT_BUFFER, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &cdesc, NULL, &p->clear_cbuf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() constant buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return true; +error: + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return false; +} + +static void clear_rpass(struct ra *ra, struct ra_tex *tex, float color[4], + struct mp_rect *rc) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + + ID3D11DeviceContext_UpdateSubresource(p->ctx, + (ID3D11Resource *)p->clear_cbuf, 0, NULL, color, 0, 0); + + ID3D11DeviceContext_IASetInputLayout(p->ctx, p->clear_layout); + ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->clear_vbuf, + &(UINT) { sizeof(float[2]) }, &(UINT) { 0 }); + ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + ID3D11DeviceContext_VSSetShader(p->ctx, p->clear_vs, NULL, 0); + + ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) { + .Width = params->w, + .Height = params->h, + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) { + .left = rc->x0, + .top = rc->y0, + .right = rc->x1, + .bottom = rc->y1, + })); + ID3D11DeviceContext_PSSetShader(p->ctx, p->clear_ps, NULL, 0); + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, &p->clear_cbuf); + + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &tex_p->rtv, NULL); + ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + + ID3D11DeviceContext_Draw(p->ctx, 6, 0); + + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, + &(ID3D11Buffer *){ NULL }); + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL); +} + +static void clear(struct ra *ra, struct ra_tex *tex, float color[4], + struct mp_rect *rc) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + + if (!tex_p->rtv) + return; + + if (rc->x0 || rc->y0 || rc->x1 != params->w || rc->y1 != params->h) { + if (p->has_clear_view) { + ID3D11DeviceContext1_ClearView(p->ctx1, (ID3D11View *)tex_p->rtv, + color, (&(D3D11_RECT) { + .left = rc->x0, + .top = rc->y0, + .right = rc->x1, + .bottom = rc->y1, + }), 1); + } else { + clear_rpass(ra, tex, color, rc); + } + } else { + ID3D11DeviceContext_ClearRenderTargetView(p->ctx, tex_p->rtv, color); + } +} + +static bool setup_blit_rpass(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3DBlob *vs_blob = NULL; + ID3DBlob *float_ps_blob = NULL; + HRESULT hr; + + hr = p->D3DCompile(blit_vs, sizeof(blit_vs), NULL, NULL, NULL, "main", + get_shader_target(ra, GLSL_SHADER_VERTEX), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile blit() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreateVertexShader(p->dev, + ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), + NULL, &p->blit_vs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = p->D3DCompile(blit_float_ps, sizeof(blit_float_ps), NULL, NULL, NULL, + "main", get_shader_target(ra, GLSL_SHADER_FRAGMENT), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &float_ps_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile blit() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreatePixelShader(p->dev, + ID3D10Blob_GetBufferPointer(float_ps_blob), + ID3D10Blob_GetBufferSize(float_ps_blob), + NULL, &p->blit_float_ps); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_INPUT_ELEMENT_DESC in_descs[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8 }, + }; + hr = ID3D11Device_CreateInputLayout(p->dev, in_descs, + MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob), + ID3D10Blob_GetBufferSize(vs_blob), &p->blit_layout); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() IA layout: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_BUFFER_DESC vdesc = { + .ByteWidth = sizeof(struct blit_vert[6]), + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, NULL, &p->blit_vbuf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() vertex buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + // Blit always uses point sampling, regardless of the source texture + D3D11_SAMPLER_DESC sdesc = { + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .ComparisonFunc = D3D11_COMPARISON_NEVER, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX, + .MaxAnisotropy = 1, + }; + hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &p->blit_sampler); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() sampler: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(float_ps_blob); + return true; +error: + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(float_ps_blob); + return false; +} + +static void blit_rpass(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc, struct mp_rect *src_rc) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *dst_p = dst->priv; + struct d3d_tex *src_p = src->priv; + + float u_min = (double)src_rc->x0 / src->params.w; + float u_max = (double)src_rc->x1 / src->params.w; + float v_min = (double)src_rc->y0 / src->params.h; + float v_max = (double)src_rc->y1 / src->params.h; + + struct blit_vert verts[6] = { + { .x = -1, .y = -1, .u = u_min, .v = v_max }, + { .x = 1, .y = -1, .u = u_max, .v = v_max }, + { .x = 1, .y = 1, .u = u_max, .v = v_min }, + { .x = -1, .y = 1, .u = u_min, .v = v_min }, + }; + verts[4] = verts[0]; + verts[5] = verts[2]; + ID3D11DeviceContext_UpdateSubresource(p->ctx, + (ID3D11Resource *)p->blit_vbuf, 0, NULL, verts, 0, 0); + + ID3D11DeviceContext_IASetInputLayout(p->ctx, p->blit_layout); + ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->blit_vbuf, + &(UINT) { sizeof(verts[0]) }, &(UINT) { 0 }); + ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + ID3D11DeviceContext_VSSetShader(p->ctx, p->blit_vs, NULL, 0); + + ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) { + .TopLeftX = dst_rc->x0, + .TopLeftY = dst_rc->y0, + .Width = mp_rect_w(*dst_rc), + .Height = mp_rect_h(*dst_rc), + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) { + .left = dst_rc->x0, + .top = dst_rc->y0, + .right = dst_rc->x1, + .bottom = dst_rc->y1, + })); + + ID3D11DeviceContext_PSSetShader(p->ctx, p->blit_float_ps, NULL, 0); + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, &src_p->srv); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, &p->blit_sampler); + + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &dst_p->rtv, NULL); + ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + + ID3D11DeviceContext_Draw(p->ctx, 6, 0); + + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, + &(ID3D11ShaderResourceView *) { NULL }); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, + &(ID3D11SamplerState *) { NULL }); + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL); +} + +static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc_ptr, struct mp_rect *src_rc_ptr) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *dst_p = dst->priv; + struct d3d_tex *src_p = src->priv; + struct mp_rect dst_rc = *dst_rc_ptr; + struct mp_rect src_rc = *src_rc_ptr; + + assert(dst->params.dimensions == 2); + assert(src->params.dimensions == 2); + + // A zero-sized target rectangle is a no-op + if (!mp_rect_w(dst_rc) || !mp_rect_h(dst_rc)) + return; + + // ra.h seems to imply that both dst_rc and src_rc can be flipped, but it's + // easier for blit_rpass() if only src_rc can be flipped, so unflip dst_rc. + if (dst_rc.x0 > dst_rc.x1) { + MPSWAP(int, dst_rc.x0, dst_rc.x1); + MPSWAP(int, src_rc.x0, src_rc.x1); + } + if (dst_rc.y0 > dst_rc.y1) { + MPSWAP(int, dst_rc.y0, dst_rc.y1); + MPSWAP(int, src_rc.y0, src_rc.y1); + } + + // If format conversion, stretching or flipping is required, a renderpass + // must be used + if (dst->params.format != src->params.format || + mp_rect_w(dst_rc) != mp_rect_w(src_rc) || + mp_rect_h(dst_rc) != mp_rect_h(src_rc)) + { + blit_rpass(ra, dst, src, &dst_rc, &src_rc); + } else { + ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, 0, + dst_rc.x0, dst_rc.y0, 0, src_p->res, 0, (&(D3D11_BOX) { + .left = src_rc.x0, + .top = src_rc.y0, + .front = 0, + .right = src_rc.x1, + .bottom = src_rc.y1, + .back = 1, + })); + } +} + +static int desc_namespace(enum ra_vartype type) +{ + // Images and SSBOs both use UAV bindings + if (type == RA_VARTYPE_IMG_W) + type = RA_VARTYPE_BUF_RW; + return type; +} + +static bool compile_glsl(struct ra *ra, enum glsl_shader type, + const char *glsl, ID3DBlob **out) +{ + struct ra_d3d11 *p = ra->priv; + struct spirv_compiler *spirv = p->spirv; + void *ta_ctx = talloc_new(NULL); + crossc_compiler *cross = NULL; + const char *hlsl = NULL; + ID3DBlob *errors = NULL; + bool success = false; + HRESULT hr; + + int cross_shader_model; + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + cross_shader_model = 50; + } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) { + cross_shader_model = 41; + } else { + cross_shader_model = 40; + } + + bstr spv_module; + if (!spirv->fns->compile_glsl(spirv, ta_ctx, type, glsl, &spv_module)) + goto done; + + cross = crossc_hlsl_create((uint32_t*)spv_module.start, + spv_module.len / sizeof(uint32_t)); + + crossc_hlsl_set_shader_model(cross, cross_shader_model); + crossc_set_flip_vert_y(cross, type == GLSL_SHADER_VERTEX); + + hlsl = crossc_compile(cross); + if (!hlsl) { + MP_ERR(ra, "SPIRV-Cross failed: %s\n", crossc_strerror(cross)); + goto done; + } + + hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main", + get_shader_target(ra, type), D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, out, + &errors); + if (FAILED(hr)) { + MP_ERR(ra, "D3DCompile failed: %s\n%.*s", mp_HRESULT_to_str(hr), + (int)ID3D10Blob_GetBufferSize(errors), + (char*)ID3D10Blob_GetBufferPointer(errors)); + goto done; + } + + success = true; +done:; + int level = success ? MSGL_DEBUG : MSGL_ERR; + MP_MSG(ra, level, "GLSL source:\n"); + mp_log_source(ra->log, level, glsl); + if (hlsl) { + MP_MSG(ra, level, "HLSL source:\n"); + mp_log_source(ra->log, level, hlsl); + } + SAFE_RELEASE(errors); + crossc_destroy(cross); + talloc_free(ta_ctx); + return success; +} + +static void renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) +{ + if (!pass) + return; + struct d3d_rpass *pass_p = pass->priv; + + SAFE_RELEASE(pass_p->vs); + SAFE_RELEASE(pass_p->ps); + SAFE_RELEASE(pass_p->cs); + SAFE_RELEASE(pass_p->layout); + SAFE_RELEASE(pass_p->bstate); + talloc_free(pass); +} + +static D3D11_BLEND map_ra_blend(enum ra_blend blend) +{ + switch (blend) { + default: + case RA_BLEND_ZERO: return D3D11_BLEND_ZERO; + case RA_BLEND_ONE: return D3D11_BLEND_ONE; + case RA_BLEND_SRC_ALPHA: return D3D11_BLEND_SRC_ALPHA; + case RA_BLEND_ONE_MINUS_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA; + }; +} + +static size_t vbuf_upload(struct ra *ra, void *data, size_t size) +{ + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + // Arbitrary size limit in case there is an insane number of vertices + if (size > 1e9) { + MP_ERR(ra, "Vertex buffer is too large\n"); + return -1; + } + + // If the vertex data doesn't fit, realloc the vertex buffer + if (size > p->vbuf_size) { + size_t new_size = p->vbuf_size; + // Arbitrary base size + if (!new_size) + new_size = 64 * 1024; + while (new_size < size) + new_size *= 2; + + ID3D11Buffer *new_buf; + D3D11_BUFFER_DESC vbuf_desc = { + .ByteWidth = new_size, + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create vertex buffer: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + SAFE_RELEASE(p->vbuf); + p->vbuf = new_buf; + p->vbuf_size = new_size; + p->vbuf_used = 0; + } + + bool discard = false; + size_t offset = p->vbuf_used; + if (offset + size > p->vbuf_size) { + // We reached the end of the buffer, so discard and wrap around + discard = true; + offset = 0; + } + + D3D11_MAPPED_SUBRESOURCE map = { 0 }; + hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)p->vbuf, 0, + discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE, + 0, &map); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to map vertex buffer: %s\n", mp_HRESULT_to_str(hr)); + return -1; + } + + char *cdata = map.pData; + memcpy(cdata + offset, data, size); + + ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)p->vbuf, 0); + + p->vbuf_used = offset + size; + return offset; +} + +static const char cache_magic[4] = "RD11"; +static const int cache_version = 1; + +struct cache_header { + char magic[sizeof(cache_magic)]; + int cache_version; + char compiler[SPIRV_NAME_MAX_LEN]; + int compiler_version; + int feature_level; + size_t vert_bytecode_len; + size_t frag_bytecode_len; + size_t comp_bytecode_len; +}; + +static void load_cached_program(struct ra *ra, + const struct ra_renderpass_params *params, + bstr *vert_bc, + bstr *frag_bc, + bstr *comp_bc) +{ + struct ra_d3d11 *p = ra->priv; + struct spirv_compiler *spirv = p->spirv; + bstr cache = params->cached_program; + + if (cache.len < sizeof(struct cache_header)) + return; + + struct cache_header *header = (struct cache_header *)cache.start; + cache = bstr_cut(cache, sizeof(*header)); + + if (strncmp(header->magic, cache_magic, sizeof(cache_magic)) != 0) + return; + if (header->cache_version != cache_version) + return; + if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0) + return; + if (header->compiler_version != spirv->compiler_version) + return; + if (header->feature_level != p->fl) + return; + + if (header->vert_bytecode_len && vert_bc) { + *vert_bc = bstr_splice(cache, 0, header->vert_bytecode_len); + MP_VERBOSE(ra, "Using cached vertex shader\n"); + } + cache = bstr_cut(cache, header->vert_bytecode_len); + + if (header->frag_bytecode_len && frag_bc) { + *frag_bc = bstr_splice(cache, 0, header->frag_bytecode_len); + MP_VERBOSE(ra, "Using cached fragment shader\n"); + } + cache = bstr_cut(cache, header->frag_bytecode_len); + + if (header->comp_bytecode_len && comp_bc) { + *comp_bc = bstr_splice(cache, 0, header->comp_bytecode_len); + MP_VERBOSE(ra, "Using cached compute shader\n"); + } + cache = bstr_cut(cache, header->comp_bytecode_len); +} + +static void save_cached_program(struct ra *ra, struct ra_renderpass *pass, + bstr vert_bc, + bstr frag_bc, + bstr comp_bc) +{ + struct ra_d3d11 *p = ra->priv; + struct spirv_compiler *spirv = p->spirv; + + struct cache_header header = { + .cache_version = cache_version, + .compiler_version = p->spirv->compiler_version, + .feature_level = p->fl, + .vert_bytecode_len = vert_bc.len, + .frag_bytecode_len = frag_bc.len, + .comp_bytecode_len = comp_bc.len, + }; + strncpy(header.magic, cache_magic, sizeof(header.magic)); + strncpy(header.compiler, spirv->name, sizeof(header.compiler)); + + struct bstr *prog = &pass->params.cached_program; + bstr_xappend(pass, prog, (bstr){ (char *) &header, sizeof(header) }); + bstr_xappend(pass, prog, vert_bc); + bstr_xappend(pass, prog, frag_bc); + bstr_xappend(pass, prog, comp_bc); +} + +static struct ra_renderpass *renderpass_create_raster(struct ra *ra, + struct ra_renderpass *pass, const struct ra_renderpass_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_rpass *pass_p = pass->priv; + ID3DBlob *vs_blob = NULL; + ID3DBlob *ps_blob = NULL; + HRESULT hr; + + // load_cached_program will load compiled shader bytecode into vert_bc and + // frag_bc if the cache is valid. If not, vert_bc/frag_bc will remain NULL. + bstr vert_bc = {0}; + bstr frag_bc = {0}; + load_cached_program(ra, params, &vert_bc, &frag_bc, NULL); + + if (!vert_bc.start) { + if (!compile_glsl(ra, GLSL_SHADER_VERTEX, params->vertex_shader, + &vs_blob)) + goto error; + vert_bc = (bstr){ + ID3D10Blob_GetBufferPointer(vs_blob), + ID3D10Blob_GetBufferSize(vs_blob), + }; + } + + hr = ID3D11Device_CreateVertexShader(p->dev, vert_bc.start, vert_bc.len, + NULL, &pass_p->vs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + if (!frag_bc.start) { + if (!compile_glsl(ra, GLSL_SHADER_FRAGMENT, params->frag_shader, + &ps_blob)) + goto error; + frag_bc = (bstr){ + ID3D10Blob_GetBufferPointer(ps_blob), + ID3D10Blob_GetBufferSize(ps_blob), + }; + } + + hr = ID3D11Device_CreatePixelShader(p->dev, frag_bc.start, frag_bc.len, + NULL, &pass_p->ps); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_INPUT_ELEMENT_DESC *in_descs = talloc_array(pass, + D3D11_INPUT_ELEMENT_DESC, params->num_vertex_attribs); + for (int i = 0; i < params->num_vertex_attribs; i++) { + struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i]; + + DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN; + switch (inp->type) { + case RA_VARTYPE_FLOAT: + switch (inp->dim_v) { + case 1: fmt = DXGI_FORMAT_R32_FLOAT; break; + case 2: fmt = DXGI_FORMAT_R32G32_FLOAT; break; + case 3: fmt = DXGI_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = DXGI_FORMAT_R32G32B32A32_FLOAT; break; + } + break; + case RA_VARTYPE_BYTE_UNORM: + switch (inp->dim_v) { + case 1: fmt = DXGI_FORMAT_R8_UNORM; break; + case 2: fmt = DXGI_FORMAT_R8G8_UNORM; break; + // There is no 3-component 8-bit DXGI format + case 4: fmt = DXGI_FORMAT_R8G8B8A8_UNORM; break; + } + break; + } + if (fmt == DXGI_FORMAT_UNKNOWN) { + MP_ERR(ra, "Could not find suitable vertex input format\n"); + goto error; + } + + in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) { + // The semantic name doesn't mean much and is just used to verify + // the input description matches the shader. SPIRV-Cross always + // uses TEXCOORD, so we should too. + .SemanticName = "TEXCOORD", + .SemanticIndex = i, + .AlignedByteOffset = inp->offset, + .Format = fmt, + }; + } + + hr = ID3D11Device_CreateInputLayout(p->dev, in_descs, + params->num_vertex_attribs, vert_bc.start, vert_bc.len, + &pass_p->layout); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create IA layout: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + talloc_free(in_descs); + in_descs = NULL; + + D3D11_BLEND_DESC bdesc = { + .RenderTarget[0] = { + .BlendEnable = params->enable_blend, + .SrcBlend = map_ra_blend(params->blend_src_rgb), + .DestBlend = map_ra_blend(params->blend_dst_rgb), + .BlendOp = D3D11_BLEND_OP_ADD, + .SrcBlendAlpha = map_ra_blend(params->blend_src_alpha), + .DestBlendAlpha = map_ra_blend(params->blend_dst_alpha), + .BlendOpAlpha = D3D11_BLEND_OP_ADD, + .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, + }, + }; + hr = ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blend state: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + save_cached_program(ra, pass, vert_bc, frag_bc, (bstr){0}); + + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return pass; + +error: + renderpass_destroy(ra, pass); + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return NULL; +} + +static struct ra_renderpass *renderpass_create_compute(struct ra *ra, + struct ra_renderpass *pass, const struct ra_renderpass_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_rpass *pass_p = pass->priv; + ID3DBlob *cs_blob = NULL; + HRESULT hr; + + bstr comp_bc = {0}; + load_cached_program(ra, params, NULL, NULL, &comp_bc); + + if (!comp_bc.start) { + if (!compile_glsl(ra, GLSL_SHADER_COMPUTE, params->compute_shader, + &cs_blob)) + goto error; + comp_bc = (bstr){ + ID3D10Blob_GetBufferPointer(cs_blob), + ID3D10Blob_GetBufferSize(cs_blob), + }; + } + hr = ID3D11Device_CreateComputeShader(p->dev, comp_bc.start, comp_bc.len, + NULL, &pass_p->cs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create compute shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + save_cached_program(ra, pass, (bstr){0}, (bstr){0}, comp_bc); + + SAFE_RELEASE(cs_blob); + return pass; +error: + renderpass_destroy(ra, pass); + SAFE_RELEASE(cs_blob); + return NULL; +} + +static struct ra_renderpass *renderpass_create(struct ra *ra, + const struct ra_renderpass_params *params) +{ + struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); + pass->params = *ra_renderpass_params_copy(pass, params); + pass->params.cached_program = (bstr){0}; + pass->priv = talloc_zero(pass, struct d3d_rpass); + + if (params->type == RA_RENDERPASS_TYPE_COMPUTE) { + return renderpass_create_compute(ra, pass, params); + } else { + return renderpass_create_raster(ra, pass, params); + } +} + +static void renderpass_run_raster(struct ra *ra, + const struct ra_renderpass_run_params *params, + ID3D11Buffer *ubos[], int ubos_len, + ID3D11SamplerState *samplers[], + ID3D11ShaderResourceView *srvs[], + int samplers_len, + ID3D11UnorderedAccessView *uavs[], + int uavs_len) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_renderpass *pass = params->pass; + struct d3d_rpass *pass_p = pass->priv; + + UINT vbuf_offset = vbuf_upload(ra, params->vertex_data, + pass->params.vertex_stride * params->vertex_count); + if (vbuf_offset == (UINT)-1) + return; + + ID3D11DeviceContext_IASetInputLayout(p->ctx, pass_p->layout); + ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->vbuf, + &pass->params.vertex_stride, &vbuf_offset); + ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + ID3D11DeviceContext_VSSetShader(p->ctx, pass_p->vs, NULL, 0); + + ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) { + .TopLeftX = params->viewport.x0, + .TopLeftY = params->viewport.y0, + .Width = mp_rect_w(params->viewport), + .Height = mp_rect_h(params->viewport), + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) { + .left = params->scissors.x0, + .top = params->scissors.y0, + .right = params->scissors.x1, + .bottom = params->scissors.y1, + })); + ID3D11DeviceContext_PSSetShader(p->ctx, pass_p->ps, NULL, 0); + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers); + + struct ra_tex *target = params->target; + struct d3d_tex *target_p = target->priv; + ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 1, + &target_p->rtv, NULL, 1, uavs_len, uavs, NULL); + ID3D11DeviceContext_OMSetBlendState(p->ctx, pass_p->bstate, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + + ID3D11DeviceContext_Draw(p->ctx, params->vertex_count, 0); + + // Unbind everything. It's easier to do this than to actually track state, + // and if we leave the RTV bound, it could trip up D3D's conflict checker. + for (int i = 0; i < ubos_len; i++) + ubos[i] = NULL; + for (int i = 0; i < samplers_len; i++) { + samplers[i] = NULL; + srvs[i] = NULL; + } + for (int i = 0; i < uavs_len; i++) + uavs[i] = NULL; + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers); + ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 0, + NULL, NULL, 1, uavs_len, uavs, NULL); +} + +static void renderpass_run_compute(struct ra *ra, + const struct ra_renderpass_run_params *params, + ID3D11Buffer *ubos[], int ubos_len, + ID3D11SamplerState *samplers[], + ID3D11ShaderResourceView *srvs[], + int samplers_len, + ID3D11UnorderedAccessView *uavs[], + int uavs_len) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_renderpass *pass = params->pass; + struct d3d_rpass *pass_p = pass->priv; + + ID3D11DeviceContext_CSSetShader(p->ctx, pass_p->cs, NULL, 0); + ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers); + ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs, + NULL); + + ID3D11DeviceContext_Dispatch(p->ctx, params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + + for (int i = 0; i < ubos_len; i++) + ubos[i] = NULL; + for (int i = 0; i < samplers_len; i++) { + samplers[i] = NULL; + srvs[i] = NULL; + } + for (int i = 0; i < uavs_len; i++) + uavs[i] = NULL; + ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers); + ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs, + NULL); +} + +static void renderpass_run(struct ra *ra, + const struct ra_renderpass_run_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_renderpass *pass = params->pass; + enum ra_renderpass_type type = pass->params.type; + + ID3D11Buffer *ubos[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT] = {0}; + int ubos_len = 0; + + ID3D11SamplerState *samplers[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0}; + ID3D11ShaderResourceView *srvs[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0}; + int samplers_len = 0; + + ID3D11UnorderedAccessView *uavs[D3D11_1_UAV_SLOT_COUNT] = {0}; + int uavs_len = 0; + + // In a raster pass, one of the UAV slots is used by the runtime for the RTV + int uavs_max = type == RA_RENDERPASS_TYPE_COMPUTE ? p->max_uavs + : p->max_uavs - 1; + + // Gather the input variables used in this pass. These will be mapped to + // HLSL registers. + for (int i = 0; i < params->num_values; i++) { + struct ra_renderpass_input_val *val = ¶ms->values[i]; + int binding = pass->params.inputs[val->index].binding; + switch (pass->params.inputs[val->index].type) { + case RA_VARTYPE_BUF_RO: + if (binding > MP_ARRAY_SIZE(ubos)) { + MP_ERR(ra, "Too many constant buffers in pass\n"); + return; + } + struct ra_buf *buf_ro = *(struct ra_buf **)val->data; + buf_resolve(ra, buf_ro); + struct d3d_buf *buf_ro_p = buf_ro->priv; + ubos[binding] = buf_ro_p->buf; + ubos_len = MPMAX(ubos_len, binding + 1); + break; + case RA_VARTYPE_BUF_RW: + if (binding > uavs_max) { + MP_ERR(ra, "Too many UAVs in pass\n"); + return; + } + struct ra_buf *buf_rw = *(struct ra_buf **)val->data; + buf_resolve(ra, buf_rw); + struct d3d_buf *buf_rw_p = buf_rw->priv; + uavs[binding] = buf_rw_p->uav; + uavs_len = MPMAX(uavs_len, binding + 1); + break; + case RA_VARTYPE_TEX: + if (binding > MP_ARRAY_SIZE(samplers)) { + MP_ERR(ra, "Too many textures in pass\n"); + return; + } + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct d3d_tex *tex_p = tex->priv; + samplers[binding] = tex_p->sampler; + srvs[binding] = tex_p->srv; + samplers_len = MPMAX(samplers_len, binding + 1); + break; + case RA_VARTYPE_IMG_W: + if (binding > uavs_max) { + MP_ERR(ra, "Too many UAVs in pass\n"); + return; + } + struct ra_tex *img = *(struct ra_tex **)val->data; + struct d3d_tex *img_p = img->priv; + uavs[binding] = img_p->uav; + uavs_len = MPMAX(uavs_len, binding + 1); + break; + } + } + + if (type == RA_RENDERPASS_TYPE_COMPUTE) { + renderpass_run_compute(ra, params, ubos, ubos_len, samplers, srvs, + samplers_len, uavs, uavs_len); + } else { + renderpass_run_raster(ra, params, ubos, ubos_len, samplers, srvs, + samplers_len, uavs, uavs_len); + } +} + +static void timer_destroy(struct ra *ra, ra_timer *ratimer) +{ + if (!ratimer) + return; + struct d3d_timer *timer = ratimer; + + SAFE_RELEASE(timer->ts_start); + SAFE_RELEASE(timer->ts_end); + SAFE_RELEASE(timer->disjoint); + talloc_free(timer); +} + +static ra_timer *timer_create(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = talloc_zero(NULL, struct d3d_timer); + HRESULT hr; + + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_start); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create start query: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_end); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create end query: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + // Measuring duration in D3D11 requires three queries: start and end + // timestamps, and a disjoint query containing a flag which says whether + // the timestamps are usable or if a discontinuity occured between them, + // like a change in power state or clock speed. The disjoint query also + // contains the timer frequency, so the timestamps are useless without it. + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &timer->disjoint); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create timer query: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + return timer; +error: + timer_destroy(ra, timer); + return NULL; +} + +static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq) +{ + static const uint64_t ns_per_s = 1000000000llu; + return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq; +} + +static uint64_t timer_get_result(struct ra *ra, ra_timer *ratimer) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = ratimer; + HRESULT hr; + + UINT64 start, end; + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj; + + hr = ID3D11DeviceContext_GetData(p->ctx, + (ID3D11Asynchronous *)timer->ts_end, &end, sizeof(end), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE) + return 0; + hr = ID3D11DeviceContext_GetData(p->ctx, + (ID3D11Asynchronous *)timer->ts_start, &start, sizeof(start), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE) + return 0; + hr = ID3D11DeviceContext_GetData(p->ctx, + (ID3D11Asynchronous *)timer->disjoint, &dj, sizeof(dj), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE || dj.Disjoint || !dj.Frequency) + return 0; + + return timestamp_to_ns(end - start, dj.Frequency); +} + +static void timer_start(struct ra *ra, ra_timer *ratimer) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = ratimer; + + // Latch the last result of this ra_timer (returned by timer_stop) + timer->result = timer_get_result(ra, ratimer); + + ID3D11DeviceContext_Begin(p->ctx, (ID3D11Asynchronous *)timer->disjoint); + ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_start); +} + +static uint64_t timer_stop(struct ra *ra, ra_timer *ratimer) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = ratimer; + + ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_end); + ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->disjoint); + + return timer->result; +} + +static int map_msg_severity(D3D11_MESSAGE_SEVERITY sev) +{ + switch (sev) { + case D3D11_MESSAGE_SEVERITY_CORRUPTION: + return MSGL_FATAL; + case D3D11_MESSAGE_SEVERITY_ERROR: + return MSGL_ERR; + case D3D11_MESSAGE_SEVERITY_WARNING: + return MSGL_WARN; + default: + case D3D11_MESSAGE_SEVERITY_INFO: + case D3D11_MESSAGE_SEVERITY_MESSAGE: + return MSGL_DEBUG; + } +} + +static void debug_marker(struct ra *ra, const char *msg) +{ + struct ra_d3d11 *p = ra->priv; + void *talloc_ctx = talloc_new(NULL); + HRESULT hr; + + if (!p->iqueue) + goto done; + + // Copy debug-layer messages to mpv's log output + bool printed_header = false; + uint64_t messages = ID3D11InfoQueue_GetNumStoredMessages(p->iqueue); + for (uint64_t i = 0; i < messages; i++) { + size_t len; + hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, NULL, &len); + if (FAILED(hr) || !len) + goto done; + + D3D11_MESSAGE *d3dmsg = talloc_size(talloc_ctx, len); + hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, d3dmsg, &len); + if (FAILED(hr)) + goto done; + + int msgl = map_msg_severity(d3dmsg->Severity); + if (mp_msg_test(ra->log, msgl)) { + if (!printed_header) + MP_INFO(ra, "%s:\n", msg); + printed_header = true; + + MP_MSG(ra, msgl, "%d: %.*s\n", (int)d3dmsg->ID, + (int)d3dmsg->DescriptionByteLength, d3dmsg->pDescription); + talloc_free(d3dmsg); + } + } + + ID3D11InfoQueue_ClearStoredMessages(p->iqueue); +done: + talloc_free(talloc_ctx); +} + +static void destroy(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + + // Release everything except the interfaces needed to perform leak checking + SAFE_RELEASE(p->clear_ps); + SAFE_RELEASE(p->clear_vs); + SAFE_RELEASE(p->clear_layout); + SAFE_RELEASE(p->clear_vbuf); + SAFE_RELEASE(p->clear_cbuf); + SAFE_RELEASE(p->blit_float_ps); + SAFE_RELEASE(p->blit_vs); + SAFE_RELEASE(p->blit_layout); + SAFE_RELEASE(p->blit_vbuf); + SAFE_RELEASE(p->blit_sampler); + SAFE_RELEASE(p->vbuf); + SAFE_RELEASE(p->ctx1); + SAFE_RELEASE(p->dev1); + SAFE_RELEASE(p->dev); + + if (p->debug && p->ctx) { + // Destroy the device context synchronously so referenced objects don't + // show up in the leak check + ID3D11DeviceContext_ClearState(p->ctx); + ID3D11DeviceContext_Flush(p->ctx); + } + SAFE_RELEASE(p->ctx); + + if (p->debug) { + // Report any leaked objects + debug_marker(ra, "after destroy"); + ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_DETAIL); + debug_marker(ra, "after leak check"); + ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_SUMMARY); + debug_marker(ra, "after leak summary"); + } + SAFE_RELEASE(p->debug); + SAFE_RELEASE(p->iqueue); + + talloc_free(ra); +} + +static struct ra_fns ra_fns_d3d11 = { + .destroy = destroy, + .tex_create = tex_create, + .tex_destroy = tex_destroy, + .tex_upload = tex_upload, + .buf_create = buf_create, + .buf_destroy = buf_destroy, + .buf_update = buf_update, + .clear = clear, + .blit = blit, + .uniform_layout = std140_layout, + .desc_namespace = desc_namespace, + .renderpass_create = renderpass_create, + .renderpass_destroy = renderpass_destroy, + .renderpass_run = renderpass_run, + .timer_create = timer_create, + .timer_destroy = timer_destroy, + .timer_start = timer_start, + .timer_stop = timer_stop, + .debug_marker = debug_marker, +}; + +void ra_d3d11_flush(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3D11DeviceContext_Flush(p->ctx); +} + +static void init_debug_layer(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Debug, + (void**)&p->debug); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to get debug device: %s\n", mp_HRESULT_to_str(hr)); + return; + } + + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11InfoQueue, + (void**)&p->iqueue); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to get info queue: %s\n", mp_HRESULT_to_str(hr)); + return; + } + + // Store an unlimited amount of messages in the buffer. This is fine + // because we flush stored messages regularly (in debug_marker.) + ID3D11InfoQueue_SetMessageCountLimit(p->iqueue, -1); + + // Filter some annoying messages + D3D11_MESSAGE_ID deny_ids[] = { + // This error occurs during context creation when we try to figure out + // the real maximum texture size by attempting to create a texture + // larger than the current feature level allows. + D3D11_MESSAGE_ID_CREATETEXTURE2D_INVALIDDIMENSIONS, + + // These are normal. The RA timer queue habitually reuses timer objects + // without retrieving the results. + D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS, + D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS, + }; + D3D11_INFO_QUEUE_FILTER filter = { + .DenyList = { + .NumIDs = MP_ARRAY_SIZE(deny_ids), + .pIDList = deny_ids, + }, + }; + ID3D11InfoQueue_PushStorageFilter(p->iqueue, &filter); +} + +static bool load_d3d_compiler(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + HMODULE d3dcompiler = NULL; + + // Try the inbox D3DCompiler first (Windows 8.1 and up) + if (IsWindows8Point1OrGreater()) { + d3dcompiler = LoadLibraryExW(L"d3dcompiler_47.dll", NULL, + LOAD_LIBRARY_SEARCH_SYSTEM32); + } + // Check for a packaged version of d3dcompiler_47.dll + if (!d3dcompiler) + d3dcompiler = LoadLibraryW(L"d3dcompiler_47.dll"); + // Try d3dcompiler_46.dll from the Windows 8 SDK + if (!d3dcompiler) + d3dcompiler = LoadLibraryW(L"d3dcompiler_46.dll"); + // Try d3dcompiler_43.dll from the June 2010 DirectX SDK + if (!d3dcompiler) + d3dcompiler = LoadLibraryW(L"d3dcompiler_43.dll"); + // Can't find any compiler DLL, so give up + if (!d3dcompiler) + return false; + + p->D3DCompile = (pD3DCompile)GetProcAddress(d3dcompiler, "D3DCompile"); + if (!p->D3DCompile) + return false; + return true; +} + +static void find_max_texture_dimension(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + + D3D11_TEXTURE2D_DESC desc = { + .Width = ra->max_texture_wh, + .Height = ra->max_texture_wh, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = DXGI_FORMAT_R8_UNORM, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + }; + while (true) { + desc.Height = desc.Width *= 2; + if (desc.Width >= 0x8000000u) + return; + if (FAILED(ID3D11Device_CreateTexture2D(p->dev, &desc, NULL, NULL))) + return; + ra->max_texture_wh = desc.Width; + } +} + +struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log, + struct spirv_compiler *spirv) +{ + HRESULT hr; + + struct ra *ra = talloc_zero(NULL, struct ra); + ra->log = log; + ra->fns = &ra_fns_d3d11; + + // Even Direct3D 10level9 supports 3D textures + ra->caps = RA_CAP_TEX_3D | RA_CAP_DIRECT_UPLOAD | RA_CAP_BUF_RO | + RA_CAP_BLIT | spirv->ra_caps; + + ra->glsl_version = spirv->glsl_version; + ra->glsl_vulkan = true; + + struct ra_d3d11 *p = ra->priv = talloc_zero(ra, struct ra_d3d11); + p->spirv = spirv; + + int minor = 0; + ID3D11Device_AddRef(dev); + p->dev = dev; + ID3D11Device_GetImmediateContext(p->dev, &p->ctx); + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1, + (void**)&p->dev1); + if (SUCCEEDED(hr)) { + minor = 1; + ID3D11Device1_GetImmediateContext1(p->dev1, &p->ctx1); + + D3D11_FEATURE_DATA_D3D11_OPTIONS fopts = { 0 }; + hr = ID3D11Device_CheckFeatureSupport(p->dev, + D3D11_FEATURE_D3D11_OPTIONS, &fopts, sizeof(fopts)); + if (SUCCEEDED(hr)) { + p->has_clear_view = fopts.ClearView; + } + } + + MP_VERBOSE(ra, "Using Direct3D 11.%d runtime\n", minor); + + p->fl = ID3D11Device_GetFeatureLevel(p->dev); + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + ra->max_texture_wh = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) { + ra->max_texture_wh = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) { + ra->max_texture_wh = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } else { + ra->max_texture_wh = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } + + // Some 10_0 hardware has compute shaders, but only 11_0 has image load/store + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW; + ra->max_shmem = 32 * 1024; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_1 && minor >= 1) { + p->max_uavs = D3D11_1_UAV_SLOT_COUNT; + } else { + p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT; + } + + if (ID3D11Device_GetCreationFlags(p->dev) & D3D11_CREATE_DEVICE_DEBUG) + init_debug_layer(ra); + + // According to MSDN, the above texture sizes are just minimums and drivers + // may support larger textures. See: + // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476874.aspx + find_max_texture_dimension(ra); + MP_VERBOSE(ra, "Maximum Texture2D size: %dx%d\n", ra->max_texture_wh, + ra->max_texture_wh); + + if (!load_d3d_compiler(ra)) { + MP_FATAL(ra, "Could not find D3DCompiler DLL\n"); + goto error; + } + + setup_formats(ra); + + // The rasterizer state never changes, so set it up here + ID3D11RasterizerState *rstate; + D3D11_RASTERIZER_DESC rdesc = { + .FillMode = D3D11_FILL_SOLID, + .CullMode = D3D11_CULL_NONE, + .FrontCounterClockwise = FALSE, + .DepthClipEnable = TRUE, // Required for 10level9 + .ScissorEnable = TRUE, + }; + hr = ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &rstate); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create rasterizer state: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + ID3D11DeviceContext_RSSetState(p->ctx, rstate); + SAFE_RELEASE(rstate); + + // If the device doesn't support ClearView, we have to set up a + // shader-based clear() implementation + if (!p->has_clear_view && !setup_clear_rpass(ra)) + goto error; + + if (!setup_blit_rpass(ra)) + goto error; + + return ra; + +error: + destroy(ra); + return NULL; +} + +ID3D11Device *ra_d3d11_get_device(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3D11Device_AddRef(p->dev); + return p->dev; +} + +bool ra_is_d3d11(struct ra *ra) +{ + return ra->fns == &ra_fns_d3d11; +} diff --git a/video/out/d3d11/ra_d3d11.h b/video/out/d3d11/ra_d3d11.h new file mode 100644 index 0000000000..eeadc7994e --- /dev/null +++ b/video/out/d3d11/ra_d3d11.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include +#include +#include + +#include "video/out/gpu/ra.h" +#include "video/out/gpu/spirv.h" + +// Create an RA instance from a D3D11 device. This takes a reference to the +// device, which is released when the RA instance is destroyed. +struct ra *ra_d3d11_create(ID3D11Device *device, struct mp_log *log, + struct spirv_compiler *spirv); + +// Flush the immediate context of the wrapped D3D11 device +void ra_d3d11_flush(struct ra *ra); + +// Create an RA texture from a D3D11 resource. This takes a reference to the +// texture, which is released when the RA texture is destroyed. +struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res); + +// As above, but for a D3D11VA video resource. The fmt parameter selects which +// plane of a planar format will be mapped when the RA texture is used. +struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, + int w, int h, + const struct ra_format *fmt); + +// Get the underlying D3D11 device from an RA instance. The returned device is +// refcounted and must be released by the caller. +ID3D11Device *ra_d3d11_get_device(struct ra *ra); + +// True if the RA instance was created with ra_d3d11_create() +bool ra_is_d3d11(struct ra *ra); diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c index 8dc8a5c71f..36f9c2dad5 100644 --- a/video/out/gpu/context.c +++ b/video/out/gpu/context.c @@ -53,7 +53,14 @@ extern const struct ra_ctx_fns ra_ctx_vulkan_wayland; extern const struct ra_ctx_fns ra_ctx_vulkan_win; extern const struct ra_ctx_fns ra_ctx_vulkan_xlib; +/* Direct3D 11 */ +extern const struct ra_ctx_fns ra_ctx_d3d11; + static const struct ra_ctx_fns *contexts[] = { +#if HAVE_D3D11 + &ra_ctx_d3d11, +#endif + // OpenGL contexts: #if HAVE_ANDROID &ra_ctx_android, diff --git a/video/out/gpu/d3d11_helpers.c b/video/out/gpu/d3d11_helpers.c new file mode 100644 index 0000000000..7912a8c23a --- /dev/null +++ b/video/out/gpu/d3d11_helpers.c @@ -0,0 +1,401 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include +#include + +#include "common/common.h" +#include "common/msg.h" +#include "osdep/io.h" +#include "osdep/windows_utils.h" + +#include "d3d11_helpers.h" + +// Windows 8 enum value, not present in mingw-w64 headers +#define DXGI_ADAPTER_FLAG_SOFTWARE (2) + +static pthread_once_t d3d11_once = PTHREAD_ONCE_INIT; +static PFN_D3D11_CREATE_DEVICE pD3D11CreateDevice = NULL; +static void d3d11_load(void) +{ + HMODULE d3d11 = LoadLibraryW(L"d3d11.dll"); + if (!d3d11) + return; + pD3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE) + GetProcAddress(d3d11, "D3D11CreateDevice"); +} + +// Get a const array of D3D_FEATURE_LEVELs from max_fl to min_fl (inclusive) +static int get_feature_levels(int max_fl, int min_fl, + const D3D_FEATURE_LEVEL **out) +{ + static const D3D_FEATURE_LEVEL levels[] = { + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + D3D_FEATURE_LEVEL_9_3, + D3D_FEATURE_LEVEL_9_2, + D3D_FEATURE_LEVEL_9_1, + }; + static const int levels_len = MP_ARRAY_SIZE(levels); + + int start = 0; + for (; start < levels_len; start++) { + if (levels[start] <= max_fl) + break; + } + int len = 0; + for (; start + len < levels_len; len++) { + if (levels[start + len] < min_fl) + break; + } + *out = &levels[start]; + return len; +} + +static HRESULT create_device(struct mp_log *log, bool warp, bool bgra, + bool debug, int max_fl, int min_fl, + ID3D11Device **dev) +{ + const D3D_FEATURE_LEVEL *levels; + int levels_len = get_feature_levels(max_fl, min_fl, &levels); + if (!levels_len) { + mp_fatal(log, "No suitable Direct3D feature level found\n"); + return E_FAIL; + } + + D3D_DRIVER_TYPE type = warp ? D3D_DRIVER_TYPE_WARP + : D3D_DRIVER_TYPE_HARDWARE; + UINT flags = 0; + if (bgra) + flags |= D3D11_CREATE_DEVICE_BGRA_SUPPORT; + if (debug) + flags |= D3D11_CREATE_DEVICE_DEBUG; + return pD3D11CreateDevice(NULL, type, NULL, flags, levels, levels_len, + D3D11_SDK_VERSION, dev, NULL, NULL); +} + +// Create a Direct3D 11 device for rendering and presentation. This is meant to +// reduce boilerplate in backends that D3D11, while also making sure they share +// the same device creation logic and log the same information. +bool mp_d3d11_create_present_device(struct mp_log *log, + struct d3d11_device_opts *opts, + ID3D11Device **dev_out) +{ + bool warp = opts->force_warp; + bool bgra = true; + int max_fl = opts->max_feature_level; + int min_fl = opts->min_feature_level; + ID3D11Device *dev = NULL; + IDXGIDevice1 *dxgi_dev = NULL; + IDXGIAdapter1 *adapter = NULL; + bool success = false; + HRESULT hr; + + pthread_once(&d3d11_once, d3d11_load); + if (!pD3D11CreateDevice) { + mp_fatal(log, "Failed to load d3d11.dll\n"); + goto done; + } + + // Return here to retry creating the device + do { + // Use these default feature levels if they are not set + max_fl = max_fl ? max_fl : D3D_FEATURE_LEVEL_11_0; + min_fl = min_fl ? min_fl : D3D_FEATURE_LEVEL_9_1; + + hr = create_device(log, warp, bgra, opts->debug, max_fl, min_fl, &dev); + if (SUCCEEDED(hr)) + break; + + // BGRA is recommended, but FL 10_0 hardware may not support it + if (bgra) { + mp_dbg(log, "Failed to create D3D device with BGRA support\n"); + bgra = false; + continue; + } + + // Trying to create a D3D_FEATURE_LEVEL_12_0 device on Windows 8.1 or + // below will not succeed. Try an 11_1 device. + if (max_fl >= D3D_FEATURE_LEVEL_12_0 && + min_fl <= D3D_FEATURE_LEVEL_11_1) + { + mp_dbg(log, "Failed to create 12_0+ device, trying 11_1\n"); + max_fl = D3D_FEATURE_LEVEL_11_1; + bgra = true; + continue; + } + + // Trying to create a D3D_FEATURE_LEVEL_11_1 device on Windows 7 + // without the platform update will not succeed. Try an 11_0 device. + if (max_fl >= D3D_FEATURE_LEVEL_11_1 && + min_fl <= D3D_FEATURE_LEVEL_11_0) + { + mp_dbg(log, "Failed to create 11_1+ device, trying 11_0\n"); + max_fl = D3D_FEATURE_LEVEL_11_0; + bgra = true; + continue; + } + + // Retry with WARP if allowed + if (!warp && opts->allow_warp) { + mp_dbg(log, "Failed to create hardware device, trying WARP\n"); + warp = true; + max_fl = opts->max_feature_level; + min_fl = opts->min_feature_level; + bgra = true; + continue; + } + + mp_fatal(log, "Failed to create Direct3D 11 device: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } while (true); + + hr = ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void**)&dxgi_dev); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI device\n"); + goto done; + } + hr = IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI adapter\n"); + goto done; + } + + IDXGIDevice1_SetMaximumFrameLatency(dxgi_dev, opts->max_frame_latency); + + DXGI_ADAPTER_DESC1 desc; + hr = IDXGIAdapter1_GetDesc1(adapter, &desc); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get adapter description\n"); + goto done; + } + + D3D_FEATURE_LEVEL selected_level = ID3D11Device_GetFeatureLevel(dev); + mp_verbose(log, "Using Direct3D 11 feature level %u_%u\n", + ((unsigned)selected_level) >> 12, + (((unsigned)selected_level) >> 8) & 0xf); + + char *dev_name = mp_to_utf8(NULL, desc.Description); + mp_verbose(log, "Device: %s\n" + "VendorId: 0x%04d\n" + "DeviceId: 0x%04d\n" + "LUID: %08lx%08lx\n", + dev_name, desc.VendorId, desc.DeviceId, + desc.AdapterLuid.HighPart, desc.AdapterLuid.LowPart); + talloc_free(dev_name); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) + warp = true; + // If the primary display adapter is a software adapter, the + // DXGI_ADAPTER_FLAG_SOFTWARE flag won't be set, but the device IDs should + // still match the Microsoft Basic Render Driver + if (desc.VendorId == 0x1414 && desc.DeviceId == 0x8c) + warp = true; + if (warp) { + mp_msg(log, opts->force_warp ? MSGL_V : MSGL_WARN, + "Using a software adapter\n"); + } + + *dev_out = dev; + dev = NULL; + success = true; + +done: + SAFE_RELEASE(adapter); + SAFE_RELEASE(dxgi_dev); + SAFE_RELEASE(dev); + return success; +} + +static HRESULT create_swapchain_1_2(ID3D11Device *dev, IDXGIFactory2 *factory, + struct mp_log *log, + struct d3d11_swapchain_opts *opts, + bool flip, DXGI_FORMAT format, + IDXGISwapChain **swapchain_out) +{ + IDXGISwapChain *swapchain = NULL; + IDXGISwapChain1 *swapchain1 = NULL; + HRESULT hr; + + DXGI_SWAP_CHAIN_DESC1 desc = { + .Width = opts->width ? opts->width : 1, + .Height = opts->height ? opts->height : 1, + .Format = format, + .SampleDesc = { .Count = 1 }, + .BufferUsage = opts->usage, + }; + + if (flip) { + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + desc.BufferCount = opts->length; + } else { + desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; + desc.BufferCount = 1; + } + + hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown*)dev, + opts->window, &desc, NULL, NULL, &swapchain1); + if (FAILED(hr)) + goto done; + hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain, + (void**)&swapchain); + if (FAILED(hr)) + goto done; + + *swapchain_out = swapchain; + swapchain = NULL; + +done: + SAFE_RELEASE(swapchain1); + SAFE_RELEASE(swapchain); + return hr; +} + +static HRESULT create_swapchain_1_1(ID3D11Device *dev, IDXGIFactory1 *factory, + struct mp_log *log, + struct d3d11_swapchain_opts *opts, + DXGI_FORMAT format, + IDXGISwapChain **swapchain_out) +{ + DXGI_SWAP_CHAIN_DESC desc = { + .BufferDesc = { + .Width = opts->width ? opts->width : 1, + .Height = opts->height ? opts->height : 1, + .Format = format, + }, + .SampleDesc = { .Count = 1 }, + .BufferUsage = opts->usage, + .BufferCount = 1, + .OutputWindow = opts->window, + .Windowed = TRUE, + .SwapEffect = DXGI_SWAP_EFFECT_DISCARD, + }; + + return IDXGIFactory1_CreateSwapChain(factory, (IUnknown*)dev, &desc, + swapchain_out); +} + +// Create a Direct3D 11 swapchain +bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log, + struct d3d11_swapchain_opts *opts, + IDXGISwapChain **swapchain_out) +{ + IDXGIDevice1 *dxgi_dev = NULL; + IDXGIAdapter1 *adapter = NULL; + IDXGIFactory1 *factory = NULL; + IDXGIFactory2 *factory2 = NULL; + IDXGISwapChain *swapchain = NULL; + bool success = false; + HRESULT hr; + + hr = ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void**)&dxgi_dev); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI device\n"); + goto done; + } + hr = IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI adapter\n"); + goto done; + } + hr = IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory1, (void**)&factory); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI factory\n"); + goto done; + } + hr = IDXGIFactory1_QueryInterface(factory, &IID_IDXGIFactory2, + (void**)&factory2); + if (FAILED(hr)) + factory2 = NULL; + + // Try B8G8R8A8_UNORM first, since at least in Windows 8, it's always the + // format of the desktop image + static const DXGI_FORMAT formats[] = { + DXGI_FORMAT_B8G8R8A8_UNORM, + DXGI_FORMAT_R8G8B8A8_UNORM, + }; + static const int formats_len = MP_ARRAY_SIZE(formats); + bool flip = factory2 && opts->flip; + + // Return here to retry creating the swapchain + do { + for (int i = 0; i < formats_len; i++) { + if (factory2) { + // Create a DXGI 1.2+ (Windows 8+) swap chain if possible + hr = create_swapchain_1_2(dev, factory2, log, opts, flip, + formats[i], &swapchain); + } else { + // Fall back to DXGI 1.1 (Windows 7) + hr = create_swapchain_1_1(dev, factory, log, opts, formats[i], + &swapchain); + } + if (SUCCEEDED(hr)) + break; + } + if (SUCCEEDED(hr)) + break; + + if (flip) { + mp_dbg(log, "Failed to create flip-model swapchain, trying bitblt\n"); + flip = false; + continue; + } + + mp_fatal(log, "Failed to create swapchain: %s\n", mp_HRESULT_to_str(hr)); + goto done; + } while (true); + + // Prevent DXGI from making changes to the VO window, otherwise it will + // hook the Alt+Enter keystroke and make it trigger an ugly transition to + // exclusive fullscreen mode instead of running the user-set command. + IDXGIFactory_MakeWindowAssociation(factory, opts->window, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER | + DXGI_MWA_NO_PRINT_SCREEN); + + if (factory2) { + mp_verbose(log, "Using DXGI 1.2+\n"); + } else { + mp_verbose(log, "Using DXGI 1.1\n"); + } + + DXGI_SWAP_CHAIN_DESC scd = {0}; + IDXGISwapChain_GetDesc(swapchain, &scd); + if (scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL) { + mp_verbose(log, "Using flip-model presentation\n"); + } else { + mp_verbose(log, "Using bitblt-model presentation\n"); + } + + *swapchain_out = swapchain; + swapchain = NULL; + success = true; + +done: + SAFE_RELEASE(swapchain); + SAFE_RELEASE(factory2); + SAFE_RELEASE(factory); + SAFE_RELEASE(adapter); + SAFE_RELEASE(dxgi_dev); + return success; +} diff --git a/video/out/gpu/d3d11_helpers.h b/video/out/gpu/d3d11_helpers.h new file mode 100644 index 0000000000..6d99c62a51 --- /dev/null +++ b/video/out/gpu/d3d11_helpers.h @@ -0,0 +1,79 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#ifndef MP_D3D11_HELPERS_H_ +#define MP_D3D11_HELPERS_H_ + +#include +#include +#include +#include + +#define D3D_FEATURE_LEVEL_12_0 (0xc000) +#define D3D_FEATURE_LEVEL_12_1 (0xc100) + +struct d3d11_device_opts { + // Enable the debug layer (D3D11_CREATE_DEVICE_DEBUG) + bool debug; + + // Allow a software (WARP) adapter. Note, sometimes a software adapter will + // be used even when allow_warp is false. This is because, on Windows 8 and + // up, if there are no hardware adapters, Windows will pretend the WARP + // adapter is the primary hardware adapter. + bool allow_warp; + + // Always use a WARP adapter. This is mainly for testing purposes. + bool force_warp; + + // The maximum number of pending frames allowed to be queued to a swapchain + int max_frame_latency; + + // The maximum Direct3D 11 feature level to attempt to create + // If unset, defaults to D3D_FEATURE_LEVEL_11_0 + int max_feature_level; + + // The minimum Direct3D 11 feature level to attempt to create. If this is + // not supported, device creation will fail. + // If unset, defaults to D3D_FEATURE_LEVEL_9_1 + int min_feature_level; +}; + +bool mp_d3d11_create_present_device(struct mp_log *log, + struct d3d11_device_opts *opts, + ID3D11Device **dev_out); + +struct d3d11_swapchain_opts { + HWND window; + int width; + int height; + + // Use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL if possible + bool flip; + + // Number of surfaces in the swapchain + int length; + + // The BufferUsage value for swapchain surfaces. This should probably + // contain DXGI_USAGE_RENDER_TARGET_OUTPUT. + DXGI_USAGE usage; +}; + +bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log, + struct d3d11_swapchain_opts *opts, + IDXGISwapChain **swapchain_out); + +#endif diff --git a/video/out/gpu/hwdec.c b/video/out/gpu/hwdec.c index 9c51a66183..0a6a4de32a 100644 --- a/video/out/gpu/hwdec.c +++ b/video/out/gpu/hwdec.c @@ -34,6 +34,7 @@ extern const struct ra_hwdec_driver ra_hwdec_d3d11egl; extern const struct ra_hwdec_driver ra_hwdec_d3d11eglrgb; extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx; extern const struct ra_hwdec_driver ra_hwdec_dxva2; +extern const struct ra_hwdec_driver ra_hwdec_d3d11va; extern const struct ra_hwdec_driver ra_hwdec_cuda; extern const struct ra_hwdec_driver ra_hwdec_cuda_nvdec; extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay; @@ -58,6 +59,9 @@ static const struct ra_hwdec_driver *const mpgl_hwdec_drivers[] = { #if HAVE_D3D9_HWACCEL &ra_hwdec_dxva2egl, #endif + #if HAVE_D3D11 + &ra_hwdec_d3d11va, + #endif #endif #if HAVE_GL_DXINTEROP_D3D9 &ra_hwdec_dxva2gldx, diff --git a/video/out/opengl/context_angle.c b/video/out/opengl/context_angle.c index 0b665e27bf..b504293db5 100644 --- a/video/out/opengl/context_angle.c +++ b/video/out/opengl/context_angle.c @@ -24,7 +24,7 @@ #include "angle_dynamic.h" #include "egl_helpers.h" -#include "d3d11_helpers.h" +#include "video/out/gpu/d3d11_helpers.h" #include "common/common.h" #include "options/m_config.h" diff --git a/video/out/opengl/d3d11_helpers.c b/video/out/opengl/d3d11_helpers.c deleted file mode 100644 index d9b7fc2804..0000000000 --- a/video/out/opengl/d3d11_helpers.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include -#include -#include -#include - -#include "common/common.h" -#include "common/msg.h" -#include "osdep/io.h" -#include "osdep/windows_utils.h" - -#include "d3d11_helpers.h" - -// Windows 8 enum value, not present in mingw-w64 headers -#define DXGI_ADAPTER_FLAG_SOFTWARE (2) - -static pthread_once_t d3d11_once = PTHREAD_ONCE_INIT; -static PFN_D3D11_CREATE_DEVICE pD3D11CreateDevice = NULL; -static void d3d11_load(void) -{ - HMODULE d3d11 = LoadLibraryW(L"d3d11.dll"); - if (!d3d11) - return; - pD3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE) - GetProcAddress(d3d11, "D3D11CreateDevice"); -} - -// Get a const array of D3D_FEATURE_LEVELs from max_fl to min_fl (inclusive) -static int get_feature_levels(int max_fl, int min_fl, - const D3D_FEATURE_LEVEL **out) -{ - static const D3D_FEATURE_LEVEL levels[] = { - D3D_FEATURE_LEVEL_11_1, - D3D_FEATURE_LEVEL_11_0, - D3D_FEATURE_LEVEL_10_1, - D3D_FEATURE_LEVEL_10_0, - D3D_FEATURE_LEVEL_9_3, - D3D_FEATURE_LEVEL_9_2, - D3D_FEATURE_LEVEL_9_1, - }; - static const int levels_len = MP_ARRAY_SIZE(levels); - - int start = 0; - for (; start < levels_len; start++) { - if (levels[start] <= max_fl) - break; - } - int len = 0; - for (; start + len < levels_len; len++) { - if (levels[start + len] < min_fl) - break; - } - *out = &levels[start]; - return len; -} - -static HRESULT create_device(struct mp_log *log, bool warp, bool bgra, - int max_fl, int min_fl, ID3D11Device **dev) -{ - const D3D_FEATURE_LEVEL *levels; - int levels_len = get_feature_levels(max_fl, min_fl, &levels); - if (!levels_len) { - mp_fatal(log, "No suitable Direct3D feature level found\n"); - return E_FAIL; - } - - D3D_DRIVER_TYPE type = warp ? D3D_DRIVER_TYPE_WARP - : D3D_DRIVER_TYPE_HARDWARE; - UINT flags = bgra ? D3D11_CREATE_DEVICE_BGRA_SUPPORT : 0; - return pD3D11CreateDevice(NULL, type, NULL, flags, levels, levels_len, - D3D11_SDK_VERSION, dev, NULL, NULL); -} - -// Create a Direct3D 11 device for rendering and presentation. This is meant to -// reduce boilerplate in backends that D3D11, while also making sure they share -// the same device creation logic and log the same information. -bool mp_d3d11_create_present_device(struct mp_log *log, - struct d3d11_device_opts *opts, - ID3D11Device **dev_out) -{ - bool warp = opts->force_warp; - bool bgra = true; - int max_fl = opts->max_feature_level; - int min_fl = opts->min_feature_level; - ID3D11Device *dev = NULL; - IDXGIDevice1 *dxgi_dev = NULL; - IDXGIAdapter1 *adapter = NULL; - bool success = false; - HRESULT hr; - - pthread_once(&d3d11_once, d3d11_load); - if (!pD3D11CreateDevice) { - mp_fatal(log, "Failed to load d3d11.dll\n"); - goto done; - } - - // Return here to retry creating the device - do { - // Use these default feature levels if they are not set - max_fl = max_fl ? max_fl : D3D_FEATURE_LEVEL_11_0; - min_fl = min_fl ? min_fl : D3D_FEATURE_LEVEL_9_1; - - hr = create_device(log, warp, bgra, max_fl, min_fl, &dev); - if (SUCCEEDED(hr)) - break; - - // BGRA is recommended, but FL 10_0 hardware may not support it - if (bgra) { - mp_dbg(log, "Failed to create D3D device with BGRA support\n"); - bgra = false; - continue; - } - - // Trying to create a D3D_FEATURE_LEVEL_11_1 device on Windows 7 - // without the platform update will not succeed. Try a 11_0 device. - if (max_fl >= D3D_FEATURE_LEVEL_11_1 && - min_fl <= D3D_FEATURE_LEVEL_11_0) - { - mp_dbg(log, "Failed to create 11_1+ device, trying 11_0\n"); - max_fl = D3D_FEATURE_LEVEL_11_0; - bgra = true; - continue; - } - - // Retry with WARP if allowed - if (!warp && opts->allow_warp) { - mp_dbg(log, "Failed to create hardware device, trying WARP\n"); - warp = true; - max_fl = opts->max_feature_level; - min_fl = opts->min_feature_level; - bgra = true; - continue; - } - - mp_fatal(log, "Failed to create Direct3D 11 device: %s\n", - mp_HRESULT_to_str(hr)); - goto done; - } while (true); - - hr = ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void**)&dxgi_dev); - if (FAILED(hr)) { - mp_fatal(log, "Failed to get DXGI device\n"); - goto done; - } - hr = IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter); - if (FAILED(hr)) { - mp_fatal(log, "Failed to get DXGI adapter\n"); - goto done; - } - - IDXGIDevice1_SetMaximumFrameLatency(dxgi_dev, opts->max_frame_latency); - - DXGI_ADAPTER_DESC1 desc; - hr = IDXGIAdapter1_GetDesc1(adapter, &desc); - if (FAILED(hr)) { - mp_fatal(log, "Failed to get adapter description\n"); - goto done; - } - - D3D_FEATURE_LEVEL selected_level = ID3D11Device_GetFeatureLevel(dev); - mp_verbose(log, "Using Direct3D 11 feature level %u_%u\n", - ((unsigned)selected_level) >> 12, - (((unsigned)selected_level) >> 8) & 0xf); - - char *dev_name = mp_to_utf8(NULL, desc.Description); - mp_verbose(log, "Device: %s\n" - "VendorId: 0x%04d\n" - "DeviceId: 0x%04d\n" - "LUID: %08lx%08lx\n", - dev_name, desc.VendorId, desc.DeviceId, - desc.AdapterLuid.HighPart, desc.AdapterLuid.LowPart); - talloc_free(dev_name); - - if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) - warp = true; - // If the primary display adapter is a software adapter, the - // DXGI_ADAPTER_FLAG_SOFTWARE flag won't be set, but the device IDs should - // still match the Microsoft Basic Render Driver - if (desc.VendorId == 0x1414 && desc.DeviceId == 0x8c) - warp = true; - if (warp) { - mp_msg(log, opts->force_warp ? MSGL_V : MSGL_WARN, - "Using a software adapter\n"); - } - - *dev_out = dev; - dev = NULL; - success = true; - -done: - SAFE_RELEASE(adapter); - SAFE_RELEASE(dxgi_dev); - SAFE_RELEASE(dev); - return success; -} - -static HRESULT create_swapchain_1_2(ID3D11Device *dev, IDXGIFactory2 *factory, - struct mp_log *log, - struct d3d11_swapchain_opts *opts, - bool flip, DXGI_FORMAT format, - IDXGISwapChain **swapchain_out) -{ - IDXGISwapChain *swapchain = NULL; - IDXGISwapChain1 *swapchain1 = NULL; - HRESULT hr; - - DXGI_SWAP_CHAIN_DESC1 desc = { - .Width = opts->width ? opts->width : 1, - .Height = opts->height ? opts->height : 1, - .Format = format, - .SampleDesc = { .Count = 1 }, - .BufferUsage = opts->usage, - }; - - if (flip) { - desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; - desc.BufferCount = opts->length; - } else { - desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; - desc.BufferCount = 1; - } - - hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown*)dev, - opts->window, &desc, NULL, NULL, &swapchain1); - if (FAILED(hr)) - goto done; - hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain, - (void**)&swapchain); - if (FAILED(hr)) - goto done; - - *swapchain_out = swapchain; - swapchain = NULL; - -done: - SAFE_RELEASE(swapchain1); - SAFE_RELEASE(swapchain); - return hr; -} - -static HRESULT create_swapchain_1_1(ID3D11Device *dev, IDXGIFactory1 *factory, - struct mp_log *log, - struct d3d11_swapchain_opts *opts, - DXGI_FORMAT format, - IDXGISwapChain **swapchain_out) -{ - DXGI_SWAP_CHAIN_DESC desc = { - .BufferDesc = { - .Width = opts->width ? opts->width : 1, - .Height = opts->height ? opts->height : 1, - .Format = format, - }, - .SampleDesc = { .Count = 1 }, - .BufferUsage = opts->usage, - .BufferCount = 1, - .OutputWindow = opts->window, - .Windowed = TRUE, - .SwapEffect = DXGI_SWAP_EFFECT_DISCARD, - }; - - return IDXGIFactory1_CreateSwapChain(factory, (IUnknown*)dev, &desc, - swapchain_out); -} - -// Create a Direct3D 11 swapchain -bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log, - struct d3d11_swapchain_opts *opts, - IDXGISwapChain **swapchain_out) -{ - IDXGIDevice1 *dxgi_dev = NULL; - IDXGIAdapter1 *adapter = NULL; - IDXGIFactory1 *factory = NULL; - IDXGIFactory2 *factory2 = NULL; - IDXGISwapChain *swapchain = NULL; - bool success = false; - HRESULT hr; - - hr = ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void**)&dxgi_dev); - if (FAILED(hr)) { - mp_fatal(log, "Failed to get DXGI device\n"); - goto done; - } - hr = IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter); - if (FAILED(hr)) { - mp_fatal(log, "Failed to get DXGI adapter\n"); - goto done; - } - hr = IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory1, (void**)&factory); - if (FAILED(hr)) { - mp_fatal(log, "Failed to get DXGI factory\n"); - goto done; - } - hr = IDXGIFactory1_QueryInterface(factory, &IID_IDXGIFactory2, - (void**)&factory2); - if (FAILED(hr)) - factory2 = NULL; - - // Try B8G8R8A8_UNORM first, since at least in Windows 8, it's always the - // format of the desktop image - static const DXGI_FORMAT formats[] = { - DXGI_FORMAT_B8G8R8A8_UNORM, - DXGI_FORMAT_R8G8B8A8_UNORM, - }; - static const int formats_len = MP_ARRAY_SIZE(formats); - bool flip = factory2 && opts->flip; - - // Return here to retry creating the swapchain - do { - for (int i = 0; i < formats_len; i++) { - if (factory2) { - // Create a DXGI 1.2+ (Windows 8+) swap chain if possible - hr = create_swapchain_1_2(dev, factory2, log, opts, flip, - formats[i], &swapchain); - } else { - // Fall back to DXGI 1.1 (Windows 7) - hr = create_swapchain_1_1(dev, factory, log, opts, formats[i], - &swapchain); - } - if (SUCCEEDED(hr)) - break; - } - if (SUCCEEDED(hr)) - break; - - if (flip) { - mp_dbg(log, "Failed to create flip-model swapchain, trying bitblt\n"); - flip = false; - continue; - } - - mp_fatal(log, "Failed to create swapchain: %s\n", mp_HRESULT_to_str(hr)); - goto done; - } while (true); - - // Prevent DXGI from making changes to the VO window, otherwise it will - // hook the Alt+Enter keystroke and make it trigger an ugly transition to - // exclusive fullscreen mode instead of running the user-set command. - IDXGIFactory_MakeWindowAssociation(factory, opts->window, - DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER | - DXGI_MWA_NO_PRINT_SCREEN); - - if (factory2) { - mp_verbose(log, "Using DXGI 1.2+\n"); - } else { - mp_verbose(log, "Using DXGI 1.1\n"); - } - - DXGI_SWAP_CHAIN_DESC scd = {0}; - IDXGISwapChain_GetDesc(swapchain, &scd); - if (scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL) { - mp_verbose(log, "Using flip-model presentation\n"); - } else { - mp_verbose(log, "Using bitblt-model presentation\n"); - } - - *swapchain_out = swapchain; - swapchain = NULL; - success = true; - -done: - SAFE_RELEASE(swapchain); - SAFE_RELEASE(factory2); - SAFE_RELEASE(factory); - SAFE_RELEASE(adapter); - SAFE_RELEASE(dxgi_dev); - return success; -} diff --git a/video/out/opengl/d3d11_helpers.h b/video/out/opengl/d3d11_helpers.h deleted file mode 100644 index f34d1d4def..0000000000 --- a/video/out/opengl/d3d11_helpers.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#ifndef MP_D3D11_HELPERS_H_ -#define MP_D3D11_HELPERS_H_ - -#include -#include -#include -#include - -struct d3d11_device_opts { - // Allow a software (WARP) adapter. Note, sometimes a software adapter will - // be used even when allow_warp is false. This is because, on Windows 8 and - // up, if there are no hardware adapters, Windows will pretend the WARP - // adapter is the primary hardware adapter. - bool allow_warp; - - // Always use a WARP adapter. This is mainly for testing purposes. - bool force_warp; - - // The maximum number of pending frames allowed to be queued to a swapchain - int max_frame_latency; - - // The maximum Direct3D 11 feature level to attempt to create - // If unset, defaults to D3D_FEATURE_LEVEL_11_0 - int max_feature_level; - - // The minimum Direct3D 11 feature level to attempt to create. If this is - // not supported, device creation will fail. - // If unset, defaults to D3D_FEATURE_LEVEL_9_1 - int min_feature_level; -}; - -bool mp_d3d11_create_present_device(struct mp_log *log, - struct d3d11_device_opts *opts, - ID3D11Device **dev_out); - -struct d3d11_swapchain_opts { - HWND window; - int width; - int height; - - // Use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL if possible - bool flip; - - // Number of surfaces in the swapchain - int length; - - // The BufferUsage value for swapchain surfaces. This should probably - // contain DXGI_USAGE_RENDER_TARGET_OUTPUT. - DXGI_USAGE usage; -}; - -bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log, - struct d3d11_swapchain_opts *opts, - IDXGISwapChain **swapchain_out); - -#endif diff --git a/wscript b/wscript index ad17012bec..50cbccf2ce 100644 --- a/wscript +++ b/wscript @@ -753,6 +753,19 @@ video_output_features = [ 'desc': 'Direct3D support', 'deps': 'win32-desktop && gpl', 'func': check_cc(header_name='d3d9.h'), + }, { + 'name': '--shaderc', + 'desc': 'libshaderc SPIR-V compiler', + 'func': check_cc(header_name='shaderc/shaderc.h', lib='shaderc_shared'), + }, { + 'name': '--crossc', + 'desc': 'libcrossc SPIR-V translator', + 'func': check_pkg_config('crossc'), + }, { + 'name': '--d3d11', + 'desc': 'Direct3D 11 video output', + 'deps': 'win32-desktop && shaderc && crossc', + 'func': check_cc(header_name=['d3d11_1.h', 'dxgi1_2.h']), }, { # We need MMAL/bcm_host/dispmanx APIs. Also, most RPI distros require # every project to hardcode the paths to the include directories. Also, @@ -806,10 +819,6 @@ video_output_features = [ 'name': '--vulkan', 'desc': 'Vulkan context support', 'func': check_pkg_config('vulkan'), - }, { - 'name': '--shaderc', - 'desc': 'libshaderc SPIR-V compiler', - 'func': check_cc(header_name='shaderc/shaderc.h', lib='shaderc_shared'), }, { 'name': 'egl-helpers', 'desc': 'EGL helper functions', diff --git a/wscript_build.py b/wscript_build.py index dbf27171d7..a2971176a3 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -401,8 +401,12 @@ def build(ctx): ( "video/out/cocoa_common.m", "cocoa" ), ( "video/out/dither.c" ), ( "video/out/filter_kernels.c" ), + ( "video/out/d3d11/context.c", "d3d11" ), + ( "video/out/d3d11/hwdec_d3d11va.c", "d3d11 && d3d-hwaccel" ), + ( "video/out/d3d11/ra_d3d11.c", "d3d11" ), ( "video/out/opengl/angle_dynamic.c", "egl-angle" ), ( "video/out/gpu/context.c" ), + ( "video/out/gpu/d3d11_helpers.c", "d3d11 || egl-angle-win32" ), ( "video/out/gpu/hwdec.c" ), ( "video/out/gpu/lcms.c" ), ( "video/out/gpu/osd.c" ), @@ -432,7 +436,6 @@ def build(ctx): ( "video/out/opengl/context_glx.c", "gl-x11" ), ( "video/out/opengl/context_x11egl.c", "egl-x11" ), ( "video/out/opengl/cuda_dynamic.c", "cuda-hwaccel" ), - ( "video/out/opengl/d3d11_helpers.c", "egl-angle-win32" ), ( "video/out/opengl/egl_helpers.c", "egl-helpers" ), ( "video/out/opengl/hwdec_cuda.c", "cuda-hwaccel" ), ( "video/out/opengl/hwdec_d3d11egl.c", "d3d-hwaccel" ), -- cgit v1.2.3