diff options
author | James Ross-Gowan <rossy@jrg.systems> | 2017-11-01 22:38:41 +1100 |
---|---|---|
committer | James Ross-Gowan <rossy@jrg.systems> | 2017-11-07 20:27:13 +1100 |
commit | e7bf5576e599593b1bba5bbf2a7cd6d4270c7809 (patch) | |
tree | 8a91e157cbd47bdd6f9bdbb40f2b0722ad5d3d53 /video/out | |
parent | b258d82d6eab5cec7f27f4d6780bf991d28aadef (diff) |
vo_gpu: hwdec_d3d11va: allow zero-copy video decoding
Like the manual says, this is technically undefined behaviour. See:
https://msdn.microsoft.com/en-us/library/windows/desktop/ff476085.aspx
In particular, MSDN says texture arrays created with the BIND_DECODER
flag cannot be used with CreateShaderResourceView, which means they
can't be sampled through SRVs like normal Direct3D textures. However,
some programs (Google Chrome included) do this anyway for performance
and power-usage reasons, and it appears to work with most drivers.
Older AMD drivers had a "bug" with zero-copy decoding, but this appears
to have been fixed. See #3255, #3464 and http://crbug.com/623029.
Diffstat (limited to 'video/out')
-rw-r--r-- | video/out/d3d11/hwdec_d3d11va.c | 154 | ||||
-rw-r--r-- | video/out/d3d11/ra_d3d11.c | 64 | ||||
-rw-r--r-- | video/out/d3d11/ra_d3d11.h | 3 |
3 files changed, 159 insertions, 62 deletions
diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c index f179298ac1..7de24dde28 100644 --- a/video/out/d3d11/hwdec_d3d11va.c +++ b/video/out/d3d11/hwdec_d3d11va.c @@ -22,21 +22,45 @@ #include "config.h" #include "common/common.h" +#include "options/m_config.h" #include "osdep/windows_utils.h" #include "video/hwdec.h" #include "video/decode/d3d.h" #include "video/out/d3d11/ra_d3d11.h" #include "video/out/gpu/hwdec.h" +struct d3d11va_opts { + int zero_copy; +}; + +#define OPT_BASE_STRUCT struct d3d11va_opts +const struct m_sub_options d3d11va_conf = { + .opts = (const struct m_option[]) { + OPT_FLAG("d3d11va-zero-copy", zero_copy, 0), + {0} + }, + .defaults = &(const struct d3d11va_opts) { + .zero_copy = 0, + }, + .size = sizeof(struct d3d11va_opts) +}; + struct priv_owner { + struct d3d11va_opts *opts; + struct mp_hwdec_ctx hwctx; ID3D11Device *device; ID3D11Device1 *device1; }; struct priv { + // 1-copy path ID3D11DeviceContext1 *ctx; ID3D11Texture2D *copy_tex; + + // zero-copy path + int num_planes; + const struct ra_format *fmt[4]; }; static void uninit(struct ra_hwdec *hw) @@ -59,6 +83,8 @@ static int init(struct ra_hwdec *hw) if (!p->device) return -1; + p->opts = mp_get_config_group(hw->priv, hw->global, &d3d11va_conf); + // D3D11VA requires Direct3D 11.1, so this should always succeed hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1, (void**)&p->device1); @@ -109,52 +135,56 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) mapper->dst_params.hw_subfmt = 0; struct ra_imgfmt_desc desc = {0}; - struct mp_image layout = {0}; if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) return -1; - mp_image_set_params(&layout, &mapper->dst_params); - - DXGI_FORMAT copy_fmt; - switch (mapper->dst_params.imgfmt) { - case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break; - case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break; - default: return -1; - } - - // We copy decoder images to an intermediate texture. This is slower than - // the zero-copy path, but according to MSDN, decoder textures should not - // be bound to SRVs, so it is technically correct, and it works around some - // driver "bugs" that can happen with the zero-copy path. It also allows - // samplers to work correctly when the decoder image includes padding. - D3D11_TEXTURE2D_DESC copy_desc = { - .Width = mapper->dst_params.w, - .Height = mapper->dst_params.h, - .MipLevels = 1, - .ArraySize = 1, - .SampleDesc.Count = 1, - .Format = copy_fmt, - .BindFlags = D3D11_BIND_SHADER_RESOURCE, - }; - hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL, &p->copy_tex); - if (FAILED(hr)) { - MP_FATAL(mapper, "Could not create shader resource texture\n"); - return -1; - } + if (o->opts->zero_copy) { + // In the zero-copy path, we create the ra_tex objects in the map + // operation, so we just need to store the format of each plane + p->num_planes = desc.num_planes; + for (int i = 0; i < desc.num_planes; i++) + p->fmt[i] = desc.planes[i]; + } else { + struct mp_image layout = {0}; + mp_image_set_params(&layout, &mapper->dst_params); + + DXGI_FORMAT copy_fmt; + switch (mapper->dst_params.imgfmt) { + case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break; + case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break; + default: return -1; + } - for (int i = 0; i < desc.num_planes; i++) { - mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex, - mp_image_plane_w(&layout, i), - mp_image_plane_h(&layout, i), - desc.planes[i]); - if (!mapper->tex[i]) { - MP_FATAL(mapper, "Could not create RA texture view\n"); + D3D11_TEXTURE2D_DESC copy_desc = { + .Width = mapper->dst_params.w, + .Height = mapper->dst_params.h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = copy_fmt, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + }; + hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL, + &p->copy_tex); + if (FAILED(hr)) { + MP_FATAL(mapper, "Could not create shader resource texture\n"); return -1; } - } - ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx); + for (int i = 0; i < desc.num_planes; i++) { + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex, + mp_image_plane_w(&layout, i), mp_image_plane_h(&layout, i), 0, + desc.planes[i]); + if (!mapper->tex[i]) { + MP_FATAL(mapper, "Could not create RA texture view\n"); + return -1; + } + } + + // A ref to the immediate context is needed for CopySubresourceRegion + ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx); + } return 0; } @@ -165,20 +195,47 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) ID3D11Texture2D *tex = (void *)mapper->src->planes[0]; int subresource = (intptr_t)mapper->src->planes[1]; - ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx, - (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0, - (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) { - .left = 0, - .top = 0, - .front = 0, - .right = mapper->dst_params.w, - .bottom = mapper->dst_params.h, - .back = 1, - }), D3D11_COPY_DISCARD); + if (p->copy_tex) { + ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx, + (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0, + (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) { + .left = 0, + .top = 0, + .front = 0, + .right = mapper->dst_params.w, + .bottom = mapper->dst_params.h, + .back = 1, + }), D3D11_COPY_DISCARD); + } else { + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex, &desc2d); + + for (int i = 0; i < p->num_planes; i++) { + // The video decode texture may include padding, so the size of the + // ra_tex needs to be determined by the actual size of the Tex2D + bool chroma = i >= 1; + int w = desc2d.Width / (chroma ? 2 : 1); + int h = desc2d.Height / (chroma ? 2 : 1); + + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, tex, + w, h, subresource, p->fmt[i]); + if (!mapper->tex[i]) + return -1; + } + } return 0; } +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + if (p->copy_tex) + return; + for (int i = 0; i < 4; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); +} + const struct ra_hwdec_driver ra_hwdec_d3d11va = { .name = "d3d11va", .priv_size = sizeof(struct priv_owner), @@ -191,5 +248,6 @@ const struct ra_hwdec_driver ra_hwdec_d3d11va = { .init = mapper_init, .uninit = mapper_uninit, .map = mapper_map, + .unmap = mapper_unmap, }, }; diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c index 394f99c70b..3037966f23 100644 --- a/video/out/d3d11/ra_d3d11.c +++ b/video/out/d3d11/ra_d3d11.c @@ -75,6 +75,7 @@ struct d3d_tex { ID3D11Texture1D *tex1d; ID3D11Texture2D *tex2d; ID3D11Texture3D *tex3d; + int array_slice; ID3D11ShaderResourceView *srv; ID3D11RenderTargetView *rtv; @@ -259,14 +260,29 @@ static bool tex_init(struct ra *ra, struct ra_tex *tex) }; switch (params->dimensions) { case 1: - srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; - srvdesc.Texture1D.MipLevels = 1; + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + srvdesc.Texture1DArray.MipLevels = 1; + srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture1DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + srvdesc.Texture1D.MipLevels = 1; + } break; case 2: - srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; - srvdesc.Texture2D.MipLevels = 1; + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + srvdesc.Texture2DArray.MipLevels = 1; + srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture2DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvdesc.Texture2D.MipLevels = 1; + } break; case 3: + // D3D11 does not have Texture3D arrays srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; srvdesc.Texture3D.MipLevels = 1; break; @@ -442,6 +458,8 @@ static struct ra_tex *tex_create(struct ra *ra, abort(); } + tex_p->array_slice = -1; + if (!tex_init(ra, tex)) goto error; @@ -478,10 +496,18 @@ struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res) D3D11_TEXTURE2D_DESC desc2d; ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); - if (desc2d.MipLevels != 1 || desc2d.ArraySize != 1) + if (desc2d.MipLevels != 1) { + MP_ERR(ra, "Mipmapped textures not supported for wrapping\n"); goto error; - if (desc2d.SampleDesc.Count != 1) + } + if (desc2d.ArraySize != 1) { + MP_ERR(ra, "Texture arrays not supported for wrapping\n"); + goto error; + } + if (desc2d.SampleDesc.Count != 1) { + MP_ERR(ra, "Multisampled textures not supported for wrapping\n"); goto error; + } params->dimensions = 2; params->w = desc2d.Width; @@ -522,6 +548,8 @@ struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res) goto error; } + tex_p->array_slice = -1; + if (!tex_init(ra, tex)) goto error; @@ -532,7 +560,7 @@ error: } struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, - int w, int h, + int w, int h, int array_slice, const struct ra_format *fmt) { struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); @@ -559,6 +587,12 @@ struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, // fmt can be different to the texture format for planar video textures params->format = fmt; + if (desc2d.ArraySize > 1) { + tex_p->array_slice = array_slice; + } else { + tex_p->array_slice = -1; + } + if (!tex_init(ra, tex)) goto error; @@ -611,12 +645,14 @@ static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params) } } + int subresource = tex_p->array_slice >= 0 ? tex_p->array_slice : 0; if (p->ctx1) { - ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res, 0, rc, - src, stride, pitch, invalidate ? D3D11_COPY_DISCARD : 0); + ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res, + subresource, rc, src, stride, pitch, + invalidate ? D3D11_COPY_DISCARD : 0); } else { - ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, 0, rc, - src, stride, pitch); + ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, subresource, + rc, src, stride, pitch); } return true; @@ -1174,8 +1210,10 @@ static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, { blit_rpass(ra, dst, src, &dst_rc, &src_rc); } else { - ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, 0, - dst_rc.x0, dst_rc.y0, 0, src_p->res, 0, (&(D3D11_BOX) { + int dst_sr = dst_p->array_slice >= 0 ? dst_p->array_slice : 0; + int src_sr = src_p->array_slice >= 0 ? src_p->array_slice : 0; + ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, dst_sr, + dst_rc.x0, dst_rc.y0, 0, src_p->res, src_sr, (&(D3D11_BOX) { .left = src_rc.x0, .top = src_rc.y0, .front = 0, diff --git a/video/out/d3d11/ra_d3d11.h b/video/out/d3d11/ra_d3d11.h index eeadc7994e..54033b6cee 100644 --- a/video/out/d3d11/ra_d3d11.h +++ b/video/out/d3d11/ra_d3d11.h @@ -22,8 +22,9 @@ struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res); // As above, but for a D3D11VA video resource. The fmt parameter selects which // plane of a planar format will be mapped when the RA texture is used. +// array_slice should be set for texture arrays and is ignored for non-arrays. struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, - int w, int h, + int w, int h, int array_slice, const struct ra_format *fmt); // Get the underlying D3D11 device from an RA instance. The returned device is |