From 65979986a923a8f08019b257c3fe72cd5e8ecf68 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Thu, 14 Sep 2017 08:04:55 +0200 Subject: vo_opengl: refactor into vo_gpu This is done in several steps: 1. refactor MPGLContext -> struct ra_ctx 2. move GL-specific stuff in vo_opengl into opengl/context.c 3. generalize context creation to support other APIs, and add --gpu-api 4. rename all of the --opengl- options that are no longer opengl-specific 5. move all of the stuff from opengl/* that isn't GL-specific into gpu/ (note: opengl/gl_utils.h became opengl/utils.h) 6. rename vo_opengl to vo_gpu 7. to handle window screenshots, the short-term approach was to just add it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to ra itself (and vo_gpu altered to compensate), but this was a stop-gap measure to prevent this commit from getting too big 8. move ra->fns->flush to ra_gl_ctx instead 9. some other minor changes that I've probably already forgotten Note: This is one half of a major refactor, the other half of which is provided by rossy's following commit. This commit enables support for all linux platforms, while his version enables support for all non-linux platforms. Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the --opengl- options like --opengl-early-flush, --opengl-finish etc. Should be a strict superset of the old functionality. Disclaimer: Since I have no way of compiling mpv on all platforms, some of these ports were done blindly. Specifically, the blind ports included context_mali_fbdev.c and context_rpi.c. Since they're both based on egl_helpers, the port should have gone smoothly without any major changes required. But if somebody complains about a compile error on those platforms (assuming anybody actually uses them), you know where to complain. --- video/out/gpu/context.c | 186 ++ video/out/gpu/context.h | 95 + video/out/gpu/hwdec.c | 239 +++ video/out/gpu/hwdec.h | 130 ++ video/out/gpu/lcms.c | 531 +++++ video/out/gpu/lcms.h | 43 + video/out/gpu/osd.c | 367 ++++ video/out/gpu/osd.h | 25 + video/out/gpu/ra.c | 327 +++ video/out/gpu/ra.h | 488 +++++ video/out/gpu/shader_cache.c | 954 +++++++++ video/out/gpu/shader_cache.h | 56 + video/out/gpu/user_shaders.c | 452 ++++ video/out/gpu/user_shaders.h | 98 + video/out/gpu/utils.c | 372 ++++ video/out/gpu/utils.h | 120 ++ video/out/gpu/video.c | 3809 ++++++++++++++++++++++++++++++++ video/out/gpu/video.h | 194 ++ video/out/gpu/video_shaders.c | 872 ++++++++ video/out/gpu/video_shaders.h | 56 + video/out/opengl/common.h | 4 +- video/out/opengl/context.c | 446 ++-- video/out/opengl/context.h | 152 +- video/out/opengl/context_cocoa.c | 2 +- video/out/opengl/context_drm_egl.c | 194 +- video/out/opengl/context_glx.c | 376 ++++ video/out/opengl/context_mali_fbdev.c | 58 +- video/out/opengl/context_rpi.c | 84 +- video/out/opengl/context_vdpau.c | 202 +- video/out/opengl/context_wayland.c | 74 +- video/out/opengl/context_x11.c | 358 ---- video/out/opengl/context_x11egl.c | 84 +- video/out/opengl/egl_helpers.c | 114 +- video/out/opengl/egl_helpers.h | 19 +- video/out/opengl/formats.h | 1 - video/out/opengl/gl_utils.c | 291 --- video/out/opengl/gl_utils.h | 56 - video/out/opengl/hwdec.c | 239 --- video/out/opengl/hwdec.h | 130 -- video/out/opengl/hwdec_cuda.c | 3 +- video/out/opengl/hwdec_ios.m | 2 +- video/out/opengl/hwdec_osx.c | 2 +- video/out/opengl/hwdec_rpi.c | 2 +- video/out/opengl/hwdec_vaegl.c | 4 +- video/out/opengl/hwdec_vaglx.c | 5 +- video/out/opengl/hwdec_vdpau.c | 2 +- video/out/opengl/lcms.c | 531 ----- video/out/opengl/lcms.h | 43 - video/out/opengl/osd.c | 367 ---- video/out/opengl/osd.h | 25 - video/out/opengl/ra.c | 327 --- video/out/opengl/ra.h | 491 ----- video/out/opengl/ra_gl.c | 7 - video/out/opengl/ra_gl.h | 3 +- video/out/opengl/shader_cache.c | 955 --------- video/out/opengl/shader_cache.h | 56 - video/out/opengl/user_shaders.c | 452 ---- video/out/opengl/user_shaders.h | 98 - video/out/opengl/utils.c | 524 ++--- video/out/opengl/utils.h | 151 +- video/out/opengl/video.c | 3813 --------------------------------- video/out/opengl/video.h | 195 -- video/out/opengl/video_shaders.c | 872 -------- video/out/opengl/video_shaders.h | 56 - video/out/vo.c | 6 +- video/out/vo_gpu.c | 385 ++++ video/out/vo_opengl.c | 470 ---- video/out/vo_opengl_cb.c | 53 +- video/out/vo_rpi.c | 2 +- 69 files changed, 11238 insertions(+), 10962 deletions(-) create mode 100644 video/out/gpu/context.c create mode 100644 video/out/gpu/context.h create mode 100644 video/out/gpu/hwdec.c create mode 100644 video/out/gpu/hwdec.h create mode 100644 video/out/gpu/lcms.c create mode 100644 video/out/gpu/lcms.h create mode 100644 video/out/gpu/osd.c create mode 100644 video/out/gpu/osd.h create mode 100644 video/out/gpu/ra.c create mode 100644 video/out/gpu/ra.h create mode 100644 video/out/gpu/shader_cache.c create mode 100644 video/out/gpu/shader_cache.h create mode 100644 video/out/gpu/user_shaders.c create mode 100644 video/out/gpu/user_shaders.h create mode 100644 video/out/gpu/utils.c create mode 100644 video/out/gpu/utils.h create mode 100644 video/out/gpu/video.c create mode 100644 video/out/gpu/video.h create mode 100644 video/out/gpu/video_shaders.c create mode 100644 video/out/gpu/video_shaders.h create mode 100644 video/out/opengl/context_glx.c delete mode 100644 video/out/opengl/context_x11.c delete mode 100644 video/out/opengl/gl_utils.c delete mode 100644 video/out/opengl/gl_utils.h delete mode 100644 video/out/opengl/hwdec.c delete mode 100644 video/out/opengl/hwdec.h delete mode 100644 video/out/opengl/lcms.c delete mode 100644 video/out/opengl/lcms.h delete mode 100644 video/out/opengl/osd.c delete mode 100644 video/out/opengl/osd.h delete mode 100644 video/out/opengl/ra.c delete mode 100644 video/out/opengl/ra.h delete mode 100644 video/out/opengl/shader_cache.c delete mode 100644 video/out/opengl/shader_cache.h delete mode 100644 video/out/opengl/user_shaders.c delete mode 100644 video/out/opengl/user_shaders.h delete mode 100644 video/out/opengl/video.c delete mode 100644 video/out/opengl/video.h delete mode 100644 video/out/opengl/video_shaders.c delete mode 100644 video/out/opengl/video_shaders.h create mode 100644 video/out/vo_gpu.c delete mode 100644 video/out/vo_opengl.c (limited to 'video') diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c new file mode 100644 index 0000000000..dbabba8b3b --- /dev/null +++ b/video/out/gpu/context.c @@ -0,0 +1,186 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "common/common.h" +#include "common/msg.h" +#include "options/options.h" +#include "options/m_option.h" +#include "video/out/vo.h" + +#include "context.h" + +extern const struct ra_ctx_fns ra_ctx_glx; +extern const struct ra_ctx_fns ra_ctx_glx_probe; +extern const struct ra_ctx_fns ra_ctx_x11_egl; +extern const struct ra_ctx_fns ra_ctx_drm_egl; +extern const struct ra_ctx_fns ra_ctx_cocoa; +extern const struct ra_ctx_fns ra_ctx_wayland_egl; +extern const struct ra_ctx_fns ra_ctx_wgl; +extern const struct ra_ctx_fns ra_ctx_angle; +extern const struct ra_ctx_fns ra_ctx_dxinterop; +extern const struct ra_ctx_fns ra_ctx_rpi; +extern const struct ra_ctx_fns ra_ctx_mali; +extern const struct ra_ctx_fns ra_ctx_vdpauglx; + +static const struct ra_ctx_fns *contexts[] = { +// OpenGL contexts: +#if HAVE_RPI + &ra_ctx_rpi, +#endif +/* +#if HAVE_GL_COCOA + &ra_ctx_cocoa, +#endif +#if HAVE_EGL_ANGLE_WIN32 + &ra_ctx_angle, +#endif +#if HAVE_GL_WIN32 + &ra_ctx_wgl, +#endif +#if HAVE_GL_DXINTEROP + &ra_ctx_dxinterop, +#endif +*/ +#if HAVE_GL_X11 + &ra_ctx_glx_probe, +#endif +#if HAVE_EGL_X11 + &ra_ctx_x11_egl, +#endif +#if HAVE_GL_X11 + &ra_ctx_glx, +#endif +#if HAVE_GL_WAYLAND + &ra_ctx_wayland_egl, +#endif +#if HAVE_EGL_DRM + &ra_ctx_drm_egl, +#endif +#if HAVE_MALI_FBDEV + &ra_ctx_mali, +#endif +#if HAVE_VDPAU_GL_X11 + &ra_ctx_vdpauglx, +#endif +}; + +static bool get_help(struct mp_log *log, struct bstr param) +{ + if (bstr_equals0(param, "help")) { + mp_info(log, "GPU contexts / APIs:\n"); + mp_info(log, " auto (autodetect)\n"); + for (int n = 0; n < MP_ARRAY_SIZE(contexts); n++) + mp_info(log, " %s (%s)\n", contexts[n]->name, contexts[n]->type); + return true; + } + + return false; +} + +int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param) +{ + if (get_help(log, param)) + return M_OPT_EXIT; + if (bstr_equals0(param, "auto")) + return 1; + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (bstr_equals0(param, contexts[i]->type)) + return 1; + } + return M_OPT_INVALID; +} + +int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param) +{ + if (get_help(log, param)) + return M_OPT_EXIT; + if (bstr_equals0(param, "auto")) + return 1; + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (bstr_equals0(param, contexts[i]->name)) + return 1; + } + return M_OPT_INVALID; +} + +// Create a VO window and create a RA context on it. +// vo_flags: passed to the backend's create window function +struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type, + const char *context_name, struct ra_ctx_opts opts) +{ + bool api_auto = !context_type || strcmp(context_type, "auto") == 0; + bool ctx_auto = !context_name || strcmp(context_name, "auto") == 0; + + if (ctx_auto) { + MP_VERBOSE(vo, "Probing for best GPU context.\n"); + opts.probing = true; + } + + // Hack to silence backend (X11/Wayland/etc.) errors. Kill it once backends + // are separate from `struct vo` + bool old_probing = vo->probing; + vo->probing = opts.probing; + + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (!opts.probing && strcmp(contexts[i]->name, context_name) != 0) + continue; + if (!api_auto && strcmp(contexts[i]->type, context_type) != 0) + continue; + + struct ra_ctx *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct ra_ctx) { + .vo = vo, + .global = vo->global, + .log = mp_log_new(ctx, vo->log, contexts[i]->type), + .opts = opts, + .fns = contexts[i], + }; + + MP_VERBOSE(ctx, "Initializing GPU context '%s'\n", ctx->fns->name); + if (contexts[i]->init(ctx)) { + vo->probing = old_probing; + return ctx; + } + + talloc_free(ctx); + } + + // If we've reached this point, then none of the contexts matched the name + // requested, or the backend creation failed for all of them. + MP_ERR(vo, "Failed initializing any suitable GPU context!\n"); + vo->probing = old_probing; + return NULL; +} + +void ra_ctx_destroy(struct ra_ctx **ctx) +{ + if (*ctx) + (*ctx)->fns->uninit(*ctx); + talloc_free(*ctx); + *ctx = NULL; +} diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h new file mode 100644 index 0000000000..42de59b75f --- /dev/null +++ b/video/out/gpu/context.h @@ -0,0 +1,95 @@ +#pragma once + +#include "video/out/vo.h" + +#include "config.h" +#include "ra.h" + +struct ra_ctx_opts { + int allow_sw; // allow software renderers + int want_alpha; // create an alpha framebuffer if possible + int debug; // enable debugging layers/callbacks etc. + bool probing; // the backend was auto-probed + int swapchain_depth; // max number of images to render ahead +}; + +struct ra_ctx { + struct vo *vo; + struct ra *ra; + struct mpv_global *global; + struct mp_log *log; + + struct ra_ctx_opts opts; + const struct ra_ctx_fns *fns; + struct ra_swapchain *swapchain; + + void *priv; +}; + +// The functions that make up a ra_ctx. +struct ra_ctx_fns { + const char *type; // API type (for --gpu-api) + const char *name; // name (for --gpu-context) + + // Resize the window, or create a new window if there isn't one yet. + // Currently, there is an unfortunate interaction with ctx->vo, and + // display size etc. are determined by it. + bool (*reconfig)(struct ra_ctx *ctx); + + // This behaves exactly like vo_driver.control(). + int (*control)(struct ra_ctx *ctx, int *events, int request, void *arg); + + // These behave exactly like vo_driver.wakeup/wait_events. They are + // optional. + void (*wakeup)(struct ra_ctx *ctx); + void (*wait_events)(struct ra_ctx *ctx, int64_t until_time_us); + + // Initialize/destroy the 'struct ra' and possibly the underlying VO backend. + // Not normally called by the user of the ra_ctx. + bool (*init)(struct ra_ctx *ctx); + void (*uninit)(struct ra_ctx *ctx); +}; + +// Extra struct for the swapchain-related functions so they can be easily +// inherited from helpers. +struct ra_swapchain { + struct ra_ctx *ctx; + struct priv *priv; + const struct ra_swapchain_fns *fns; + + bool flip_v; // flip the rendered image vertically (set by the swapchain) +}; + +struct ra_swapchain_fns { + // Gets the current framebuffer depth in bits (0 if unknown). Optional. + int (*color_depth)(struct ra_swapchain *sw); + + // Retrieves a screenshot of the framebuffer. These are always the right + // side up, regardless of ra_swapchain->flip_v. Optional. + struct mp_image *(*screenshot)(struct ra_swapchain *sw); + + // Called when rendering starts. Returns NULL on failure. This must be + // followed by submit_frame, to submit the rendered frame. + struct ra_tex *(*start_frame)(struct ra_swapchain *sw); + + // Present the frame. Issued in lockstep with start_frame, with rendering + // commands in between. The `frame` is just there for timing data, for + // swapchains smart enough to do something with it. + bool (*submit_frame)(struct ra_swapchain *sw, const struct vo_frame *frame); + + // Performs a buffer swap. This blocks for as long as necessary to meet + // params.swapchain_depth, or until the next vblank (for vsynced contexts) + void (*swap_buffers)(struct ra_swapchain *sw); +}; + +// Create and destroy a ra_ctx. This also takes care of creating and destroying +// the underlying `struct ra`, and perhaps the underlying VO backend. +struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type, + const char *context_name, struct ra_ctx_opts opts); +void ra_ctx_destroy(struct ra_ctx **ctx); + +struct m_option; +int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param); +int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param); diff --git a/video/out/gpu/hwdec.c b/video/out/gpu/hwdec.c new file mode 100644 index 0000000000..5fbc1aa4a9 --- /dev/null +++ b/video/out/gpu/hwdec.c @@ -0,0 +1,239 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include + +#include "config.h" + +#include "common/common.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "hwdec.h" + +extern const struct ra_hwdec_driver ra_hwdec_vaegl; +extern const struct ra_hwdec_driver ra_hwdec_vaglx; +extern const struct ra_hwdec_driver ra_hwdec_videotoolbox; +extern const struct ra_hwdec_driver ra_hwdec_vdpau; +extern const struct ra_hwdec_driver ra_hwdec_dxva2egl; +extern const struct ra_hwdec_driver ra_hwdec_d3d11egl; +extern const struct ra_hwdec_driver ra_hwdec_d3d11eglrgb; +extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx; +extern const struct ra_hwdec_driver ra_hwdec_dxva2; +extern const struct ra_hwdec_driver ra_hwdec_cuda; +extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay; + +static const struct ra_hwdec_driver *const mpgl_hwdec_drivers[] = { +#if HAVE_VAAPI_EGL + &ra_hwdec_vaegl, +#endif +#if HAVE_VAAPI_GLX + &ra_hwdec_vaglx, +#endif +#if HAVE_VDPAU_GL_X11 + &ra_hwdec_vdpau, +#endif +#if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL + &ra_hwdec_videotoolbox, +#endif +#if HAVE_D3D_HWACCEL + &ra_hwdec_d3d11egl, + &ra_hwdec_d3d11eglrgb, + #if HAVE_D3D9_HWACCEL + &ra_hwdec_dxva2egl, + #endif +#endif +#if HAVE_GL_DXINTEROP_D3D9 + &ra_hwdec_dxva2gldx, +#endif +#if HAVE_CUDA_HWACCEL + &ra_hwdec_cuda, +#endif +#if HAVE_RPI + &ra_hwdec_rpi_overlay, +#endif + NULL +}; + +static struct ra_hwdec *load_hwdec_driver(struct mp_log *log, struct ra *ra, + struct mpv_global *global, + struct mp_hwdec_devices *devs, + const struct ra_hwdec_driver *drv, + bool is_auto) +{ + struct ra_hwdec *hwdec = talloc(NULL, struct ra_hwdec); + *hwdec = (struct ra_hwdec) { + .driver = drv, + .log = mp_log_new(hwdec, log, drv->name), + .global = global, + .ra = ra, + .devs = devs, + .probing = is_auto, + .priv = talloc_zero_size(hwdec, drv->priv_size), + }; + mp_verbose(log, "Loading hwdec driver '%s'\n", drv->name); + if (hwdec->driver->init(hwdec) < 0) { + ra_hwdec_uninit(hwdec); + mp_verbose(log, "Loading failed.\n"); + return NULL; + } + return hwdec; +} + +struct ra_hwdec *ra_hwdec_load_api(struct mp_log *log, struct ra *ra, + struct mpv_global *g, + struct mp_hwdec_devices *devs, + enum hwdec_type api) +{ + bool is_auto = HWDEC_IS_AUTO(api); + for (int n = 0; mpgl_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; + if ((is_auto || api == drv->api) && !drv->testing_only) { + struct ra_hwdec *r = load_hwdec_driver(log, ra, g, devs, drv, is_auto); + if (r) + return r; + } + } + return NULL; +} + +// Load by option name. +struct ra_hwdec *ra_hwdec_load(struct mp_log *log, struct ra *ra, + struct mpv_global *g, + struct mp_hwdec_devices *devs, + const char *name) +{ + int g_hwdec_api; + mp_read_option_raw(g, "hwdec", &m_option_type_choice, &g_hwdec_api); + if (!name || !name[0]) + name = m_opt_choice_str(mp_hwdec_names, g_hwdec_api); + + int api_id = HWDEC_NONE; + for (int n = 0; mp_hwdec_names[n].name; n++) { + if (name && strcmp(mp_hwdec_names[n].name, name) == 0) + api_id = mp_hwdec_names[n].value; + } + + for (int n = 0; mpgl_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; + if (name && strcmp(drv->name, name) == 0) { + struct ra_hwdec *r = load_hwdec_driver(log, ra, g, devs, drv, false); + if (r) + return r; + } + } + + return ra_hwdec_load_api(log, ra, g, devs, api_id); +} + +int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param) +{ + bool help = bstr_equals0(param, "help"); + if (help) + mp_info(log, "Available hwdecs:\n"); + for (int n = 0; mpgl_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; + const char *api_name = m_opt_choice_str(mp_hwdec_names, drv->api); + if (help) { + mp_info(log, " %s [%s]\n", drv->name, api_name); + } else if (bstr_equals0(param, drv->name) || + bstr_equals0(param, api_name)) + { + return 1; + } + } + if (help) { + mp_info(log, " auto (loads best)\n" + " (other --hwdec values)\n" + "Setting an empty string means use --hwdec.\n"); + return M_OPT_EXIT; + } + if (!param.len) + return 1; // "" is treated specially + for (int n = 0; mp_hwdec_names[n].name; n++) { + if (bstr_equals0(param, mp_hwdec_names[n].name)) + return 1; + } + mp_fatal(log, "No hwdec backend named '%.*s' found!\n", BSTR_P(param)); + return M_OPT_INVALID; +} + +void ra_hwdec_uninit(struct ra_hwdec *hwdec) +{ + if (hwdec) + hwdec->driver->uninit(hwdec); + talloc_free(hwdec); +} + +bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt) +{ + for (int n = 0; hwdec->driver->imgfmts[n]; n++) { + if (hwdec->driver->imgfmts[n] == imgfmt) + return true; + } + return false; +} + +struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec, + struct mp_image_params *params) +{ + assert(ra_hwdec_test_format(hwdec, params->imgfmt)); + + struct ra_hwdec_mapper *mapper = talloc_ptrtype(NULL, mapper); + *mapper = (struct ra_hwdec_mapper){ + .owner = hwdec, + .driver = hwdec->driver->mapper, + .log = hwdec->log, + .ra = hwdec->ra, + .priv = talloc_zero_size(mapper, hwdec->driver->mapper->priv_size), + .src_params = *params, + .dst_params = *params, + }; + if (mapper->driver->init(mapper) < 0) + ra_hwdec_mapper_free(&mapper); + return mapper; +} + +void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper) +{ + struct ra_hwdec_mapper *p = *mapper; + if (p) { + ra_hwdec_mapper_unmap(p); + p->driver->uninit(p); + talloc_free(p); + } + *mapper = NULL; +} + +void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + if (mapper->driver->unmap) + mapper->driver->unmap(mapper); + mp_image_unrefp(&mapper->src); +} + +int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img) +{ + ra_hwdec_mapper_unmap(mapper); + mp_image_setrefp(&mapper->src, img); + if (mapper->driver->map(mapper) < 0) { + ra_hwdec_mapper_unmap(mapper); + return -1; + } + return 0; +} diff --git a/video/out/gpu/hwdec.h b/video/out/gpu/hwdec.h new file mode 100644 index 0000000000..20bbaae9eb --- /dev/null +++ b/video/out/gpu/hwdec.h @@ -0,0 +1,130 @@ +#ifndef MPGL_HWDEC_H_ +#define MPGL_HWDEC_H_ + +#include "video/mp_image.h" +#include "ra.h" +#include "video/hwdec.h" + +struct ra_hwdec { + const struct ra_hwdec_driver *driver; + struct mp_log *log; + struct mpv_global *global; + struct ra *ra; + struct mp_hwdec_devices *devs; + // GLSL extensions required to sample textures from this. + const char **glsl_extensions; + // For free use by hwdec driver + void *priv; + // For working around the vdpau vs. vaapi mess. + bool probing; + // Used in overlay mode only. + float overlay_colorkey[4]; +}; + +struct ra_hwdec_mapper { + const struct ra_hwdec_mapper_driver *driver; + struct mp_log *log; + struct ra *ra; + void *priv; + struct ra_hwdec *owner; + // Input frame parameters. (Set before init(), immutable.) + struct mp_image_params src_params; + // Output frame parameters (represents the format the textures return). Must + // be set by init(), immutable afterwards, + struct mp_image_params dst_params; + + // The currently mapped source image (or the image about to be mapped in + // ->map()). NULL if unmapped. The mapper can also clear this reference if + // the mapped textures contain a full copy. + struct mp_image *src; + + // The mapped textures and metadata about them. These fields change if a + // new frame is mapped (or unmapped), but otherwise remain constant. + // The common code won't mess with these, so you can e.g. set them in the + // .init() callback. + struct ra_tex *tex[4]; + bool vdpau_fields; +}; + +// This can be used to map frames of a specific hw format as GL textures. +struct ra_hwdec_mapper_driver { + // Used to create ra_hwdec_mapper.priv. + size_t priv_size; + + // Init the mapper implementation. At this point, the field src_params, + // fns, devs, priv are initialized. + int (*init)(struct ra_hwdec_mapper *mapper); + // Destroy the mapper. unmap is called before this. + void (*uninit)(struct ra_hwdec_mapper *mapper); + + // Map mapper->src as texture, and set mapper->frame to textures using it. + // It is expected that that the textures remain valid until the next unmap + // or uninit call. + // The function is allowed to unref mapper->src if it's not needed (i.e. + // this function creates a copy). + // The underlying format can change, so you might need to do some form + // of change detection. You also must reject unsupported formats with an + // error. + // On error, returns negative value on error and remains unmapped. + int (*map)(struct ra_hwdec_mapper *mapper); + // Unmap the frame. Does nothing if already unmapped. Optional. + void (*unmap)(struct ra_hwdec_mapper *mapper); +}; + +struct ra_hwdec_driver { + // Name of the interop backend. This is used for informational purposes only. + const char *name; + // Used to create ra_hwdec.priv. + size_t priv_size; + // Used to explicitly request a specific API. + enum hwdec_type api; + // One of the hardware surface IMGFMT_ that must be passed to map_image later. + // Terminated with a 0 entry. (Extend the array size as needed.) + const int imgfmts[3]; + // Dosn't load this unless requested by name. + bool testing_only; + + // Create the hwdec device. It must add it to hw->devs, if applicable. + int (*init)(struct ra_hwdec *hw); + void (*uninit)(struct ra_hwdec *hw); + + // This will be used to create a ra_hwdec_mapper from ra_hwdec. + const struct ra_hwdec_mapper_driver *mapper; + + // The following function provides an alternative API. Each ra_hwdec_driver + // must have either provide a mapper or overlay_frame (not both or none), and + // if overlay_frame is set, it operates in overlay mode. In this mode, + // OSD etc. is rendered via OpenGL, but the video is rendered as a separate + // layer below it. + // Non-overlay mode is strictly preferred, so try not to use overlay mode. + // Set the given frame as overlay, replacing the previous one. This can also + // just change the position of the overlay. + // hw_image==src==dst==NULL is passed to clear the overlay. + int (*overlay_frame)(struct ra_hwdec *hw, struct mp_image *hw_image, + struct mp_rect *src, struct mp_rect *dst, bool newframe); +}; + +struct ra_hwdec *ra_hwdec_load_api(struct mp_log *log, struct ra *ra, + struct mpv_global *g, + struct mp_hwdec_devices *devs, + enum hwdec_type api); + +struct ra_hwdec *ra_hwdec_load(struct mp_log *log, struct ra *ra, + struct mpv_global *g, + struct mp_hwdec_devices *devs, + const char *name); + +int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param); + +void ra_hwdec_uninit(struct ra_hwdec *hwdec); + +bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt); + +struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec, + struct mp_image_params *params); +void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper); +void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper); +int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img); + +#endif diff --git a/video/out/gpu/lcms.c b/video/out/gpu/lcms.c new file mode 100644 index 0000000000..8747ae6aa6 --- /dev/null +++ b/video/out/gpu/lcms.c @@ -0,0 +1,531 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include + +#include "mpv_talloc.h" + +#include "config.h" + +#include "stream/stream.h" +#include "common/common.h" +#include "misc/bstr.h" +#include "common/msg.h" +#include "options/m_option.h" +#include "options/path.h" +#include "video/csputils.h" +#include "lcms.h" + +#include "osdep/io.h" + +#if HAVE_LCMS2 + +#include +#include +#include + +struct gl_lcms { + void *icc_data; + size_t icc_size; + struct AVBufferRef *vid_profile; + char *current_profile; + bool using_memory_profile; + bool changed; + enum mp_csp_prim current_prim; + enum mp_csp_trc current_trc; + + struct mp_log *log; + struct mpv_global *global; + struct mp_icc_opts *opts; +}; + +static bool parse_3dlut_size(const char *arg, int *p1, int *p2, int *p3) +{ + if (sscanf(arg, "%dx%dx%d", p1, p2, p3) != 3) + return false; + for (int n = 0; n < 3; n++) { + int s = ((int[]) { *p1, *p2, *p3 })[n]; + if (s < 2 || s > 512) + return false; + } + return true; +} + +static int validate_3dlut_size_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param) +{ + int p1, p2, p3; + char s[20]; + snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); + return parse_3dlut_size(s, &p1, &p2, &p3); +} + +#define OPT_BASE_STRUCT struct mp_icc_opts +const struct m_sub_options mp_icc_conf = { + .opts = (const m_option_t[]) { + OPT_FLAG("use-embedded-icc-profile", use_embedded, 0), + OPT_STRING("icc-profile", profile, M_OPT_FILE), + OPT_FLAG("icc-profile-auto", profile_auto, 0), + OPT_STRING("icc-cache-dir", cache_dir, M_OPT_FILE), + OPT_INT("icc-intent", intent, 0), + OPT_INTRANGE("icc-contrast", contrast, 0, 0, 100000), + OPT_STRING_VALIDATE("icc-3dlut-size", size_str, 0, validate_3dlut_size_opt), + + OPT_REPLACED("3dlut-size", "icc-3dlut-size"), + OPT_REMOVED("icc-cache", "see icc-cache-dir"), + {0} + }, + .size = sizeof(struct mp_icc_opts), + .defaults = &(const struct mp_icc_opts) { + .size_str = "64x64x64", + .intent = INTENT_RELATIVE_COLORIMETRIC, + .use_embedded = true, + }, +}; + +static void lcms2_error_handler(cmsContext ctx, cmsUInt32Number code, + const char *msg) +{ + struct gl_lcms *p = cmsGetContextUserData(ctx); + MP_ERR(p, "lcms2: %s\n", msg); +} + +static void load_profile(struct gl_lcms *p) +{ + talloc_free(p->icc_data); + p->icc_data = NULL; + p->icc_size = 0; + p->using_memory_profile = false; + talloc_free(p->current_profile); + p->current_profile = NULL; + + if (!p->opts->profile || !p->opts->profile[0]) + return; + + char *fname = mp_get_user_path(NULL, p->global, p->opts->profile); + MP_VERBOSE(p, "Opening ICC profile '%s'\n", fname); + struct bstr iccdata = stream_read_file(fname, p, p->global, + 100000000); // 100 MB + talloc_free(fname); + if (!iccdata.len) + return; + + talloc_free(p->icc_data); + + p->icc_data = iccdata.start; + p->icc_size = iccdata.len; + p->current_profile = talloc_strdup(p, p->opts->profile); +} + +static void gl_lcms_destructor(void *ptr) +{ + struct gl_lcms *p = ptr; + av_buffer_unref(&p->vid_profile); +} + +struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, + struct mpv_global *global, + struct mp_icc_opts *opts) +{ + struct gl_lcms *p = talloc_ptrtype(talloc_ctx, p); + talloc_set_destructor(p, gl_lcms_destructor); + *p = (struct gl_lcms) { + .global = global, + .log = log, + .opts = opts, + }; + gl_lcms_update_options(p); + return p; +} + +void gl_lcms_update_options(struct gl_lcms *p) +{ + if ((p->using_memory_profile && !p->opts->profile_auto) || + !bstr_equals(bstr0(p->opts->profile), bstr0(p->current_profile))) + { + load_profile(p); + } + + p->changed = true; // probably +} + +// Warning: profile.start must point to a ta allocation, and the function +// takes over ownership. +// Returns whether the internal profile was changed. +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) +{ + if (!p->opts->profile_auto || (p->opts->profile && p->opts->profile[0])) { + talloc_free(profile.start); + return false; + } + + if (p->using_memory_profile && + p->icc_data && profile.start && + profile.len == p->icc_size && + memcmp(profile.start, p->icc_data, p->icc_size) == 0) + { + talloc_free(profile.start); + return false; + } + + p->changed = true; + p->using_memory_profile = true; + + talloc_free(p->icc_data); + + p->icc_data = talloc_steal(p, profile.start); + p->icc_size = profile.len; + + return true; +} + +// Guards against NULL and uses bstr_equals to short-circuit some special cases +static bool vid_profile_eq(struct AVBufferRef *a, struct AVBufferRef *b) +{ + if (!a || !b) + return a == b; + + return bstr_equals((struct bstr){ a->data, a->size }, + (struct bstr){ b->data, b->size }); +} + +// Return whether the profile or config has changed since the last time it was +// retrieved. If it has changed, gl_lcms_get_lut3d() should be called. +bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, + enum mp_csp_trc trc, struct AVBufferRef *vid_profile) +{ + if (p->changed || p->current_prim != prim || p->current_trc != trc) + return true; + + return !vid_profile_eq(p->vid_profile, vid_profile); +} + +// Whether a profile is set. (gl_lcms_get_lut3d() is expected to return a lut, +// but it could still fail due to runtime errors, such as invalid icc data.) +bool gl_lcms_has_profile(struct gl_lcms *p) +{ + return p->icc_size > 0; +} + +static cmsHPROFILE get_vid_profile(struct gl_lcms *p, cmsContext cms, + cmsHPROFILE disp_profile, + enum mp_csp_prim prim, enum mp_csp_trc trc) +{ + if (p->opts->use_embedded && p->vid_profile) { + // Try using the embedded ICC profile + cmsHPROFILE prof = cmsOpenProfileFromMemTHR(cms, p->vid_profile->data, + p->vid_profile->size); + if (prof) { + MP_VERBOSE(p, "Successfully opened embedded ICC profile\n"); + return prof; + } + + // Otherwise, warn the user and generate the profile as usual + MP_WARN(p, "Video contained an invalid ICC profile! Ignoring..\n"); + } + + // The input profile for the transformation is dependent on the video + // primaries and transfer characteristics + struct mp_csp_primaries csp = mp_get_csp_primaries(prim); + cmsCIExyY wp_xyY = {csp.white.x, csp.white.y, 1.0}; + cmsCIExyYTRIPLE prim_xyY = { + .Red = {csp.red.x, csp.red.y, 1.0}, + .Green = {csp.green.x, csp.green.y, 1.0}, + .Blue = {csp.blue.x, csp.blue.y, 1.0}, + }; + + cmsToneCurve *tonecurve[3] = {0}; + switch (trc) { + case MP_CSP_TRC_LINEAR: tonecurve[0] = cmsBuildGamma(cms, 1.0); break; + case MP_CSP_TRC_GAMMA18: tonecurve[0] = cmsBuildGamma(cms, 1.8); break; + case MP_CSP_TRC_GAMMA22: tonecurve[0] = cmsBuildGamma(cms, 2.2); break; + case MP_CSP_TRC_GAMMA28: tonecurve[0] = cmsBuildGamma(cms, 2.8); break; + + case MP_CSP_TRC_SRGB: + // Values copied from Little-CMS + tonecurve[0] = cmsBuildParametricToneCurve(cms, 4, + (double[5]){2.40, 1/1.055, 0.055/1.055, 1/12.92, 0.04045}); + break; + + case MP_CSP_TRC_PRO_PHOTO: + tonecurve[0] = cmsBuildParametricToneCurve(cms, 4, + (double[5]){1.8, 1.0, 0.0, 1/16.0, 0.03125}); + break; + + case MP_CSP_TRC_BT_1886: { + // To build an appropriate BT.1886 transformation we need access to + // the display's black point, so we LittleCMS' detection function. + // Relative colorimetric is used since we want to approximate the + // BT.1886 to the target device's actual black point even in e.g. + // perceptual mode + const int intent = MP_INTENT_RELATIVE_COLORIMETRIC; + cmsCIEXYZ bp_XYZ; + if (!cmsDetectBlackPoint(&bp_XYZ, disp_profile, intent, 0)) + return false; + + // Map this XYZ value back into the (linear) source space + cmsToneCurve *linear = cmsBuildGamma(cms, 1.0); + cmsHPROFILE rev_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, + (cmsToneCurve*[3]){linear, linear, linear}); + cmsHPROFILE xyz_profile = cmsCreateXYZProfile(); + cmsHTRANSFORM xyz2src = cmsCreateTransformTHR(cms, + xyz_profile, TYPE_XYZ_DBL, rev_profile, TYPE_RGB_DBL, + intent, 0); + cmsFreeToneCurve(linear); + cmsCloseProfile(rev_profile); + cmsCloseProfile(xyz_profile); + if (!xyz2src) + return false; + + double src_black[3]; + cmsDoTransform(xyz2src, &bp_XYZ, src_black, 1); + cmsDeleteTransform(xyz2src); + + // Contrast limiting + if (p->opts->contrast > 0) { + for (int i = 0; i < 3; i++) + src_black[i] = MPMAX(src_black[i], 1.0 / p->opts->contrast); + } + + // Built-in contrast failsafe + double contrast = 3.0 / (src_black[0] + src_black[1] + src_black[2]); + if (contrast > 100000) { + MP_WARN(p, "ICC profile detected contrast very high (>100000)," + " falling back to contrast 1000 for sanity. Set the" + " icc-contrast option to silence this warning.\n"); + src_black[0] = src_black[1] = src_black[2] = 1.0 / 1000; + } + + // Build the parametric BT.1886 transfer curve, one per channel + for (int i = 0; i < 3; i++) { + const double gamma = 2.40; + double binv = pow(src_black[i], 1.0/gamma); + tonecurve[i] = cmsBuildParametricToneCurve(cms, 6, + (double[4]){gamma, 1.0 - binv, binv, 0.0}); + } + break; + } + + default: + abort(); + } + + if (!tonecurve[0]) + return false; + + if (!tonecurve[1]) tonecurve[1] = tonecurve[0]; + if (!tonecurve[2]) tonecurve[2] = tonecurve[0]; + + cmsHPROFILE *vid_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, + tonecurve); + + if (tonecurve[2] != tonecurve[0]) cmsFreeToneCurve(tonecurve[2]); + if (tonecurve[1] != tonecurve[0]) cmsFreeToneCurve(tonecurve[1]); + cmsFreeToneCurve(tonecurve[0]); + + return vid_profile; +} + +bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, + enum mp_csp_prim prim, enum mp_csp_trc trc, + struct AVBufferRef *vid_profile) +{ + int s_r, s_g, s_b; + bool result = false; + + p->changed = false; + p->current_prim = prim; + p->current_trc = trc; + + // We need to hold on to a reference to the video's ICC profile for as long + // as we still need to perform equality checking, so generate a new + // reference here + av_buffer_unref(&p->vid_profile); + if (vid_profile) { + MP_VERBOSE(p, "Got an embedded ICC profile.\n"); + p->vid_profile = av_buffer_ref(vid_profile); + if (!p->vid_profile) + abort(); + } + + if (!parse_3dlut_size(p->opts->size_str, &s_r, &s_g, &s_b)) + return false; + + if (!gl_lcms_has_profile(p)) + return false; + + void *tmp = talloc_new(NULL); + uint16_t *output = talloc_array(tmp, uint16_t, s_r * s_g * s_b * 4); + struct lut3d *lut = NULL; + cmsContext cms = NULL; + + char *cache_file = NULL; + if (p->opts->cache_dir && p->opts->cache_dir[0]) { + // Gamma is included in the header to help uniquely identify it, + // because we may change the parameter in the future or make it + // customizable, same for the primaries. + char *cache_info = talloc_asprintf(tmp, + "ver=1.4, intent=%d, size=%dx%dx%d, prim=%d, trc=%d, " + "contrast=%d\n", + p->opts->intent, s_r, s_g, s_b, prim, trc, p->opts->contrast); + + uint8_t hash[32]; + struct AVSHA *sha = av_sha_alloc(); + if (!sha) + abort(); + av_sha_init(sha, 256); + av_sha_update(sha, cache_info, strlen(cache_info)); + if (vid_profile) + av_sha_update(sha, vid_profile->data, vid_profile->size); + av_sha_update(sha, p->icc_data, p->icc_size); + av_sha_final(sha, hash); + av_free(sha); + + char *cache_dir = mp_get_user_path(tmp, p->global, p->opts->cache_dir); + cache_file = talloc_strdup(tmp, ""); + for (int i = 0; i < sizeof(hash); i++) + cache_file = talloc_asprintf_append(cache_file, "%02X", hash[i]); + cache_file = mp_path_join(tmp, cache_dir, cache_file); + + mp_mkdirp(cache_dir); + } + + // check cache + if (cache_file && stat(cache_file, &(struct stat){0}) == 0) { + MP_VERBOSE(p, "Opening 3D LUT cache in file '%s'.\n", cache_file); + struct bstr cachedata = stream_read_file(cache_file, tmp, p->global, + 1000000000); // 1 GB + if (cachedata.len == talloc_get_size(output)) { + memcpy(output, cachedata.start, cachedata.len); + goto done; + } else { + MP_WARN(p, "3D LUT cache invalid!\n"); + } + } + + cms = cmsCreateContext(NULL, p); + if (!cms) + goto error_exit; + cmsSetLogErrorHandlerTHR(cms, lcms2_error_handler); + + cmsHPROFILE profile = + cmsOpenProfileFromMemTHR(cms, p->icc_data, p->icc_size); + if (!profile) + goto error_exit; + + cmsHPROFILE vid_hprofile = get_vid_profile(p, cms, profile, prim, trc); + if (!vid_hprofile) { + cmsCloseProfile(profile); + goto error_exit; + } + + cmsHTRANSFORM trafo = cmsCreateTransformTHR(cms, vid_hprofile, TYPE_RGB_16, + profile, TYPE_RGBA_16, + p->opts->intent, + cmsFLAGS_HIGHRESPRECALC | + cmsFLAGS_BLACKPOINTCOMPENSATION); + cmsCloseProfile(profile); + cmsCloseProfile(vid_hprofile); + + if (!trafo) + goto error_exit; + + // transform a (s_r)x(s_g)x(s_b) cube, with 3 components per channel + uint16_t *input = talloc_array(tmp, uint16_t, s_r * 3); + for (int b = 0; b < s_b; b++) { + for (int g = 0; g < s_g; g++) { + for (int r = 0; r < s_r; r++) { + input[r * 3 + 0] = r * 65535 / (s_r - 1); + input[r * 3 + 1] = g * 65535 / (s_g - 1); + input[r * 3 + 2] = b * 65535 / (s_b - 1); + } + size_t base = (b * s_r * s_g + g * s_r) * 4; + cmsDoTransform(trafo, input, output + base, s_r); + } + } + + cmsDeleteTransform(trafo); + + if (cache_file) { + FILE *out = fopen(cache_file, "wb"); + if (out) { + fwrite(output, talloc_get_size(output), 1, out); + fclose(out); + } + } + +done: ; + + lut = talloc_ptrtype(NULL, lut); + *lut = (struct lut3d) { + .data = talloc_steal(lut, output), + .size = {s_r, s_g, s_b}, + }; + + *result_lut3d = lut; + result = true; + +error_exit: + + if (cms) + cmsDeleteContext(cms); + + if (!lut) + MP_FATAL(p, "Error loading ICC profile.\n"); + + talloc_free(tmp); + return result; +} + +#else /* HAVE_LCMS2 */ + +const struct m_sub_options mp_icc_conf = { + .opts = (const m_option_t[]) { {0} }, + .size = sizeof(struct mp_icc_opts), + .defaults = &(const struct mp_icc_opts) {0}, +}; + +struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, + struct mpv_global *global, + struct mp_icc_opts *opts) +{ + return (struct gl_lcms *) talloc_new(talloc_ctx); +} + +void gl_lcms_update_options(struct gl_lcms *p) { } +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) {return false;} + +bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, + enum mp_csp_trc trc, struct AVBufferRef *vid_profile) +{ + return false; +} + +bool gl_lcms_has_profile(struct gl_lcms *p) +{ + return false; +} + +bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, + enum mp_csp_prim prim, enum mp_csp_trc trc, + struct AVBufferRef *vid_profile) +{ + return false; +} + +#endif diff --git a/video/out/gpu/lcms.h b/video/out/gpu/lcms.h new file mode 100644 index 0000000000..35bbd61fe0 --- /dev/null +++ b/video/out/gpu/lcms.h @@ -0,0 +1,43 @@ +#ifndef MP_GL_LCMS_H +#define MP_GL_LCMS_H + +#include +#include +#include "misc/bstr.h" +#include "video/csputils.h" +#include + +extern const struct m_sub_options mp_icc_conf; + +struct mp_icc_opts { + int use_embedded; + char *profile; + int profile_auto; + char *cache_dir; + char *size_str; + int intent; + int contrast; +}; + +struct lut3d { + uint16_t *data; + int size[3]; +}; + +struct mp_log; +struct mpv_global; +struct gl_lcms; + +struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, + struct mpv_global *global, + struct mp_icc_opts *opts); +void gl_lcms_update_options(struct gl_lcms *p); +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile); +bool gl_lcms_has_profile(struct gl_lcms *p); +bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **, + enum mp_csp_prim prim, enum mp_csp_trc trc, + struct AVBufferRef *vid_profile); +bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, + enum mp_csp_trc trc, struct AVBufferRef *vid_profile); + +#endif diff --git a/video/out/gpu/osd.c b/video/out/gpu/osd.c new file mode 100644 index 0000000000..f7c325d1db --- /dev/null +++ b/video/out/gpu/osd.c @@ -0,0 +1,367 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include + +#include + +#include "common/common.h" +#include "common/msg.h" +#include "video/csputils.h" +#include "video/mp_image.h" +#include "osd.h" + +#define GLSL(x) gl_sc_add(sc, #x "\n"); + +// glBlendFuncSeparate() arguments +static const int blend_factors[SUBBITMAP_COUNT][4] = { + [SUBBITMAP_LIBASS] = {RA_BLEND_SRC_ALPHA, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, + [SUBBITMAP_RGBA] = {RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, +}; + +struct vertex { + float position[2]; + float texcoord[2]; + uint8_t ass_color[4]; +}; + +static const struct ra_renderpass_input vertex_vao[] = { + {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, + {"texcoord" , RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord)}, + {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)}, + {0} +}; + +struct mpgl_osd_part { + enum sub_bitmap_format format; + int change_id; + struct ra_tex *texture; + int w, h; + int num_subparts; + int prev_num_subparts; + struct sub_bitmap *subparts; + int num_vertices; + struct vertex *vertices; +}; + +struct mpgl_osd { + struct mp_log *log; + struct osd_state *osd; + struct ra *ra; + struct mpgl_osd_part *parts[MAX_OSD_PARTS]; + const struct ra_format *fmt_table[SUBBITMAP_COUNT]; + bool formats[SUBBITMAP_COUNT]; + bool change_flag; // for reporting to API user only + // temporary + int stereo_mode; + struct mp_osd_res osd_res; + void *scratch; +}; + +struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, + struct osd_state *osd) +{ + struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct mpgl_osd) { + .log = log, + .osd = osd, + .ra = ra, + .change_flag = true, + .scratch = talloc_zero_size(ctx, 1), + }; + + ctx->fmt_table[SUBBITMAP_LIBASS] = ra_find_unorm_format(ra, 1, 1); + ctx->fmt_table[SUBBITMAP_RGBA] = ra_find_unorm_format(ra, 1, 4); + + for (int n = 0; n < MAX_OSD_PARTS; n++) + ctx->parts[n] = talloc_zero(ctx, struct mpgl_osd_part); + + for (int n = 0; n < SUBBITMAP_COUNT; n++) + ctx->formats[n] = !!ctx->fmt_table[n]; + + return ctx; +} + +void mpgl_osd_destroy(struct mpgl_osd *ctx) +{ + if (!ctx) + return; + + for (int n = 0; n < MAX_OSD_PARTS; n++) { + struct mpgl_osd_part *p = ctx->parts[n]; + ra_tex_free(ctx->ra, &p->texture); + } + talloc_free(ctx); +} + +static int next_pow2(int v) +{ + for (int x = 0; x < 30; x++) { + if ((1 << x) >= v) + return 1 << x; + } + return INT_MAX; +} + +static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, + struct sub_bitmaps *imgs) +{ + struct ra *ra = ctx->ra; + bool ok = false; + + assert(imgs->packed); + + int req_w = next_pow2(imgs->packed_w); + int req_h = next_pow2(imgs->packed_h); + + const struct ra_format *fmt = ctx->fmt_table[imgs->format]; + assert(fmt); + + if (!osd->texture || req_w > osd->w || req_h > osd->h || + osd->format != imgs->format) + { + ra_tex_free(ra, &osd->texture); + + osd->format = imgs->format; + osd->w = FFMAX(32, req_w); + osd->h = FFMAX(32, req_h); + + MP_VERBOSE(ctx, "Reallocating OSD texture to %dx%d.\n", osd->w, osd->h); + + if (osd->w > ra->max_texture_wh || osd->h > ra->max_texture_wh) { + MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " + "supported size %dx%d.\n", ra->max_texture_wh, + ra->max_texture_wh); + goto done; + } + + struct ra_tex_params params = { + .dimensions = 2, + .w = osd->w, + .h = osd->h, + .d = 1, + .format = fmt, + .render_src = true, + .src_linear = true, + .host_mutable = true, + }; + osd->texture = ra_tex_create(ra, ¶ms); + if (!osd->texture) + goto done; + } + + struct ra_tex_upload_params params = { + .tex = osd->texture, + .src = imgs->packed->planes[0], + .invalidate = true, + .rc = &(struct mp_rect){0, 0, imgs->packed_w, imgs->packed_h}, + .stride = imgs->packed->stride[0], + }; + + ok = ra->fns->tex_upload(ra, ¶ms); + +done: + return ok; +} + +static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs) +{ + struct mpgl_osd *ctx = pctx; + + if (imgs->num_parts == 0 || !ctx->formats[imgs->format]) + return; + + struct mpgl_osd_part *osd = ctx->parts[imgs->render_index]; + + bool ok = true; + if (imgs->change_id != osd->change_id) { + if (!upload_osd(ctx, osd, imgs)) + ok = false; + + osd->change_id = imgs->change_id; + ctx->change_flag = true; + } + osd->num_subparts = ok ? imgs->num_parts : 0; + + MP_TARRAY_GROW(osd, osd->subparts, osd->num_subparts); + memcpy(osd->subparts, imgs->parts, + osd->num_subparts * sizeof(osd->subparts[0])); +} + +bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc) +{ + assert(index >= 0 && index < MAX_OSD_PARTS); + struct mpgl_osd_part *part = ctx->parts[index]; + + enum sub_bitmap_format fmt = part->format; + if (!fmt || !part->num_subparts) + return false; + + gl_sc_uniform_texture(sc, "osdtex", part->texture); + switch (fmt) { + case SUBBITMAP_RGBA: { + GLSL(color = texture(osdtex, texcoord).bgra;) + break; + } + case SUBBITMAP_LIBASS: { + GLSL(color = + vec4(ass_color.rgb, ass_color.a * texture(osdtex, texcoord).r);) + break; + } + default: + abort(); + } + + gl_sc_set_vertex_format(sc, vertex_vao, sizeof(struct vertex)); + + return true; +} + +static void write_quad(struct vertex *va, struct gl_transform t, + float x0, float y0, float x1, float y1, + float tx0, float ty0, float tx1, float ty1, + float tex_w, float tex_h, const uint8_t color[4]) +{ + gl_transform_vec(t, &x0, &y0); + gl_transform_vec(t, &x1, &y1); + +#define COLOR_INIT {color[0], color[1], color[2], color[3]} + va[0] = (struct vertex){ {x0, y0}, {tx0 / tex_w, ty0 / tex_h}, COLOR_INIT }; + va[1] = (struct vertex){ {x0, y1}, {tx0 / tex_w, ty1 / tex_h}, COLOR_INIT }; + va[2] = (struct vertex){ {x1, y0}, {tx1 / tex_w, ty0 / tex_h}, COLOR_INIT }; + va[3] = (struct vertex){ {x1, y1}, {tx1 / tex_w, ty1 / tex_h}, COLOR_INIT }; + va[4] = va[2]; + va[5] = va[1]; +#undef COLOR_INIT +} + +static void generate_verts(struct mpgl_osd_part *part, struct gl_transform t) +{ + int num_vertices = part->num_subparts * 6; + MP_TARRAY_GROW(part, part->vertices, part->num_vertices + num_vertices); + + for (int n = 0; n < part->num_subparts; n++) { + struct sub_bitmap *b = &part->subparts[n]; + struct vertex *va = &part->vertices[part->num_vertices]; + + // NOTE: the blend color is used with SUBBITMAP_LIBASS only, so it + // doesn't matter that we upload garbage for the other formats + uint32_t c = b->libass.color; + uint8_t color[4] = { c >> 24, (c >> 16) & 0xff, + (c >> 8) & 0xff, 255 - (c & 0xff) }; + + write_quad(&va[n * 6], t, + b->x, b->y, b->x + b->dw, b->y + b->dh, + b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h, + part->w, part->h, color); + } + + part->num_vertices += num_vertices; +} + +// number of screen divisions per axis (x=0, y=1) for the current 3D mode +static void get_3d_side_by_side(int stereo_mode, int div[2]) +{ + div[0] = div[1] = 1; + switch (stereo_mode) { + case MP_STEREO3D_SBS2L: + case MP_STEREO3D_SBS2R: div[0] = 2; break; + case MP_STEREO3D_AB2R: + case MP_STEREO3D_AB2L: div[1] = 2; break; + } +} + +void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc, struct fbodst target) +{ + struct mpgl_osd_part *part = ctx->parts[index]; + + int div[2]; + get_3d_side_by_side(ctx->stereo_mode, div); + + part->num_vertices = 0; + + for (int x = 0; x < div[0]; x++) { + for (int y = 0; y < div[1]; y++) { + struct gl_transform t; + gl_transform_ortho_fbodst(&t, target); + + float a_x = ctx->osd_res.w * x; + float a_y = ctx->osd_res.h * y; + t.t[0] += a_x * t.m[0][0] + a_y * t.m[1][0]; + t.t[1] += a_x * t.m[0][1] + a_y * t.m[1][1]; + + generate_verts(part, t); + } + } + + const int *factors = &blend_factors[part->format][0]; + gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]); + + gl_sc_dispatch_draw(sc, target.tex, part->vertices, part->num_vertices); +} + +static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) +{ + int div[2]; + get_3d_side_by_side(stereo_mode, div); + + res.w /= div[0]; + res.h /= div[1]; + ctx->osd_res = res; +} + +void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts, + int stereo_mode, int draw_flags) +{ + for (int n = 0; n < MAX_OSD_PARTS; n++) + ctx->parts[n]->num_subparts = 0; + + set_res(ctx, res, stereo_mode); + + osd_draw(ctx->osd, ctx->osd_res, pts, draw_flags, ctx->formats, gen_osd_cb, ctx); + ctx->stereo_mode = stereo_mode; + + // Parts going away does not necessarily result in gen_osd_cb() being called + // (not even with num_parts==0), so check this separately. + for (int n = 0; n < MAX_OSD_PARTS; n++) { + struct mpgl_osd_part *part = ctx->parts[n]; + if (part->num_subparts != part->prev_num_subparts) + ctx->change_flag = true; + part->prev_num_subparts = part->num_subparts; + } +} + +// See osd_resize() for remarks. This function is an optional optimization too. +void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) +{ + set_res(ctx, res, stereo_mode); + osd_resize(ctx->osd, ctx->osd_res); +} + +bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res, + double pts) +{ + ctx->change_flag = false; + mpgl_osd_generate(ctx, *res, pts, 0, 0); + return ctx->change_flag; +} diff --git a/video/out/gpu/osd.h b/video/out/gpu/osd.h new file mode 100644 index 0000000000..6c2b886de3 --- /dev/null +++ b/video/out/gpu/osd.h @@ -0,0 +1,25 @@ +#ifndef MPLAYER_GL_OSD_H +#define MPLAYER_GL_OSD_H + +#include +#include + +#include "utils.h" +#include "shader_cache.h" +#include "sub/osd.h" + +struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, + struct osd_state *osd); +void mpgl_osd_destroy(struct mpgl_osd *ctx); + +void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts, + int stereo_mode, int draw_flags); +void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode); +bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc); +void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc, struct fbodst target); +bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res, + double pts); + +#endif diff --git a/video/out/gpu/ra.c b/video/out/gpu/ra.c new file mode 100644 index 0000000000..ef1de54d1a --- /dev/null +++ b/video/out/gpu/ra.c @@ -0,0 +1,327 @@ +#include "common/common.h" +#include "common/msg.h" +#include "video/img_format.h" + +#include "ra.h" + +struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params) +{ + return ra->fns->tex_create(ra, params); +} + +void ra_tex_free(struct ra *ra, struct ra_tex **tex) +{ + if (*tex) + ra->fns->tex_destroy(ra, *tex); + *tex = NULL; +} + +struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params) +{ + return ra->fns->buf_create(ra, params); +} + +void ra_buf_free(struct ra *ra, struct ra_buf **buf) +{ + if (*buf) + ra->fns->buf_destroy(ra, *buf); + *buf = NULL; +} + +void ra_free(struct ra **ra) +{ + if (*ra) + (*ra)->fns->destroy(*ra); + talloc_free(*ra); + *ra = NULL; +} + +size_t ra_vartype_size(enum ra_vartype type) +{ + switch (type) { + case RA_VARTYPE_INT: return sizeof(int); + case RA_VARTYPE_FLOAT: return sizeof(float); + case RA_VARTYPE_BYTE_UNORM: return 1; + default: return 0; + } +} + +struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input) +{ + size_t el_size = ra_vartype_size(input->type); + if (!el_size) + return (struct ra_layout){0}; + + // host data is always tightly packed + return (struct ra_layout) { + .align = 1, + .stride = el_size * input->dim_v, + .size = el_size * input->dim_v * input->dim_m, + }; +} + +static struct ra_renderpass_input *dup_inputs(void *ta_parent, + const struct ra_renderpass_input *inputs, int num_inputs) +{ + struct ra_renderpass_input *res = + talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0])); + for (int n = 0; n < num_inputs; n++) + res[n].name = talloc_strdup(res, res[n].name); + return res; +} + +// Return a newly allocated deep-copy of params. +struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, + const struct ra_renderpass_params *params) +{ + struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res); + *res = *params; + res->inputs = dup_inputs(res, res->inputs, res->num_inputs); + res->vertex_attribs = + dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs); + res->cached_program = bstrdup(res, res->cached_program); + res->vertex_shader = talloc_strdup(res, res->vertex_shader); + res->frag_shader = talloc_strdup(res, res->frag_shader); + res->compute_shader = talloc_strdup(res, res->compute_shader); + return res; +}; + + +// Return whether this is a tightly packed format with no external padding and +// with the same bit size/depth in all components, and the shader returns +// components in the same order as in memory. +static bool ra_format_is_regular(const struct ra_format *fmt) +{ + if (!fmt->pixel_size || !fmt->num_components || !fmt->ordered) + return false; + for (int n = 1; n < fmt->num_components; n++) { + if (fmt->component_size[n] != fmt->component_size[0] || + fmt->component_depth[n] != fmt->component_depth[0]) + return false; + } + if (fmt->component_size[0] * fmt->num_components != fmt->pixel_size * 8) + return false; + return true; +} + +// Return a regular filterable format using RA_CTYPE_UNORM. +const struct ra_format *ra_find_unorm_format(struct ra *ra, + int bytes_per_component, + int n_components) +{ + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (fmt->ctype == RA_CTYPE_UNORM && fmt->num_components == n_components && + fmt->pixel_size == bytes_per_component * n_components && + fmt->component_depth[0] == bytes_per_component * 8 && + fmt->linear_filter && ra_format_is_regular(fmt)) + return fmt; + } + return NULL; +} + +// Return a regular format using RA_CTYPE_UINT. +const struct ra_format *ra_find_uint_format(struct ra *ra, + int bytes_per_component, + int n_components) +{ + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (fmt->ctype == RA_CTYPE_UINT && fmt->num_components == n_components && + fmt->pixel_size == bytes_per_component * n_components && + fmt->component_depth[0] == bytes_per_component * 8 && + ra_format_is_regular(fmt)) + return fmt; + } + return NULL; +} + +// Find a float format of any precision that matches the C type of the same +// size for upload. +// May drop bits from the mantissa (such as selecting float16 even if +// bytes_per_component == 32); prefers possibly faster formats first. +static const struct ra_format *ra_find_float_format(struct ra *ra, + int bytes_per_component, + int n_components) +{ + // Assumes ra_format are ordered by performance. + // The >=16 check is to avoid catching fringe formats. + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (fmt->ctype == RA_CTYPE_FLOAT && fmt->num_components == n_components && + fmt->pixel_size == bytes_per_component * n_components && + fmt->component_depth[0] >= 16 && + fmt->linear_filter && ra_format_is_regular(fmt)) + return fmt; + } + return NULL; +} + +// Return a filterable regular format that uses at least float16 internally, and +// uses a normal C float for transfer on the CPU side. (This is just so we don't +// need 32->16 bit conversion on CPU, which would be messy.) +const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components) +{ + return ra_find_float_format(ra, sizeof(float), n_components); +} + +const struct ra_format *ra_find_named_format(struct ra *ra, const char *name) +{ + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (strcmp(fmt->name, name) == 0) + return fmt; + } + return NULL; +} + +// Like ra_find_unorm_format(), but if no fixed point format is available, +// return an unsigned integer format. +static const struct ra_format *find_plane_format(struct ra *ra, int bytes, + int n_channels, + enum mp_component_type ctype) +{ + switch (ctype) { + case MP_COMPONENT_TYPE_UINT: { + const struct ra_format *f = ra_find_unorm_format(ra, bytes, n_channels); + if (f) + return f; + return ra_find_uint_format(ra, bytes, n_channels); + } + case MP_COMPONENT_TYPE_FLOAT: + return ra_find_float_format(ra, bytes, n_channels); + default: return NULL; + } +} + +// Put a mapping of imgfmt to texture formats into *out. Basically it selects +// the correct texture formats needed to represent an imgfmt in a shader, with +// textures using the same memory organization as on the CPU. +// Each plane is represented by a texture, and each texture has a RGBA +// component order. out->components describes the meaning of them. +// May return integer formats for >8 bit formats, if the driver has no +// normalized 16 bit formats. +// Returns false (and *out is not touched) if no format found. +bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out) +{ + struct ra_imgfmt_desc res = {0}; + + struct mp_regular_imgfmt regfmt; + if (mp_get_regular_imgfmt(®fmt, imgfmt)) { + enum ra_ctype ctype = RA_CTYPE_UNKNOWN; + res.num_planes = regfmt.num_planes; + res.component_bits = regfmt.component_size * 8; + res.component_pad = regfmt.component_pad; + for (int n = 0; n < regfmt.num_planes; n++) { + struct mp_regular_imgfmt_plane *plane = ®fmt.planes[n]; + res.planes[n] = find_plane_format(ra, regfmt.component_size, + plane->num_components, + regfmt.component_type); + if (!res.planes[n]) + return false; + for (int i = 0; i < plane->num_components; i++) + res.components[n][i] = plane->components[i]; + // Dropping LSBs when shifting will lead to dropped MSBs. + if (res.component_bits > res.planes[n]->component_depth[0] && + res.component_pad < 0) + return false; + // Renderer restriction, but actually an unwanted corner case. + if (ctype != RA_CTYPE_UNKNOWN && ctype != res.planes[n]->ctype) + return false; + ctype = res.planes[n]->ctype; + } + res.chroma_w = regfmt.chroma_w; + res.chroma_h = regfmt.chroma_h; + goto supported; + } + + for (int n = 0; n < ra->num_formats; n++) { + if (imgfmt && ra->formats[n]->special_imgfmt == imgfmt) { + res = *ra->formats[n]->special_imgfmt_desc; + goto supported; + } + } + + // Unsupported format + return false; + +supported: + + *out = res; + return true; +} + +void ra_dump_tex_formats(struct ra *ra, int msgl) +{ + if (!mp_msg_test(ra->log, msgl)) + return; + MP_MSG(ra, msgl, "Texture formats:\n"); + MP_MSG(ra, msgl, " NAME COMP*TYPE SIZE DEPTH PER COMP.\n"); + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + const char *ctype = "unknown"; + switch (fmt->ctype) { + case RA_CTYPE_UNORM: ctype = "unorm"; break; + case RA_CTYPE_UINT: ctype = "uint "; break; + case RA_CTYPE_FLOAT: ctype = "float"; break; + } + char cl[40] = ""; + for (int i = 0; i < fmt->num_components; i++) { + mp_snprintf_cat(cl, sizeof(cl), "%s%d", i ? " " : "", + fmt->component_size[i]); + if (fmt->component_size[i] != fmt->component_depth[i]) + mp_snprintf_cat(cl, sizeof(cl), "/%d", fmt->component_depth[i]); + } + MP_MSG(ra, msgl, " %-10s %d*%s %3dB %s %s %s {%s}\n", fmt->name, + fmt->num_components, ctype, fmt->pixel_size, + fmt->luminance_alpha ? "LA" : " ", + fmt->linear_filter ? "LF" : " ", + fmt->renderable ? "CR" : " ", cl); + } + MP_MSG(ra, msgl, " LA = LUMINANCE_ALPHA hack format\n"); + MP_MSG(ra, msgl, " LF = linear filterable\n"); + MP_MSG(ra, msgl, " CR = can be used for render targets\n"); +} + +void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc, + int msgl) +{ + char pl[80] = ""; + char pf[80] = ""; + for (int n = 0; n < desc->num_planes; n++) { + if (n > 0) { + mp_snprintf_cat(pl, sizeof(pl), "/"); + mp_snprintf_cat(pf, sizeof(pf), "/"); + } + char t[5] = {0}; + for (int i = 0; i < 4; i++) + t[i] = "_rgba"[desc->components[n][i]]; + for (int i = 3; i > 0 && t[i] == '_'; i--) + t[i] = '\0'; + mp_snprintf_cat(pl, sizeof(pl), "%s", t); + mp_snprintf_cat(pf, sizeof(pf), "%s", desc->planes[n]->name); + } + MP_MSG(ra, msgl, "%d planes %dx%d %d/%d [%s] (%s)\n", + desc->num_planes, desc->chroma_w, desc->chroma_h, + desc->component_bits, desc->component_pad, pf, pl); +} + +void ra_dump_img_formats(struct ra *ra, int msgl) +{ + if (!mp_msg_test(ra->log, msgl)) + return; + MP_MSG(ra, msgl, "Image formats:\n"); + for (int imgfmt = IMGFMT_START; imgfmt < IMGFMT_END; imgfmt++) { + const char *name = mp_imgfmt_to_name(imgfmt); + if (strcmp(name, "unknown") == 0) + continue; + MP_MSG(ra, msgl, " %s", name); + struct ra_imgfmt_desc desc; + if (ra_get_imgfmt_desc(ra, imgfmt, &desc)) { + MP_MSG(ra, msgl, " => "); + ra_dump_imgfmt_desc(ra, &desc, msgl); + } else { + MP_MSG(ra, msgl, "\n"); + } + } +} diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h new file mode 100644 index 0000000000..76f98397f8 --- /dev/null +++ b/video/out/gpu/ra.h @@ -0,0 +1,488 @@ +#pragma once + +#include "common/common.h" +#include "misc/bstr.h" + +// Handle for a rendering API backend. +struct ra { + struct ra_fns *fns; + void *priv; + + int glsl_version; // GLSL version (e.g. 300 => 3.0) + bool glsl_es; // use ES dialect + bool glsl_vulkan; // use vulkan dialect + + struct mp_log *log; + + // RA_CAP_* bit field. The RA backend must set supported features at init + // time. + uint64_t caps; + + // Maximum supported width and height of a 2D texture. Set by the RA backend + // at init time. + int max_texture_wh; + + // Maximum shared memory for compute shaders. Set by the RA backend at init + // time. + size_t max_shmem; + + // Set of supported texture formats. Must be added by RA backend at init time. + // If there are equivalent formats with different caveats, the preferred + // formats should have a lower index. (E.g. GLES3 should put rg8 before la.) + struct ra_format **formats; + int num_formats; + + // Accelerate texture uploads via an extra PBO even when + // RA_CAP_DIRECT_UPLOAD is supported. This is basically only relevant for + // OpenGL. Set by the RA user. + bool use_pbo; +}; + +enum { + RA_CAP_TEX_1D = 1 << 0, // supports 1D textures (as shader inputs) + RA_CAP_TEX_3D = 1 << 1, // supports 3D textures (as shader inputs) + RA_CAP_BLIT = 1 << 2, // supports ra_fns.blit + RA_CAP_COMPUTE = 1 << 3, // supports compute shaders + RA_CAP_DIRECT_UPLOAD = 1 << 4, // supports tex_upload without ra_buf + RA_CAP_BUF_RO = 1 << 5, // supports RA_VARTYPE_BUF_RO + RA_CAP_BUF_RW = 1 << 6, // supports RA_VARTYPE_BUF_RW + RA_CAP_NESTED_ARRAY = 1 << 7, // supports nested arrays + RA_CAP_SHARED_BINDING = 1 << 8, // sampler/image/buffer namespaces are disjoint + RA_CAP_GLOBAL_UNIFORM = 1 << 9, // supports using "naked" uniforms (not UBO) +}; + +enum ra_ctype { + RA_CTYPE_UNKNOWN = 0, // also used for inconsistent multi-component formats + RA_CTYPE_UNORM, // unsigned normalized integer (fixed point) formats + RA_CTYPE_UINT, // full integer formats + RA_CTYPE_FLOAT, // float formats (signed, any bit size) +}; + +// All formats must be useable as texture formats. All formats must be byte +// aligned (all pixels start and end on a byte boundary), at least as far CPU +// transfers are concerned. +struct ra_format { + // All fields are read-only after creation. + const char *name; // symbolic name for user interaction/debugging + void *priv; + enum ra_ctype ctype; // data type of each component + bool ordered; // components are sequential in memory, and returned + // by the shader in memory order (the shader can + // return arbitrary values for unused components) + int num_components; // component count, 0 if not applicable, max. 4 + int component_size[4]; // in bits, all entries 0 if not applicable + int component_depth[4]; // bits in use for each component, 0 if not applicable + // (_must_ be set if component_size[] includes padding, + // and the real procession as seen by shader is lower) + int pixel_size; // in bytes, total pixel size (0 if opaque) + bool luminance_alpha; // pre-GL_ARB_texture_rg hack for 2 component textures + // if this is set, shader must use .ra instead of .rg + // only applies to 2-component textures + bool linear_filter; // linear filtering available from shader + bool renderable; // can be used for render targets + + // If not 0, the format represents some sort of packed fringe format, whose + // shader representation is given by the special_imgfmt_desc pointer. + int special_imgfmt; + const struct ra_imgfmt_desc *special_imgfmt_desc; +}; + +struct ra_tex_params { + int dimensions; // 1-3 for 1D-3D textures + // Size of the texture. 1D textures require h=d=1, 2D textures require d=1. + int w, h, d; + const struct ra_format *format; + bool render_src; // must be useable as source texture in a shader + bool render_dst; // must be useable as target texture in a shader + bool storage_dst; // must be usable as a storage image (RA_VARTYPE_IMG_W) + bool blit_src; // must be usable as a blit source + bool blit_dst; // must be usable as a blit destination + bool host_mutable; // texture may be updated with tex_upload + // When used as render source texture. + bool src_linear; // if false, use nearest sampling (whether this can + // be true depends on ra_format.linear_filter) + bool src_repeat; // if false, clamp texture coordinates to edge + // if true, repeat texture coordinates + bool non_normalized; // hack for GL_TEXTURE_RECTANGLE OSX idiocy + // always set to false, except in OSX code + bool external_oes; // hack for GL_TEXTURE_EXTERNAL_OES idiocy + // If non-NULL, the texture will be created with these contents. Using + // this does *not* require setting host_mutable. Otherwise, the initial + // data is undefined. + void *initial_data; +}; + +// Conflates the following typical GPU API concepts: +// - texture itself +// - sampler state +// - staging buffers for texture upload +// - framebuffer objects +// - wrappers for swapchain framebuffers +// - synchronization needed for upload/rendering/etc. +struct ra_tex { + // All fields are read-only after creation. + struct ra_tex_params params; + void *priv; +}; + +struct ra_tex_upload_params { + struct ra_tex *tex; // Texture to upload to + bool invalidate; // Discard pre-existing data not in the region uploaded + // Uploading from buffer: + struct ra_buf *buf; // Buffer to upload from (mutually exclusive with `src`) + size_t buf_offset; // Start of data within buffer (bytes) + // Uploading directly: (Note: If RA_CAP_DIRECT_UPLOAD is not set, then this + // will be internally translated to a tex_upload buffer by the RA) + const void *src; // Address of data + // For 2D textures only: + struct mp_rect *rc; // Region to upload. NULL means entire image + ptrdiff_t stride; // The size of a horizontal line in bytes (*not* texels!) +}; + +// Buffer type hint. Setting this may result in more or less efficient +// operation, although it shouldn't technically prohibit anything +enum ra_buf_type { + RA_BUF_TYPE_INVALID, + RA_BUF_TYPE_TEX_UPLOAD, // texture upload buffer (pixel buffer object) + RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW + RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO +}; + +struct ra_buf_params { + enum ra_buf_type type; + size_t size; + bool host_mapped; // create a read-writable persistent mapping (ra_buf.data) + bool host_mutable; // contents may be updated via buf_update() + // If non-NULL, the buffer will be created with these contents. Otherwise, + // the initial data is undefined. + void *initial_data; +}; + +// A generic buffer, which can be used for many purposes (texture upload, +// storage buffer, uniform buffer, etc.) +struct ra_buf { + // All fields are read-only after creation. + struct ra_buf_params params; + void *data; // for persistently mapped buffers, points to the first byte + void *priv; +}; + +// Type of a shader uniform variable, or a vertex attribute. In all cases, +// vectors are matrices are done by having more than 1 value. +enum ra_vartype { + RA_VARTYPE_INVALID, + RA_VARTYPE_INT, // C: int, GLSL: int, ivec* + RA_VARTYPE_FLOAT, // C: float, GLSL: float, vec*, mat* + RA_VARTYPE_TEX, // C: ra_tex*, GLSL: various sampler types + // ra_tex.params.render_src must be true + RA_VARTYPE_IMG_W, // C: ra_tex*, GLSL: various image types + // write-only (W) image for compute shaders + // ra_tex.params.storage_dst must be true + RA_VARTYPE_BYTE_UNORM, // C: uint8_t, GLSL: int, vec* (vertex data only) + RA_VARTYPE_BUF_RO, // C: ra_buf*, GLSL: uniform buffer block + // buf type must be RA_BUF_TYPE_UNIFORM + RA_VARTYPE_BUF_RW, // C: ra_buf*, GLSL: shader storage buffer block + // buf type must be RA_BUF_TYPE_SHADER_STORAGE + RA_VARTYPE_COUNT +}; + +// Returns the host size of a ra_vartype, or 0 for abstract vartypes (e.g. tex) +size_t ra_vartype_size(enum ra_vartype type); + +// Represents a uniform, texture input parameter, and similar things. +struct ra_renderpass_input { + const char *name; // name as used in the shader + enum ra_vartype type; + // The total number of values is given by dim_v * dim_m. + int dim_v; // vector dimension (1 for non-vector and non-matrix) + int dim_m; // additional matrix dimension (dim_v x dim_m) + // Vertex data: byte offset of the attribute into the vertex struct + size_t offset; + // RA_VARTYPE_TEX: texture unit + // RA_VARTYPE_IMG_W: image unit + // RA_VARTYPE_BUF_* buffer binding point + // Other uniforms: unused + // If RA_CAP_SHARED_BINDING is set, these may only be unique per input type. + // Otherwise, these must be unique for all input values. + int binding; +}; + +// Represents the layout requirements of an input value +struct ra_layout { + size_t align; // the alignment requirements (always a power of two) + size_t stride; // the delta between two rows of an array/matrix + size_t size; // the total size of the input +}; + +// Returns the host layout of a render pass input. Returns {0} for renderpass +// inputs without a corresponding host representation (e.g. textures/buffers) +struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input); + +enum ra_blend { + RA_BLEND_ZERO, + RA_BLEND_ONE, + RA_BLEND_SRC_ALPHA, + RA_BLEND_ONE_MINUS_SRC_ALPHA, +}; + +enum ra_renderpass_type { + RA_RENDERPASS_TYPE_INVALID, + RA_RENDERPASS_TYPE_RASTER, // vertex+fragment shader + RA_RENDERPASS_TYPE_COMPUTE, // compute shader +}; + +// Static part of a rendering pass. It conflates the following: +// - compiled shader and its list of uniforms +// - vertex attributes and its shader mappings +// - blending parameters +// (For Vulkan, this would be shader module + pipeline state.) +// Upon creation, the values of dynamic values such as uniform contents (whose +// initial values are not provided here) are required to be 0. +struct ra_renderpass_params { + enum ra_renderpass_type type; + + // Uniforms, including texture/sampler inputs. + struct ra_renderpass_input *inputs; + int num_inputs; + + // Highly implementation-specific byte array storing a compiled version + // of the program. Can be used to speed up shader compilation. A backend + // xan read this in renderpass_create, or set this on the newly created + // ra_renderpass params field. + bstr cached_program; + + // --- type==RA_RENDERPASS_TYPE_RASTER only + + // Describes the format of the vertex data. When using ra.glsl_vulkan, + // the order of this array must match the vertex attribute locations. + struct ra_renderpass_input *vertex_attribs; + int num_vertex_attribs; + int vertex_stride; + + // Format of the target texture + const struct ra_format *target_format; + + // Shader text, in GLSL. (Yes, you need a GLSL compiler.) + // These are complete shaders, including prelude and declarations. + const char *vertex_shader; + const char *frag_shader; + + // Target blending mode. If enable_blend is false, the blend_ fields can + // be ignored. + bool enable_blend; + enum ra_blend blend_src_rgb; + enum ra_blend blend_dst_rgb; + enum ra_blend blend_src_alpha; + enum ra_blend blend_dst_alpha; + + // --- type==RA_RENDERPASS_TYPE_COMPUTE only + + // Shader text, like vertex_shader/frag_shader. + const char *compute_shader; +}; + +struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, + const struct ra_renderpass_params *params); + +// Conflates the following typical GPU API concepts: +// - various kinds of shaders +// - rendering pipelines +// - descriptor sets, uniforms, other bindings +// - all synchronization necessary +// - the current values of all uniforms (this one makes it relatively stateful +// from an API perspective) +struct ra_renderpass { + // All fields are read-only after creation. + struct ra_renderpass_params params; + void *priv; +}; + +// An input value (see ra_renderpass_input). +struct ra_renderpass_input_val { + int index; // index into ra_renderpass_params.inputs[] + void *data; // pointer to data according to ra_renderpass_input + // (e.g. type==RA_VARTYPE_FLOAT+dim_v=3,dim_m=3 => float[9]) +}; + +// Parameters for performing a rendering pass (basically the dynamic params). +// These change potentially every time. +struct ra_renderpass_run_params { + struct ra_renderpass *pass; + + // Generally this lists parameters only which changed since the last + // invocation and need to be updated. The ra_renderpass instance is + // supposed to keep unchanged values from the previous run. + // For non-primitive types like textures, these entries are always added, + // even if they do not change. + struct ra_renderpass_input_val *values; + int num_values; + + // --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only + + // target->params.render_dst must be true, and target->params.format must + // match pass->params.target_format. + struct ra_tex *target; + struct mp_rect viewport; + struct mp_rect scissors; + + // (The primitive type is always a triangle list.) + void *vertex_data; + int vertex_count; // number of vertex elements, not bytes + + // --- pass->params.type==RA_RENDERPASS_TYPE_COMPUTE only + + // Number of work groups to be run in X/Y/Z dimensions. + int compute_groups[3]; +}; + +// This is an opaque type provided by the implementation, but we want to at +// least give it a saner name than void* for code readability purposes. +typedef void ra_timer; + +// Rendering API entrypoints. (Note: there are some additional hidden features +// you need to take care of. For example, hwdec mapping will be provided +// separately from ra, but might need to call into ra private code.) +struct ra_fns { + void (*destroy)(struct ra *ra); + + // Create a texture (with undefined contents). Return NULL on failure. + // This is a rare operation, and normally textures and even FBOs for + // temporary rendering intermediate data are cached. + struct ra_tex *(*tex_create)(struct ra *ra, + const struct ra_tex_params *params); + + void (*tex_destroy)(struct ra *ra, struct ra_tex *tex); + + // Upload data to a texture. This is an extremely common operation. When + // using a buffer, the contants of the buffer must exactly match the image + // - conversions between bit depth etc. are not supported. The buffer *may* + // be marked as "in use" while this operation is going on, and the contents + // must not be touched again by the API user until buf_poll returns true. + // Returns whether successful. + bool (*tex_upload)(struct ra *ra, const struct ra_tex_upload_params *params); + + // Create a buffer. This can be used as a persistently mapped buffer, + // a uniform buffer, a shader storage buffer or possibly others. + // Not all usage types must be supported; may return NULL if unavailable. + struct ra_buf *(*buf_create)(struct ra *ra, + const struct ra_buf_params *params); + + void (*buf_destroy)(struct ra *ra, struct ra_buf *buf); + + // Update the contents of a buffer, starting at a given offset and up to a + // given size, with the contents of *data. This is an extremely common + // operation. Calling this while the buffer is considered "in use" is an + // error. (See: buf_poll) + void (*buf_update)(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size); + + // Returns if a buffer is currently "in use" or not. Updating the contents + // of a buffer (via buf_update or writing to buf->data) while it is still + // in use is an error and may result in graphical corruption. Optional, if + // NULL then all buffers are always usable. + bool (*buf_poll)(struct ra *ra, struct ra_buf *buf); + + // Returns the layout requirements of a uniform buffer element. Optional, + // but must be implemented if RA_CAP_BUF_RO is supported. + struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp); + + // Clear the dst with the given color (rgba) and within the given scissor. + // dst must have dst->params.render_dst==true. Content outside of the + // scissor is preserved. + void (*clear)(struct ra *ra, struct ra_tex *dst, float color[4], + struct mp_rect *scissor); + + // Copy a sub-rectangle from one texture to another. The source/dest region + // is always within the texture bounds. Areas outside the dest region are + // preserved. The formats of the textures must be losely compatible. The + // dst texture can be a swapchain framebuffer, but src can not. Only 2D + // textures are supported. + // The textures must have blit_src and blit_dst set, respectively. + // Rectangles with negative width/height lead to flipping, different src/dst + // sizes lead to point scaling. Coordinates are always in pixels. + // Optional. Only available if RA_CAP_BLIT is set (if it's not set, it must + // not be called, even if it's non-NULL). + void (*blit)(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc, struct mp_rect *src_rc); + + // Compile a shader and create a pipeline. This is a rare operation. + // The params pointer and anything it points to must stay valid until + // renderpass_destroy. + struct ra_renderpass *(*renderpass_create)(struct ra *ra, + const struct ra_renderpass_params *params); + + void (*renderpass_destroy)(struct ra *ra, struct ra_renderpass *pass); + + // Perform a render pass, basically drawing a list of triangles to a FBO. + // This is an extremely common operation. + void (*renderpass_run)(struct ra *ra, + const struct ra_renderpass_run_params *params); + + // Create a timer object. Returns NULL on failure, or if timers are + // unavailable for some reason. Optional. + ra_timer *(*timer_create)(struct ra *ra); + + void (*timer_destroy)(struct ra *ra, ra_timer *timer); + + // Start recording a timer. Note that valid usage requires you to pair + // every start with a stop. Trying to start a timer twice, or trying to + // stop a timer before having started it, consistutes invalid usage. + void (*timer_start)(struct ra *ra, ra_timer *timer); + + // Stop recording a timer. This also returns any results that have been + // measured since the last usage of this ra_timer. It's important to note + // that GPU timer measurement are asynchronous, so this function does not + // always produce a value - and the values it does produce are typically + // delayed by a few frames. When no value is available, this returns 0. + uint64_t (*timer_stop)(struct ra *ra, ra_timer *timer); + + // Associates a marker with any past error messages, for debugging + // purposes. Optional. + void (*debug_marker)(struct ra *ra, const char *msg); +}; + +struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params); +void ra_tex_free(struct ra *ra, struct ra_tex **tex); + +struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params); +void ra_buf_free(struct ra *ra, struct ra_buf **buf); + +void ra_free(struct ra **ra); + +const struct ra_format *ra_find_unorm_format(struct ra *ra, + int bytes_per_component, + int n_components); +const struct ra_format *ra_find_uint_format(struct ra *ra, + int bytes_per_component, + int n_components); +const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components); +const struct ra_format *ra_find_named_format(struct ra *ra, const char *name); + +struct ra_imgfmt_desc { + int num_planes; + const struct ra_format *planes[4]; + // Chroma pixel size (1x1 is 4:4:4) + uint8_t chroma_w, chroma_h; + // Component storage size in bits (possibly padded). For formats with + // different sizes per component, this is arbitrary. For padded formats + // like P010 or YUV420P10, padding is included. + int component_bits; + // Like mp_regular_imgfmt.component_pad. + int component_pad; + // For each texture and each texture output (rgba order) describe what + // component it returns. + // The values are like the values in mp_regular_imgfmt_plane.components[]. + // Access as components[plane_nr][component_index]. Set unused items to 0. + // For ra_format.luminance_alpha, this returns 1/2 ("rg") instead of 1/4 + // ("ra"). the logic is that the texture format has 2 channels, thus the + // data must be returned in the first two components. The renderer fixes + // this later. + uint8_t components[4][4]; +}; + +bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out); + +void ra_dump_tex_formats(struct ra *ra, int msgl); +void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc, + int msgl); +void ra_dump_img_formats(struct ra *ra, int msgl); diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c new file mode 100644 index 0000000000..afda9cc036 --- /dev/null +++ b/video/out/gpu/shader_cache.c @@ -0,0 +1,954 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "osdep/io.h" + +#include "common/common.h" +#include "options/path.h" +#include "stream/stream.h" +#include "shader_cache.h" +#include "utils.h" + +// Force cache flush if more than this number of shaders is created. +#define SC_MAX_ENTRIES 48 + +union uniform_val { + float f[9]; // RA_VARTYPE_FLOAT + int i[4]; // RA_VARTYPE_INT + struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_* + struct ra_buf *buf; // RA_VARTYPE_BUF_* +}; + +enum sc_uniform_type { + SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM) + SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO) +}; + +struct sc_uniform { + enum sc_uniform_type type; + struct ra_renderpass_input input; + const char *glsl_type; + union uniform_val v; + char *buffer_format; + // for SC_UNIFORM_TYPE_UBO: + struct ra_layout layout; + size_t offset; // byte offset within the buffer +}; + +struct sc_cached_uniform { + union uniform_val v; + int index; // for ra_renderpass_input_val + bool set; // whether the uniform has ever been set +}; + +struct sc_entry { + struct ra_renderpass *pass; + struct sc_cached_uniform *cached_uniforms; + int num_cached_uniforms; + bstr total; + struct timer_pool *timer; + struct ra_buf *ubo; + int ubo_index; // for ra_renderpass_input_val.index +}; + +struct gl_shader_cache { + struct ra *ra; + struct mp_log *log; + + // permanent + char **exts; + int num_exts; + + // this is modified during use (gl_sc_add() etc.) and reset for each shader + bstr prelude_text; + bstr header_text; + bstr text; + + // Next binding point (texture unit, image unit, buffer binding, etc.) + // In OpenGL these are separate for each input type + int next_binding[RA_VARTYPE_COUNT]; + + struct ra_renderpass_params params; + + struct sc_entry **entries; + int num_entries; + + struct sc_entry *current_shader; // set by gl_sc_generate() + + struct sc_uniform *uniforms; + int num_uniforms; + + int ubo_binding; + size_t ubo_size; + + struct ra_renderpass_input_val *values; + int num_values; + + // For checking that the user is calling gl_sc_reset() properly. + bool needs_reset; + + bool error_state; // true if an error occurred + + // temporary buffers (avoids frequent reallocations) + bstr tmp[6]; + + // For the disk-cache. + char *cache_dir; + struct mpv_global *global; // can be NULL +}; + +static void gl_sc_reset(struct gl_shader_cache *sc); + +struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, + struct mp_log *log) +{ + struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc); + *sc = (struct gl_shader_cache){ + .ra = ra, + .global = global, + .log = log, + }; + gl_sc_reset(sc); + return sc; +} + +// Reset the previous pass. This must be called after gl_sc_generate and before +// starting a new shader. +static void gl_sc_reset(struct gl_shader_cache *sc) +{ + sc->prelude_text.len = 0; + sc->header_text.len = 0; + sc->text.len = 0; + for (int n = 0; n < sc->num_uniforms; n++) + talloc_free((void *)sc->uniforms[n].input.name); + sc->num_uniforms = 0; + sc->ubo_binding = 0; + sc->ubo_size = 0; + for (int i = 0; i < RA_VARTYPE_COUNT; i++) + sc->next_binding[i] = 0; + sc->current_shader = NULL; + sc->params = (struct ra_renderpass_params){0}; + sc->needs_reset = false; +} + +static void sc_flush_cache(struct gl_shader_cache *sc) +{ + MP_VERBOSE(sc, "flushing shader cache\n"); + + for (int n = 0; n < sc->num_entries; n++) { + struct sc_entry *e = sc->entries[n]; + ra_buf_free(sc->ra, &e->ubo); + if (e->pass) + sc->ra->fns->renderpass_destroy(sc->ra, e->pass); + timer_pool_destroy(e->timer); + talloc_free(e); + } + sc->num_entries = 0; +} + +void gl_sc_destroy(struct gl_shader_cache *sc) +{ + if (!sc) + return; + gl_sc_reset(sc); + sc_flush_cache(sc); + talloc_free(sc); +} + +bool gl_sc_error_state(struct gl_shader_cache *sc) +{ + return sc->error_state; +} + +void gl_sc_reset_error(struct gl_shader_cache *sc) +{ + sc->error_state = false; +} + +void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name) +{ + for (int n = 0; n < sc->num_exts; n++) { + if (strcmp(sc->exts[n], name) == 0) + return; + } + MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name)); +} + +#define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s)) + +void gl_sc_add(struct gl_shader_cache *sc, const char *text) +{ + bstr_xappend0(sc, &sc->text, text); +} + +void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->text, textf, ap); + va_end(ap); +} + +void gl_sc_hadd(struct gl_shader_cache *sc, const char *text) +{ + bstr_xappend0(sc, &sc->header_text, text); +} + +void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap); + va_end(ap); +} + +void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text) +{ + bstr_xappend(sc, &sc->header_text, text); +} + +void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap); + va_end(ap); +} + +static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, + const char *name) +{ + struct sc_uniform new = { + .input = { + .dim_v = 1, + .dim_m = 1, + }, + }; + + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (strcmp(u->input.name, name) == 0) { + const char *allocname = u->input.name; + *u = new; + u->input.name = allocname; + return u; + } + } + + // not found -> add it + new.input.name = talloc_strdup(NULL, name); + MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); + return &sc->uniforms[sc->num_uniforms - 1]; +} + +static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type) +{ + if (sc->ra->caps & RA_CAP_SHARED_BINDING) { + return sc->next_binding[type]++; + } else { + return sc->next_binding[0]++; + } +} + +// Updates the UBO metadata for the given sc_uniform. Assumes sc_uniform->input +// is already set. Also updates sc_uniform->type. +static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u) +{ + if (!(sc->ra->caps & RA_CAP_BUF_RO)) + return; + + // Using UBOs with explicit layout(offset) like we do requires GLSL version + // 440 or higher. In theory the UBO code can also use older versions, but + // just try and avoid potential headaches. This also ensures they're only + // used on drivers that are probably modern enough to actually support them + // correctly. + if (sc->ra->glsl_version < 440) + return; + + u->type = SC_UNIFORM_TYPE_UBO; + u->layout = sc->ra->fns->uniform_layout(&u->input); + u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); + sc->ubo_size = u->offset + u->layout.size; +} + +void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, + struct ra_tex *tex) +{ + const char *glsl_type = "sampler2D"; + if (tex->params.dimensions == 1) { + glsl_type = "sampler1D"; + } else if (tex->params.dimensions == 3) { + glsl_type = "sampler3D"; + } else if (tex->params.non_normalized) { + glsl_type = "sampler2DRect"; + } else if (tex->params.external_oes) { + glsl_type = "samplerExternalOES"; + } else if (tex->params.format->ctype == RA_CTYPE_UINT) { + glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D"; + } + + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_TEX; + u->glsl_type = glsl_type; + u->input.binding = gl_sc_next_binding(sc, u->input.type); + u->v.tex = tex; +} + +void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, + struct ra_tex *tex) +{ + gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store"); + + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_IMG_W; + u->glsl_type = "writeonly image2D"; + u->input.binding = gl_sc_next_binding(sc, u->input.type); + u->v.tex = tex; +} + +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf, + char *format, ...) +{ + assert(sc->ra->caps & RA_CAP_BUF_RW); + gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object"); + + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_BUF_RW; + u->glsl_type = ""; + u->input.binding = gl_sc_next_binding(sc, u->input.type); + u->v.buf = buf; + + va_list ap; + va_start(ap, format); + u->buffer_format = ta_vasprintf(sc, format, ap); + va_end(ap); +} + +void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->glsl_type = "float"; + update_ubo_params(sc, u); + u->v.f[0] = f; +} + +void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_INT; + u->glsl_type = "int"; + update_ubo_params(sc, u); + u->v.i[0] = i; +} + +void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 2; + u->glsl_type = "vec2"; + update_ubo_params(sc, u); + u->v.f[0] = f[0]; + u->v.f[1] = f[1]; +} + +void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 3; + u->glsl_type = "vec3"; + update_ubo_params(sc, u); + u->v.f[0] = f[0]; + u->v.f[1] = f[1]; + u->v.f[2] = f[2]; +} + +static void transpose2x2(float r[2 * 2]) +{ + MPSWAP(float, r[0+2*1], r[1+2*0]); +} + +void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, + bool transpose, float *v) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 2; + u->input.dim_m = 2; + u->glsl_type = "mat2"; + update_ubo_params(sc, u); + for (int n = 0; n < 4; n++) + u->v.f[n] = v[n]; + if (transpose) + transpose2x2(&u->v.f[0]); +} + +static void transpose3x3(float r[3 * 3]) +{ + MPSWAP(float, r[0+3*1], r[1+3*0]); + MPSWAP(float, r[0+3*2], r[2+3*0]); + MPSWAP(float, r[1+3*2], r[2+3*1]); +} + +void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, + bool transpose, float *v) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 3; + u->input.dim_m = 3; + u->glsl_type = "mat3"; + update_ubo_params(sc, u); + for (int n = 0; n < 9; n++) + u->v.f[n] = v[n]; + if (transpose) + transpose3x3(&u->v.f[0]); +} + +// Tell the shader generator (and later gl_sc_draw_data()) about the vertex +// data layout and attribute names. The entries array is terminated with a {0} +// entry. The array memory must remain valid indefinitely (for now). +void gl_sc_set_vertex_format(struct gl_shader_cache *sc, + const struct ra_renderpass_input *entries, + int vertex_stride) +{ + sc->params.vertex_attribs = (struct ra_renderpass_input *)entries; + sc->params.num_vertex_attribs = 0; + while (entries[sc->params.num_vertex_attribs].name) + sc->params.num_vertex_attribs++; + sc->params.vertex_stride = vertex_stride; +} + +void gl_sc_blend(struct gl_shader_cache *sc, + enum ra_blend blend_src_rgb, + enum ra_blend blend_dst_rgb, + enum ra_blend blend_src_alpha, + enum ra_blend blend_dst_alpha) +{ + sc->params.enable_blend = true; + sc->params.blend_src_rgb = blend_src_rgb; + sc->params.blend_dst_rgb = blend_dst_rgb; + sc->params.blend_src_alpha = blend_src_alpha; + sc->params.blend_dst_alpha = blend_dst_alpha; +} + +static const char *vao_glsl_type(const struct ra_renderpass_input *e) +{ + // pretty dumb... too dumb, but works for us + switch (e->dim_v) { + case 1: return "float"; + case 2: return "vec2"; + case 3: return "vec3"; + case 4: return "vec4"; + default: abort(); + } +} + +static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u) +{ + uintptr_t src = (uintptr_t) &u->v; + size_t dst = u->offset; + struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); + struct ra_layout dst_layout = u->layout; + + for (int i = 0; i < u->input.dim_m; i++) { + ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride); + src += src_layout.stride; + dst += dst_layout.stride; + } +} + +static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, + struct sc_uniform *u, int n) +{ + struct sc_cached_uniform *un = &e->cached_uniforms[n]; + struct ra_layout layout = ra_renderpass_input_layout(&u->input); + if (layout.size > 0 && un->set && memcmp(&un->v, &u->v, layout.size) == 0) + return; + + un->v = u->v; + un->set = true; + + switch (u->type) { + case SC_UNIFORM_TYPE_GLOBAL: { + struct ra_renderpass_input_val value = { + .index = un->index, + .data = &un->v, + }; + MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value); + break; + } + case SC_UNIFORM_TYPE_UBO: + assert(e->ubo); + update_ubo(sc->ra, e->ubo, u); + break; + default: abort(); + } +} + +void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir) +{ + talloc_free(sc->cache_dir); + sc->cache_dir = talloc_strdup(sc, dir); +} + +static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) +{ + bool ret = false; + + void *tmp = talloc_new(NULL); + struct ra_renderpass_params params = sc->params; + + MP_VERBOSE(sc, "new shader program:\n"); + if (sc->header_text.len) { + MP_VERBOSE(sc, "header:\n"); + mp_log_source(sc->log, MSGL_V, sc->header_text.start); + MP_VERBOSE(sc, "body:\n"); + } + if (sc->text.len) + mp_log_source(sc->log, MSGL_V, sc->text.start); + + // The vertex shader uses mangled names for the vertex attributes, so that + // the fragment shader can use the "real" names. But the shader is expecting + // the vertex attribute names (at least with older GLSL targets for GL). + params.vertex_attribs = talloc_memdup(tmp, params.vertex_attribs, + params.num_vertex_attribs * sizeof(params.vertex_attribs[0])); + for (int n = 0; n < params.num_vertex_attribs; n++) { + struct ra_renderpass_input *attrib = ¶ms.vertex_attribs[n]; + attrib->name = talloc_asprintf(tmp, "vertex_%s", attrib->name); + } + + const char *cache_header = "mpv shader cache v1\n"; + char *cache_filename = NULL; + char *cache_dir = NULL; + + if (sc->cache_dir && sc->cache_dir[0]) { + // Try to load it from a disk cache. + cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir); + + struct AVSHA *sha = av_sha_alloc(); + if (!sha) + abort(); + av_sha_init(sha, 256); + av_sha_update(sha, entry->total.start, entry->total.len); + + uint8_t hash[256 / 8]; + av_sha_final(sha, hash); + av_free(sha); + + char hashstr[256 / 8 * 2 + 1]; + for (int n = 0; n < 256 / 8; n++) + snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]); + + cache_filename = mp_path_join(tmp, cache_dir, hashstr); + if (stat(cache_filename, &(struct stat){0}) == 0) { + MP_VERBOSE(sc, "Trying to load shader from disk...\n"); + struct bstr cachedata = + stream_read_file(cache_filename, tmp, sc->global, 1000000000); + if (bstr_eatstart0(&cachedata, cache_header)) + params.cached_program = cachedata; + } + } + + // If using a UBO, also make sure to add it as an input value so the RA + // can see it + if (sc->ubo_size) { + entry->ubo_index = sc->params.num_inputs; + struct ra_renderpass_input ubo_input = { + .name = "UBO", + .type = RA_VARTYPE_BUF_RO, + .dim_v = 1, + .dim_m = 1, + .binding = sc->ubo_binding, + }; + MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input); + } + + entry->pass = sc->ra->fns->renderpass_create(sc->ra, ¶ms); + if (!entry->pass) + goto error; + + if (sc->ubo_size) { + struct ra_buf_params ubo_params = { + .type = RA_BUF_TYPE_UNIFORM, + .size = sc->ubo_size, + .host_mutable = true, + }; + + entry->ubo = ra_buf_create(sc->ra, &ubo_params); + if (!entry->ubo) { + MP_ERR(sc, "Failed creating uniform buffer!\n"); + goto error; + } + } + + if (entry->pass && cache_filename) { + bstr nc = entry->pass->params.cached_program; + if (nc.len && !bstr_equals(params.cached_program, nc)) { + mp_mkdirp(cache_dir); + + MP_VERBOSE(sc, "Writing shader cache file: %s\n", cache_filename); + FILE *out = fopen(cache_filename, "wb"); + if (out) { + fwrite(cache_header, strlen(cache_header), 1, out); + fwrite(nc.start, nc.len, 1, out); + fclose(out); + } + } + } + + ret = true; + +error: + talloc_free(tmp); + return ret; +} + +#define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__) +#define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s)) + +static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) +{ + // Add all of the UBO entries separately as members of their own buffer + if (sc->ubo_size > 0) { + ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding); + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (u->type != SC_UNIFORM_TYPE_UBO) + continue; + ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, + u->glsl_type, u->input.name); + } + ADD(dst, "};\n"); + } + + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (u->type != SC_UNIFORM_TYPE_GLOBAL) + continue; + switch (u->input.type) { + case RA_VARTYPE_INT: + case RA_VARTYPE_FLOAT: + assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); + // fall through + case RA_VARTYPE_TEX: + case RA_VARTYPE_IMG_W: + // Vulkan requires explicitly assigning the bindings in the shader + // source. For OpenGL it's optional, but requires higher GL version + // so we don't do it (and instead have ra_gl update the bindings + // after program creation). + if (sc->ra->glsl_vulkan) + ADD(dst, "layout(binding=%d) ", u->input.binding); + ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name); + break; + case RA_VARTYPE_BUF_RO: + ADD(dst, "layout(std140, binding=%d) uniform %s { %s };\n", + u->input.binding, u->input.name, u->buffer_format); + break; + case RA_VARTYPE_BUF_RW: + ADD(dst, "layout(std430, binding=%d) buffer %s { %s };\n", + u->input.binding, u->input.name, u->buffer_format); + break; + } + } +} + +// 1. Generate vertex and fragment shaders from the fragment shader text added +// with gl_sc_add(). The generated shader program is cached (based on the +// text), so actual compilation happens only the first time. +// 2. Update the uniforms and textures set with gl_sc_uniform_*. +// 3. Make the new shader program current (glUseProgram()). +// After that, you render, and then you call gc_sc_reset(), which does: +// 1. Unbind the program and all textures. +// 2. Reset the sc state and prepare for a new shader program. (All uniforms +// and fragment operations needed for the next program have to be re-added.) +static void gl_sc_generate(struct gl_shader_cache *sc, + enum ra_renderpass_type type, + const struct ra_format *target_format) +{ + int glsl_version = sc->ra->glsl_version; + int glsl_es = sc->ra->glsl_es ? glsl_version : 0; + + sc->params.type = type; + + // gl_sc_reset() must be called after ending the previous render process, + // and before starting a new one. + assert(!sc->needs_reset); + sc->needs_reset = true; + + // gl_sc_set_vertex_format() must always be called + assert(sc->params.vertex_attribs); + + // If using a UBO, pick a binding (needed for shader generation) + if (sc->ubo_size) + sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO); + + for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++) + sc->tmp[n].len = 0; + + // set up shader text (header + uniforms + body) + bstr *header = &sc->tmp[0]; + ADD(header, "#version %d%s\n", glsl_version, glsl_es >= 300 ? " es" : ""); + if (type == RA_RENDERPASS_TYPE_COMPUTE) { + // This extension cannot be enabled in fragment shader. Enable it as + // an exception for compute shader. + ADD(header, "#extension GL_ARB_compute_shader : enable\n"); + } + for (int n = 0; n < sc->num_exts; n++) + ADD(header, "#extension %s : enable\n", sc->exts[n]); + if (glsl_es) { + ADD(header, "precision mediump float;\n"); + ADD(header, "precision mediump sampler2D;\n"); + if (sc->ra->caps & RA_CAP_TEX_3D) + ADD(header, "precision mediump sampler3D;\n"); + } + + if (glsl_version >= 130) { + ADD(header, "#define tex1D texture\n"); + ADD(header, "#define tex3D texture\n"); + } else { + ADD(header, "#define tex1D texture1D\n"); + ADD(header, "#define tex3D texture3D\n"); + ADD(header, "#define texture texture2D\n"); + } + + if (sc->ra->glsl_vulkan && type == RA_RENDERPASS_TYPE_COMPUTE) { + ADD(header, "#define gl_GlobalInvocationIndex " + "(gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID)\n"); + } + + // Additional helpers. + ADD(header, "#define LUT_POS(x, lut_size)" + " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n"); + + char *vert_in = glsl_version >= 130 ? "in" : "attribute"; + char *vert_out = glsl_version >= 130 ? "out" : "varying"; + char *frag_in = glsl_version >= 130 ? "in" : "varying"; + + struct bstr *vert = NULL, *frag = NULL, *comp = NULL; + + if (type == RA_RENDERPASS_TYPE_RASTER) { + // vertex shader: we don't use the vertex shader, so just setup a + // dummy, which passes through the vertex array attributes. + bstr *vert_head = &sc->tmp[1]; + ADD_BSTR(vert_head, *header); + bstr *vert_body = &sc->tmp[2]; + ADD(vert_body, "void main() {\n"); + bstr *frag_vaos = &sc->tmp[3]; + for (int n = 0; n < sc->params.num_vertex_attribs; n++) { + const struct ra_renderpass_input *e = &sc->params.vertex_attribs[n]; + const char *glsl_type = vao_glsl_type(e); + char loc[32] = {0}; + if (sc->ra->glsl_vulkan) + snprintf(loc, sizeof(loc), "layout(location=%d) ", n); + if (strcmp(e->name, "position") == 0) { + // setting raster pos. requires setting gl_Position magic variable + assert(e->dim_v == 2 && e->type == RA_VARTYPE_FLOAT); + ADD(vert_head, "%s%s vec2 vertex_position;\n", loc, vert_in); + ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n"); + } else { + ADD(vert_head, "%s%s %s vertex_%s;\n", loc, vert_in, glsl_type, e->name); + ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, glsl_type, e->name); + ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name); + ADD(frag_vaos, "%s%s %s %s;\n", loc, frag_in, glsl_type, e->name); + } + } + ADD(vert_body, "}\n"); + vert = vert_head; + ADD_BSTR(vert, *vert_body); + + // fragment shader; still requires adding used uniforms and VAO elements + frag = &sc->tmp[4]; + ADD_BSTR(frag, *header); + if (glsl_version >= 130) { + ADD(frag, "%sout vec4 out_color;\n", + sc->ra->glsl_vulkan ? "layout(location=0) " : ""); + } + ADD_BSTR(frag, *frag_vaos); + add_uniforms(sc, frag); + + ADD_BSTR(frag, sc->prelude_text); + ADD_BSTR(frag, sc->header_text); + + ADD(frag, "void main() {\n"); + // we require _all_ frag shaders to write to a "vec4 color" + ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); + ADD_BSTR(frag, sc->text); + if (glsl_version >= 130) { + ADD(frag, "out_color = color;\n"); + } else { + ADD(frag, "gl_FragColor = color;\n"); + } + ADD(frag, "}\n"); + + // We need to fix the format of the render dst at renderpass creation + // time + assert(target_format); + sc->params.target_format = target_format; + } + + if (type == RA_RENDERPASS_TYPE_COMPUTE) { + comp = &sc->tmp[4]; + ADD_BSTR(comp, *header); + + add_uniforms(sc, comp); + + ADD_BSTR(comp, sc->prelude_text); + ADD_BSTR(comp, sc->header_text); + + ADD(comp, "void main() {\n"); + ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience + ADD_BSTR(comp, sc->text); + ADD(comp, "}\n"); + } + + bstr *hash_total = &sc->tmp[5]; + + ADD(hash_total, "type %d\n", sc->params.type); + + if (frag) { + ADD_BSTR(hash_total, *frag); + sc->params.frag_shader = frag->start; + } + ADD(hash_total, "\n"); + if (vert) { + ADD_BSTR(hash_total, *vert); + sc->params.vertex_shader = vert->start; + } + ADD(hash_total, "\n"); + if (comp) { + ADD_BSTR(hash_total, *comp); + sc->params.compute_shader = comp->start; + } + ADD(hash_total, "\n"); + + if (sc->params.enable_blend) { + ADD(hash_total, "blend %d %d %d %d\n", + sc->params.blend_src_rgb, sc->params.blend_dst_rgb, + sc->params.blend_src_alpha, sc->params.blend_dst_alpha); + } + + if (sc->params.target_format) + ADD(hash_total, "format %s\n", sc->params.target_format->name); + + struct sc_entry *entry = NULL; + for (int n = 0; n < sc->num_entries; n++) { + struct sc_entry *cur = sc->entries[n]; + if (bstr_equals(cur->total, *hash_total)) { + entry = cur; + break; + } + } + if (!entry) { + if (sc->num_entries == SC_MAX_ENTRIES) + sc_flush_cache(sc); + entry = talloc_ptrtype(NULL, entry); + *entry = (struct sc_entry){ + .total = bstrdup(entry, *hash_total), + .timer = timer_pool_create(sc->ra), + }; + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_cached_uniform u = {0}; + if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) { + // global uniforms need to be made visible to the ra_renderpass + u.index = sc->params.num_inputs; + MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs, + sc->uniforms[n].input); + } + MP_TARRAY_APPEND(entry, entry->cached_uniforms, + entry->num_cached_uniforms, u); + } + if (!create_pass(sc, entry)) + sc->error_state = true; + MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry); + } + if (sc->error_state) + return; + + assert(sc->num_uniforms == entry->num_cached_uniforms); + + sc->num_values = 0; + for (int n = 0; n < sc->num_uniforms; n++) + update_uniform(sc, entry, &sc->uniforms[n], n); + + // If we're using a UBO, make sure to bind it as well + if (sc->ubo_size) { + struct ra_renderpass_input_val ubo_val = { + .index = entry->ubo_index, + .data = &entry->ubo, + }; + MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val); + } + + sc->current_shader = entry; +} + +struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, + struct ra_tex *target, + void *ptr, size_t num) +{ + struct timer_pool *timer = NULL; + + gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format); + if (!sc->current_shader) + goto error; + + timer = sc->current_shader->timer; + + struct mp_rect full_rc = {0, 0, target->params.w, target->params.h}; + + struct ra_renderpass_run_params run = { + .pass = sc->current_shader->pass, + .values = sc->values, + .num_values = sc->num_values, + .target = target, + .vertex_data = ptr, + .vertex_count = num, + .viewport = full_rc, + .scissors = full_rc, + }; + + timer_pool_start(timer); + sc->ra->fns->renderpass_run(sc->ra, &run); + timer_pool_stop(timer); + +error: + gl_sc_reset(sc); + return timer_pool_measure(timer); +} + +struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, + int w, int h, int d) +{ + struct timer_pool *timer = NULL; + + gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL); + if (!sc->current_shader) + goto error; + + timer = sc->current_shader->timer; + + struct ra_renderpass_run_params run = { + .pass = sc->current_shader->pass, + .values = sc->values, + .num_values = sc->num_values, + .compute_groups = {w, h, d}, + }; + + timer_pool_start(timer); + sc->ra->fns->renderpass_run(sc->ra, &run); + timer_pool_stop(timer); + +error: + gl_sc_reset(sc); + return timer_pool_measure(timer); +} diff --git a/video/out/gpu/shader_cache.h b/video/out/gpu/shader_cache.h new file mode 100644 index 0000000000..82a078079b --- /dev/null +++ b/video/out/gpu/shader_cache.h @@ -0,0 +1,56 @@ +#pragma once + +#include "common/common.h" +#include "misc/bstr.h" +#include "ra.h" + +// For mp_pass_perf +#include "video/out/vo.h" + +struct mp_log; +struct mpv_global; +struct gl_shader_cache; + +struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, + struct mp_log *log); +void gl_sc_destroy(struct gl_shader_cache *sc); +bool gl_sc_error_state(struct gl_shader_cache *sc); +void gl_sc_reset_error(struct gl_shader_cache *sc); +void gl_sc_add(struct gl_shader_cache *sc, const char *text); +void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) + PRINTF_ATTRIBUTE(2, 3); +void gl_sc_hadd(struct gl_shader_cache *sc, const char *text); +void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) + PRINTF_ATTRIBUTE(2, 3); +void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text); +void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) + PRINTF_ATTRIBUTE(2, 3); +void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, + struct ra_tex *tex); +void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, + struct ra_tex *tex); +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf, + char *format, ...) PRINTF_ATTRIBUTE(4, 5); +void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f); +void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int f); +void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]); +void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]); +void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, + bool transpose, float *v); +void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, + bool transpose, float *v); +void gl_sc_set_vertex_format(struct gl_shader_cache *sc, + const struct ra_renderpass_input *vertex_attribs, + int vertex_stride); +void gl_sc_blend(struct gl_shader_cache *sc, + enum ra_blend blend_src_rgb, + enum ra_blend blend_dst_rgb, + enum ra_blend blend_src_alpha, + enum ra_blend blend_dst_alpha); +void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name); +struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, + struct ra_tex *target, + void *ptr, size_t num); +struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, + int w, int h, int d); +void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir); diff --git a/video/out/gpu/user_shaders.c b/video/out/gpu/user_shaders.c new file mode 100644 index 0000000000..446941b03f --- /dev/null +++ b/video/out/gpu/user_shaders.c @@ -0,0 +1,452 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include + +#include "common/msg.h" +#include "misc/ctype.h" +#include "user_shaders.h" + +static bool parse_rpn_szexpr(struct bstr line, struct szexp out[MAX_SZEXP_SIZE]) +{ + int pos = 0; + + while (line.len > 0) { + struct bstr word = bstr_strip(bstr_splitchar(line, &line, ' ')); + if (word.len == 0) + continue; + + if (pos >= MAX_SZEXP_SIZE) + return false; + + struct szexp *exp = &out[pos++]; + + if (bstr_eatend0(&word, ".w") || bstr_eatend0(&word, ".width")) { + exp->tag = SZEXP_VAR_W; + exp->val.varname = word; + continue; + } + + if (bstr_eatend0(&word, ".h") || bstr_eatend0(&word, ".height")) { + exp->tag = SZEXP_VAR_H; + exp->val.varname = word; + continue; + } + + switch (word.start[0]) { + case '+': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_ADD; continue; + case '-': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_SUB; continue; + case '*': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MUL; continue; + case '/': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_DIV; continue; + case '!': exp->tag = SZEXP_OP1; exp->val.op = SZEXP_OP_NOT; continue; + case '>': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_GT; continue; + case '<': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_LT; continue; + } + + if (mp_isdigit(word.start[0])) { + exp->tag = SZEXP_CONST; + if (bstr_sscanf(word, "%f", &exp->val.cval) != 1) + return false; + continue; + } + + // Some sort of illegal expression + return false; + } + + return true; +} + +// Returns whether successful. 'result' is left untouched on failure +bool eval_szexpr(struct mp_log *log, void *priv, + bool (*lookup)(void *priv, struct bstr var, float size[2]), + struct szexp expr[MAX_SZEXP_SIZE], float *result) +{ + float stack[MAX_SZEXP_SIZE] = {0}; + int idx = 0; // points to next element to push + + for (int i = 0; i < MAX_SZEXP_SIZE; i++) { + switch (expr[i].tag) { + case SZEXP_END: + goto done; + + case SZEXP_CONST: + // Since our SZEXPs are bound by MAX_SZEXP_SIZE, it should be + // impossible to overflow the stack + assert(idx < MAX_SZEXP_SIZE); + stack[idx++] = expr[i].val.cval; + continue; + + case SZEXP_OP1: + if (idx < 1) { + mp_warn(log, "Stack underflow in RPN expression!\n"); + return false; + } + + switch (expr[i].val.op) { + case SZEXP_OP_NOT: stack[idx-1] = !stack[idx-1]; break; + default: abort(); + } + continue; + + case SZEXP_OP2: + if (idx < 2) { + mp_warn(log, "Stack underflow in RPN expression!\n"); + return false; + } + + // Pop the operands in reverse order + float op2 = stack[--idx]; + float op1 = stack[--idx]; + float res = 0.0; + switch (expr[i].val.op) { + case SZEXP_OP_ADD: res = op1 + op2; break; + case SZEXP_OP_SUB: res = op1 - op2; break; + case SZEXP_OP_MUL: res = op1 * op2; break; + case SZEXP_OP_DIV: res = op1 / op2; break; + case SZEXP_OP_GT: res = op1 > op2; break; + case SZEXP_OP_LT: res = op1 < op2; break; + default: abort(); + } + + if (!isfinite(res)) { + mp_warn(log, "Illegal operation in RPN expression!\n"); + return false; + } + + stack[idx++] = res; + continue; + + case SZEXP_VAR_W: + case SZEXP_VAR_H: { + struct bstr name = expr[i].val.varname; + float size[2]; + + if (!lookup(priv, name, size)) { + mp_warn(log, "Variable %.*s not found in RPN expression!\n", + BSTR_P(name)); + return false; + } + + stack[idx++] = (expr[i].tag == SZEXP_VAR_W) ? size[0] : size[1]; + continue; + } + } + } + +done: + // Return the single stack element + if (idx != 1) { + mp_warn(log, "Malformed stack after RPN expression!\n"); + return false; + } + + *result = stack[0]; + return true; +} + +static bool parse_hook(struct mp_log *log, struct bstr *body, + struct gl_user_shader_hook *out) +{ + *out = (struct gl_user_shader_hook){ + .pass_desc = bstr0("(unknown)"), + .offset = identity_trans, + .width = {{ SZEXP_VAR_W, { .varname = bstr0("HOOKED") }}}, + .height = {{ SZEXP_VAR_H, { .varname = bstr0("HOOKED") }}}, + .cond = {{ SZEXP_CONST, { .cval = 1.0 }}}, + }; + + int hook_idx = 0; + int bind_idx = 0; + + // Parse all headers + while (true) { + struct bstr rest; + struct bstr line = bstr_strip(bstr_getline(*body, &rest)); + + // Check for the presence of the magic line beginning + if (!bstr_eatstart0(&line, "//!")) + break; + + *body = rest; + + // Parse the supported commands + if (bstr_eatstart0(&line, "HOOK")) { + if (hook_idx == SHADER_MAX_HOOKS) { + mp_err(log, "Passes may only hook up to %d textures!\n", + SHADER_MAX_HOOKS); + return false; + } + out->hook_tex[hook_idx++] = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "BIND")) { + if (bind_idx == SHADER_MAX_BINDS) { + mp_err(log, "Passes may only bind up to %d textures!\n", + SHADER_MAX_BINDS); + return false; + } + out->bind_tex[bind_idx++] = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "SAVE")) { + out->save_tex = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "DESC")) { + out->pass_desc = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "OFFSET")) { + float ox, oy; + if (bstr_sscanf(line, "%f %f", &ox, &oy) != 2) { + mp_err(log, "Error while parsing OFFSET!\n"); + return false; + } + out->offset.t[0] = ox; + out->offset.t[1] = oy; + continue; + } + + if (bstr_eatstart0(&line, "WIDTH")) { + if (!parse_rpn_szexpr(line, out->width)) { + mp_err(log, "Error while parsing WIDTH!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "HEIGHT")) { + if (!parse_rpn_szexpr(line, out->height)) { + mp_err(log, "Error while parsing HEIGHT!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "WHEN")) { + if (!parse_rpn_szexpr(line, out->cond)) { + mp_err(log, "Error while parsing WHEN!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "COMPONENTS")) { + if (bstr_sscanf(line, "%d", &out->components) != 1) { + mp_err(log, "Error while parsing COMPONENTS!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "COMPUTE")) { + struct compute_info *ci = &out->compute; + int num = bstr_sscanf(line, "%d %d %d %d", &ci->block_w, &ci->block_h, + &ci->threads_w, &ci->threads_h); + + if (num == 2 || num == 4) { + ci->active = true; + ci->directly_writes = true; + } else { + mp_err(log, "Error while parsing COMPUTE!\n"); + return false; + } + continue; + } + + // Unknown command type + mp_err(log, "Unrecognized command '%.*s'!\n", BSTR_P(line)); + return false; + } + + // The rest of the file up until the next magic line beginning (if any) + // shall be the shader body + if (bstr_split_tok(*body, "//!", &out->pass_body, body)) { + // Make sure the magic line is part of the rest + body->start -= 3; + body->len += 3; + } + + // Sanity checking + if (hook_idx == 0) + mp_warn(log, "Pass has no hooked textures (will be ignored)!\n"); + + return true; +} + +static bool parse_tex(struct mp_log *log, struct ra *ra, struct bstr *body, + struct gl_user_shader_tex *out) +{ + *out = (struct gl_user_shader_tex){ + .name = bstr0("USER_TEX"), + .params = { + .dimensions = 2, + .w = 1, .h = 1, .d = 1, + .render_src = true, + .src_linear = true, + }, + }; + struct ra_tex_params *p = &out->params; + + while (true) { + struct bstr rest; + struct bstr line = bstr_strip(bstr_getline(*body, &rest)); + + if (!bstr_eatstart0(&line, "//!")) + break; + + *body = rest; + + if (bstr_eatstart0(&line, "TEXTURE")) { + out->name = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "SIZE")) { + p->dimensions = bstr_sscanf(line, "%d %d %d", &p->w, &p->h, &p->d); + if (p->dimensions < 1 || p->dimensions > 3 || + p->w < 1 || p->h < 1 || p->d < 1) + { + mp_err(log, "Error while parsing SIZE!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "FORMAT ")) { + p->format = NULL; + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (bstr_equals0(line, fmt->name)) { + p->format = fmt; + break; + } + } + // (pixel_size==0 is for opaque formats) + if (!p->format || !p->format->pixel_size) { + mp_err(log, "Unrecognized/unavailable FORMAT name: '%.*s'!\n", + BSTR_P(line)); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "FILTER")) { + line = bstr_strip(line); + if (bstr_equals0(line, "LINEAR")) { + p->src_linear = true; + } else if (bstr_equals0(line, "NEAREST")) { + p->src_linear = false; + } else { + mp_err(log, "Unrecognized FILTER: '%.*s'!\n", BSTR_P(line)); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "BORDER")) { + line = bstr_strip(line); + if (bstr_equals0(line, "CLAMP")) { + p->src_repeat = false; + } else if (bstr_equals0(line, "REPEAT")) { + p->src_repeat = true; + } else { + mp_err(log, "Unrecognized BORDER: '%.*s'!\n", BSTR_P(line)); + return false; + } + continue; + } + + mp_err(log, "Unrecognized command '%.*s'!\n", BSTR_P(line)); + return false; + } + + if (!p->format) { + mp_err(log, "No FORMAT specified.\n"); + return false; + } + + if (p->src_linear && !p->format->linear_filter) { + mp_err(log, "The specified texture format cannot be filtered!\n"); + return false; + } + + // Decode the rest of the section (up to the next //! marker) as raw hex + // data for the texture + struct bstr hexdata; + if (bstr_split_tok(*body, "//!", &hexdata, body)) { + // Make sure the magic line is part of the rest + body->start -= 3; + body->len += 3; + } + + struct bstr tex; + if (!bstr_decode_hex(NULL, bstr_strip(hexdata), &tex)) { + mp_err(log, "Error while parsing TEXTURE body: must be a valid " + "hexadecimal sequence, on a single line!\n"); + return false; + } + + int expected_len = p->w * p->h * p->d * p->format->pixel_size; + if (tex.len != expected_len) { + mp_err(log, "Shader TEXTURE size mismatch: got %zd bytes, expected %d!\n", + tex.len, expected_len); + talloc_free(tex.start); + return false; + } + + p->initial_data = tex.start; + return true; +} + +void parse_user_shader(struct mp_log *log, struct ra *ra, struct bstr shader, + void *priv, + bool (*dohook)(void *p, struct gl_user_shader_hook hook), + bool (*dotex)(void *p, struct gl_user_shader_tex tex)) +{ + if (!dohook || !dotex || !shader.len) + return; + + // Skip all garbage (e.g. comments) before the first header + int pos = bstr_find(shader, bstr0("//!")); + if (pos < 0) { + mp_warn(log, "Shader appears to contain no headers!\n"); + return; + } + shader = bstr_cut(shader, pos); + + // Loop over the file + while (shader.len > 0) + { + // Peek at the first header to dispatch the right type + if (bstr_startswith0(shader, "//!TEXTURE")) { + struct gl_user_shader_tex t; + if (!parse_tex(log, ra, &shader, &t) || !dotex(priv, t)) + return; + continue; + } + + struct gl_user_shader_hook h; + if (!parse_hook(log, &shader, &h) || !dohook(priv, h)) + return; + } +} diff --git a/video/out/gpu/user_shaders.h b/video/out/gpu/user_shaders.h new file mode 100644 index 0000000000..94a070c8e2 --- /dev/null +++ b/video/out/gpu/user_shaders.h @@ -0,0 +1,98 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#ifndef MP_GL_USER_SHADERS_H +#define MP_GL_USER_SHADERS_H + +#include "utils.h" +#include "ra.h" + +#define SHADER_MAX_PASSES 32 +#define SHADER_MAX_HOOKS 16 +#define SHADER_MAX_BINDS 6 +#define SHADER_MAX_SAVED 64 +#define MAX_SZEXP_SIZE 32 + +enum szexp_op { + SZEXP_OP_ADD, + SZEXP_OP_SUB, + SZEXP_OP_MUL, + SZEXP_OP_DIV, + SZEXP_OP_NOT, + SZEXP_OP_GT, + SZEXP_OP_LT, +}; + +enum szexp_tag { + SZEXP_END = 0, // End of an RPN expression + SZEXP_CONST, // Push a constant value onto the stack + SZEXP_VAR_W, // Get the width/height of a named texture (variable) + SZEXP_VAR_H, + SZEXP_OP2, // Pop two elements and push the result of a dyadic operation + SZEXP_OP1, // Pop one element and push the result of a monadic operation +}; + +struct szexp { + enum szexp_tag tag; + union { + float cval; + struct bstr varname; + enum szexp_op op; + } val; +}; + +struct compute_info { + bool active; + int block_w, block_h; // Block size (each block corresponds to one WG) + int threads_w, threads_h; // How many threads form a working group + bool directly_writes; // If true, shader is assumed to imageStore(out_image) +}; + +struct gl_user_shader_hook { + struct bstr pass_desc; + struct bstr hook_tex[SHADER_MAX_HOOKS]; + struct bstr bind_tex[SHADER_MAX_BINDS]; + struct bstr save_tex; + struct bstr pass_body; + struct gl_transform offset; + struct szexp width[MAX_SZEXP_SIZE]; + struct szexp height[MAX_SZEXP_SIZE]; + struct szexp cond[MAX_SZEXP_SIZE]; + int components; + struct compute_info compute; +}; + +struct gl_user_shader_tex { + struct bstr name; + struct ra_tex_params params; + // for video.c + struct ra_tex *tex; +}; + +// Parse the next shader block from `body`. The callbacks are invoked on every +// valid shader block parsed. +void parse_user_shader(struct mp_log *log, struct ra *ra, struct bstr shader, + void *priv, + bool (*dohook)(void *p, struct gl_user_shader_hook hook), + bool (*dotex)(void *p, struct gl_user_shader_tex tex)); + +// Evaluate a szexp, given a lookup function for named textures +bool eval_szexpr(struct mp_log *log, void *priv, + bool (*lookup)(void *priv, struct bstr var, float size[2]), + struct szexp expr[MAX_SZEXP_SIZE], float *result); + +#endif diff --git a/video/out/gpu/utils.c b/video/out/gpu/utils.c new file mode 100644 index 0000000000..f8dcbaac60 --- /dev/null +++ b/video/out/gpu/utils.c @@ -0,0 +1,372 @@ +#include "common/msg.h" +#include "video/out/vo.h" +#include "utils.h" + +// Standard parallel 2D projection, except y1 < y0 means that the coordinate +// system is flipped, not the projection. +void gl_transform_ortho(struct gl_transform *t, float x0, float x1, + float y0, float y1) +{ + if (y1 < y0) { + float tmp = y0; + y0 = tmp - y1; + y1 = tmp; + } + + t->m[0][0] = 2.0f / (x1 - x0); + t->m[0][1] = 0.0f; + t->m[1][0] = 0.0f; + t->m[1][1] = 2.0f / (y1 - y0); + t->t[0] = -(x1 + x0) / (x1 - x0); + t->t[1] = -(y1 + y0) / (y1 - y0); +} + +// Apply the effects of one transformation to another, transforming it in the +// process. In other words: post-composes t onto x +void gl_transform_trans(struct gl_transform t, struct gl_transform *x) +{ + struct gl_transform xt = *x; + x->m[0][0] = t.m[0][0] * xt.m[0][0] + t.m[0][1] * xt.m[1][0]; + x->m[1][0] = t.m[1][0] * xt.m[0][0] + t.m[1][1] * xt.m[1][0]; + x->m[0][1] = t.m[0][0] * xt.m[0][1] + t.m[0][1] * xt.m[1][1]; + x->m[1][1] = t.m[1][0] * xt.m[0][1] + t.m[1][1] * xt.m[1][1]; + gl_transform_vec(t, &x->t[0], &x->t[1]); +} + +void gl_transform_ortho_fbodst(struct gl_transform *t, struct fbodst fbo) +{ + int y_dir = fbo.flip ? -1 : 1; + gl_transform_ortho(t, 0, fbo.tex->params.w, 0, fbo.tex->params.h * y_dir); +} + +void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool) +{ + for (int i = 0; i < pool->num_buffers; i++) + ra_buf_free(ra, &pool->buffers[i]); + + talloc_free(pool->buffers); + *pool = (struct ra_buf_pool){0}; +} + +static bool ra_buf_params_compatible(const struct ra_buf_params *new, + const struct ra_buf_params *old) +{ + return new->type == old->type && + new->size <= old->size && + new->host_mapped == old->host_mapped && + new->host_mutable == old->host_mutable; +} + +static bool ra_buf_pool_grow(struct ra *ra, struct ra_buf_pool *pool) +{ + struct ra_buf *buf = ra_buf_create(ra, &pool->current_params); + if (!buf) + return false; + + MP_TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf); + MP_VERBOSE(ra, "Resized buffer pool of type %u to size %d\n", + pool->current_params.type, pool->num_buffers); + return true; +} + +struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, + const struct ra_buf_params *params) +{ + assert(!params->initial_data); + + if (!ra_buf_params_compatible(params, &pool->current_params)) { + ra_buf_pool_uninit(ra, pool); + pool->current_params = *params; + } + + // Make sure we have at least one buffer available + if (!pool->buffers && !ra_buf_pool_grow(ra, pool)) + return NULL; + + // Make sure the next buffer is available for use + if (!ra->fns->buf_poll(ra, pool->buffers[pool->index]) && + !ra_buf_pool_grow(ra, pool)) + { + return NULL; + } + + struct ra_buf *buf = pool->buffers[pool->index++]; + pool->index %= pool->num_buffers; + + return buf; +} + +bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, + const struct ra_tex_upload_params *params) +{ + if (params->buf) + return ra->fns->tex_upload(ra, params); + + struct ra_tex *tex = params->tex; + size_t row_size = tex->params.dimensions == 2 ? params->stride : + tex->params.w * tex->params.format->pixel_size; + + struct ra_buf_params bufparams = { + .type = RA_BUF_TYPE_TEX_UPLOAD, + .size = row_size * tex->params.h * tex->params.d, + .host_mutable = true, + }; + + struct ra_buf *buf = ra_buf_pool_get(ra, pbo, &bufparams); + if (!buf) + return false; + + ra->fns->buf_update(ra, buf, 0, params->src, bufparams.size); + + struct ra_tex_upload_params newparams = *params; + newparams.buf = buf; + newparams.src = NULL; + + return ra->fns->tex_upload(ra, &newparams); +} + +struct ra_layout std140_layout(struct ra_renderpass_input *inp) +{ + size_t el_size = ra_vartype_size(inp->type); + + // std140 packing rules: + // 1. The alignment of generic values is their size in bytes + // 2. The alignment of vectors is the vector length * the base count, with + // the exception of vec3 which is always aligned like vec4 + // 3. The alignment of arrays is that of the element size rounded up to + // the nearest multiple of vec4 + // 4. Matrices are treated like arrays of vectors + // 5. Arrays/matrices are laid out with a stride equal to the alignment + size_t size = el_size * inp->dim_v; + if (inp->dim_v == 3) + size += el_size; + if (inp->dim_m > 1) + size = MP_ALIGN_UP(size, sizeof(float[4])); + + return (struct ra_layout) { + .align = size, + .stride = size, + .size = size * inp->dim_m, + }; +} + +struct ra_layout std430_layout(struct ra_renderpass_input *inp) +{ + size_t el_size = ra_vartype_size(inp->type); + + // std430 packing rules: like std140, except arrays/matrices are always + // "tightly" packed, even arrays/matrices of vec3s + size_t size = el_size * inp->dim_v; + if (inp->dim_v == 3 && inp->dim_m == 1) + size += el_size; + + return (struct ra_layout) { + .align = size, + .stride = size, + .size = size * inp->dim_m, + }; +} + +// Create a texture and a FBO using the texture as color attachments. +// fmt: texture internal format +// If the parameters are the same as the previous call, do not touch it. +// flags can be 0, or a combination of FBOTEX_FUZZY_W and FBOTEX_FUZZY_H. +// Enabling FUZZY for W or H means the w or h does not need to be exact. +bool fbotex_change(struct fbotex *fbo, struct ra *ra, struct mp_log *log, + int w, int h, const struct ra_format *fmt, int flags) +{ + int lw = w, lh = h; + + if (fbo->tex) { + int cw = w, ch = h; + int rw = fbo->tex->params.w, rh = fbo->tex->params.h; + + if ((flags & FBOTEX_FUZZY_W) && cw < rw) + cw = rw; + if ((flags & FBOTEX_FUZZY_H) && ch < rh) + ch = rh; + + if (rw == cw && rh == ch && fbo->tex->params.format == fmt) + goto done; + } + + if (flags & FBOTEX_FUZZY_W) + w = MP_ALIGN_UP(w, 256); + if (flags & FBOTEX_FUZZY_H) + h = MP_ALIGN_UP(h, 256); + + mp_verbose(log, "Create FBO: %dx%d (%dx%d)\n", lw, lh, w, h); + + if (!fmt || !fmt->renderable || !fmt->linear_filter) { + mp_err(log, "Format %s not supported.\n", fmt ? fmt->name : "(unset)"); + return false; + } + + fbotex_uninit(fbo); + + *fbo = (struct fbotex) { + .ra = ra, + }; + + struct ra_tex_params params = { + .dimensions = 2, + .w = w, + .h = h, + .d = 1, + .format = fmt, + .src_linear = true, + .render_src = true, + .render_dst = true, + .storage_dst = true, + .blit_src = true, + }; + + fbo->tex = ra_tex_create(fbo->ra, ¶ms); + + if (!fbo->tex) { + mp_err(log, "Error: framebuffer could not be created.\n"); + fbotex_uninit(fbo); + return false; + } + +done: + + fbo->lw = lw; + fbo->lh = lh; + + fbo->fbo = (struct fbodst){ + .tex = fbo->tex, + }; + + return true; +} + +void fbotex_uninit(struct fbotex *fbo) +{ + if (fbo->ra) { + ra_tex_free(fbo->ra, &fbo->tex); + *fbo = (struct fbotex) {0}; + } +} + +struct timer_pool { + struct ra *ra; + ra_timer *timer; + bool running; // detect invalid usage + + uint64_t samples[VO_PERF_SAMPLE_COUNT]; + int sample_idx; + int sample_count; + + uint64_t sum; + uint64_t peak; +}; + +struct timer_pool *timer_pool_create(struct ra *ra) +{ + if (!ra->fns->timer_create) + return NULL; + + ra_timer *timer = ra->fns->timer_create(ra); + if (!timer) + return NULL; + + struct timer_pool *pool = talloc(NULL, struct timer_pool); + if (!pool) { + ra->fns->timer_destroy(ra, timer); + return NULL; + } + + *pool = (struct timer_pool){ .ra = ra, .timer = timer }; + return pool; +} + +void timer_pool_destroy(struct timer_pool *pool) +{ + if (!pool) + return; + + pool->ra->fns->timer_destroy(pool->ra, pool->timer); + talloc_free(pool); +} + +void timer_pool_start(struct timer_pool *pool) +{ + if (!pool) + return; + + assert(!pool->running); + pool->ra->fns->timer_start(pool->ra, pool->timer); + pool->running = true; +} + +void timer_pool_stop(struct timer_pool *pool) +{ + if (!pool) + return; + + assert(pool->running); + uint64_t res = pool->ra->fns->timer_stop(pool->ra, pool->timer); + pool->running = false; + + if (res) { + // Input res into the buffer and grab the previous value + uint64_t old = pool->samples[pool->sample_idx]; + pool->sample_count = MPMIN(pool->sample_count + 1, VO_PERF_SAMPLE_COUNT); + pool->samples[pool->sample_idx++] = res; + pool->sample_idx %= VO_PERF_SAMPLE_COUNT; + pool->sum = pool->sum + res - old; + + // Update peak if necessary + if (res >= pool->peak) { + pool->peak = res; + } else if (pool->peak == old) { + // It's possible that the last peak was the value we just removed, + // if so we need to scan for the new peak + uint64_t peak = res; + for (int i = 0; i < VO_PERF_SAMPLE_COUNT; i++) + peak = MPMAX(peak, pool->samples[i]); + pool->peak = peak; + } + } +} + +struct mp_pass_perf timer_pool_measure(struct timer_pool *pool) +{ + if (!pool) + return (struct mp_pass_perf){0}; + + struct mp_pass_perf res = { + .peak = pool->peak, + .count = pool->sample_count, + }; + + int idx = pool->sample_idx - pool->sample_count + VO_PERF_SAMPLE_COUNT; + for (int i = 0; i < res.count; i++) { + idx %= VO_PERF_SAMPLE_COUNT; + res.samples[i] = pool->samples[idx++]; + } + + if (res.count > 0) { + res.last = res.samples[res.count - 1]; + res.avg = pool->sum / res.count; + } + + return res; +} + +void mp_log_source(struct mp_log *log, int lev, const char *src) +{ + int line = 1; + if (!src) + return; + while (*src) { + const char *end = strchr(src, '\n'); + const char *next = end + 1; + if (!end) + next = end = src + strlen(src); + mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src); + line++; + src = next; + } +} diff --git a/video/out/gpu/utils.h b/video/out/gpu/utils.h new file mode 100644 index 0000000000..04695f8085 --- /dev/null +++ b/video/out/gpu/utils.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include + +#include "ra.h" + +// A 3x2 matrix, with the translation part separate. +struct gl_transform { + // row-major, e.g. in mathematical notation: + // | m[0][0] m[0][1] | + // | m[1][0] m[1][1] | + float m[2][2]; + float t[2]; +}; + +static const struct gl_transform identity_trans = { + .m = {{1.0, 0.0}, {0.0, 1.0}}, + .t = {0.0, 0.0}, +}; + +void gl_transform_ortho(struct gl_transform *t, float x0, float x1, + float y0, float y1); + +// This treats m as an affine transformation, in other words m[2][n] gets +// added to the output. +static inline void gl_transform_vec(struct gl_transform t, float *x, float *y) +{ + float vx = *x, vy = *y; + *x = vx * t.m[0][0] + vy * t.m[0][1] + t.t[0]; + *y = vx * t.m[1][0] + vy * t.m[1][1] + t.t[1]; +} + +struct mp_rect_f { + float x0, y0, x1, y1; +}; + +// Semantic equality (fuzzy comparison) +static inline bool mp_rect_f_seq(struct mp_rect_f a, struct mp_rect_f b) +{ + return fabs(a.x0 - b.x0) < 1e-6 && fabs(a.x1 - b.x1) < 1e-6 && + fabs(a.y0 - b.y0) < 1e-6 && fabs(a.y1 - b.y1) < 1e-6; +} + +static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r) +{ + gl_transform_vec(t, &r->x0, &r->y0); + gl_transform_vec(t, &r->x1, &r->y1); +} + +static inline bool gl_transform_eq(struct gl_transform a, struct gl_transform b) +{ + for (int x = 0; x < 2; x++) { + for (int y = 0; y < 2; y++) { + if (a.m[x][y] != b.m[x][y]) + return false; + } + } + + return a.t[0] == b.t[0] && a.t[1] == b.t[1]; +} + +void gl_transform_trans(struct gl_transform t, struct gl_transform *x); + +struct fbodst { + struct ra_tex *tex; + bool flip; // mirror vertically +}; + +void gl_transform_ortho_fbodst(struct gl_transform *t, struct fbodst fbo); + +// A pool of buffers, which can grow as needed +struct ra_buf_pool { + struct ra_buf_params current_params; + struct ra_buf **buffers; + int num_buffers; + int index; +}; + +void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool); + +// Note: params->initial_data is *not* supported +struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, + const struct ra_buf_params *params); + +// Helper that wraps ra_tex_upload using texture upload buffers to ensure that +// params->buf is always set. This is intended for RA-internal usage. +bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, + const struct ra_tex_upload_params *params); + +// Layout rules for GLSL's packing modes +struct ra_layout std140_layout(struct ra_renderpass_input *inp); +struct ra_layout std430_layout(struct ra_renderpass_input *inp); + +struct fbotex { + struct ra *ra; + struct ra_tex *tex; + int lw, lh; // logical (configured) size, <= than texture size + struct fbodst fbo; +}; + +void fbotex_uninit(struct fbotex *fbo); +bool fbotex_change(struct fbotex *fbo, struct ra *ra, struct mp_log *log, + int w, int h, const struct ra_format *fmt, int flags); +#define FBOTEX_FUZZY_W 1 +#define FBOTEX_FUZZY_H 2 +#define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H) + +// A wrapper around ra_timer that does result pooling, averaging etc. +struct timer_pool; + +struct timer_pool *timer_pool_create(struct ra *ra); +void timer_pool_destroy(struct timer_pool *pool); +void timer_pool_start(struct timer_pool *pool); +void timer_pool_stop(struct timer_pool *pool); +struct mp_pass_perf timer_pool_measure(struct timer_pool *pool); + +// print a multi line string with line numbers (e.g. for shader sources) +// log, lev: module and log level, as in mp_msg() +void mp_log_source(struct mp_log *log, int lev, const char *src); diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c new file mode 100644 index 0000000000..e36fde60e8 --- /dev/null +++ b/video/out/gpu/video.c @@ -0,0 +1,3809 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "video.h" + +#include "misc/bstr.h" +#include "options/m_config.h" +#include "common/global.h" +#include "options/options.h" +#include "utils.h" +#include "hwdec.h" +#include "osd.h" +#include "ra.h" +#include "stream/stream.h" +#include "video_shaders.h" +#include "user_shaders.h" +#include "video/out/filter_kernels.h" +#include "video/out/aspect.h" +#include "video/out/dither.h" +#include "video/out/vo.h" + +// scale/cscale arguments that map directly to shader filter routines. +// Note that the convolution filters are not included in this list. +static const char *const fixed_scale_filters[] = { + "bilinear", + "bicubic_fast", + "oversample", + NULL +}; +static const char *const fixed_tscale_filters[] = { + "oversample", + "linear", + NULL +}; + +// must be sorted, and terminated with 0 +int filter_sizes[] = + {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0}; +int tscale_sizes[] = {2, 4, 6, 0}; // limited by TEXUNIT_VIDEO_NUM + +struct vertex_pt { + float x, y; +}; + +struct vertex { + struct vertex_pt position; + struct vertex_pt texcoord[TEXUNIT_VIDEO_NUM]; +}; + +static const struct ra_renderpass_input vertex_vao[] = { + {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, + {"texcoord0", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[0])}, + {"texcoord1", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[1])}, + {"texcoord2", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[2])}, + {"texcoord3", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[3])}, + {"texcoord4", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[4])}, + {"texcoord5", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[5])}, + {0} +}; + +struct texplane { + struct ra_tex *tex; + int w, h; + bool flipped; +}; + +struct video_image { + struct texplane planes[4]; + struct mp_image *mpi; // original input image + uint64_t id; // unique ID identifying mpi contents + bool hwdec_mapped; +}; + +enum plane_type { + PLANE_NONE = 0, + PLANE_RGB, + PLANE_LUMA, + PLANE_CHROMA, + PLANE_ALPHA, + PLANE_XYZ, +}; + +static const char *plane_names[] = { + [PLANE_NONE] = "unknown", + [PLANE_RGB] = "rgb", + [PLANE_LUMA] = "luma", + [PLANE_CHROMA] = "chroma", + [PLANE_ALPHA] = "alpha", + [PLANE_XYZ] = "xyz", +}; + +// A self-contained description of a source image which can be bound to a +// texture unit and sampled from. Contains metadata about how it's to be used +struct img_tex { + enum plane_type type; // must be set to something non-zero + int components; // number of relevant coordinates + float multiplier; // multiplier to be used when sampling + struct ra_tex *tex; + int w, h; // logical size (after transformation) + struct gl_transform transform; // rendering transformation +}; + +// A named img_tex, for user scripting purposes +struct saved_tex { + const char *name; + struct img_tex tex; +}; + +// A texture hook. This is some operation that transforms a named texture as +// soon as it's generated +struct tex_hook { + const char *save_tex; + const char *hook_tex[SHADER_MAX_HOOKS]; + const char *bind_tex[TEXUNIT_VIDEO_NUM]; + int components; // how many components are relevant (0 = same as input) + void *priv; // this gets talloc_freed when the tex_hook is removed + void (*hook)(struct gl_video *p, struct img_tex tex, // generates GLSL + struct gl_transform *trans, void *priv); + bool (*cond)(struct gl_video *p, struct img_tex tex, void *priv); +}; + +struct fbosurface { + struct fbotex fbotex; + uint64_t id; + double pts; +}; + +#define FBOSURFACES_MAX 10 + +struct cached_file { + char *path; + struct bstr body; +}; + +struct pass_info { + struct bstr desc; + struct mp_pass_perf perf; +}; + +#define PASS_INFO_MAX (SHADER_MAX_PASSES + 32) + +struct dr_buffer { + struct ra_buf *buf; + // The mpi reference will keep the data from being recycled (or from other + // references gaining write access) while the GPU is accessing the buffer. + struct mp_image *mpi; +}; + +struct gl_video { + struct ra *ra; + + struct mpv_global *global; + struct mp_log *log; + struct gl_video_opts opts; + struct m_config_cache *opts_cache; + struct gl_lcms *cms; + + int fb_depth; // actual bits available in GL main framebuffer + struct m_color clear_color; + bool force_clear_color; + + struct gl_shader_cache *sc; + + struct osd_state *osd_state; + struct mpgl_osd *osd; + double osd_pts; + + struct ra_tex *lut_3d_texture; + bool use_lut_3d; + int lut_3d_size[3]; + + struct ra_tex *dither_texture; + + struct mp_image_params real_image_params; // configured format + struct mp_image_params image_params; // texture format (mind hwdec case) + struct ra_imgfmt_desc ra_format; // texture format + int plane_count; + + bool is_gray; + bool has_alpha; + char color_swizzle[5]; + bool use_integer_conversion; + + struct video_image image; + + struct dr_buffer *dr_buffers; + int num_dr_buffers; + + bool using_dr_path; + + bool dumb_mode; + bool forced_dumb_mode; + + const struct ra_format *fbo_format; + struct fbotex merge_fbo[4]; + struct fbotex scale_fbo[4]; + struct fbotex integer_fbo[4]; + struct fbotex indirect_fbo; + struct fbotex blend_subs_fbo; + struct fbotex screen_fbo; + struct fbotex output_fbo; + struct fbosurface surfaces[FBOSURFACES_MAX]; + struct fbotex vdpau_deinterleave_fbo[2]; + struct ra_buf *hdr_peak_ssbo; + + // user pass descriptions and textures + struct tex_hook tex_hooks[SHADER_MAX_PASSES]; + int tex_hook_num; + struct gl_user_shader_tex user_textures[SHADER_MAX_PASSES]; + int user_tex_num; + + int surface_idx; + int surface_now; + int frames_drawn; + bool is_interpolated; + bool output_fbo_valid; + + // state for configured scalers + struct scaler scaler[SCALER_COUNT]; + + struct mp_csp_equalizer_state *video_eq; + + struct mp_rect src_rect; // displayed part of the source video + struct mp_rect dst_rect; // video rectangle on output window + struct mp_osd_res osd_rect; // OSD size/margins + + // temporary during rendering + struct img_tex pass_tex[TEXUNIT_VIDEO_NUM]; + struct compute_info pass_compute; // compute shader metadata for this pass + int pass_tex_num; + int texture_w, texture_h; + struct gl_transform texture_offset; // texture transform without rotation + int components; + bool use_linear; + float user_gamma; + + // pass info / metrics + struct pass_info pass_fresh[PASS_INFO_MAX]; + struct pass_info pass_redraw[PASS_INFO_MAX]; + struct pass_info *pass; + int pass_idx; + struct timer_pool *upload_timer; + struct timer_pool *blit_timer; + struct timer_pool *osd_timer; + + // intermediate textures + struct saved_tex saved_tex[SHADER_MAX_SAVED]; + int saved_tex_num; + struct fbotex hook_fbos[SHADER_MAX_SAVED]; + int hook_fbo_num; + + int frames_uploaded; + int frames_rendered; + AVLFG lfg; + + // Cached because computing it can take relatively long + int last_dither_matrix_size; + float *last_dither_matrix; + + struct cached_file *files; + int num_files; + + struct ra_hwdec *hwdec; + struct ra_hwdec_mapper *hwdec_mapper; + bool hwdec_active; + + bool dsi_warned; + bool broken_frame; // temporary error state +}; + +static const struct gl_video_opts gl_video_opts_def = { + .dither_algo = DITHER_FRUIT, + .dither_depth = -1, + .dither_size = 6, + .temporal_dither_period = 1, + .fbo_format = "auto", + .sigmoid_center = 0.75, + .sigmoid_slope = 6.5, + .scaler = { + {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, + .cutoff = 0.001}, // scale + {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}, + .cutoff = 0.001}, // dscale + {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, + .cutoff = 0.001}, // cscale + {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}}, + .clamp = 1, }, // tscale + }, + .scaler_resizes_only = 1, + .scaler_lut_size = 6, + .interpolation_threshold = 0.0001, + .alpha_mode = ALPHA_BLEND_TILES, + .background = {0, 0, 0, 255}, + .gamma = 1.0f, + .tone_mapping = TONE_MAPPING_MOBIUS, + .tone_mapping_param = NAN, + .tone_mapping_desat = 2.0, + .early_flush = -1, +}; + +static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param); + +static int validate_window_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param); + +#define OPT_BASE_STRUCT struct gl_video_opts + +#define SCALER_OPTS(n, i) \ + OPT_STRING_VALIDATE(n, scaler[i].kernel.name, 0, validate_scaler_opt), \ + OPT_FLOAT(n"-param1", scaler[i].kernel.params[0], 0), \ + OPT_FLOAT(n"-param2", scaler[i].kernel.params[1], 0), \ + OPT_FLOAT(n"-blur", scaler[i].kernel.blur, 0), \ + OPT_FLOATRANGE(n"-cutoff", scaler[i].cutoff, 0, 0.0, 1.0), \ + OPT_FLOATRANGE(n"-taper", scaler[i].kernel.taper, 0, 0.0, 1.0), \ + OPT_FLOAT(n"-wparam", scaler[i].window.params[0], 0), \ + OPT_FLOAT(n"-wblur", scaler[i].window.blur, 0), \ + OPT_FLOATRANGE(n"-wtaper", scaler[i].window.taper, 0, 0.0, 1.0), \ + OPT_FLOATRANGE(n"-clamp", scaler[i].clamp, 0, 0.0, 1.0), \ + OPT_FLOATRANGE(n"-radius", scaler[i].radius, 0, 0.5, 16.0), \ + OPT_FLOATRANGE(n"-antiring", scaler[i].antiring, 0, 0.0, 1.0), \ + OPT_STRING_VALIDATE(n"-window", scaler[i].window.name, 0, validate_window_opt) + +const struct m_sub_options gl_video_conf = { + .opts = (const m_option_t[]) { + OPT_CHOICE("gpu-dumb-mode", dumb_mode, 0, + ({"auto", 0}, {"yes", 1}, {"no", -1})), + OPT_FLOATRANGE("gamma-factor", gamma, 0, 0.1, 2.0), + OPT_FLAG("gamma-auto", gamma_auto, 0), + OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names), + OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), + OPT_CHOICE("tone-mapping", tone_mapping, 0, + ({"clip", TONE_MAPPING_CLIP}, + {"mobius", TONE_MAPPING_MOBIUS}, + {"reinhard", TONE_MAPPING_REINHARD}, + {"hable", TONE_MAPPING_HABLE}, + {"gamma", TONE_MAPPING_GAMMA}, + {"linear", TONE_MAPPING_LINEAR})), + OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0), + OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0), + OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0), + OPT_FLAG("gamut-warning", gamut_warning, 0), + OPT_FLAG("opengl-pbo", pbo, 0), + SCALER_OPTS("scale", SCALER_SCALE), + SCALER_OPTS("dscale", SCALER_DSCALE), + SCALER_OPTS("cscale", SCALER_CSCALE), + SCALER_OPTS("tscale", SCALER_TSCALE), + OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10), + OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0), + OPT_FLAG("linear-scaling", linear_scaling, 0), + OPT_FLAG("correct-downscaling", correct_downscaling, 0), + OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0), + OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0), + OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0), + OPT_STRING("fbo-format", fbo_format, 0), + OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16, + ({"no", -1}, {"auto", 0})), + OPT_CHOICE("dither", dither_algo, 0, + ({"fruit", DITHER_FRUIT}, + {"ordered", DITHER_ORDERED}, + {"no", DITHER_NONE})), + OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8), + OPT_FLAG("temporal-dither", temporal_dither, 0), + OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128), + OPT_CHOICE("alpha", alpha_mode, 0, + ({"no", ALPHA_NO}, + {"yes", ALPHA_YES}, + {"blend", ALPHA_BLEND}, + {"blend-tiles", ALPHA_BLEND_TILES})), + OPT_FLAG("opengl-rectangle-textures", use_rectangle, 0), + OPT_COLOR("background", background, 0), + OPT_FLAG("interpolation", interpolation, 0), + OPT_FLOAT("interpolation-threshold", interpolation_threshold, 0), + OPT_CHOICE("blend-subtitles", blend_subs, 0, + ({"no", BLEND_SUBS_NO}, + {"yes", BLEND_SUBS_YES}, + {"video", BLEND_SUBS_VIDEO})), + OPT_PATHLIST("glsl-shaders", user_shaders, 0), + OPT_CLI_ALIAS("glsl-shader", "glsl-shaders-append"), + OPT_FLAG("deband", deband, 0), + OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0), + OPT_FLOAT("sharpen", unsharp, 0), + OPT_INTRANGE("gpu-tex-pad-x", tex_pad_x, 0, 0, 4096), + OPT_INTRANGE("gpu-tex-pad-y", tex_pad_y, 0, 0, 4096), + OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0), + OPT_STRING("gpu-shader-cache-dir", shader_cache_dir, 0), + OPT_REPLACED("hdr-tone-mapping", "tone-mapping"), + OPT_REPLACED("opengl-shaders", "glsl-shaders"), + OPT_CLI_ALIAS("opengl-shader", "glsl-shaders-append"), + OPT_REPLACED("opengl-shader-cache-dir", "gpu-shader-cache-dir"), + OPT_REPLACED("opengl-tex-pad-x", "gpu-tex-pad-x"), + OPT_REPLACED("opengl-tex-pad-y", "gpu-tex-pad-y"), + OPT_REPLACED("opengl-fbo-format", "fbo-format"), + OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"), + OPT_REPLACED("opengl-gamma", "gpu-gamma"), + {0} + }, + .size = sizeof(struct gl_video_opts), + .defaults = &gl_video_opts_def, +}; + +static void uninit_rendering(struct gl_video *p); +static void uninit_scaler(struct gl_video *p, struct scaler *scaler); +static void check_gl_features(struct gl_video *p); +static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id); +static const char *handle_scaler_opt(const char *name, bool tscale); +static void reinit_from_options(struct gl_video *p); +static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]); +static void gl_video_setup_hooks(struct gl_video *p); + +#define GLSL(x) gl_sc_add(p->sc, #x "\n"); +#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) +#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__) +#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__) + +static struct bstr load_cached_file(struct gl_video *p, const char *path) +{ + if (!path || !path[0]) + return (struct bstr){0}; + for (int n = 0; n < p->num_files; n++) { + if (strcmp(p->files[n].path, path) == 0) + return p->files[n].body; + } + // not found -> load it + struct bstr s = stream_read_file(path, p, p->global, 1024000); // 1024 kB + if (s.len) { + struct cached_file new = { + .path = talloc_strdup(p, path), + .body = s, + }; + MP_TARRAY_APPEND(p, p->files, p->num_files, new); + return new.body; + } + return (struct bstr){0}; +} + +static void debug_check_gl(struct gl_video *p, const char *msg) +{ + if (p->ra->fns->debug_marker) + p->ra->fns->debug_marker(p->ra, msg); +} + +static void gl_video_reset_surfaces(struct gl_video *p) +{ + for (int i = 0; i < FBOSURFACES_MAX; i++) { + p->surfaces[i].id = 0; + p->surfaces[i].pts = MP_NOPTS_VALUE; + } + p->surface_idx = 0; + p->surface_now = 0; + p->frames_drawn = 0; + p->output_fbo_valid = false; +} + +static void gl_video_reset_hooks(struct gl_video *p) +{ + for (int i = 0; i < p->tex_hook_num; i++) + talloc_free(p->tex_hooks[i].priv); + + for (int i = 0; i < p->user_tex_num; i++) + ra_tex_free(p->ra, &p->user_textures[i].tex); + + p->tex_hook_num = 0; + p->user_tex_num = 0; +} + +static inline int fbosurface_wrap(int id) +{ + id = id % FBOSURFACES_MAX; + return id < 0 ? id + FBOSURFACES_MAX : id; +} + +static void reinit_osd(struct gl_video *p) +{ + mpgl_osd_destroy(p->osd); + p->osd = NULL; + if (p->osd_state) + p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state); +} + +static void uninit_rendering(struct gl_video *p) +{ + for (int n = 0; n < SCALER_COUNT; n++) + uninit_scaler(p, &p->scaler[n]); + + ra_tex_free(p->ra, &p->dither_texture); + + for (int n = 0; n < 4; n++) { + fbotex_uninit(&p->merge_fbo[n]); + fbotex_uninit(&p->scale_fbo[n]); + fbotex_uninit(&p->integer_fbo[n]); + } + + fbotex_uninit(&p->indirect_fbo); + fbotex_uninit(&p->blend_subs_fbo); + fbotex_uninit(&p->screen_fbo); + fbotex_uninit(&p->output_fbo); + + for (int n = 0; n < FBOSURFACES_MAX; n++) + fbotex_uninit(&p->surfaces[n].fbotex); + + for (int n = 0; n < SHADER_MAX_SAVED; n++) + fbotex_uninit(&p->hook_fbos[n]); + + for (int n = 0; n < 2; n++) + fbotex_uninit(&p->vdpau_deinterleave_fbo[n]); + + gl_video_reset_surfaces(p); + gl_video_reset_hooks(p); + + gl_sc_reset_error(p->sc); +} + +bool gl_video_gamma_auto_enabled(struct gl_video *p) +{ + return p->opts.gamma_auto; +} + +struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p) +{ + return (struct mp_colorspace) { + .primaries = p->opts.target_prim, + .gamma = p->opts.target_trc, + }; +} + +// Warning: profile.start must point to a ta allocation, and the function +// takes over ownership. +void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data) +{ + if (gl_lcms_set_memory_profile(p->cms, icc_data)) + reinit_from_options(p); +} + +bool gl_video_icc_auto_enabled(struct gl_video *p) +{ + return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false; +} + +static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, + enum mp_csp_trc trc) +{ + if (!p->use_lut_3d) + return false; + + struct AVBufferRef *icc = NULL; + if (p->image.mpi) + icc = p->image.mpi->icc_profile; + + if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc, icc)) + return true; + + // GLES3 doesn't provide filtered 16 bit integer textures + // GLES2 doesn't even provide 3D textures + const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4); + if (!fmt || !(p->ra->caps & RA_CAP_TEX_3D)) { + p->use_lut_3d = false; + MP_WARN(p, "Disabling color management (no RGBA16 3D textures).\n"); + return false; + } + + struct lut3d *lut3d = NULL; + if (!fmt || !gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc, icc) || !lut3d) { + p->use_lut_3d = false; + return false; + } + + ra_tex_free(p->ra, &p->lut_3d_texture); + + struct ra_tex_params params = { + .dimensions = 3, + .w = lut3d->size[0], + .h = lut3d->size[1], + .d = lut3d->size[2], + .format = fmt, + .render_src = true, + .src_linear = true, + .initial_data = lut3d->data, + }; + p->lut_3d_texture = ra_tex_create(p->ra, ¶ms); + + debug_check_gl(p, "after 3d lut creation"); + + for (int i = 0; i < 3; i++) + p->lut_3d_size[i] = lut3d->size[i]; + + talloc_free(lut3d); + + return true; +} + +// Fill an img_tex struct from an FBO + some metadata +static struct img_tex img_tex_fbo(struct fbotex *fbo, enum plane_type type, + int components) +{ + assert(type != PLANE_NONE); + return (struct img_tex){ + .type = type, + .tex = fbo->tex, + .multiplier = 1.0, + .w = fbo->lw, + .h = fbo->lh, + .transform = identity_trans, + .components = components, + }; +} + +// Bind an img_tex to a free texture unit and return its ID. At most +// TEXUNIT_VIDEO_NUM texture units can be bound at once +static int pass_bind(struct gl_video *p, struct img_tex tex) +{ + assert(p->pass_tex_num < TEXUNIT_VIDEO_NUM); + p->pass_tex[p->pass_tex_num] = tex; + return p->pass_tex_num++; +} + +// Rotation by 90° and flipping. +// w/h is used for recentering. +static void get_transform(float w, float h, int rotate, bool flip, + struct gl_transform *out_tr) +{ + int a = rotate % 90 ? 0 : rotate / 90; + int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc. + int cos90[4] = {1, 0, -1, 0}; + struct gl_transform tr = {{{ cos90[a], sin90[a]}, + {-sin90[a], cos90[a]}}}; + + // basically, recenter to keep the whole image in view + float b[2] = {1, 1}; + gl_transform_vec(tr, &b[0], &b[1]); + tr.t[0] += b[0] < 0 ? w : 0; + tr.t[1] += b[1] < 0 ? h : 0; + + if (flip) { + struct gl_transform fliptr = {{{1, 0}, {0, -1}}, {0, h}}; + gl_transform_trans(fliptr, &tr); + } + + *out_tr = tr; +} + +// Return the chroma plane upscaled to luma size, but with additional padding +// for image sizes not aligned to subsampling. +static int chroma_upsize(int size, int pixel) +{ + return (size + pixel - 1) / pixel * pixel; +} + +// If a and b are on the same plane, return what plane type should be used. +// If a or b are none, the other type always wins. +// Usually: LUMA/RGB/XYZ > CHROMA > ALPHA +static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b) +{ + if (a == PLANE_NONE) + return b; + if (b == PLANE_LUMA || b == PLANE_RGB || b == PLANE_XYZ) + return b; + if (b != PLANE_NONE && a == PLANE_ALPHA) + return b; + return a; +} + +// Places a video_image's image textures + associated metadata into tex[]. The +// number of textures is equal to p->plane_count. Any necessary plane offsets +// are stored in off. (e.g. chroma position) +static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, + struct img_tex tex[4], struct gl_transform off[4]) +{ + assert(vimg->mpi); + + int w = p->image_params.w; + int h = p->image_params.h; + + // Determine the chroma offset + float ls_w = 1.0 / p->ra_format.chroma_w; + float ls_h = 1.0 / p->ra_format.chroma_h; + + struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}}; + + if (p->image_params.chroma_location != MP_CHROMA_CENTER) { + int cx, cy; + mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy); + // By default texture coordinates are such that chroma is centered with + // any chroma subsampling. If a specific direction is given, make it + // so that the luma and chroma sample line up exactly. + // For 4:4:4, setting chroma location should have no effect at all. + // luma sample size (in chroma coord. space) + chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; + chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + } + + int msb_valid_bits = + p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0); + // The existing code assumes we just have a single tex multiplier for + // all of the planes. This may change in the future + float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.color.space, + msb_valid_bits, + p->ra_format.component_bits); + + memset(tex, 0, 4 * sizeof(tex[0])); + for (int n = 0; n < p->plane_count; n++) { + struct texplane *t = &vimg->planes[n]; + + enum plane_type type = PLANE_NONE; + for (int i = 0; i < 4; i++) { + int c = p->ra_format.components[n][i]; + enum plane_type ctype; + if (c == 0) { + ctype = PLANE_NONE; + } else if (c == 4) { + ctype = PLANE_ALPHA; + } else if (p->image_params.color.space == MP_CSP_RGB) { + ctype = PLANE_RGB; + } else if (p->image_params.color.space == MP_CSP_XYZ) { + ctype = PLANE_XYZ; + } else { + ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA; + } + type = merge_plane_types(type, ctype); + } + + tex[n] = (struct img_tex){ + .type = type, + .tex = t->tex, + .multiplier = tex_mul, + .w = t->w, + .h = t->h, + }; + + for (int i = 0; i < 4; i++) + tex[n].components += !!p->ra_format.components[n][i]; + + get_transform(t->w, t->h, p->image_params.rotate, t->flipped, + &tex[n].transform); + if (p->image_params.rotate % 180 == 90) + MPSWAP(int, tex[n].w, tex[n].h); + + off[n] = identity_trans; + + if (type == PLANE_CHROMA) { + struct gl_transform rot; + get_transform(0, 0, p->image_params.rotate, true, &rot); + + struct gl_transform tr = chroma; + gl_transform_vec(rot, &tr.t[0], &tr.t[1]); + + float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w; + float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h; + + // Adjust the chroma offset if the real chroma size is fractional + // due image sizes not aligned to chroma subsampling. + struct gl_transform rot2; + get_transform(0, 0, p->image_params.rotate, t->flipped, &rot2); + if (rot2.m[0][0] < 0) + tr.t[0] += dx; + if (rot2.m[1][0] < 0) + tr.t[0] += dy; + if (rot2.m[0][1] < 0) + tr.t[1] += dx; + if (rot2.m[1][1] < 0) + tr.t[1] += dy; + + off[n] = tr; + } + } +} + +// Return the index of the given component (assuming all non-padding components +// of all planes are concatenated into a linear list). +static int find_comp(struct ra_imgfmt_desc *desc, int component) +{ + int cur = 0; + for (int n = 0; n < desc->num_planes; n++) { + for (int i = 0; i < 4; i++) { + if (desc->components[n][i]) { + if (desc->components[n][i] == component) + return cur; + cur++; + } + } + } + return -1; +} + +static void init_video(struct gl_video *p) +{ + p->use_integer_conversion = false; + + if (p->hwdec && ra_hwdec_test_format(p->hwdec, p->image_params.imgfmt)) { + if (p->hwdec->driver->overlay_frame) { + MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed " + "on the video!\n"); + } else { + p->hwdec_mapper = ra_hwdec_mapper_create(p->hwdec, &p->image_params); + if (!p->hwdec_mapper) + MP_ERR(p, "Initializing texture for hardware decoding failed.\n"); + } + if (p->hwdec_mapper) + p->image_params = p->hwdec_mapper->dst_params; + const char **exts = p->hwdec->glsl_extensions; + for (int n = 0; exts && exts[n]; n++) + gl_sc_enable_extension(p->sc, (char *)exts[n]); + p->hwdec_active = true; + } + + p->ra_format = (struct ra_imgfmt_desc){0}; + ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format); + + p->plane_count = p->ra_format.num_planes; + + p->has_alpha = false; + p->is_gray = true; + + for (int n = 0; n < p->ra_format.num_planes; n++) { + for (int i = 0; i < 4; i++) { + if (p->ra_format.components[n][i]) { + p->has_alpha |= p->ra_format.components[n][i] == 4; + p->is_gray &= p->ra_format.components[n][i] == 1 || + p->ra_format.components[n][i] == 4; + } + } + } + + for (int c = 0; c < 4; c++) { + int loc = find_comp(&p->ra_format, c + 1); + p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0]; + } + p->color_swizzle[4] = '\0'; + + // Format-dependent checks. + check_gl_features(p); + + mp_image_params_guess_csp(&p->image_params); + + av_lfg_init(&p->lfg, 1); + + debug_check_gl(p, "before video texture creation"); + + if (!p->hwdec_active) { + struct video_image *vimg = &p->image; + + struct mp_image layout = {0}; + mp_image_set_params(&layout, &p->image_params); + + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &vimg->planes[n]; + const struct ra_format *format = p->ra_format.planes[n]; + + plane->w = mp_image_plane_w(&layout, n); + plane->h = mp_image_plane_h(&layout, n); + + struct ra_tex_params params = { + .dimensions = 2, + .w = plane->w + p->opts.tex_pad_x, + .h = plane->h + p->opts.tex_pad_y, + .d = 1, + .format = format, + .render_src = true, + .src_linear = format->linear_filter, + .non_normalized = p->opts.use_rectangle, + .host_mutable = true, + }; + + MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, + params.w, params.h); + + plane->tex = ra_tex_create(p->ra, ¶ms); + if (!plane->tex) + abort(); // shit happens + + p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT; + } + } + + debug_check_gl(p, "after video texture creation"); + + gl_video_setup_hooks(p); +} + +// Release any texture mappings associated with the current frame. +static void unmap_current_image(struct gl_video *p) +{ + struct video_image *vimg = &p->image; + + if (vimg->hwdec_mapped) { + assert(p->hwdec_active && p->hwdec_mapper); + ra_hwdec_mapper_unmap(p->hwdec_mapper); + memset(vimg->planes, 0, sizeof(vimg->planes)); + vimg->hwdec_mapped = false; + vimg->id = 0; // needs to be mapped again + } +} + +static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr) +{ + for (int i = 0; i < p->num_dr_buffers; i++) { + struct dr_buffer *buffer = &p->dr_buffers[i]; + uint8_t *bufptr = buffer->buf->data; + size_t size = buffer->buf->params.size; + if (ptr >= bufptr && ptr < bufptr + size) + return buffer; + } + + return NULL; +} + +static void gc_pending_dr_fences(struct gl_video *p, bool force) +{ +again:; + for (int n = 0; n < p->num_dr_buffers; n++) { + struct dr_buffer *buffer = &p->dr_buffers[n]; + if (!buffer->mpi) + continue; + + bool res = p->ra->fns->buf_poll(p->ra, buffer->buf); + if (res || force) { + // Unreferencing the image could cause gl_video_dr_free_buffer() + // to be called by the talloc destructor (if it was the last + // reference). This will implicitly invalidate the buffer pointer + // and change the p->dr_buffers array. To make it worse, it could + // free multiple dr_buffers due to weird theoretical corner cases. + // This is also why we use the goto to iterate again from the + // start, because everything gets fucked up. Hail satan! + struct mp_image *ref = buffer->mpi; + buffer->mpi = NULL; + talloc_free(ref); + goto again; + } + } +} + +static void unref_current_image(struct gl_video *p) +{ + unmap_current_image(p); + p->image.id = 0; + + mp_image_unrefp(&p->image.mpi); + + // While we're at it, also garbage collect pending fences in here to + // get it out of the way. + gc_pending_dr_fences(p, false); +} + +// If overlay mode is used, make sure to remove the overlay. +// Be careful with this. Removing the overlay and adding another one will +// lead to flickering artifacts. +static void unmap_overlay(struct gl_video *p) +{ + if (p->hwdec_active && p->hwdec->driver->overlay_frame) + p->hwdec->driver->overlay_frame(p->hwdec, NULL, NULL, NULL, true); +} + +static void uninit_video(struct gl_video *p) +{ + uninit_rendering(p); + + struct video_image *vimg = &p->image; + + unmap_overlay(p); + unref_current_image(p); + + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &vimg->planes[n]; + ra_tex_free(p->ra, &plane->tex); + } + *vimg = (struct video_image){0}; + + // Invalidate image_params to ensure that gl_video_config() will call + // init_video() on uninitialized gl_video. + p->real_image_params = (struct mp_image_params){0}; + p->image_params = p->real_image_params; + p->hwdec_active = false; + ra_hwdec_mapper_free(&p->hwdec_mapper); +} + +static void pass_record(struct gl_video *p, struct mp_pass_perf perf) +{ + if (!p->pass || p->pass_idx == PASS_INFO_MAX) + return; + + struct pass_info *pass = &p->pass[p->pass_idx]; + pass->perf = perf; + + if (pass->desc.len == 0) + bstr_xappend(p, &pass->desc, bstr0("(unknown)")); + + p->pass_idx++; +} + +PRINTF_ATTRIBUTE(2, 3) +static void pass_describe(struct gl_video *p, const char *textf, ...) +{ + if (!p->pass || p->pass_idx == PASS_INFO_MAX) + return; + + struct pass_info *pass = &p->pass[p->pass_idx]; + + if (pass->desc.len > 0) + bstr_xappend(p, &pass->desc, bstr0(" + ")); + + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(p, &pass->desc, textf, ap); + va_end(ap); +} + +static void pass_info_reset(struct gl_video *p, bool is_redraw) +{ + p->pass = is_redraw ? p->pass_redraw : p->pass_fresh; + p->pass_idx = 0; + + for (int i = 0; i < PASS_INFO_MAX; i++) { + p->pass[i].desc.len = 0; + p->pass[i].perf = (struct mp_pass_perf){0}; + } +} + +static void pass_report_performance(struct gl_video *p) +{ + if (!p->pass) + return; + + for (int i = 0; i < PASS_INFO_MAX; i++) { + struct pass_info *pass = &p->pass[i]; + if (pass->desc.len) { + MP_DBG(p, "pass '%.*s': last %dus avg %dus peak %dus\n", + BSTR_P(pass->desc), + (int)pass->perf.last/1000, + (int)pass->perf.avg/1000, + (int)pass->perf.peak/1000); + } + } +} + +static void pass_prepare_src_tex(struct gl_video *p) +{ + struct gl_shader_cache *sc = p->sc; + + for (int n = 0; n < p->pass_tex_num; n++) { + struct img_tex *s = &p->pass_tex[n]; + if (!s->tex) + continue; + + char *texture_name = mp_tprintf(32, "texture%d", n); + char *texture_size = mp_tprintf(32, "texture_size%d", n); + char *texture_rot = mp_tprintf(32, "texture_rot%d", n); + char *texture_off = mp_tprintf(32, "texture_off%d", n); + char *pixel_size = mp_tprintf(32, "pixel_size%d", n); + + gl_sc_uniform_texture(sc, texture_name, s->tex); + float f[2] = {1, 1}; + if (!s->tex->params.non_normalized) { + f[0] = s->tex->params.w; + f[1] = s->tex->params.h; + } + gl_sc_uniform_vec2(sc, texture_size, f); + gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m); + gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t); + gl_sc_uniform_vec2(sc, pixel_size, (float[]){1.0f / f[0], + 1.0f / f[1]}); + } +} + +// Sets the appropriate compute shader metadata for an implicit compute pass +// bw/bh: block size +static void pass_is_compute(struct gl_video *p, int bw, int bh) +{ + p->pass_compute = (struct compute_info){ + .active = true, + .block_w = bw, + .block_h = bh, + }; +} + +// w/h: the width/height of the compute shader's operating domain (e.g. the +// target target that needs to be written, or the source texture that needs to +// be reduced) +static void dispatch_compute(struct gl_video *p, int w, int h, + struct compute_info info) +{ + PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", + info.threads_w > 0 ? info.threads_w : info.block_w, + info.threads_h > 0 ? info.threads_h : info.block_h); + + pass_prepare_src_tex(p); + gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex)); + + // Since we don't actually have vertices, we pretend for convenience + // reasons that we do and calculate the right texture coordinates based on + // the output sample ID + gl_sc_uniform_vec2(p->sc, "out_scale", (float[2]){ 1.0 / w, 1.0 / h }); + PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n"); + + for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) { + struct img_tex *s = &p->pass_tex[n]; + if (!s->tex) + continue; + + // We need to rescale the coordinates to the true texture size + char tex_scale[32]; + snprintf(tex_scale, sizeof(tex_scale), "tex_scale%d", n); + gl_sc_uniform_vec2(p->sc, tex_scale, (float[2]){ + (float)s->w / s->tex->params.w, + (float)s->h / s->tex->params.h, + }); + + PRELUDE("#define texcoord%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n); + PRELUDE("#define texcoord%d_rot(id) (texture_rot%d * texcoord%d_raw(id) + " + "pixel_size%d * texture_off%d)\n", n, n, n, n, n); + // Clamp the texture coordinates to prevent sampling out-of-bounds in + // threads that exceed the requested width/height + PRELUDE("#define texmap%d(id) min(texcoord%d_rot(id), vec2(1.0))\n", n, n); + PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n); + } + + // always round up when dividing to make sure we don't leave off a part of + // the image + int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1, + num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1; + + pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1)); + + memset(&p->pass_tex, 0, sizeof(p->pass_tex)); + p->pass_tex_num = 0; +} + +static struct mp_pass_perf render_pass_quad(struct gl_video *p, + struct fbodst target, + const struct mp_rect *dst) +{ + struct vertex va[6] = {0}; + + struct gl_transform t; + gl_transform_ortho_fbodst(&t, target); + + float x[2] = {dst->x0, dst->x1}; + float y[2] = {dst->y0, dst->y1}; + gl_transform_vec(t, &x[0], &y[0]); + gl_transform_vec(t, &x[1], &y[1]); + + for (int n = 0; n < 4; n++) { + struct vertex *v = &va[n]; + v->position.x = x[n / 2]; + v->position.y = y[n % 2]; + for (int i = 0; i < p->pass_tex_num; i++) { + struct img_tex *s = &p->pass_tex[i]; + if (!s->tex) + continue; + struct gl_transform tr = s->transform; + float tx = (n / 2) * s->w; + float ty = (n % 2) * s->h; + gl_transform_vec(tr, &tx, &ty); + bool rect = s->tex->params.non_normalized; + v->texcoord[i].x = tx / (rect ? 1 : s->tex->params.w); + v->texcoord[i].y = ty / (rect ? 1 : s->tex->params.h); + } + } + + va[4] = va[2]; + va[5] = va[1]; + + return gl_sc_dispatch_draw(p->sc, target.tex, va, 6); +} + +static void finish_pass_direct(struct gl_video *p, struct fbodst target, + const struct mp_rect *dst) +{ + pass_prepare_src_tex(p); + gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex)); + pass_record(p, render_pass_quad(p, target, dst)); + debug_check_gl(p, "after rendering"); + memset(&p->pass_tex, 0, sizeof(p->pass_tex)); + p->pass_tex_num = 0; +} + +// dst_fbo: this will be used for rendering; possibly reallocating the whole +// FBO, if the required parameters have changed +// w, h: required FBO target dimension, and also defines the target rectangle +// used for rasterization +// flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy +// flags allows the FBO to be larger than the w/h parameters) +static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, + int w, int h, int flags) +{ + fbotex_change(dst_fbo, p->ra, p->log, w, h, p->fbo_format, flags); + + if (p->pass_compute.active) { + if (!dst_fbo->tex) + return; + gl_sc_uniform_image2D_wo(p->sc, "out_image", dst_fbo->tex); + if (!p->pass_compute.directly_writes) + GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);) + + dispatch_compute(p, w, h, p->pass_compute); + p->pass_compute = (struct compute_info){0}; + + debug_check_gl(p, "after dispatching compute shader"); + } else { + finish_pass_direct(p, dst_fbo->fbo, &(struct mp_rect){0, 0, w, h}); + } +} + +static const char *get_tex_swizzle(struct img_tex *img) +{ + if (!img->tex) + return "rgba"; + return img->tex->params.format->luminance_alpha ? "raaa" : "rgba"; +} + +// Copy a texture to the vec4 color, while increasing offset. Also applies +// the texture multiplier to the sampled color +static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img) +{ + int count = img.components; + assert(*offset + count <= 4); + + int id = pass_bind(p, img); + char src[5] = {0}; + char dst[5] = {0}; + const char *tex_fmt = get_tex_swizzle(&img); + const char *dst_fmt = "rgba"; + for (int i = 0; i < count; i++) { + src[i] = tex_fmt[i]; + dst[i] = dst_fmt[*offset + i]; + } + + if (img.tex && img.tex->params.format->ctype == RA_CTYPE_UINT) { + uint64_t tex_max = 1ull << p->ra_format.component_bits; + img.multiplier *= 1.0 / (tex_max - 1); + } + + GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n", + dst, img.multiplier, id, id, src); + + *offset += count; +} + +static void skip_unused(struct gl_video *p, int num_components) +{ + for (int i = num_components; i < 4; i++) + GLSLF("color.%c = %f;\n", "rgba"[i], i < 3 ? 0.0 : 1.0); +} + +static void uninit_scaler(struct gl_video *p, struct scaler *scaler) +{ + fbotex_uninit(&scaler->sep_fbo); + ra_tex_free(p->ra, &scaler->lut); + scaler->kernel = NULL; + scaler->initialized = false; +} + +static void hook_prelude(struct gl_video *p, const char *name, int id, + struct img_tex tex) +{ + GLSLHF("#define %s_raw texture%d\n", name, id); + GLSLHF("#define %s_pos texcoord%d\n", name, id); + GLSLHF("#define %s_size texture_size%d\n", name, id); + GLSLHF("#define %s_rot texture_rot%d\n", name, id); + GLSLHF("#define %s_pt pixel_size%d\n", name, id); + GLSLHF("#define %s_map texmap%d\n", name, id); + GLSLHF("#define %s_mul %f\n", name, tex.multiplier); + + // Set up the sampling functions + GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n", + name, name, name, get_tex_swizzle(&tex)); + + // Since the extra matrix multiplication impacts performance, + // skip it unless the texture was actually rotated + if (gl_transform_eq(tex.transform, identity_trans)) { + GLSLHF("#define %s_texOff(off) %s_tex(%s_pos + %s_pt * vec2(off))\n", + name, name, name, name); + } else { + GLSLHF("#define %s_texOff(off) " + "%s_tex(%s_pos + %s_rot * vec2(off)/%s_size)\n", + name, name, name, name, name); + } +} + +static bool saved_tex_find(struct gl_video *p, const char *name, + struct img_tex *out) +{ + if (!name || !out) + return false; + + for (int i = 0; i < p->saved_tex_num; i++) { + if (strcmp(p->saved_tex[i].name, name) == 0) { + *out = p->saved_tex[i].tex; + return true; + } + } + + return false; +} + +static void saved_tex_store(struct gl_video *p, const char *name, + struct img_tex tex) +{ + assert(name); + + for (int i = 0; i < p->saved_tex_num; i++) { + if (strcmp(p->saved_tex[i].name, name) == 0) { + p->saved_tex[i].tex = tex; + return; + } + } + + assert(p->saved_tex_num < SHADER_MAX_SAVED); + p->saved_tex[p->saved_tex_num++] = (struct saved_tex) { + .name = name, + .tex = tex + }; +} + +static bool pass_hook_setup_binds(struct gl_video *p, const char *name, + struct img_tex tex, struct tex_hook *hook) +{ + for (int t = 0; t < TEXUNIT_VIDEO_NUM; t++) { + char *bind_name = (char *)hook->bind_tex[t]; + + if (!bind_name) + continue; + + // This is a special name that means "currently hooked texture" + if (strcmp(bind_name, "HOOKED") == 0) { + int id = pass_bind(p, tex); + hook_prelude(p, "HOOKED", id, tex); + hook_prelude(p, name, id, tex); + continue; + } + + // BIND can also be used to load user-defined textures, in which + // case we will directly load them as a uniform instead of + // generating the hook_prelude boilerplate + for (int u = 0; u < p->user_tex_num; u++) { + struct gl_user_shader_tex *utex = &p->user_textures[u]; + if (bstr_equals0(utex->name, bind_name)) { + gl_sc_uniform_texture(p->sc, bind_name, utex->tex); + goto next_bind; + } + } + + struct img_tex bind_tex; + if (!saved_tex_find(p, bind_name, &bind_tex)) { + // Clean up texture bindings and move on to the next hook + MP_DBG(p, "Skipping hook on %s due to no texture named %s.\n", + name, bind_name); + p->pass_tex_num -= t; + return false; + } + + hook_prelude(p, bind_name, pass_bind(p, bind_tex), bind_tex); + +next_bind: ; + } + + return true; +} + +// Process hooks for a plane, saving the result and returning a new img_tex +// If 'trans' is NULL, the shader is forbidden from transforming tex +static struct img_tex pass_hook(struct gl_video *p, const char *name, + struct img_tex tex, struct gl_transform *trans) +{ + if (!name) + return tex; + + saved_tex_store(p, name, tex); + + MP_DBG(p, "Running hooks for %s\n", name); + for (int i = 0; i < p->tex_hook_num; i++) { + struct tex_hook *hook = &p->tex_hooks[i]; + + // Figure out if this pass hooks this texture + for (int h = 0; h < SHADER_MAX_HOOKS; h++) { + if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0) + goto found; + } + + continue; + +found: + // Check the hook's condition + if (hook->cond && !hook->cond(p, tex, hook->priv)) { + MP_DBG(p, "Skipping hook on %s due to condition.\n", name); + continue; + } + + if (!pass_hook_setup_binds(p, name, tex, hook)) + continue; + + // Run the actual hook. This generates a series of GLSL shader + // instructions sufficient for drawing the hook's output + struct gl_transform hook_off = identity_trans; + hook->hook(p, tex, &hook_off, hook->priv); + + int comps = hook->components ? hook->components : tex.components; + skip_unused(p, comps); + + // Compute the updated FBO dimensions and store the result + struct mp_rect_f sz = {0, 0, tex.w, tex.h}; + gl_transform_rect(hook_off, &sz); + int w = lroundf(fabs(sz.x1 - sz.x0)); + int h = lroundf(fabs(sz.y1 - sz.y0)); + + assert(p->hook_fbo_num < SHADER_MAX_SAVED); + struct fbotex *fbo = &p->hook_fbos[p->hook_fbo_num++]; + finish_pass_fbo(p, fbo, w, h, 0); + + const char *store_name = hook->save_tex ? hook->save_tex : name; + struct img_tex saved_tex = img_tex_fbo(fbo, tex.type, comps); + + // If the texture we're saving overwrites the "current" texture, also + // update the tex parameter so that the future loop cycles will use the + // updated values, and export the offset + if (strcmp(store_name, name) == 0) { + if (!trans && !gl_transform_eq(hook_off, identity_trans)) { + MP_ERR(p, "Hook tried changing size of unscalable texture %s!\n", + name); + return tex; + } + + tex = saved_tex; + if (trans) + gl_transform_trans(hook_off, trans); + } + + saved_tex_store(p, store_name, saved_tex); + } + + return tex; +} + +// This can be used at any time in the middle of rendering to specify an +// optional hook point, which if triggered will render out to a new FBO and +// load the result back into vec4 color. Offsets applied by the hooks are +// accumulated in tex_trans, and the FBO is dimensioned according +// to p->texture_w/h +static void pass_opt_hook_point(struct gl_video *p, const char *name, + struct gl_transform *tex_trans) +{ + if (!name) + return; + + for (int i = 0; i < p->tex_hook_num; i++) { + struct tex_hook *hook = &p->tex_hooks[i]; + + for (int h = 0; h < SHADER_MAX_HOOKS; h++) { + if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0) + goto found; + } + + for (int b = 0; b < TEXUNIT_VIDEO_NUM; b++) { + if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0) + goto found; + } + } + + // Nothing uses this texture, don't bother storing it + return; + +found: + assert(p->hook_fbo_num < SHADER_MAX_SAVED); + struct fbotex *fbo = &p->hook_fbos[p->hook_fbo_num++]; + finish_pass_fbo(p, fbo, p->texture_w, p->texture_h, 0); + + struct img_tex img = img_tex_fbo(fbo, PLANE_RGB, p->components); + img = pass_hook(p, name, img, tex_trans); + copy_img_tex(p, &(int){0}, img); + p->texture_w = img.w; + p->texture_h = img.h; + p->components = img.components; + pass_describe(p, "(remainder pass)"); +} + +static void load_shader(struct gl_video *p, struct bstr body) +{ + gl_sc_hadd_bstr(p->sc, body); + gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX); + gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded); + gl_sc_uniform_vec2(p->sc, "input_size", + (float[]){(p->src_rect.x1 - p->src_rect.x0) * + p->texture_offset.m[0][0], + (p->src_rect.y1 - p->src_rect.y0) * + p->texture_offset.m[1][1]}); + gl_sc_uniform_vec2(p->sc, "target_size", + (float[]){p->dst_rect.x1 - p->dst_rect.x0, + p->dst_rect.y1 - p->dst_rect.y0}); + gl_sc_uniform_vec2(p->sc, "tex_offset", + (float[]){p->src_rect.x0 * p->texture_offset.m[0][0] + + p->texture_offset.t[0], + p->src_rect.y0 * p->texture_offset.m[1][1] + + p->texture_offset.t[1]}); +} + +// Semantic equality +static bool double_seq(double a, double b) +{ + return (isnan(a) && isnan(b)) || a == b; +} + +static bool scaler_fun_eq(struct scaler_fun a, struct scaler_fun b) +{ + if ((a.name && !b.name) || (b.name && !a.name)) + return false; + + return ((!a.name && !b.name) || strcmp(a.name, b.name) == 0) && + double_seq(a.params[0], b.params[0]) && + double_seq(a.params[1], b.params[1]) && + a.blur == b.blur && + a.taper == b.taper; +} + +static bool scaler_conf_eq(struct scaler_config a, struct scaler_config b) +{ + // Note: antiring isn't compared because it doesn't affect LUT + // generation + return scaler_fun_eq(a.kernel, b.kernel) && + scaler_fun_eq(a.window, b.window) && + a.radius == b.radius && + a.clamp == b.clamp; +} + +static void reinit_scaler(struct gl_video *p, struct scaler *scaler, + const struct scaler_config *conf, + double scale_factor, + int sizes[]) +{ + if (scaler_conf_eq(scaler->conf, *conf) && + scaler->scale_factor == scale_factor && + scaler->initialized) + return; + + uninit_scaler(p, scaler); + + scaler->conf = *conf; + bool is_tscale = scaler->index == SCALER_TSCALE; + scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale); + scaler->conf.window.name = (char *)handle_scaler_opt(conf->window.name, is_tscale); + scaler->scale_factor = scale_factor; + scaler->insufficient = false; + scaler->initialized = true; + + const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name); + if (!t_kernel) + return; + + scaler->kernel_storage = *t_kernel; + scaler->kernel = &scaler->kernel_storage; + + const char *win = conf->window.name; + if (!win || !win[0]) + win = t_kernel->window; // fall back to the scaler's default window + const struct filter_window *t_window = mp_find_filter_window(win); + if (t_window) + scaler->kernel->w = *t_window; + + for (int n = 0; n < 2; n++) { + if (!isnan(conf->kernel.params[n])) + scaler->kernel->f.params[n] = conf->kernel.params[n]; + if (!isnan(conf->window.params[n])) + scaler->kernel->w.params[n] = conf->window.params[n]; + } + + if (conf->kernel.blur > 0.0) + scaler->kernel->f.blur = conf->kernel.blur; + if (conf->window.blur > 0.0) + scaler->kernel->w.blur = conf->window.blur; + + if (conf->kernel.taper > 0.0) + scaler->kernel->f.taper = conf->kernel.taper; + if (conf->window.taper > 0.0) + scaler->kernel->w.taper = conf->window.taper; + + if (scaler->kernel->f.resizable && conf->radius > 0.0) + scaler->kernel->f.radius = conf->radius; + + scaler->kernel->clamp = conf->clamp; + scaler->kernel->value_cutoff = conf->cutoff; + + scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor); + + int size = scaler->kernel->size; + int num_components = size > 2 ? 4 : size; + const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components); + assert(fmt); + + int width = (size + num_components - 1) / num_components; // round up + int stride = width * num_components; + assert(size <= stride); + + scaler->lut_size = 1 << p->opts.scaler_lut_size; + + float *weights = talloc_array(NULL, float, scaler->lut_size * stride); + mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights); + + bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D); + + struct ra_tex_params lut_params = { + .dimensions = use_1d ? 1 : 2, + .w = use_1d ? scaler->lut_size : width, + .h = use_1d ? 1 : scaler->lut_size, + .d = 1, + .format = fmt, + .render_src = true, + .src_linear = true, + .initial_data = weights, + }; + scaler->lut = ra_tex_create(p->ra, &lut_params); + + talloc_free(weights); + + debug_check_gl(p, "after initializing scaler"); +} + +// Special helper for sampling from two separated stages +static void pass_sample_separated(struct gl_video *p, struct img_tex src, + struct scaler *scaler, int w, int h) +{ + // Separate the transformation into x and y components, per pass + struct gl_transform t_x = { + .m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}}, + .t = {src.transform.t[0], 0.0}, + }; + struct gl_transform t_y = { + .m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}}, + .t = {0.0, src.transform.t[1]}, + }; + + // First pass (scale only in the y dir) + src.transform = t_y; + sampler_prelude(p->sc, pass_bind(p, src)); + GLSLF("// first pass\n"); + pass_sample_separated_gen(p->sc, scaler, 0, 1); + GLSLF("color *= %f;\n", src.multiplier); + finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H); + + // Second pass (scale only in the x dir) + src = img_tex_fbo(&scaler->sep_fbo, src.type, src.components); + src.transform = t_x; + pass_describe(p, "%s second pass", scaler->conf.kernel.name); + sampler_prelude(p->sc, pass_bind(p, src)); + pass_sample_separated_gen(p->sc, scaler, 1, 0); +} + +// Picks either the compute shader version or the regular sampler version +// depending on hardware support +static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler, + struct img_tex tex, int w, int h) +{ + uint64_t reqs = RA_CAP_COMPUTE | RA_CAP_NESTED_ARRAY; + if ((p->ra->caps & reqs) != reqs) + goto fallback; + + int bound = ceil(scaler->kernel->radius_cutoff); + int offset = bound - 1; // padding top/left + int padding = offset + bound; // total padding + + float ratiox = (float)w / tex.w, + ratioy = (float)h / tex.h; + + // For performance we want to load at least as many pixels + // horizontally as there are threads in a warp (32 for nvidia), as + // well as enough to take advantage of shmem parallelism + const int warp_size = 32, threads = 256; + int bw = warp_size; + int bh = threads / bw; + + // We need to sample everything from base_min to base_max, so make sure + // we have enough room in shmem + int iw = (int)ceil(bw / ratiox) + padding + 1, + ih = (int)ceil(bh / ratioy) + padding + 1; + + int shmem_req = iw * ih * tex.components * sizeof(float); + if (shmem_req > p->ra->max_shmem) + goto fallback; + + pass_is_compute(p, bw, bh); + pass_compute_polar(p->sc, scaler, tex.components, bw, bh, iw, ih); + return; + +fallback: + // Fall back to regular polar shader when compute shaders are unsupported + // or the kernel is too big for shmem + pass_sample_polar(p->sc, scaler, tex.components, p->ra->glsl_version); +} + +// Sample from img_tex, with the src rectangle given by it. +// The dst rectangle is implicit by what the caller will do next, but w and h +// must still be what is going to be used (to dimension FBOs correctly). +// This will write the scaled contents to the vec4 "color". +// The scaler unit is initialized by this function; in order to avoid cache +// thrashing, the scaler unit should usually use the same parameters. +static void pass_sample(struct gl_video *p, struct img_tex tex, + struct scaler *scaler, const struct scaler_config *conf, + double scale_factor, int w, int h) +{ + reinit_scaler(p, scaler, conf, scale_factor, filter_sizes); + + // Describe scaler + const char *scaler_opt[] = { + [SCALER_SCALE] = "scale", + [SCALER_DSCALE] = "dscale", + [SCALER_CSCALE] = "cscale", + [SCALER_TSCALE] = "tscale", + }; + + pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index], + scaler->conf.kernel.name, plane_names[tex.type]); + + bool is_separated = scaler->kernel && !scaler->kernel->polar; + + // Set up the transformation+prelude and bind the texture, for everything + // other than separated scaling (which does this in the subfunction) + if (!is_separated) + sampler_prelude(p->sc, pass_bind(p, tex)); + + // Dispatch the scaler. They're all wildly different. + const char *name = scaler->conf.kernel.name; + if (strcmp(name, "bilinear") == 0) { + GLSL(color = texture(tex, pos);) + } else if (strcmp(name, "bicubic_fast") == 0) { + pass_sample_bicubic_fast(p->sc); + } else if (strcmp(name, "oversample") == 0) { + pass_sample_oversample(p->sc, scaler, w, h); + } else if (scaler->kernel && scaler->kernel->polar) { + pass_dispatch_sample_polar(p, scaler, tex, w, h); + } else if (scaler->kernel) { + pass_sample_separated(p, tex, scaler, w, h); + } else { + // Should never happen + abort(); + } + + // Apply any required multipliers. Separated scaling already does this in + // its first stage + if (!is_separated) + GLSLF("color *= %f;\n", tex.multiplier); + + // Micro-optimization: Avoid scaling unneeded channels + skip_unused(p, tex.components); +} + +// Returns true if two img_texs are semantically equivalent (same metadata) +static bool img_tex_equiv(struct img_tex a, struct img_tex b) +{ + return a.type == b.type && + a.components == b.components && + a.multiplier == b.multiplier && + a.tex->params.format == b.tex->params.format && + a.tex->params.w == b.tex->params.w && + a.tex->params.h == b.tex->params.h && + a.w == b.w && + a.h == b.h && + gl_transform_eq(a.transform, b.transform); +} + +static bool add_hook(struct gl_video *p, struct tex_hook hook) +{ + if (p->tex_hook_num < SHADER_MAX_PASSES) { + p->tex_hooks[p->tex_hook_num++] = hook; + return true; + } else { + MP_ERR(p, "Too many passes! Limit is %d.\n", SHADER_MAX_PASSES); + talloc_free(hook.priv); + return false; + } +} + +static void deband_hook(struct gl_video *p, struct img_tex tex, + struct gl_transform *trans, void *priv) +{ + pass_describe(p, "debanding (%s)", plane_names[tex.type]); + pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg, + p->image_params.color.gamma); +} + +static void unsharp_hook(struct gl_video *p, struct img_tex tex, + struct gl_transform *trans, void *priv) +{ + pass_describe(p, "unsharp masking"); + pass_sample_unsharp(p->sc, p->opts.unsharp); +} + +struct szexp_ctx { + struct gl_video *p; + struct img_tex tex; +}; + +static bool szexp_lookup(void *priv, struct bstr var, float size[2]) +{ + struct szexp_ctx *ctx = priv; + struct gl_video *p = ctx->p; + + if (bstr_equals0(var, "NATIVE_CROPPED")) { + size[0] = (p->src_rect.x1 - p->src_rect.x0) * p->texture_offset.m[0][0]; + size[1] = (p->src_rect.y1 - p->src_rect.y0) * p->texture_offset.m[1][1]; + return true; + } + + // The size of OUTPUT is determined. It could be useful for certain + // user shaders to skip passes. + if (bstr_equals0(var, "OUTPUT")) { + size[0] = p->dst_rect.x1 - p->dst_rect.x0; + size[1] = p->dst_rect.y1 - p->dst_rect.y0; + return true; + } + + // HOOKED is a special case + if (bstr_equals0(var, "HOOKED")) { + size[0] = ctx->tex.w; + size[1] = ctx->tex.h; + return true; + } + + for (int o = 0; o < p->saved_tex_num; o++) { + if (bstr_equals0(var, p->saved_tex[o].name)) { + size[0] = p->saved_tex[o].tex.w; + size[1] = p->saved_tex[o].tex.h; + return true; + } + } + + return false; +} + +static bool user_hook_cond(struct gl_video *p, struct img_tex tex, void *priv) +{ + struct gl_user_shader_hook *shader = priv; + assert(shader); + + float res = false; + eval_szexpr(p->log, &(struct szexp_ctx){p, tex}, szexp_lookup, shader->cond, &res); + return res; +} + +static void user_hook(struct gl_video *p, struct img_tex tex, + struct gl_transform *trans, void *priv) +{ + struct gl_user_shader_hook *shader = priv; + assert(shader); + load_shader(p, shader->pass_body); + + pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc), + plane_names[tex.type]); + + if (shader->compute.active) { + p->pass_compute = shader->compute; + GLSLF("hook();\n"); + } else { + GLSLF("color = hook();\n"); + } + + // Make sure we at least create a legal FBO on failure, since it's better + // to do this and display an error message than just crash OpenGL + float w = 1.0, h = 1.0; + + eval_szexpr(p->log, &(struct szexp_ctx){p, tex}, szexp_lookup, shader->width, &w); + eval_szexpr(p->log, &(struct szexp_ctx){p, tex}, szexp_lookup, shader->height, &h); + + *trans = (struct gl_transform){{{w / tex.w, 0}, {0, h / tex.h}}}; + gl_transform_trans(shader->offset, trans); +} + +static bool add_user_hook(void *priv, struct gl_user_shader_hook hook) +{ + struct gl_video *p = priv; + struct gl_user_shader_hook *copy = talloc_ptrtype(p, copy); + *copy = hook; + + struct tex_hook texhook = { + .save_tex = bstrdup0(copy, hook.save_tex), + .components = hook.components, + .hook = user_hook, + .cond = user_hook_cond, + .priv = copy, + }; + + for (int h = 0; h < SHADER_MAX_HOOKS; h++) + texhook.hook_tex[h] = bstrdup0(copy, hook.hook_tex[h]); + for (int h = 0; h < SHADER_MAX_BINDS; h++) + texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]); + + return add_hook(p, texhook); +} + +static bool add_user_tex(void *priv, struct gl_user_shader_tex tex) +{ + struct gl_video *p = priv; + + if (p->user_tex_num == SHADER_MAX_PASSES) { + MP_ERR(p, "Too many textures! Limit is %d.\n", SHADER_MAX_PASSES); + goto err; + } + + tex.tex = ra_tex_create(p->ra, &tex.params); + TA_FREEP(&tex.params.initial_data); + + p->user_textures[p->user_tex_num++] = tex; + return true; + +err: + talloc_free(tex.params.initial_data); + return false; +} + +static void load_user_shaders(struct gl_video *p, char **shaders) +{ + if (!shaders) + return; + + for (int n = 0; shaders[n] != NULL; n++) { + struct bstr file = load_cached_file(p, shaders[n]); + parse_user_shader(p->log, p->ra, file, p, add_user_hook, add_user_tex); + } +} + +static void gl_video_setup_hooks(struct gl_video *p) +{ + gl_video_reset_hooks(p); + + if (p->opts.deband) { + add_hook(p, (struct tex_hook) { + .hook_tex = {"LUMA", "CHROMA", "RGB", "XYZ"}, + .bind_tex = {"HOOKED"}, + .hook = deband_hook, + }); + } + + if (p->opts.unsharp != 0.0) { + add_hook(p, (struct tex_hook) { + .hook_tex = {"MAIN"}, + .bind_tex = {"HOOKED"}, + .hook = unsharp_hook, + }); + } + + load_user_shaders(p, p->opts.user_shaders); +} + +// sample from video textures, set "color" variable to yuv value +static void pass_read_video(struct gl_video *p) +{ + struct img_tex tex[4]; + struct gl_transform offsets[4]; + pass_get_img_tex(p, &p->image, tex, offsets); + + // To keep the code as simple as possibly, we currently run all shader + // stages even if they would be unnecessary (e.g. no hooks for a texture). + // In the future, deferred img_tex should optimize this away. + + // Merge semantically identical textures. This loop is done from back + // to front so that merged textures end up in the right order while + // simultaneously allowing us to skip unnecessary merges + for (int n = 3; n >= 0; n--) { + if (tex[n].type == PLANE_NONE) + continue; + + int first = n; + int num = 0; + + for (int i = 0; i < n; i++) { + if (img_tex_equiv(tex[n], tex[i]) && + gl_transform_eq(offsets[n], offsets[i])) + { + GLSLF("// merging plane %d ...\n", i); + copy_img_tex(p, &num, tex[i]); + first = MPMIN(first, i); + tex[i] = (struct img_tex){0}; + } + } + + if (num > 0) { + GLSLF("// merging plane %d ... into %d\n", n, first); + copy_img_tex(p, &num, tex[n]); + pass_describe(p, "merging planes"); + finish_pass_fbo(p, &p->merge_fbo[n], tex[n].w, tex[n].h, 0); + tex[first] = img_tex_fbo(&p->merge_fbo[n], tex[n].type, num); + tex[n] = (struct img_tex){0}; + } + } + + // If any textures are still in integer format by this point, we need + // to introduce an explicit conversion pass to avoid breaking hooks/scaling + for (int n = 0; n < 4; n++) { + if (tex[n].tex && tex[n].tex->params.format->ctype == RA_CTYPE_UINT) { + GLSLF("// use_integer fix for plane %d\n", n); + copy_img_tex(p, &(int){0}, tex[n]); + pass_describe(p, "use_integer fix"); + finish_pass_fbo(p, &p->integer_fbo[n], tex[n].w, tex[n].h, 0); + tex[n] = img_tex_fbo(&p->integer_fbo[n], tex[n].type, + tex[n].components); + } + } + + // Dispatch the hooks for all of these textures, saving and perhaps + // modifying them in the process + for (int n = 0; n < 4; n++) { + const char *name; + switch (tex[n].type) { + case PLANE_RGB: name = "RGB"; break; + case PLANE_LUMA: name = "LUMA"; break; + case PLANE_CHROMA: name = "CHROMA"; break; + case PLANE_ALPHA: name = "ALPHA"; break; + case PLANE_XYZ: name = "XYZ"; break; + default: continue; + } + + tex[n] = pass_hook(p, name, tex[n], &offsets[n]); + } + + // At this point all planes are finalized but they may not be at the + // required size yet. Furthermore, they may have texture offsets that + // require realignment. For lack of something better to do, we assume + // the rgb/luma texture is the "reference" and scale everything else + // to match. + for (int n = 0; n < 4; n++) { + switch (tex[n].type) { + case PLANE_RGB: + case PLANE_XYZ: + case PLANE_LUMA: break; + default: continue; + } + + p->texture_w = tex[n].w; + p->texture_h = tex[n].h; + p->texture_offset = offsets[n]; + break; + } + + // Compute the reference rect + struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h}; + struct mp_rect_f ref = src; + gl_transform_rect(p->texture_offset, &ref); + MP_DBG(p, "ref rect: {%f %f} {%f %f}\n", ref.x0, ref.y0, ref.x1, ref.y1); + + // Explicitly scale all of the textures that don't match + for (int n = 0; n < 4; n++) { + if (tex[n].type == PLANE_NONE) + continue; + + // If the planes are aligned identically, we will end up with the + // exact same source rectangle. + struct mp_rect_f rect = src; + gl_transform_rect(offsets[n], &rect); + MP_DBG(p, "rect[%d]: {%f %f} {%f %f}\n", n, + rect.x0, rect.y0, rect.x1, rect.y1); + + if (mp_rect_f_seq(ref, rect)) + continue; + + // If the rectangles differ, then our planes have a different + // alignment and/or size. First of all, we have to compute the + // corrections required to meet the target rectangle + struct gl_transform fix = { + .m = {{(ref.x1 - ref.x0) / (rect.x1 - rect.x0), 0.0}, + {0.0, (ref.y1 - ref.y0) / (rect.y1 - rect.y0)}}, + .t = {ref.x0, ref.y0}, + }; + MP_DBG(p, "-> fix[%d] = {%f %f} + off {%f %f}\n", n, + fix.m[0][0], fix.m[1][1], fix.t[0], fix.t[1]); + + // Since the scale in texture space is different from the scale in + // absolute terms, we have to scale the coefficients down to be + // relative to the texture's physical dimensions and local offset + struct gl_transform scale = { + .m = {{(float)tex[n].w / p->texture_w, 0.0}, + {0.0, (float)tex[n].h / p->texture_h}}, + .t = {-rect.x0, -rect.y0}, + }; + if (p->image_params.rotate % 180 == 90) + MPSWAP(double, scale.m[0][0], scale.m[1][1]); + + gl_transform_trans(scale, &fix); + MP_DBG(p, "-> scaled[%d] = {%f %f} + off {%f %f}\n", n, + fix.m[0][0], fix.m[1][1], fix.t[0], fix.t[1]); + + // Since the texture transform is a function of the texture coordinates + // to texture space, rather than the other way around, we have to + // actually apply the *inverse* of this. Fortunately, calculating + // the inverse is relatively easy here. + fix.m[0][0] = 1.0 / fix.m[0][0]; + fix.m[1][1] = 1.0 / fix.m[1][1]; + fix.t[0] = fix.m[0][0] * -fix.t[0]; + fix.t[1] = fix.m[1][1] * -fix.t[1]; + gl_transform_trans(fix, &tex[n].transform); + + int scaler_id = -1; + const char *name = NULL; + switch (tex[n].type) { + case PLANE_RGB: + case PLANE_LUMA: + case PLANE_XYZ: + scaler_id = SCALER_SCALE; + // these aren't worth hooking, fringe hypothetical cases only + break; + case PLANE_CHROMA: + scaler_id = SCALER_CSCALE; + name = "CHROMA_SCALED"; + break; + case PLANE_ALPHA: + // alpha always uses bilinear + name = "ALPHA_SCALED"; + } + + if (scaler_id < 0) + continue; + + const struct scaler_config *conf = &p->opts.scaler[scaler_id]; + struct scaler *scaler = &p->scaler[scaler_id]; + + // bilinear scaling is a free no-op thanks to GPU sampling + if (strcmp(conf->kernel.name, "bilinear") != 0) { + GLSLF("// upscaling plane %d\n", n); + pass_sample(p, tex[n], scaler, conf, 1.0, p->texture_w, p->texture_h); + finish_pass_fbo(p, &p->scale_fbo[n], p->texture_w, p->texture_h, 0); + tex[n] = img_tex_fbo(&p->scale_fbo[n], tex[n].type, tex[n].components); + } + + // Run any post-scaling hooks + tex[n] = pass_hook(p, name, tex[n], NULL); + } + + // All planes are of the same size and properly aligned at this point + GLSLF("// combining planes\n"); + int coord = 0; + for (int i = 0; i < 4; i++) { + if (tex[i].type != PLANE_NONE) + copy_img_tex(p, &coord, tex[i]); + } + p->components = coord; +} + +// Utility function that simply binds an FBO and reads from it, without any +// transformations. +static void pass_read_fbo(struct gl_video *p, struct fbotex *fbo) +{ + struct img_tex tex = img_tex_fbo(fbo, PLANE_RGB, p->components); + copy_img_tex(p, &(int){0}, tex); +} + +// yuv conversion, and any other conversions before main up/down-scaling +static void pass_convert_yuv(struct gl_video *p) +{ + struct gl_shader_cache *sc = p->sc; + + struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; + cparams.gray = p->is_gray; + mp_csp_set_image_params(&cparams, &p->image_params); + mp_csp_equalizer_state_get(p->video_eq, &cparams); + p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma); + + pass_describe(p, "color conversion"); + + if (p->color_swizzle[0]) + GLSLF("color = color.%s;\n", p->color_swizzle); + + // Pre-colormatrix input gamma correction + if (cparams.color.space == MP_CSP_XYZ) + GLSL(color.rgb = pow(color.rgb, vec3(2.6));) // linear light + + // We always explicitly normalize the range in pass_read_video + cparams.input_bits = cparams.texture_bits = 0; + + // Conversion to RGB. For RGB itself, this still applies e.g. brightness + // and contrast controls, or expansion of e.g. LSB-packed 10 bit data. + struct mp_cmat m = {{{0}}}; + mp_get_csp_matrix(&cparams, &m); + gl_sc_uniform_mat3(sc, "colormatrix", true, &m.m[0][0]); + gl_sc_uniform_vec3(sc, "colormatrix_c", m.c); + + GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;) + + if (p->image_params.color.space == MP_CSP_BT_2020_C) { + // Conversion for C'rcY'cC'bc via the BT.2020 CL system: + // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 + // = (B'-Y'c) / 1.5816 | C'bc > 0 + // + // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0 + // = (R'-Y'c) / 0.9936 | C'rc > 0 + // + // as per the BT.2020 specification, table 4. This is a non-linear + // transformation because (constant) luminance receives non-equal + // contributions from the three different channels. + GLSLF("// constant luminance conversion\n"); + GLSL(color.br = color.br * mix(vec2(1.5816, 0.9936), + vec2(1.9404, 1.7184), + lessThanEqual(color.br, vec2(0))) + + color.gg;) + // Expand channels to camera-linear light. This shader currently just + // assumes everything uses the BT.2020 12-bit gamma function, since the + // difference between 10 and 12-bit is negligible for anything other + // than 12-bit content. + GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), + pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), + vec3(1.0/0.45)), + lessThanEqual(vec3(0.08145), color.rgb));) + // Calculate the green channel from the expanded RYcB + // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B + GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;) + // Recompress to receive the R'G'B' result, same as other systems + GLSL(color.rgb = mix(color.rgb * vec3(4.5), + vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), + lessThanEqual(vec3(0.0181), color.rgb));) + } + + p->components = 3; + if (!p->has_alpha || p->opts.alpha_mode == ALPHA_NO) { + GLSL(color.a = 1.0;) + } else { // alpha present in image + p->components = 4; + GLSL(color = vec4(color.rgb * color.a, color.a);) + } +} + +static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]) +{ + double target_w = p->src_rect.x1 - p->src_rect.x0; + double target_h = p->src_rect.y1 - p->src_rect.y0; + if (transpose_rot && p->image_params.rotate % 180 == 90) + MPSWAP(double, target_w, target_h); + xy[0] = (p->dst_rect.x1 - p->dst_rect.x0) / target_w; + xy[1] = (p->dst_rect.y1 - p->dst_rect.y0) / target_h; +} + +// Cropping. +static void compute_src_transform(struct gl_video *p, struct gl_transform *tr) +{ + float sx = (p->src_rect.x1 - p->src_rect.x0) / (float)p->texture_w, + sy = (p->src_rect.y1 - p->src_rect.y0) / (float)p->texture_h, + ox = p->src_rect.x0, + oy = p->src_rect.y0; + struct gl_transform transform = {{{sx, 0}, {0, sy}}, {ox, oy}}; + + gl_transform_trans(p->texture_offset, &transform); + + *tr = transform; +} + +// Takes care of the main scaling and pre/post-conversions +static void pass_scale_main(struct gl_video *p) +{ + // Figure out the main scaler. + double xy[2]; + get_scale_factors(p, true, xy); + + // actual scale factor should be divided by the scale factor of prescaling. + xy[0] /= p->texture_offset.m[0][0]; + xy[1] /= p->texture_offset.m[1][1]; + + bool downscaling = xy[0] < 1.0 || xy[1] < 1.0; + bool upscaling = !downscaling && (xy[0] > 1.0 || xy[1] > 1.0); + double scale_factor = 1.0; + + struct scaler *scaler = &p->scaler[SCALER_SCALE]; + struct scaler_config scaler_conf = p->opts.scaler[SCALER_SCALE]; + if (p->opts.scaler_resizes_only && !downscaling && !upscaling) { + scaler_conf.kernel.name = "bilinear"; + // For scaler-resizes-only, we round the texture offset to + // the nearest round value in order to prevent ugly blurriness + // (in exchange for slightly shifting the image by up to half a + // subpixel) + p->texture_offset.t[0] = roundf(p->texture_offset.t[0]); + p->texture_offset.t[1] = roundf(p->texture_offset.t[1]); + } + if (downscaling && p->opts.scaler[SCALER_DSCALE].kernel.name) { + scaler_conf = p->opts.scaler[SCALER_DSCALE]; + scaler = &p->scaler[SCALER_DSCALE]; + } + + // When requesting correct-downscaling and the clip is anamorphic, and + // because only a single scale factor is used for both axes, enable it only + // when both axes are downscaled, and use the milder of the factors to not + // end up with too much blur on one axis (even if we end up with sub-optimal + // scale factor on the other axis). This is better than not respecting + // correct scaling at all for anamorphic clips. + double f = MPMAX(xy[0], xy[1]); + if (p->opts.correct_downscaling && f < 1.0) + scale_factor = 1.0 / f; + + // Pre-conversion, like linear light/sigmoidization + GLSLF("// scaler pre-conversion\n"); + bool use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling; + + // Linear light downscaling results in nasty artifacts for HDR curves due + // to the potentially extreme brightness differences severely compounding + // any ringing. So just scale in gamma light instead. + if (mp_trc_is_hdr(p->image_params.color.gamma) && downscaling) + use_linear = false; + + if (use_linear) { + p->use_linear = true; + pass_linearize(p->sc, p->image_params.color.gamma); + pass_opt_hook_point(p, "LINEAR", NULL); + } + + bool use_sigmoid = use_linear && p->opts.sigmoid_upscaling && upscaling; + float sig_center, sig_slope, sig_offset, sig_scale; + if (use_sigmoid) { + // Coefficients for the sigmoidal transform are taken from the + // formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal + sig_center = p->opts.sigmoid_center; + sig_slope = p->opts.sigmoid_slope; + // This function needs to go through (0,0) and (1,1) so we compute the + // values at 1 and 0, and then scale/shift them, respectively. + sig_offset = 1.0/(1+expf(sig_slope * sig_center)); + sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset; + GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n", + sig_center, sig_scale, sig_offset, sig_slope); + pass_opt_hook_point(p, "SIGMOID", NULL); + } + + pass_opt_hook_point(p, "PREKERNEL", NULL); + + int vp_w = p->dst_rect.x1 - p->dst_rect.x0; + int vp_h = p->dst_rect.y1 - p->dst_rect.y0; + struct gl_transform transform; + compute_src_transform(p, &transform); + + GLSLF("// main scaling\n"); + finish_pass_fbo(p, &p->indirect_fbo, p->texture_w, p->texture_h, 0); + struct img_tex src = img_tex_fbo(&p->indirect_fbo, PLANE_RGB, p->components); + gl_transform_trans(transform, &src.transform); + pass_sample(p, src, scaler, &scaler_conf, scale_factor, vp_w, vp_h); + + // Changes the texture size to display size after main scaler. + p->texture_w = vp_w; + p->texture_h = vp_h; + + pass_opt_hook_point(p, "POSTKERNEL", NULL); + + GLSLF("// scaler post-conversion\n"); + if (use_sigmoid) { + // Inverse of the transformation above + GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n", + sig_slope, sig_center, sig_offset, sig_scale); + } +} + +// Adapts the colors to the right output color space. (Final pass during +// rendering) +// If OSD is true, ignore any changes that may have been made to the video +// by previous passes (i.e. linear scaling) +static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool osd) +{ + struct ra *ra = p->ra; + + // Figure out the target color space from the options, or auto-guess if + // none were set + struct mp_colorspace dst = { + .gamma = p->opts.target_trc, + .primaries = p->opts.target_prim, + .light = MP_CSP_LIGHT_DISPLAY, + }; + + if (p->use_lut_3d) { + // The 3DLUT is always generated against the video's original source + // space, *not* the reference space. (To avoid having to regenerate + // the 3DLUT for the OSD on every frame) + enum mp_csp_prim prim_orig = p->image_params.color.primaries; + enum mp_csp_trc trc_orig = p->image_params.color.gamma; + + // One exception: HDR is not implemented by LittleCMS for technical + // limitation reasons, so we use a gamma 2.2 input curve here instead. + // We could pick any value we want here, the difference is just coding + // efficiency. + if (mp_trc_is_hdr(trc_orig)) + trc_orig = MP_CSP_TRC_GAMMA22; + + if (gl_video_get_lut3d(p, prim_orig, trc_orig)) { + dst.primaries = prim_orig; + dst.gamma = trc_orig; + } + } + + if (dst.primaries == MP_CSP_PRIM_AUTO) { + // The vast majority of people are on sRGB or BT.709 displays, so pick + // this as the default output color space. + dst.primaries = MP_CSP_PRIM_BT_709; + + if (src.primaries == MP_CSP_PRIM_BT_601_525 || + src.primaries == MP_CSP_PRIM_BT_601_625) + { + // Since we auto-pick BT.601 and BT.709 based on the dimensions, + // combined with the fact that they're very similar to begin with, + // and to avoid confusing the average user, just don't adapt BT.601 + // content automatically at all. + dst.primaries = src.primaries; + } + } + + if (dst.gamma == MP_CSP_TRC_AUTO) { + // Most people seem to complain when the image is darker or brighter + // than what they're "used to", so just avoid changing the gamma + // altogether by default. The only exceptions to this rule apply to + // very unusual TRCs, which even hardcode technoluddites would probably + // not enjoy viewing unaltered. + dst.gamma = src.gamma; + + // Avoid outputting linear light or HDR content "by default". For these + // just pick gamma 2.2 as a default, since it's a good estimate for + // the response of typical displays + if (dst.gamma == MP_CSP_TRC_LINEAR || mp_trc_is_hdr(dst.gamma)) + dst.gamma = MP_CSP_TRC_GAMMA22; + } + + bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma); + if (detect_peak && !p->hdr_peak_ssbo) { + struct { + unsigned int sig_peak_raw; + unsigned int index; + unsigned int frame_max[PEAK_DETECT_FRAMES+1]; + } peak_ssbo = {0}; + + // Prefill with safe values + int safe = MP_REF_WHITE * mp_trc_nom_peak(p->image_params.color.gamma); + peak_ssbo.sig_peak_raw = PEAK_DETECT_FRAMES * safe; + for (int i = 0; i < PEAK_DETECT_FRAMES+1; i++) + peak_ssbo.frame_max[i] = safe; + + struct ra_buf_params params = { + .type = RA_BUF_TYPE_SHADER_STORAGE, + .size = sizeof(peak_ssbo), + .initial_data = &peak_ssbo, + }; + + p->hdr_peak_ssbo = ra_buf_create(ra, ¶ms); + if (!p->hdr_peak_ssbo) { + MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n"); + detect_peak = (p->opts.compute_hdr_peak = false); + } + } + + if (detect_peak) { + pass_describe(p, "detect HDR peak"); + pass_is_compute(p, 8, 8); // 8x8 is good for performance + gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, + "uint sig_peak_raw;" + "uint index;" + "uint frame_max[%d];", PEAK_DETECT_FRAMES + 1 + ); + } + + // Adapt from src to dst as necessary + pass_color_map(p->sc, src, dst, p->opts.tone_mapping, + p->opts.tone_mapping_param, p->opts.tone_mapping_desat, + detect_peak, p->opts.gamut_warning, p->use_linear && !osd); + + if (p->use_lut_3d) { + gl_sc_uniform_texture(p->sc, "lut_3d", p->lut_3d_texture); + GLSL(vec3 cpos;) + for (int i = 0; i < 3; i++) + GLSLF("cpos[%d] = LUT_POS(color[%d], %d.0);\n", i, i, p->lut_3d_size[i]); + GLSL(color.rgb = tex3D(lut_3d, cpos).rgb;) + } +} + +void gl_video_set_fb_depth(struct gl_video *p, int fb_depth) +{ + p->fb_depth = fb_depth; +} + +static void pass_dither(struct gl_video *p) +{ + // Assume 8 bits per component if unknown. + int dst_depth = p->fb_depth > 0 ? p->fb_depth : 8; + if (p->opts.dither_depth > 0) + dst_depth = p->opts.dither_depth; + + if (p->opts.dither_depth < 0 || p->opts.dither_algo == DITHER_NONE) + return; + + if (!p->dither_texture) { + MP_VERBOSE(p, "Dither to %d.\n", dst_depth); + + int tex_size = 0; + void *tex_data = NULL; + const struct ra_format *fmt = NULL; + void *temp = NULL; + + if (p->opts.dither_algo == DITHER_FRUIT) { + int sizeb = p->opts.dither_size; + int size = 1 << sizeb; + + if (p->last_dither_matrix_size != size) { + p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix, + float, size * size); + mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb); + p->last_dither_matrix_size = size; + } + + // Prefer R16 texture since they provide higher precision. + fmt = ra_find_unorm_format(p->ra, 2, 1); + if (!fmt) + fmt = ra_find_float16_format(p->ra, 1); + if (fmt) { + tex_size = size; + tex_data = p->last_dither_matrix; + if (fmt->ctype == RA_CTYPE_UNORM) { + uint16_t *t = temp = talloc_array(NULL, uint16_t, size * size); + for (int n = 0; n < size * size; n++) + t[n] = p->last_dither_matrix[n] * UINT16_MAX; + tex_data = t; + } + } else { + MP_VERBOSE(p, "GL too old. Falling back to ordered dither.\n"); + p->opts.dither_algo = DITHER_ORDERED; + } + } + + if (p->opts.dither_algo == DITHER_ORDERED) { + temp = talloc_array(NULL, char, 8 * 8); + mp_make_ordered_dither_matrix(temp, 8); + + fmt = ra_find_unorm_format(p->ra, 1, 1); + tex_size = 8; + tex_data = temp; + } + + struct ra_tex_params params = { + .dimensions = 2, + .w = tex_size, + .h = tex_size, + .d = 1, + .format = fmt, + .render_src = true, + .src_repeat = true, + .initial_data = tex_data, + }; + p->dither_texture = ra_tex_create(p->ra, ¶ms); + + debug_check_gl(p, "dither setup"); + + talloc_free(temp); + } + + GLSLF("// dithering\n"); + + // This defines how many bits are considered significant for output on + // screen. The superfluous bits will be used for rounding according to the + // dither matrix. The precision of the source implicitly decides how many + // dither patterns can be visible. + int dither_quantization = (1 << dst_depth) - 1; + int dither_size = p->dither_texture->params.w; + + gl_sc_uniform_texture(p->sc, "dither", p->dither_texture); + + GLSLF("vec2 dither_pos = gl_FragCoord.xy * 1.0/%d.0;\n", dither_size); + + if (p->opts.temporal_dither) { + int phase = (p->frames_rendered / p->opts.temporal_dither_period) % 8u; + float r = phase * (M_PI / 2); // rotate + float m = phase < 4 ? 1 : -1; // mirror + + float matrix[2][2] = {{cos(r), -sin(r) }, + {sin(r) * m, cos(r) * m}}; + gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]); + + GLSL(dither_pos = dither_trafo * dither_pos;) + } + + GLSL(float dither_value = texture(dither, dither_pos).r;) + GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) * 1.0/%d.0;\n", + dither_quantization, dither_size * dither_size, dither_quantization); +} + +// Draws the OSD, in scene-referred colors.. If cms is true, subtitles are +// instead adapted to the display's gamut. +static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts, + struct mp_osd_res rect, struct fbodst target, bool cms) +{ + mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo_out, draw_flags); + + timer_pool_start(p->osd_timer); + for (int n = 0; n < MAX_OSD_PARTS; n++) { + // (This returns false if this part is empty with nothing to draw.) + if (!mpgl_osd_draw_prepare(p->osd, n, p->sc)) + continue; + // When subtitles need to be color managed, assume they're in sRGB + // (for lack of anything saner to do) + if (cms) { + static const struct mp_colorspace csp_srgb = { + .primaries = MP_CSP_PRIM_BT_709, + .gamma = MP_CSP_TRC_SRGB, + .light = MP_CSP_LIGHT_DISPLAY, + }; + + pass_colormanage(p, csp_srgb, true); + } + mpgl_osd_draw_finish(p->osd, n, p->sc, target); + } + + timer_pool_stop(p->osd_timer); + pass_describe(p, "drawing osd"); + pass_record(p, timer_pool_measure(p->osd_timer)); +} + +static float chroma_realign(int size, int pixel) +{ + return size / (float)chroma_upsize(size, pixel); +} + +// Minimal rendering code path, for GLES or OpenGL 2.1 without proper FBOs. +static void pass_render_frame_dumb(struct gl_video *p) +{ + struct img_tex tex[4]; + struct gl_transform off[4]; + pass_get_img_tex(p, &p->image, tex, off); + + struct gl_transform transform; + compute_src_transform(p, &transform); + + int index = 0; + for (int i = 0; i < p->plane_count; i++) { + int cw = tex[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1; + int ch = tex[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1; + if (p->image_params.rotate % 180 == 90) + MPSWAP(int, cw, ch); + + struct gl_transform t = transform; + t.m[0][0] *= chroma_realign(p->texture_w, cw); + t.m[1][1] *= chroma_realign(p->texture_h, ch); + + t.t[0] /= cw; + t.t[1] /= ch; + + t.t[0] += off[i].t[0]; + t.t[1] += off[i].t[1]; + + gl_transform_trans(tex[i].transform, &t); + tex[i].transform = t; + + copy_img_tex(p, &index, tex[i]); + } + + pass_convert_yuv(p); +} + +// The main rendering function, takes care of everything up to and including +// upscaling. p->image is rendered. +static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, uint64_t id) +{ + // initialize the texture parameters and temporary variables + p->texture_w = p->image_params.w; + p->texture_h = p->image_params.h; + p->texture_offset = identity_trans; + p->components = 0; + p->saved_tex_num = 0; + p->hook_fbo_num = 0; + p->use_linear = false; + + // try uploading the frame + if (!pass_upload_image(p, mpi, id)) + return false; + + if (p->image_params.rotate % 180 == 90) + MPSWAP(int, p->texture_w, p->texture_h); + + if (p->dumb_mode) + return true; + + pass_read_video(p); + pass_opt_hook_point(p, "NATIVE", &p->texture_offset); + pass_convert_yuv(p); + pass_opt_hook_point(p, "MAINPRESUB", &p->texture_offset); + + // For subtitles + double vpts = p->image.mpi->pts; + if (vpts == MP_NOPTS_VALUE) + vpts = p->osd_pts; + + if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO) { + double scale[2]; + get_scale_factors(p, false, scale); + struct mp_osd_res rect = { + .w = p->texture_w, .h = p->texture_h, + .display_par = scale[1] / scale[0], // counter compensate scaling + }; + finish_pass_fbo(p, &p->blend_subs_fbo, rect.w, rect.h, 0); + pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, + p->blend_subs_fbo.fbo, false); + pass_read_fbo(p, &p->blend_subs_fbo); + pass_describe(p, "blend subs video"); + } + pass_opt_hook_point(p, "MAIN", &p->texture_offset); + + pass_scale_main(p); + + int vp_w = p->dst_rect.x1 - p->dst_rect.x0, + vp_h = p->dst_rect.y1 - p->dst_rect.y0; + if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES) { + // Recreate the real video size from the src/dst rects + struct mp_osd_res rect = { + .w = vp_w, .h = vp_h, + .ml = -p->src_rect.x0, .mr = p->src_rect.x1 - p->image_params.w, + .mt = -p->src_rect.y0, .mb = p->src_rect.y1 - p->image_params.h, + .display_par = 1.0, + }; + // Adjust margins for scale + double scale[2]; + get_scale_factors(p, true, scale); + rect.ml *= scale[0]; rect.mr *= scale[0]; + rect.mt *= scale[1]; rect.mb *= scale[1]; + // We should always blend subtitles in non-linear light + if (p->use_linear) { + pass_delinearize(p->sc, p->image_params.color.gamma); + p->use_linear = false; + } + finish_pass_fbo(p, &p->blend_subs_fbo, p->texture_w, p->texture_h, 0); + pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, + p->blend_subs_fbo.fbo, false); + pass_read_fbo(p, &p->blend_subs_fbo); + pass_describe(p, "blend subs"); + } + + pass_opt_hook_point(p, "SCALED", NULL); + + return true; +} + +static void pass_draw_to_screen(struct gl_video *p, struct fbodst fbo) +{ + if (p->dumb_mode) + pass_render_frame_dumb(p); + + // Adjust the overall gamma before drawing to screen + if (p->user_gamma != 1) { + gl_sc_uniform_f(p->sc, "user_gamma", p->user_gamma); + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + GLSL(color.rgb = pow(color.rgb, vec3(user_gamma));) + } + + pass_colormanage(p, p->image_params.color, false); + + // Since finish_pass_direct doesn't work with compute shaders, and neither + // does the checkerboard/dither code, we may need an indirection via + // p->screen_fbo here. + if (p->pass_compute.active) { + int o_w = p->dst_rect.x1 - p->dst_rect.x0, + o_h = p->dst_rect.y1 - p->dst_rect.y0; + finish_pass_fbo(p, &p->screen_fbo, o_w, o_h, FBOTEX_FUZZY); + struct img_tex tmp = img_tex_fbo(&p->screen_fbo, PLANE_RGB, p->components); + copy_img_tex(p, &(int){0}, tmp); + } + + if (p->has_alpha){ + if (p->opts.alpha_mode == ALPHA_BLEND_TILES) { + // Draw checkerboard pattern to indicate transparency + GLSLF("// transparency checkerboard\n"); + GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy * 1.0/32.0), vec2(0.5));) + GLSL(vec3 background = vec3(tile.x == tile.y ? 1.0 : 0.75);) + GLSL(color.rgb = mix(background, color.rgb, color.a);) + } else if (p->opts.alpha_mode == ALPHA_BLEND) { + // Blend into background color (usually black) + struct m_color c = p->opts.background; + GLSLF("vec4 background = vec4(%f, %f, %f, %f);\n", + c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0); + GLSL(color = mix(background, vec4(color.rgb, 1.0), color.a);) + } + } + + pass_opt_hook_point(p, "OUTPUT", NULL); + + pass_dither(p); + pass_describe(p, "output to screen"); + finish_pass_direct(p, fbo, &p->dst_rect); +} + +static bool update_fbosurface(struct gl_video *p, struct mp_image *mpi, + uint64_t id, struct fbosurface *surf) +{ + int vp_w = p->dst_rect.x1 - p->dst_rect.x0, + vp_h = p->dst_rect.y1 - p->dst_rect.y0; + + pass_info_reset(p, false); + if (!pass_render_frame(p, mpi, id)) + return false; + + // Frame blending should always be done in linear light to preserve the + // overall brightness, otherwise this will result in flashing dark frames + // because mixing in compressed light artificially darkens the results + if (!p->use_linear) { + p->use_linear = true; + pass_linearize(p->sc, p->image_params.color.gamma); + } + + finish_pass_fbo(p, &surf->fbotex, vp_w, vp_h, FBOTEX_FUZZY); + surf->id = id; + surf->pts = mpi->pts; + return true; +} + +// Draws an interpolate frame to fbo, based on the frame timing in t +static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, + struct fbodst fbo) +{ + bool is_new = false; + + // Reset the queue completely if this is a still image, to avoid any + // interpolation artifacts from surrounding frames when unpausing or + // framestepping + if (t->still) + gl_video_reset_surfaces(p); + + // First of all, figure out if we have a frame available at all, and draw + // it manually + reset the queue if not + if (p->surfaces[p->surface_now].id == 0) { + struct fbosurface *now = &p->surfaces[p->surface_now]; + if (!update_fbosurface(p, t->current, t->frame_id, now)) + return; + p->surface_idx = p->surface_now; + is_new = true; + } + + // Find the right frame for this instant + if (t->current) { + int next = fbosurface_wrap(p->surface_now + 1); + while (p->surfaces[next].id && + p->surfaces[next].id > p->surfaces[p->surface_now].id && + p->surfaces[p->surface_now].id < t->frame_id) + { + p->surface_now = next; + next = fbosurface_wrap(next + 1); + } + } + + // Figure out the queue size. For illustration, a filter radius of 2 would + // look like this: _ A [B] C D _ + // A is surface_bse, B is surface_now, C is surface_now+1 and D is + // surface_end. + struct scaler *tscale = &p->scaler[SCALER_TSCALE]; + reinit_scaler(p, tscale, &p->opts.scaler[SCALER_TSCALE], 1, tscale_sizes); + bool oversample = strcmp(tscale->conf.kernel.name, "oversample") == 0; + bool linear = strcmp(tscale->conf.kernel.name, "linear") == 0; + int size; + + if (oversample || linear) { + size = 2; + } else { + assert(tscale->kernel && !tscale->kernel->polar); + size = ceil(tscale->kernel->size); + assert(size <= TEXUNIT_VIDEO_NUM); + } + + int radius = size/2; + int surface_now = p->surface_now; + int surface_bse = fbosurface_wrap(surface_now - (radius-1)); + int surface_end = fbosurface_wrap(surface_now + radius); + assert(fbosurface_wrap(surface_bse + size-1) == surface_end); + + // Render new frames while there's room in the queue. Note that technically, + // this should be done before the step where we find the right frame, but + // it only barely matters at the very beginning of playback, and this way + // makes the code much more linear. + int surface_dst = fbosurface_wrap(p->surface_idx + 1); + for (int i = 0; i < t->num_frames; i++) { + // Avoid overwriting data we might still need + if (surface_dst == surface_bse - 1) + break; + + struct mp_image *f = t->frames[i]; + uint64_t f_id = t->frame_id + i; + if (!mp_image_params_equal(&f->params, &p->real_image_params)) + continue; + + if (f_id > p->surfaces[p->surface_idx].id) { + struct fbosurface *dst = &p->surfaces[surface_dst]; + if (!update_fbosurface(p, f, f_id, dst)) + return; + p->surface_idx = surface_dst; + surface_dst = fbosurface_wrap(surface_dst + 1); + is_new = true; + } + } + + // Figure out whether the queue is "valid". A queue is invalid if the + // frames' PTS is not monotonically increasing. Anything else is invalid, + // so avoid blending incorrect data and just draw the latest frame as-is. + // Possible causes for failure of this condition include seeks, pausing, + // end of playback or start of playback. + bool valid = true; + for (int i = surface_bse, ii; valid && i != surface_end; i = ii) { + ii = fbosurface_wrap(i + 1); + if (p->surfaces[i].id == 0 || p->surfaces[ii].id == 0) { + valid = false; + } else if (p->surfaces[ii].id < p->surfaces[i].id) { + valid = false; + MP_DBG(p, "interpolation queue underrun\n"); + } + } + + // Update OSD PTS to synchronize subtitles with the displayed frame + p->osd_pts = p->surfaces[surface_now].pts; + + // Finally, draw the right mix of frames to the screen. + if (!is_new) + pass_info_reset(p, true); + pass_describe(p, "interpolation"); + if (!valid || t->still) { + // surface_now is guaranteed to be valid, so we can safely use it. + pass_read_fbo(p, &p->surfaces[surface_now].fbotex); + p->is_interpolated = false; + } else { + double mix = t->vsync_offset / t->ideal_frame_duration; + // The scaler code always wants the fcoord to be between 0 and 1, + // so we try to adjust by using the previous set of N frames instead + // (which requires some extra checking to make sure it's valid) + if (mix < 0.0) { + int prev = fbosurface_wrap(surface_bse - 1); + if (p->surfaces[prev].id != 0 && + p->surfaces[prev].id < p->surfaces[surface_bse].id) + { + mix += 1.0; + surface_bse = prev; + } else { + mix = 0.0; // at least don't blow up, this should only + // ever happen at the start of playback + } + } + + if (oversample) { + // Oversample uses the frame area as mix ratio, not the the vsync + // position itself + double vsync_dist = t->vsync_interval / t->ideal_frame_duration, + threshold = tscale->conf.kernel.params[0]; + threshold = isnan(threshold) ? 0.0 : threshold; + mix = (1 - mix) / vsync_dist; + mix = mix <= 0 + threshold ? 0 : mix; + mix = mix >= 1 - threshold ? 1 : mix; + mix = 1 - mix; + } + + // Blend the frames together + if (oversample || linear) { + gl_sc_uniform_f(p->sc, "inter_coeff", mix); + GLSL(color = mix(texture(texture0, texcoord0), + texture(texture1, texcoord1), + inter_coeff);) + } else { + gl_sc_uniform_f(p->sc, "fcoord", mix); + pass_sample_separated_gen(p->sc, tscale, 0, 0); + } + + // Load all the required frames + for (int i = 0; i < size; i++) { + struct img_tex img = + img_tex_fbo(&p->surfaces[fbosurface_wrap(surface_bse+i)].fbotex, + PLANE_RGB, p->components); + // Since the code in pass_sample_separated currently assumes + // the textures are bound in-order and starting at 0, we just + // assert to make sure this is the case (which it should always be) + int id = pass_bind(p, img); + assert(id == i); + } + + MP_DBG(p, "inter frame dur: %f vsync: %f, mix: %f\n", + t->ideal_frame_duration, t->vsync_interval, mix); + p->is_interpolated = true; + } + pass_draw_to_screen(p, fbo); + + p->frames_drawn += 1; +} + +void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, + struct fbodst target) +{ + struct mp_rect target_rc = {0, 0, target.tex->params.w, target.tex->params.h}; + + p->broken_frame = false; + + bool has_frame = !!frame->current; + + if (!has_frame || !mp_rect_equals(&p->dst_rect, &target_rc)) { + struct m_color c = p->clear_color; + float color[4] = {c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0}; + p->ra->fns->clear(p->ra, target.tex, color, &target_rc); + } + + if (p->hwdec_active && p->hwdec->driver->overlay_frame) { + if (has_frame) { + float *color = p->hwdec->overlay_colorkey; + p->ra->fns->clear(p->ra, target.tex, color, &p->dst_rect); + } + + p->hwdec->driver->overlay_frame(p->hwdec, frame->current, + &p->src_rect, &p->dst_rect, + frame->frame_id != p->image.id); + + if (frame->current) + p->osd_pts = frame->current->pts; + + // Disable GL rendering + has_frame = false; + } + + if (has_frame) { + bool interpolate = p->opts.interpolation && frame->display_synced && + (p->frames_drawn || !frame->still); + if (interpolate) { + double ratio = frame->ideal_frame_duration / frame->vsync_interval; + if (fabs(ratio - 1.0) < p->opts.interpolation_threshold) + interpolate = false; + } + + if (interpolate) { + gl_video_interpolate_frame(p, frame, target); + } else { + bool is_new = frame->frame_id != p->image.id; + + // Redrawing a frame might update subtitles. + if (frame->still && p->opts.blend_subs) + is_new = true; + + if (is_new || !p->output_fbo_valid) { + p->output_fbo_valid = false; + + pass_info_reset(p, !is_new); + if (!pass_render_frame(p, frame->current, frame->frame_id)) + goto done; + + // For the non-interpolation case, we draw to a single "cache" + // FBO to speed up subsequent re-draws (if any exist) + struct fbodst dest_fbo = target; + if (frame->num_vsyncs > 1 && frame->display_synced && + !p->dumb_mode && (p->ra->caps & RA_CAP_BLIT)) + { + fbotex_change(&p->output_fbo, p->ra, p->log, + target.tex->params.w, target.tex->params.h, + p->fbo_format, FBOTEX_FUZZY); + dest_fbo = p->output_fbo.fbo; + p->output_fbo_valid = true; + } + pass_draw_to_screen(p, dest_fbo); + } + + // "output fbo valid" and "output fbo needed" are equivalent + if (p->output_fbo_valid) { + pass_info_reset(p, true); + pass_describe(p, "redraw cached frame"); + struct mp_rect src = p->dst_rect; + struct mp_rect dst = src; + if (target.flip) { + dst.y0 = target.tex->params.h - src.y0; + dst.y1 = target.tex->params.h - src.y1; + } + timer_pool_start(p->blit_timer); + p->ra->fns->blit(p->ra, target.tex, p->output_fbo.tex, + &dst, &src); + timer_pool_stop(p->blit_timer); + pass_record(p, timer_pool_measure(p->blit_timer)); + } + } + } + +done: + + unmap_current_image(p); + + debug_check_gl(p, "after video rendering"); + + if (p->osd) { + // If we haven't actually drawn anything so far, then we technically + // need to consider this the start of a new pass. Let's call it a + // redraw just because, since it's basically a blank frame anyway + if (!has_frame) + pass_info_reset(p, true); + + pass_draw_osd(p, p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0, + p->osd_pts, p->osd_rect, target, true); + debug_check_gl(p, "after OSD rendering"); + } + + if (gl_sc_error_state(p->sc) || p->broken_frame) { + // Make the screen solid blue to make it visually clear that an + // error has occurred + float color[4] = {0.0, 0.05, 0.5, 1.0}; + p->ra->fns->clear(p->ra, target.tex, color, &target_rc); + } + + p->frames_rendered++; + pass_report_performance(p); +} + +// Use this color instead of the global option. +void gl_video_set_clear_color(struct gl_video *p, struct m_color c) +{ + p->force_clear_color = true; + p->clear_color = c; +} + +void gl_video_set_osd_pts(struct gl_video *p, double pts) +{ + p->osd_pts = pts; +} + +bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *res, + double pts) +{ + return p->osd ? mpgl_osd_check_change(p->osd, res, pts) : false; +} + +void gl_video_resize(struct gl_video *p, + struct mp_rect *src, struct mp_rect *dst, + struct mp_osd_res *osd) +{ + if (mp_rect_equals(&p->src_rect, src) && + mp_rect_equals(&p->dst_rect, dst) && + osd_res_equals(p->osd_rect, *osd)) + return; + + p->src_rect = *src; + p->dst_rect = *dst; + p->osd_rect = *osd; + + gl_video_reset_surfaces(p); + + if (p->osd) + mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo_out); +} + +static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out) +{ + for (int i = 0; i < PASS_INFO_MAX; i++) { + if (!pass[i].desc.len) + break; + out->perf[out->count] = pass[i].perf; + out->desc[out->count] = pass[i].desc.start; + out->count++; + } +} + +void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out) +{ + *out = (struct voctrl_performance_data){0}; + frame_perf_data(p->pass_fresh, &out->fresh); + frame_perf_data(p->pass_redraw, &out->redraw); +} + +// This assumes nv12, with textures set to GL_NEAREST filtering. +static void reinterleave_vdpau(struct gl_video *p, + struct ra_tex *input[4], struct ra_tex *output[2]) +{ + for (int n = 0; n < 2; n++) { + struct fbotex *fbo = &p->vdpau_deinterleave_fbo[n]; + // This is an array of the 2 to-merge planes. + struct ra_tex **src = &input[n * 2]; + int w = src[0]->params.w; + int h = src[0]->params.h; + int ids[2]; + for (int t = 0; t < 2; t++) { + ids[t] = pass_bind(p, (struct img_tex){ + .tex = src[t], + .multiplier = 1.0, + .transform = identity_trans, + .w = w, + .h = h, + }); + } + + GLSLF("color = fract(gl_FragCoord.y * 0.5) < 0.5\n"); + GLSLF(" ? texture(texture%d, texcoord%d)\n", ids[0], ids[0]); + GLSLF(" : texture(texture%d, texcoord%d);", ids[1], ids[1]); + + const struct ra_format *fmt = + ra_find_unorm_format(p->ra, 1, n == 0 ? 1 : 2); + fbotex_change(fbo, p->ra, p->log, w, h * 2, fmt, 0); + + pass_describe(p, "vdpau reinterleaving"); + finish_pass_direct(p, fbo->fbo, &(struct mp_rect){0, 0, w, h * 2}); + + output[n] = fbo->tex; + } +} + +// Returns false on failure. +static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id) +{ + struct video_image *vimg = &p->image; + + if (vimg->id == id) + return true; + + unref_current_image(p); + + mpi = mp_image_new_ref(mpi); + if (!mpi) + goto error; + + vimg->mpi = mpi; + vimg->id = id; + p->osd_pts = mpi->pts; + p->frames_uploaded++; + + if (p->hwdec_active) { + // Hardware decoding + + if (!p->hwdec_mapper) + goto error; + + pass_describe(p, "map frame (hwdec)"); + timer_pool_start(p->upload_timer); + bool ok = ra_hwdec_mapper_map(p->hwdec_mapper, vimg->mpi) >= 0; + timer_pool_stop(p->upload_timer); + pass_record(p, timer_pool_measure(p->upload_timer)); + + vimg->hwdec_mapped = true; + if (ok) { + struct mp_image layout = {0}; + mp_image_set_params(&layout, &p->image_params); + struct ra_tex **tex = p->hwdec_mapper->tex; + struct ra_tex *tmp[4] = {0}; + if (p->hwdec_mapper->vdpau_fields) { + reinterleave_vdpau(p, tex, tmp); + tex = tmp; + } + for (int n = 0; n < p->plane_count; n++) { + vimg->planes[n] = (struct texplane){ + .w = mp_image_plane_w(&layout, n), + .h = mp_image_plane_h(&layout, n), + .tex = tex[n], + }; + } + } else { + MP_FATAL(p, "Mapping hardware decoded surface failed.\n"); + goto error; + } + return true; + } + + // Software decoding + assert(mpi->num_planes == p->plane_count); + + timer_pool_start(p->upload_timer); + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &vimg->planes[n]; + + plane->flipped = mpi->stride[0] < 0; + + struct ra_tex_upload_params params = { + .tex = plane->tex, + .src = mpi->planes[n], + .invalidate = true, + .stride = mpi->stride[n], + }; + + struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]); + if (mapped) { + params.buf = mapped->buf; + params.buf_offset = (uintptr_t)params.src - + (uintptr_t)mapped->buf->data; + params.src = NULL; + } + + if (p->using_dr_path != !!mapped) { + p->using_dr_path = !!mapped; + MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no"); + } + + if (!p->ra->fns->tex_upload(p->ra, ¶ms)) { + timer_pool_stop(p->upload_timer); + goto error; + } + + if (mapped && !mapped->mpi) + mapped->mpi = mp_image_new_ref(mpi); + } + timer_pool_stop(p->upload_timer); + + bool using_pbo = p->ra->use_pbo || !(p->ra->caps & RA_CAP_DIRECT_UPLOAD); + const char *mode = p->using_dr_path ? "DR" : using_pbo ? "PBO" : "naive"; + pass_describe(p, "upload frame (%s)", mode); + pass_record(p, timer_pool_measure(p->upload_timer)); + + return true; + +error: + unref_current_image(p); + p->broken_frame = true; + return false; +} + +static bool test_fbo(struct gl_video *p, const struct ra_format *fmt) +{ + MP_VERBOSE(p, "Testing FBO format %s\n", fmt->name); + struct fbotex fbo = {0}; + bool success = fbotex_change(&fbo, p->ra, p->log, 16, 16, fmt, 0); + fbotex_uninit(&fbo); + return success; +} + +// Return whether dumb-mode can be used without disabling any features. +// Essentially, vo_opengl with mostly default settings will return true. +static bool check_dumb_mode(struct gl_video *p) +{ + struct gl_video_opts *o = &p->opts; + if (p->use_integer_conversion) + return false; + if (o->dumb_mode > 0) // requested by user + return true; + if (o->dumb_mode < 0) // disabled by user + return false; + + // otherwise, use auto-detection + if (o->target_prim || o->target_trc || o->linear_scaling || + o->correct_downscaling || o->sigmoid_upscaling || o->interpolation || + o->blend_subs || o->deband || o->unsharp) + return false; + // check remaining scalers (tscale is already implicitly excluded above) + for (int i = 0; i < SCALER_COUNT; i++) { + if (i != SCALER_TSCALE) { + const char *name = o->scaler[i].kernel.name; + if (name && strcmp(name, "bilinear") != 0) + return false; + } + } + if (o->user_shaders && o->user_shaders[0]) + return false; + if (p->use_lut_3d) + return false; + return true; +} + +// Disable features that are not supported with the current OpenGL version. +static void check_gl_features(struct gl_video *p) +{ + struct ra *ra = p->ra; + bool have_float_tex = !!ra_find_float16_format(ra, 1); + bool have_mglsl = ra->glsl_version >= 130; // modern GLSL + const struct ra_format *rg_tex = ra_find_unorm_format(p->ra, 1, 2); + bool have_texrg = rg_tex && !rg_tex->luminance_alpha; + bool have_compute = ra->caps & RA_CAP_COMPUTE; + bool have_ssbo = ra->caps & RA_CAP_BUF_RW; + + const char *auto_fbo_fmts[] = {"rgba16", "rgba16f", "rgb10_a2", "rgba8", 0}; + const char *user_fbo_fmts[] = {p->opts.fbo_format, 0}; + const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto") + ? user_fbo_fmts : auto_fbo_fmts; + bool have_fbo = false; + p->fbo_format = NULL; + for (int n = 0; fbo_fmts[n]; n++) { + const char *fmt = fbo_fmts[n]; + const struct ra_format *f = ra_find_named_format(p->ra, fmt); + if (!f && fbo_fmts == user_fbo_fmts) + MP_WARN(p, "FBO format '%s' not found!\n", fmt); + if (f && f->renderable && f->linear_filter && test_fbo(p, f)) { + MP_VERBOSE(p, "Using FBO format %s.\n", f->name); + have_fbo = true; + p->fbo_format = f; + break; + } + } + + p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg; + bool voluntarily_dumb = check_dumb_mode(p); + if (p->forced_dumb_mode || voluntarily_dumb) { + if (voluntarily_dumb) { + MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n"); + } else if (p->opts.dumb_mode <= 0) { + MP_WARN(p, "High bit depth FBOs unsupported. Enabling dumb mode.\n" + "Most extended features will be disabled.\n"); + } + p->dumb_mode = true; + p->use_lut_3d = false; + // Most things don't work, so whitelist all options that still work. + p->opts = (struct gl_video_opts){ + .gamma = p->opts.gamma, + .gamma_auto = p->opts.gamma_auto, + .pbo = p->opts.pbo, + .fbo_format = p->opts.fbo_format, + .alpha_mode = p->opts.alpha_mode, + .use_rectangle = p->opts.use_rectangle, + .background = p->opts.background, + .dither_algo = p->opts.dither_algo, + .dither_depth = p->opts.dither_depth, + .dither_size = p->opts.dither_size, + .temporal_dither = p->opts.temporal_dither, + .temporal_dither_period = p->opts.temporal_dither_period, + .tex_pad_x = p->opts.tex_pad_x, + .tex_pad_y = p->opts.tex_pad_y, + .tone_mapping = p->opts.tone_mapping, + .tone_mapping_param = p->opts.tone_mapping_param, + .tone_mapping_desat = p->opts.tone_mapping_desat, + .early_flush = p->opts.early_flush, + }; + for (int n = 0; n < SCALER_COUNT; n++) + p->opts.scaler[n] = gl_video_opts_def.scaler[n]; + return; + } + p->dumb_mode = false; + + // Normally, we want to disable them by default if FBOs are unavailable, + // because they will be slow (not critically slow, but still slower). + // Without FP textures, we must always disable them. + // I don't know if luminance alpha float textures exist, so disregard them. + for (int n = 0; n < SCALER_COUNT; n++) { + const struct filter_kernel *kernel = + mp_find_filter_kernel(p->opts.scaler[n].kernel.name); + if (kernel) { + char *reason = NULL; + if (!have_float_tex) + reason = "(float tex. missing)"; + if (!have_mglsl) + reason = "(GLSL version too old)"; + if (reason) { + MP_WARN(p, "Disabling scaler #%d %s %s.\n", n, + p->opts.scaler[n].kernel.name, reason); + // p->opts is a copy => we can just mess with it. + p->opts.scaler[n].kernel.name = "bilinear"; + if (n == SCALER_TSCALE) + p->opts.interpolation = 0; + } + } + } + + int use_cms = p->opts.target_prim != MP_CSP_PRIM_AUTO || + p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d; + + // mix() is needed for some gamma functions + if (!have_mglsl && (p->opts.linear_scaling || p->opts.sigmoid_upscaling)) { + p->opts.linear_scaling = false; + p->opts.sigmoid_upscaling = false; + MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n"); + } + if (!have_mglsl && use_cms) { + p->opts.target_prim = MP_CSP_PRIM_AUTO; + p->opts.target_trc = MP_CSP_TRC_AUTO; + p->use_lut_3d = false; + MP_WARN(p, "Disabling color management (GLSL version too old).\n"); + } + if (!have_mglsl && p->opts.deband) { + p->opts.deband = 0; + MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); + } + if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) { + p->opts.compute_hdr_peak = 0; + MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n"); + } +} + +static void init_gl(struct gl_video *p) +{ + debug_check_gl(p, "before init_gl"); + + p->upload_timer = timer_pool_create(p->ra); + p->blit_timer = timer_pool_create(p->ra); + p->osd_timer = timer_pool_create(p->ra); + + debug_check_gl(p, "after init_gl"); + + ra_dump_tex_formats(p->ra, MSGL_DEBUG); + ra_dump_img_formats(p->ra, MSGL_DEBUG); +} + +void gl_video_uninit(struct gl_video *p) +{ + if (!p) + return; + + uninit_video(p); + + gl_sc_destroy(p->sc); + + ra_tex_free(p->ra, &p->lut_3d_texture); + ra_buf_free(p->ra, &p->hdr_peak_ssbo); + + timer_pool_destroy(p->upload_timer); + timer_pool_destroy(p->blit_timer); + timer_pool_destroy(p->osd_timer); + + for (int i = 0; i < PASS_INFO_MAX; i++) { + talloc_free(p->pass_fresh[i].desc.start); + talloc_free(p->pass_redraw[i].desc.start); + } + + mpgl_osd_destroy(p->osd); + + // Forcibly destroy possibly remaining image references. This should also + // cause gl_video_dr_free_buffer() to be called for the remaining buffers. + gc_pending_dr_fences(p, true); + + // Should all have been unreffed already. + assert(!p->num_dr_buffers); + + talloc_free(p); +} + +void gl_video_reset(struct gl_video *p) +{ + gl_video_reset_surfaces(p); +} + +bool gl_video_showing_interpolated_frame(struct gl_video *p) +{ + return p->is_interpolated; +} + +static bool is_imgfmt_desc_supported(struct gl_video *p, + const struct ra_imgfmt_desc *desc) +{ + if (!desc->num_planes) + return false; + + if (desc->planes[0]->ctype == RA_CTYPE_UINT && p->forced_dumb_mode) + return false; + + return true; +} + +bool gl_video_check_format(struct gl_video *p, int mp_format) +{ + struct ra_imgfmt_desc desc; + if (ra_get_imgfmt_desc(p->ra, mp_format, &desc) && + is_imgfmt_desc_supported(p, &desc)) + return true; + if (p->hwdec && ra_hwdec_test_format(p->hwdec, mp_format)) + return true; + return false; +} + +void gl_video_config(struct gl_video *p, struct mp_image_params *params) +{ + unmap_overlay(p); + unref_current_image(p); + + if (!mp_image_params_equal(&p->real_image_params, params)) { + uninit_video(p); + p->real_image_params = *params; + p->image_params = *params; + if (params->imgfmt) + init_video(p); + } + + gl_video_reset_surfaces(p); +} + +void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd) +{ + mpgl_osd_destroy(p->osd); + p->osd = NULL; + p->osd_state = osd; + reinit_osd(p); +} + +struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, + struct mpv_global *g) +{ + struct gl_video *p = talloc_ptrtype(NULL, p); + *p = (struct gl_video) { + .ra = ra, + .global = g, + .log = log, + .sc = gl_sc_create(ra, g, log), + .video_eq = mp_csp_equalizer_create(p, g), + .opts_cache = m_config_cache_alloc(p, g, &gl_video_conf), + }; + // make sure this variable is initialized to *something* + p->pass = p->pass_fresh; + struct gl_video_opts *opts = p->opts_cache->opts; + p->cms = gl_lcms_init(p, log, g, opts->icc_opts), + p->opts = *opts; + for (int n = 0; n < SCALER_COUNT; n++) + p->scaler[n] = (struct scaler){.index = n}; + init_gl(p); + reinit_from_options(p); + return p; +} + +// Get static string for scaler shader. If "tscale" is set to true, the +// scaler must be a separable convolution filter. +static const char *handle_scaler_opt(const char *name, bool tscale) +{ + if (name && name[0]) { + const struct filter_kernel *kernel = mp_find_filter_kernel(name); + if (kernel && (!tscale || !kernel->polar)) + return kernel->f.name; + + for (const char *const *filter = tscale ? fixed_tscale_filters + : fixed_scale_filters; + *filter; filter++) { + if (strcmp(*filter, name) == 0) + return *filter; + } + } + return NULL; +} + +void gl_video_update_options(struct gl_video *p) +{ + if (m_config_cache_update(p->opts_cache)) { + gl_lcms_update_options(p->cms); + reinit_from_options(p); + } +} + +static void reinit_from_options(struct gl_video *p) +{ + p->use_lut_3d = gl_lcms_has_profile(p->cms); + + // Copy the option fields, so that check_gl_features() can mutate them. + // This works only for the fields themselves of course, not for any memory + // referenced by them. + p->opts = *(struct gl_video_opts *)p->opts_cache->opts; + + if (!p->force_clear_color) + p->clear_color = p->opts.background; + + check_gl_features(p); + uninit_rendering(p); + gl_sc_set_cache_dir(p->sc, p->opts.shader_cache_dir); + p->ra->use_pbo = p->opts.pbo; + gl_video_setup_hooks(p); + reinit_osd(p); + + if (p->opts.interpolation && !p->global->opts->video_sync && !p->dsi_warned) { + MP_WARN(p, "Interpolation now requires enabling display-sync mode.\n" + "E.g.: --video-sync=display-resample\n"); + p->dsi_warned = true; + } +} + +void gl_video_configure_queue(struct gl_video *p, struct vo *vo) +{ + int queue_size = 1; + + // Figure out an adequate size for the interpolation queue. The larger + // the radius, the earlier we need to queue frames. + if (p->opts.interpolation) { + const struct filter_kernel *kernel = + mp_find_filter_kernel(p->opts.scaler[SCALER_TSCALE].kernel.name); + if (kernel) { + // filter_scale wouldn't be correctly initialized were we to use it here. + // This is fine since we're always upsampling, but beware if downsampling + // is added! + double radius = kernel->f.radius; + radius = radius > 0 ? radius : p->opts.scaler[SCALER_TSCALE].radius; + queue_size += 1 + ceil(radius); + } else { + // Oversample/linear case + queue_size += 2; + } + } + + vo_set_queue_params(vo, 0, queue_size); +} + +static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param) +{ + char s[20] = {0}; + int r = 1; + bool tscale = bstr_equals0(name, "tscale"); + if (bstr_equals0(param, "help")) { + r = M_OPT_EXIT; + } else { + snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); + if (!handle_scaler_opt(s, tscale)) + r = M_OPT_INVALID; + } + if (r < 1) { + mp_info(log, "Available scalers:\n"); + for (const char *const *filter = tscale ? fixed_tscale_filters + : fixed_scale_filters; + *filter; filter++) { + mp_info(log, " %s\n", *filter); + } + for (int n = 0; mp_filter_kernels[n].f.name; n++) { + if (!tscale || !mp_filter_kernels[n].polar) + mp_info(log, " %s\n", mp_filter_kernels[n].f.name); + } + if (s[0]) + mp_fatal(log, "No scaler named '%s' found!\n", s); + } + return r; +} + +static int validate_window_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, struct bstr param) +{ + char s[20] = {0}; + int r = 1; + if (bstr_equals0(param, "help")) { + r = M_OPT_EXIT; + } else { + snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); + const struct filter_window *window = mp_find_filter_window(s); + if (!window) + r = M_OPT_INVALID; + } + if (r < 1) { + mp_info(log, "Available windows:\n"); + for (int n = 0; mp_filter_windows[n].name; n++) + mp_info(log, " %s\n", mp_filter_windows[n].name); + if (s[0]) + mp_fatal(log, "No window named '%s' found!\n", s); + } + return r; +} + +float gl_video_scale_ambient_lux(float lmin, float lmax, + float rmin, float rmax, float lux) +{ + assert(lmax > lmin); + + float num = (rmax - rmin) * (log10(lux) - log10(lmin)); + float den = log10(lmax) - log10(lmin); + float result = num / den + rmin; + + // clamp the result + float max = MPMAX(rmax, rmin); + float min = MPMIN(rmax, rmin); + return MPMAX(MPMIN(result, max), min); +} + +void gl_video_set_ambient_lux(struct gl_video *p, int lux) +{ + if (p->opts.gamma_auto) { + float gamma = gl_video_scale_ambient_lux(16.0, 64.0, 2.40, 1.961, lux); + MP_VERBOSE(p, "ambient light changed: %dlux (gamma: %f)\n", lux, gamma); + p->opts.gamma = MPMIN(1.0, 1.961 / gamma); + } +} + +void gl_video_set_hwdec(struct gl_video *p, struct ra_hwdec *hwdec) +{ + unref_current_image(p); + ra_hwdec_mapper_free(&p->hwdec_mapper); + p->hwdec = hwdec; +} + +static void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size) +{ + struct ra_buf_params params = { + .type = RA_BUF_TYPE_TEX_UPLOAD, + .host_mapped = true, + .size = size, + }; + + struct ra_buf *buf = ra_buf_create(p->ra, ¶ms); + if (!buf) + return NULL; + + MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers); + p->dr_buffers[p->num_dr_buffers++] = (struct dr_buffer){ .buf = buf }; + + return buf->data; +}; + +static void gl_video_dr_free_buffer(void *opaque, uint8_t *data) +{ + struct gl_video *p = opaque; + + for (int n = 0; n < p->num_dr_buffers; n++) { + struct dr_buffer *buffer = &p->dr_buffers[n]; + if (buffer->buf->data == data) { + assert(!buffer->mpi); // can't be freed while it has a ref + ra_buf_free(p->ra, &buffer->buf); + MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n); + return; + } + } + // not found - must not happen + assert(0); +} + +struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h, + int stride_align) +{ + int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align); + if (size < 0) + return NULL; + + int alloc_size = size + stride_align; + void *ptr = gl_video_dr_alloc_buffer(p, alloc_size); + if (!ptr) + return NULL; + + // (we expect vo.c to proxy the free callback, so it happens in the same + // thread it was allocated in, removing the need for synchronization) + struct mp_image *res = mp_image_from_buffer(imgfmt, w, h, stride_align, + ptr, alloc_size, p, + gl_video_dr_free_buffer); + if (!res) + gl_video_dr_free_buffer(p, ptr); + return res; +} diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h new file mode 100644 index 0000000000..884f5914fd --- /dev/null +++ b/video/out/gpu/video.h @@ -0,0 +1,194 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#ifndef MP_GL_VIDEO_H +#define MP_GL_VIDEO_H + +#include + +#include "options/m_option.h" +#include "sub/osd.h" +#include "utils.h" +#include "lcms.h" +#include "shader_cache.h" +#include "video/csputils.h" +#include "video/out/filter_kernels.h" + +// Assume we have this many texture units for sourcing additional passes. +// The actual texture unit assignment is dynamic. +#define TEXUNIT_VIDEO_NUM 6 + +struct scaler_fun { + char *name; + float params[2]; + float blur; + float taper; +}; + +struct scaler_config { + struct scaler_fun kernel; + struct scaler_fun window; + float radius; + float antiring; + float cutoff; + float clamp; +}; + +struct scaler { + int index; + struct scaler_config conf; + double scale_factor; + bool initialized; + struct filter_kernel *kernel; + struct ra_tex *lut; + struct fbotex sep_fbo; + bool insufficient; + int lut_size; + + // kernel points here + struct filter_kernel kernel_storage; +}; + +enum scaler_unit { + SCALER_SCALE, // luma/video + SCALER_DSCALE, // luma-video downscaling + SCALER_CSCALE, // chroma upscaling + SCALER_TSCALE, // temporal scaling (interpolation) + SCALER_COUNT +}; + +enum dither_algo { + DITHER_NONE = 0, + DITHER_FRUIT, + DITHER_ORDERED, +}; + +enum alpha_mode { + ALPHA_NO = 0, + ALPHA_YES, + ALPHA_BLEND, + ALPHA_BLEND_TILES, +}; + +enum blend_subs_mode { + BLEND_SUBS_NO = 0, + BLEND_SUBS_YES, + BLEND_SUBS_VIDEO, +}; + +enum tone_mapping { + TONE_MAPPING_CLIP, + TONE_MAPPING_MOBIUS, + TONE_MAPPING_REINHARD, + TONE_MAPPING_HABLE, + TONE_MAPPING_GAMMA, + TONE_MAPPING_LINEAR, +}; + +// How many frames to average over for HDR peak detection +#define PEAK_DETECT_FRAMES 100 + +struct gl_video_opts { + int dumb_mode; + struct scaler_config scaler[4]; + int scaler_lut_size; + float gamma; + int gamma_auto; + int target_prim; + int target_trc; + int target_brightness; + int tone_mapping; + int compute_hdr_peak; + float tone_mapping_param; + float tone_mapping_desat; + int gamut_warning; + int linear_scaling; + int correct_downscaling; + int sigmoid_upscaling; + float sigmoid_center; + float sigmoid_slope; + int scaler_resizes_only; + int pbo; + int dither_depth; + int dither_algo; + int dither_size; + int temporal_dither; + int temporal_dither_period; + char *fbo_format; + int alpha_mode; + int use_rectangle; + struct m_color background; + int interpolation; + float interpolation_threshold; + int blend_subs; + char **user_shaders; + int deband; + struct deband_opts *deband_opts; + float unsharp; + int tex_pad_x, tex_pad_y; + struct mp_icc_opts *icc_opts; + int early_flush; + char *shader_cache_dir; +}; + +extern const struct m_sub_options gl_video_conf; + +struct gl_video; +struct vo_frame; + +struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, + struct mpv_global *g); +void gl_video_uninit(struct gl_video *p); +void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd); +void gl_video_update_options(struct gl_video *p); +bool gl_video_check_format(struct gl_video *p, int mp_format); +void gl_video_config(struct gl_video *p, struct mp_image_params *params); +void gl_video_set_output_depth(struct gl_video *p, int r, int g, int b); +void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, + struct fbodst target); +void gl_video_resize(struct gl_video *p, + struct mp_rect *src, struct mp_rect *dst, + struct mp_osd_res *osd); +void gl_video_set_fb_depth(struct gl_video *p, int fb_depth); +void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out); +void gl_video_set_clear_color(struct gl_video *p, struct m_color color); +void gl_video_set_osd_pts(struct gl_video *p, double pts); +bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *osd, + double pts); + +float gl_video_scale_ambient_lux(float lmin, float lmax, + float rmin, float rmax, float lux); +void gl_video_set_ambient_lux(struct gl_video *p, int lux); +void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data); +bool gl_video_icc_auto_enabled(struct gl_video *p); +bool gl_video_gamma_auto_enabled(struct gl_video *p); +struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p); + +void gl_video_reset(struct gl_video *p); +bool gl_video_showing_interpolated_frame(struct gl_video *p); + +struct ra_hwdec; +void gl_video_set_hwdec(struct gl_video *p, struct ra_hwdec *hwdec); + +struct vo; +void gl_video_configure_queue(struct gl_video *p, struct vo *vo); + +struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h, + int stride_align); + + +#endif diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c new file mode 100644 index 0000000000..60c5ce82ac --- /dev/null +++ b/video/out/gpu/video_shaders.c @@ -0,0 +1,872 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include + +#include "video_shaders.h" +#include "video.h" + +#define GLSL(x) gl_sc_add(sc, #x "\n"); +#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) +#define GLSLH(x) gl_sc_hadd(sc, #x "\n"); +#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) + +// Set up shared/commonly used variables and macros +void sampler_prelude(struct gl_shader_cache *sc, int tex_num) +{ + GLSLF("#undef tex\n"); + GLSLF("#undef texmap\n"); + GLSLF("#define tex texture%d\n", tex_num); + GLSLF("#define texmap texmap%d\n", tex_num); + GLSLF("vec2 pos = texcoord%d;\n", tex_num); + GLSLF("vec2 size = texture_size%d;\n", tex_num); + GLSLF("vec2 pt = pixel_size%d;\n", tex_num); +} + +static void pass_sample_separated_get_weights(struct gl_shader_cache *sc, + struct scaler *scaler) +{ + gl_sc_uniform_texture(sc, "lut", scaler->lut); + GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut_size); + + int N = scaler->kernel->size; + int width = (N + 3) / 4; // round up + + GLSLF("float weights[%d];\n", N); + for (int i = 0; i < N; i++) { + if (i % 4 == 0) + GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width); + GLSLF("weights[%d] = c[%d];\n", i, i % 4); + } +} + +// Handle a single pass (either vertical or horizontal). The direction is given +// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is +// used instead (samples from texture0 through textureN) +void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, + int d_x, int d_y) +{ + int N = scaler->kernel->size; + bool use_ar = scaler->conf.antiring > 0; + bool planar = d_x == 0 && d_y == 0; + GLSL(color = vec4(0.0);) + GLSLF("{\n"); + if (!planar) { + GLSLF("vec2 dir = vec2(%d.0, %d.0);\n", d_x, d_y); + GLSL(pt *= dir;) + GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);) + GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d.0);\n", N / 2 - 1); + } + GLSL(vec4 c;) + if (use_ar) { + GLSL(vec4 hi = vec4(0.0);) + GLSL(vec4 lo = vec4(1.0);) + } + pass_sample_separated_get_weights(sc, scaler); + GLSLF("// scaler samples\n"); + for (int n = 0; n < N; n++) { + if (planar) { + GLSLF("c = texture(texture%d, texcoord%d);\n", n, n); + } else { + GLSLF("c = texture(tex, base + pt * vec2(%d.0));\n", n); + } + GLSLF("color += vec4(weights[%d]) * c;\n", n); + if (use_ar && (n == N/2-1 || n == N/2)) { + GLSL(lo = min(lo, c);) + GLSL(hi = max(hi, c);) + } + } + if (use_ar) + GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", + scaler->conf.antiring); + GLSLF("}\n"); +} + +// Subroutine for computing and adding an individual texel contribution +// If subtexel < 0 and offset < 0, samples directly. +// If subtexel >= 0, takes the texel from cN[subtexel] +// If offset >= 0, takes the texel from inN[rel.y+y+offset][rel.x+x+offset] +static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, + int x, int y, int subtexel, int offset, int components) +{ + double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale; + double radius_cutoff = scaler->kernel->radius_cutoff; + + // Since we can't know the subpixel position in advance, assume a + // worst case scenario + int yy = y > 0 ? y-1 : y; + int xx = x > 0 ? x-1 : x; + double dmax = sqrt(xx*xx + yy*yy); + // Skip samples definitely outside the radius + if (dmax >= radius_cutoff) + return; + GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y); + // Check for samples that might be skippable + bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2; + if (maybe_skippable) + GLSLF("if (d < %f) {\n", radius_cutoff); + + // get the weight for this pixel + if (scaler->lut->params.dimensions == 1) { + GLSLF("w = tex1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n", + radius, scaler->lut_size); + } else { + GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n", + radius, scaler->lut_size); + } + GLSL(wsum += w;) + + if (subtexel < 0 && offset < 0) { + GLSLF("c0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y); + GLSL(color += vec4(w) * c0;) + } else if (subtexel >= 0) { + for (int n = 0; n < components; n++) + GLSLF("color[%d] += w * c%d[%d];\n", n, n, subtexel); + } else if (offset >= 0) { + for (int n = 0; n lut); + + GLSLF("// scaler samples\n"); + int bound = ceil(scaler->kernel->radius_cutoff); + for (int y = 1-bound; y <= bound; y += 2) { + for (int x = 1-bound; x <= bound; x += 2) { + // First we figure out whether it's more efficient to use direct + // sampling or gathering. The problem is that gathering 4 texels + // only to discard some of them is very wasteful, so only do it if + // we suspect it will be a win rather than a loss. This is the case + // exactly when all four texels are within bounds + bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff; + + // textureGather is only supported in GLSL 400+ + if (glsl_version < 400) + use_gather = false; + + if (use_gather) { + // Gather the four surrounding texels simultaneously + for (int n = 0; n < components; n++) { + GLSLF("c%d = textureGatherOffset(tex, base, ivec2(%d, %d), %d);\n", + n, x, y, n); + } + + // Mix in all of the points with their weights + for (int p = 0; p < 4; p++) { + // The four texels are gathered counterclockwise starting + // from the bottom left + static const int xo[4] = {0, 1, 1, 0}; + static const int yo[4] = {1, 1, 0, 0}; + if (x+xo[p] > bound || y+yo[p] > bound) + continue; + polar_sample(sc, scaler, x+xo[p], y+yo[p], p, -1, components); + } + } else { + // switch to direct sampling instead, for efficiency/compatibility + for (int yy = y; yy <= bound && yy <= y+1; yy++) { + for (int xx = x; xx <= bound && xx <= x+1; xx++) + polar_sample(sc, scaler, xx, yy, -1, -1, components); + } + } + } + } + + GLSL(color = color / vec4(wsum);) + GLSLF("}\n"); +} + +// bw/bh: block size +// iw/ih: input size (pre-calculated to fit all required texels) +void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, + int components, int bw, int bh, int iw, int ih) +{ + int bound = ceil(scaler->kernel->radius_cutoff); + int offset = bound - 1; // padding top/left + + GLSL(color = vec4(0.0);) + GLSLF("{\n"); + GLSL(vec2 wpos = texmap(gl_WorkGroupID * gl_WorkGroupSize);) + GLSL(vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5));) + GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) + GLSL(vec2 base = pos - pt * fcoord;) + GLSL(ivec2 rel = ivec2(round((base - wbase) * size));) + GLSLF("float w, d, wsum = 0.0;\n"); + gl_sc_uniform_texture(sc, "lut", scaler->lut); + + // Load all relevant texels into shmem + gl_sc_enable_extension(sc, "GL_ARB_arrays_of_arrays"); + for (int c = 0; c < components; c++) + GLSLHF("shared float in%d[%d][%d];\n", c, ih, iw); + + GLSL(vec4 c;) + GLSLF("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) {\n", ih, bh); + GLSLF("for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) {\n", iw, bw); + GLSLF("c = texture(tex, wbase + pt * vec2(x - %d, y - %d));\n", offset, offset); + for (int c = 0; c < components; c++) + GLSLF("in%d[y][x] = c[%d];\n", c, c); + GLSLF("}}\n"); + GLSL(groupMemoryBarrier();) + GLSL(barrier();) + + // Dispatch the actual samples + GLSLF("// scaler samples\n"); + for (int y = 1-bound; y <= bound; y++) { + for (int x = 1-bound; x <= bound; x++) + polar_sample(sc, scaler, x, y, -1, offset, components); + } + + GLSL(color = color / vec4(wsum);) + GLSLF("}\n"); +} + +static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s) +{ + // Explanation of how bicubic scaling with only 4 texel fetches is done: + // http://www.mate.tue.nl/mate/pdfs/10318.pdf + // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' + // Explanation why this algorithm normally always blurs, even with unit + // scaling: + // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf + // 'GPU Prefilter for Accurate Cubic B-spline Interpolation' + GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s" + " + vec4(1, 0, -0.5, 0.5);\n", t, s); + GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); + GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); + GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); + GLSLF("%s.xy += vec2(1.0 + %s, 1.0 - %s);\n", t, s, s); +} + +void pass_sample_bicubic_fast(struct gl_shader_cache *sc) +{ + GLSLF("{\n"); + GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));) + bicubic_calcweights(sc, "parmx", "fcoord.x"); + bicubic_calcweights(sc, "parmy", "fcoord.y"); + GLSL(vec4 cdelta;) + GLSL(cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);) + GLSL(cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);) + // first y-interpolation + GLSL(vec4 ar = texture(tex, pos + cdelta.xy);) + GLSL(vec4 ag = texture(tex, pos + cdelta.xw);) + GLSL(vec4 ab = mix(ag, ar, parmy.b);) + // second y-interpolation + GLSL(vec4 br = texture(tex, pos + cdelta.zy);) + GLSL(vec4 bg = texture(tex, pos + cdelta.zw);) + GLSL(vec4 aa = mix(bg, br, parmy.b);) + // x-interpolation + GLSL(color = mix(aa, ab, parmx.b);) + GLSLF("}\n"); +} + +void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, + int w, int h) +{ + GLSLF("{\n"); + GLSL(vec2 pos = pos - vec2(0.5) * pt;) // round to nearest + GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) + // Determine the mixing coefficient vector + gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h}); + GLSL(vec2 coeff = fcoord * output_size/size;) + float threshold = scaler->conf.kernel.params[0]; + threshold = isnan(threshold) ? 0.0 : threshold; + GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold); + GLSL(coeff = clamp(coeff, 0.0, 1.0);) + // Compute the right blend of colors + GLSL(color = texture(tex, pos + pt * (coeff - fcoord));) + GLSLF("}\n"); +} + +// Common constants for SMPTE ST.2084 (HDR) +static const float PQ_M1 = 2610./4096 * 1./4, + PQ_M2 = 2523./4096 * 128, + PQ_C1 = 3424./4096, + PQ_C2 = 2413./4096 * 32, + PQ_C3 = 2392./4096 * 32; + +// Common constants for ARIB STD-B67 (HLG) +static const float HLG_A = 0.17883277, + HLG_B = 0.28466892, + HLG_C = 0.55991073; + +// Common constants for Panasonic V-Log +static const float VLOG_B = 0.00873, + VLOG_C = 0.241514, + VLOG_D = 0.598206; + +// Common constants for Sony S-Log +static const float SLOG_A = 0.432699, + SLOG_B = 0.037584, + SLOG_C = 0.616596 + 0.03, + SLOG_P = 3.538813, + SLOG_Q = 0.030001, + SLOG_K2 = 155.0 / 219.0; + +// Linearize (expand), given a TRC as input. In essence, this is the ITU-R +// EOTF, calculated on an idealized (reference) monitor with a white point of +// MP_REF_WHITE and infinite contrast. +void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) +{ + if (trc == MP_CSP_TRC_LINEAR) + return; + + GLSLF("// linearize\n"); + + // Note that this clamp may technically violate the definition of + // ITU-R BT.2100, which allows for sub-blacks and super-whites to be + // displayed on the display where such would be possible. That said, the + // problem is that not all gamma curves are well-defined on the values + // outside this range, so we ignore it and just clip anyway for sanity. + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + + switch (trc) { + case MP_CSP_TRC_SRGB: + GLSL(color.rgb = mix(color.rgb * vec3(1.0/12.92), + pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), + lessThan(vec3(0.04045), color.rgb));) + break; + case MP_CSP_TRC_BT_1886: + GLSL(color.rgb = pow(color.rgb, vec3(2.4));) + break; + case MP_CSP_TRC_GAMMA18: + GLSL(color.rgb = pow(color.rgb, vec3(1.8));) + break; + case MP_CSP_TRC_GAMMA22: + GLSL(color.rgb = pow(color.rgb, vec3(2.2));) + break; + case MP_CSP_TRC_GAMMA28: + GLSL(color.rgb = pow(color.rgb, vec3(2.8));) + break; + case MP_CSP_TRC_PRO_PHOTO: + GLSL(color.rgb = mix(color.rgb * vec3(1.0/16.0), + pow(color.rgb, vec3(1.8)), + lessThan(vec3(0.03125), color.rgb));) + break; + case MP_CSP_TRC_PQ: + GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2); + GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n" + " / (vec3(%f) - vec3(%f) * color.rgb);\n", + PQ_C1, PQ_C2, PQ_C3); + GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M1); + // PQ's output range is 0-10000, but we need it to be relative to to + // MP_REF_WHITE instead, so rescale + GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE); + break; + case MP_CSP_TRC_HLG: + GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n" + " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n" + " lessThan(vec3(0.5), color.rgb));\n", + HLG_C, HLG_A, HLG_B); + break; + case MP_CSP_TRC_V_LOG: + GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" + " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" + " - vec3(%f), \n" + " lessThanEqual(vec3(0.181), color.rgb)); \n", + VLOG_D, VLOG_C, VLOG_B); + break; + case MP_CSP_TRC_S_LOG1: + GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n" + " - vec3(%f);\n", + SLOG_C, SLOG_A, SLOG_B); + break; + case MP_CSP_TRC_S_LOG2: + GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" + " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" + " - vec3(%f)) * vec3(1.0/%f), \n" + " lessThanEqual(vec3(%f), color.rgb)); \n", + SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q); + break; + default: + abort(); + } + + // Rescale to prevent clipping on non-float textures + GLSLF("color.rgb *= vec3(1.0/%f);\n", mp_trc_nom_peak(trc)); +} + +// Delinearize (compress), given a TRC as output. This corresponds to the +// inverse EOTF (not the OETF) in ITU-R terminology, again assuming a +// reference monitor. +void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) +{ + if (trc == MP_CSP_TRC_LINEAR) + return; + + GLSLF("// delinearize\n"); + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(trc)); + + switch (trc) { + case MP_CSP_TRC_SRGB: + GLSL(color.rgb = mix(color.rgb * vec3(12.92), + vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) + - vec3(0.055), + lessThanEqual(vec3(0.0031308), color.rgb));) + break; + case MP_CSP_TRC_BT_1886: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) + break; + case MP_CSP_TRC_GAMMA18: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));) + break; + case MP_CSP_TRC_GAMMA22: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));) + break; + case MP_CSP_TRC_GAMMA28: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));) + break; + case MP_CSP_TRC_PRO_PHOTO: + GLSL(color.rgb = mix(color.rgb * vec3(16.0), + pow(color.rgb, vec3(1.0/1.8)), + lessThanEqual(vec3(0.001953), color.rgb));) + break; + case MP_CSP_TRC_PQ: + GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE); + GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1); + GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" + " / (vec3(1.0) + vec3(%f) * color.rgb);\n", + PQ_C1, PQ_C2, PQ_C3); + GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M2); + break; + case MP_CSP_TRC_HLG: + GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n" + " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n" + " lessThan(vec3(1.0), color.rgb));\n", + HLG_A, HLG_B, HLG_C); + break; + case MP_CSP_TRC_V_LOG: + GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n" + " vec3(%f) * log(color.rgb + vec3(%f)) \n" + " + vec3(%f), \n" + " lessThanEqual(vec3(0.01), color.rgb)); \n", + VLOG_C / M_LN10, VLOG_B, VLOG_D); + break; + case MP_CSP_TRC_S_LOG1: + GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n", + SLOG_A / M_LN10, SLOG_B, SLOG_C); + break; + case MP_CSP_TRC_S_LOG2: + GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n" + " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n" + " + vec3(%f), \n" + " lessThanEqual(vec3(0.0), color.rgb)); \n", + SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C); + break; + default: + abort(); + } +} + +// Apply the OOTF mapping from a given light type to display-referred light. +// The extra peak parameter is used to scale the values before and after +// the OOTF, and can be inferred using mp_trc_nom_peak +void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) +{ + if (light == MP_CSP_LIGHT_DISPLAY) + return; + + GLSLF("// apply ootf\n"); + GLSLF("color.rgb *= vec3(%f);\n", peak); + + switch (light) + { + case MP_CSP_LIGHT_SCENE_HLG: + // HLG OOTF from BT.2100, assuming a reference display with a + // peak of 1000 cd/m² -> gamma = 1.2 + GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), 0.2));\n", + (1000 / MP_REF_WHITE) / pow(12, 1.2)); + break; + case MP_CSP_LIGHT_SCENE_709_1886: + // This OOTF is defined by encoding the result as 709 and then decoding + // it as 1886; although this is called 709_1886 we actually use the + // more precise (by one decimal) values from BT.2020 instead + GLSL(color.rgb = mix(color.rgb * vec3(4.5), + vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), + lessThan(vec3(0.0181), color.rgb));) + GLSL(color.rgb = pow(color.rgb, vec3(2.4));) + break; + case MP_CSP_LIGHT_SCENE_1_2: + GLSL(color.rgb = pow(color.rgb, vec3(1.2));) + break; + default: + abort(); + } + + GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); +} + +// Inverse of the function pass_ootf, for completeness' sake. +void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) +{ + if (light == MP_CSP_LIGHT_DISPLAY) + return; + + GLSLF("// apply inverse ootf\n"); + GLSLF("color.rgb *= vec3(%f);\n", peak); + + switch (light) + { + case MP_CSP_LIGHT_SCENE_HLG: + GLSLF("color.rgb *= vec3(1.0/%f);\n", (1000 / MP_REF_WHITE) / pow(12, 1.2)); + GLSL(color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), 0.2/1.2)));) + break; + case MP_CSP_LIGHT_SCENE_709_1886: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) + GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), + pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), + vec3(1/0.45)), + lessThan(vec3(0.08145), color.rgb));) + break; + case MP_CSP_LIGHT_SCENE_1_2: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));) + break; + default: + abort(); + } + + GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); +} + +// Tone map from a known peak brightness to the range [0,1]. If ref_peak +// is 0, we will use peak detection instead +static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, + enum tone_mapping algo, float param, float desat) +{ + GLSLF("// HDR tone mapping\n"); + + // Desaturate the color using a coefficient dependent on the luminance + GLSL(float luma = dot(dst_luma, color.rgb);) + if (desat > 0) { + GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat); + GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);) + } + + // To prevent discoloration due to out-of-bounds clipping, we need to make + // sure to reduce the value range as far as necessary to keep the entire + // signal in range, so tone map based on the brightest component. + GLSL(float sig = max(max(color.r, color.g), color.b);) + GLSL(float sig_orig = sig;) + + if (!ref_peak) { + // For performance, we want to do as few atomic operations on global + // memory as possible, so use an atomic in shmem for the work group. + // We also want slightly more stable values, so use the group average + // instead of the group max + GLSLHF("shared uint group_sum = 0;\n"); + GLSLF("atomicAdd(group_sum, uint(sig * %f));\n", MP_REF_WHITE); + + // Have one thread in each work group update the frame maximum + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSL(if (gl_LocalInvocationIndex == 0)) + GLSL(atomicMax(frame_max[index], group_sum / + (gl_WorkGroupSize.x * gl_WorkGroupSize.y));) + + // Finally, have one thread per invocation update the total maximum + // and advance the index + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation + GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); + GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n"); + GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE); + GLSL(index = next;) + GLSL(}) + + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSLF("float sig_peak = 1.0/%f * float(sig_peak_raw);\n", + MP_REF_WHITE * PEAK_DETECT_FRAMES); + } else { + GLSLHF("const float sig_peak = %f;\n", ref_peak); + } + + switch (algo) { + case TONE_MAPPING_CLIP: + GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param); + break; + + case TONE_MAPPING_MOBIUS: + GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param); + // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0 + // where M(x) = scale * (x+a)/(x+b) + GLSLF("float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak);\n"); + GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / " + "max(1e-6, sig_peak - 1.0);\n"); + GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n"); + GLSL(sig = mix(sig, scale * (sig + a) / (sig + b), sig > j);) + break; + + case TONE_MAPPING_REINHARD: { + float contrast = isnan(param) ? 0.5 : param, + offset = (1.0 - contrast) / contrast; + GLSLF("sig = sig / (sig + %f);\n", offset); + GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset); + GLSL(sig *= scale;) + break; + } + + case TONE_MAPPING_HABLE: { + float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; + GLSLHF("float hable(float x) {\n"); + GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n", + A, C*B, D*E, A, B, D*F, E/F); + GLSLHF("}\n"); + GLSL(sig = hable(sig) / hable(sig_peak);) + break; + } + + case TONE_MAPPING_GAMMA: { + float gamma = isnan(param) ? 1.8 : param; + GLSLF("const float cutoff = 0.05, gamma = %f;\n", 1.0/gamma); + GLSL(float scale = pow(cutoff / sig_peak, gamma) / cutoff;) + GLSL(sig = sig > cutoff ? pow(sig / sig_peak, gamma) : scale * sig;) + break; + } + + case TONE_MAPPING_LINEAR: { + float coeff = isnan(param) ? 1.0 : param; + GLSLF("sig = %f / sig_peak * sig;\n", coeff); + break; + } + + default: + abort(); + } + + // Apply the computed scale factor to the color, linearly to prevent + // discoloration + GLSL(color.rgb *= sig / sig_orig;) +} + +// Map colors from one source space to another. These source spaces must be +// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any +// auto-guessing. If is_linear is true, we assume the input has already been +// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will +// detect the peak instead of relying on metadata. Note that this requires +// the caller to have already bound the appropriate SSBO and set up the +// compute shader metadata +void pass_color_map(struct gl_shader_cache *sc, + struct mp_colorspace src, struct mp_colorspace dst, + enum tone_mapping algo, float tone_mapping_param, + float tone_mapping_desat, bool detect_peak, + bool gamut_warning, bool is_linear) +{ + GLSLF("// color mapping\n"); + + // Compute the highest encodable level + float src_range = mp_trc_nom_peak(src.gamma), + dst_range = mp_trc_nom_peak(dst.gamma); + float ref_peak = src.sig_peak / dst_range; + + // Some operations need access to the video's luma coefficients, so make + // them available + float rgb2xyz[3][3]; + mp_get_rgb2xyz_matrix(mp_get_csp_primaries(src.primaries), rgb2xyz); + gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz[1]); + mp_get_rgb2xyz_matrix(mp_get_csp_primaries(dst.primaries), rgb2xyz); + gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz[1]); + + // All operations from here on require linear light as a starting point, + // so we linearize even if src.gamma == dst.gamma when one of the other + // operations needs it + bool need_gamma = src.gamma != dst.gamma || + src.primaries != dst.primaries || + src_range != dst_range || + src.sig_peak > dst_range || + src.light != dst.light; + + if (need_gamma && !is_linear) { + pass_linearize(sc, src.gamma); + is_linear= true; + } + + if (src.light != dst.light) + pass_ootf(sc, src.light, mp_trc_nom_peak(src.gamma)); + + // Rescale the signal to compensate for differences in the encoding range + // and reference white level. This is necessary because of how mpv encodes + // brightness in textures. + if (src_range != dst_range) { + GLSLF("// rescale value range;\n"); + GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range); + } + + // Adapt to the right colorspace if necessary + if (src.primaries != dst.primaries) { + struct mp_csp_primaries csp_src = mp_get_csp_primaries(src.primaries), + csp_dst = mp_get_csp_primaries(dst.primaries); + float m[3][3] = {{0}}; + mp_get_cms_matrix(csp_src, csp_dst, MP_INTENT_RELATIVE_COLORIMETRIC, m); + gl_sc_uniform_mat3(sc, "cms_matrix", true, &m[0][0]); + GLSL(color.rgb = cms_matrix * color.rgb;) + // Since this can reduce the gamut, figure out by how much + for (int c = 0; c < 3; c++) + ref_peak = MPMAX(ref_peak, m[c][c]); + } + + // Tone map to prevent clipping when the source signal peak exceeds the + // encodable range or we've reduced the gamut + if (ref_peak > 1) { + pass_tone_map(sc, detect_peak ? 0 : ref_peak, algo, + tone_mapping_param, tone_mapping_desat); + } + + if (src.light != dst.light) + pass_inverse_ootf(sc, dst.light, mp_trc_nom_peak(dst.gamma)); + + // Warn for remaining out-of-gamut colors is enabled + if (gamut_warning) { + GLSL(if (any(greaterThan(color.rgb, vec3(1.01))))) + GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert + } + + if (is_linear) + pass_delinearize(sc, dst.gamma); +} + +// Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post. +// Obtain random numbers by calling rand(h), followed by h = permute(h) to +// update the state. Assumes the texture was hooked. +static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) +{ + GLSLH(float mod289(float x) { return x - floor(x * 1.0/289.0) * 289.0; }) + GLSLH(float permute(float x) { return mod289((34.0*x + 1.0) * x); }) + GLSLH(float rand(float x) { return fract(x * 1.0/41.0); }) + + // Initialize the PRNG by hashing the position + a random uniform + GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);) + GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);) + gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX); +} + +struct deband_opts { + int enabled; + int iterations; + float threshold; + float range; + float grain; +}; + +const struct deband_opts deband_opts_def = { + .iterations = 1, + .threshold = 64.0, + .range = 16.0, + .grain = 48.0, +}; + +#define OPT_BASE_STRUCT struct deband_opts +const struct m_sub_options deband_conf = { + .opts = (const m_option_t[]) { + OPT_INTRANGE("iterations", iterations, 0, 1, 16), + OPT_FLOATRANGE("threshold", threshold, 0, 0.0, 4096.0), + OPT_FLOATRANGE("range", range, 0, 1.0, 64.0), + OPT_FLOATRANGE("grain", grain, 0, 0.0, 4096.0), + {0} + }, + .size = sizeof(struct deband_opts), + .defaults = &deband_opts_def, +}; + +// Stochastically sample a debanded result from a hooked texture. +void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, + AVLFG *lfg, enum mp_csp_trc trc) +{ + // Initialize the PRNG + GLSLF("{\n"); + prng_init(sc, lfg); + + // Helper: Compute a stochastic approximation of the avg color around a + // pixel + GLSLHF("vec4 average(float range, inout float h) {\n"); + // Compute a random rangle and distance + GLSLH(float dist = rand(h) * range; h = permute(h);) + GLSLH(float dir = rand(h) * 6.2831853; h = permute(h);) + GLSLH(vec2 o = dist * vec2(cos(dir), sin(dir));) + + // Sample at quarter-turn intervals around the source pixel + GLSLH(vec4 ref[4];) + GLSLH(ref[0] = HOOKED_texOff(vec2( o.x, o.y));) + GLSLH(ref[1] = HOOKED_texOff(vec2(-o.y, o.x));) + GLSLH(ref[2] = HOOKED_texOff(vec2(-o.x, -o.y));) + GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));) + + // Return the (normalized) average + GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;) + GLSLHF("}\n"); + + // Sample the source pixel + GLSL(color = HOOKED_tex(HOOKED_pos);) + GLSLF("vec4 avg, diff;\n"); + for (int i = 1; i <= opts->iterations; i++) { + // Sample the average pixel and use it instead of the original if + // the difference is below the given threshold + GLSLF("avg = average(%f, h);\n", i * opts->range); + GLSL(diff = abs(color - avg);) + GLSLF("color = mix(avg, color, greaterThan(diff, vec4(%f)));\n", + opts->threshold / (i * 16384.0)); + } + + // Add some random noise to smooth out residual differences + GLSL(vec3 noise;) + GLSL(noise.x = rand(h); h = permute(h);) + GLSL(noise.y = rand(h); h = permute(h);) + GLSL(noise.z = rand(h); h = permute(h);) + + // Noise is scaled to the signal level to prevent extreme noise for HDR + float gain = opts->grain/8192.0 / mp_trc_nom_peak(trc); + GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", gain); + GLSLF("}\n"); +} + +// Assumes the texture was hooked +void pass_sample_unsharp(struct gl_shader_cache *sc, float param) { + GLSLF("{\n"); + GLSL(float st1 = 1.2;) + GLSL(vec4 p = HOOKED_tex(HOOKED_pos);) + GLSL(vec4 sum1 = HOOKED_texOff(st1 * vec2(+1, +1)) + + HOOKED_texOff(st1 * vec2(+1, -1)) + + HOOKED_texOff(st1 * vec2(-1, +1)) + + HOOKED_texOff(st1 * vec2(-1, -1));) + GLSL(float st2 = 1.5;) + GLSL(vec4 sum2 = HOOKED_texOff(st2 * vec2(+1, 0)) + + HOOKED_texOff(st2 * vec2( 0, +1)) + + HOOKED_texOff(st2 * vec2(-1, 0)) + + HOOKED_texOff(st2 * vec2( 0, -1));) + GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) + GLSLF("color = p + t * %f;\n", param); + GLSLF("}\n"); +} diff --git a/video/out/gpu/video_shaders.h b/video/out/gpu/video_shaders.h new file mode 100644 index 0000000000..8345e4c598 --- /dev/null +++ b/video/out/gpu/video_shaders.h @@ -0,0 +1,56 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#ifndef MP_GL_VIDEO_SHADERS_H +#define MP_GL_VIDEO_SHADERS_H + +#include + +#include "utils.h" +#include "video.h" + +extern const struct deband_opts deband_opts_def; +extern const struct m_sub_options deband_conf; + +void sampler_prelude(struct gl_shader_cache *sc, int tex_num); +void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, + int d_x, int d_y); +void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, + int components, int glsl_version); +void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, + int components, int bw, int bh, int iw, int ih); +void pass_sample_bicubic_fast(struct gl_shader_cache *sc); +void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, + int w, int h); + +void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); +void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); +void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak); +void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak); + +void pass_color_map(struct gl_shader_cache *sc, + struct mp_colorspace src, struct mp_colorspace dst, + enum tone_mapping algo, float tone_mapping_param, + float tone_mapping_desat, bool use_detected_peak, + bool gamut_warning, bool is_linear); + +void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, + AVLFG *lfg, enum mp_csp_trc trc); + +void pass_sample_unsharp(struct gl_shader_cache *sc, float param); + +#endif diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h index 7b2e3ed497..b9f582b79f 100644 --- a/video/out/opengl/common.h +++ b/video/out/opengl/common.h @@ -26,10 +26,10 @@ #include "common/msg.h" #include "misc/bstr.h" -#include "video/out/vo.h" #include "video/csputils.h" - #include "video/mp_image.h" +#include "video/out/vo.h" +#include "video/out/gpu/ra.h" #include "gl_headers.h" diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c index fe454e9741..d3cdcac3b7 100644 --- a/video/out/opengl/context.c +++ b/video/out/opengl/context.c @@ -1,10 +1,4 @@ /* - * common OpenGL routines - * - * copyleft (C) 2005-2010 Reimar Döffinger - * Special thanks go to the xine team and Matthias Hopf, whose video_out_opengl.c - * gave me lots of good ideas. - * * This file is part of mpv. * * mpv is free software; you can redistribute it and/or @@ -21,73 +15,10 @@ * License along with mpv. If not, see . */ -#include -#include -#include -#include -#include -#include -#include - +#include "options/m_config.h" #include "context.h" -#include "common/common.h" -#include "options/options.h" -#include "options/m_option.h" - -extern const struct mpgl_driver mpgl_driver_x11; -extern const struct mpgl_driver mpgl_driver_x11egl; -extern const struct mpgl_driver mpgl_driver_x11_probe; -extern const struct mpgl_driver mpgl_driver_drm_egl; -extern const struct mpgl_driver mpgl_driver_drm; -extern const struct mpgl_driver mpgl_driver_cocoa; -extern const struct mpgl_driver mpgl_driver_wayland; -extern const struct mpgl_driver mpgl_driver_w32; -extern const struct mpgl_driver mpgl_driver_angle; -extern const struct mpgl_driver mpgl_driver_angle_es2; -extern const struct mpgl_driver mpgl_driver_dxinterop; -extern const struct mpgl_driver mpgl_driver_rpi; -extern const struct mpgl_driver mpgl_driver_mali; -extern const struct mpgl_driver mpgl_driver_vdpauglx; - -static const struct mpgl_driver *const backends[] = { -#if HAVE_RPI - &mpgl_driver_rpi, -#endif -#if HAVE_GL_COCOA - &mpgl_driver_cocoa, -#endif -#if HAVE_EGL_ANGLE_WIN32 - &mpgl_driver_angle, -#endif -#if HAVE_GL_WIN32 - &mpgl_driver_w32, -#endif -#if HAVE_GL_DXINTEROP - &mpgl_driver_dxinterop, -#endif -#if HAVE_GL_X11 - &mpgl_driver_x11_probe, -#endif -#if HAVE_EGL_X11 - &mpgl_driver_x11egl, -#endif -#if HAVE_GL_X11 - &mpgl_driver_x11, -#endif -#if HAVE_GL_WAYLAND - &mpgl_driver_wayland, -#endif -#if HAVE_EGL_DRM - &mpgl_driver_drm, - &mpgl_driver_drm_egl, -#endif -#if HAVE_MALI_FBDEV - &mpgl_driver_mali, -#endif -#if HAVE_VDPAU_GL_X11 - &mpgl_driver_vdpauglx, -#endif -}; +#include "ra_gl.h" +#include "utils.h" // 0-terminated list of desktop GL versions a backend should try to // initialize. The first entry is the most preferred version. @@ -103,140 +34,319 @@ const int mpgl_preferred_gl_versions[] = { 0 }; -int mpgl_find_backend(const char *name) +enum { + FLUSH_NO = 0, + FLUSH_YES, + FLUSH_AUTO, +}; + +enum { + GLES_AUTO = 0, + GLES_YES, + GLES_NO, +}; + +struct opengl_opts { + int use_glfinish; + int waitvsync; + int vsync_pattern[2]; + int swapinterval; + int early_flush; + int restrict_version; + int gles_mode; +}; + +#define OPT_BASE_STRUCT struct opengl_opts +const struct m_sub_options opengl_conf = { + .opts = (const struct m_option[]) { + OPT_FLAG("opengl-glfinish", use_glfinish, 0), + OPT_FLAG("opengl-waitvsync", waitvsync, 0), + OPT_INT("opengl-swapinterval", swapinterval, 0), + OPT_INTPAIR("opengl-check-pattern", vsync_pattern, 0), + OPT_INT("opengl-restrict", restrict_version, 0), + OPT_CHOICE("opengl-es", gles_mode, 0, + ({"auto", GLES_AUTO}, {"yes", GLES_YES}, {"no", GLES_NO})), + OPT_CHOICE("opengl-early-flush", early_flush, 0, + ({"no", FLUSH_NO}, {"yes", FLUSH_YES}, {"auto", FLUSH_AUTO})), + + OPT_REPLACED("opengl-debug", "gpu-debug"), + OPT_REPLACED("opengl-sw", "gpu-sw"), + OPT_REPLACED("opengl-vsync-fences", "swapchain-depth"), + OPT_REPLACED("opengl-backend", "gpu-context"), + {0}, + }, + .defaults = &(const struct opengl_opts) { + .swapinterval = 1, + }, + .size = sizeof(struct opengl_opts), +}; + +struct priv { + GL *gl; + struct mp_log *log; + struct ra_gl_ctx_params params; + struct opengl_opts *opts; + struct ra_swapchain_fns fns; + GLuint main_fb; + struct ra_tex *wrapped_fb; // corresponds to main_fb + // for debugging: + int frames_rendered; + unsigned int prev_sgi_sync_count; + // for gl_vsync_pattern + int last_pattern; + int matches, mismatches; + // for swapchain_depth simulation + GLsync *vsync_fences; + int num_vsync_fences; +}; + +bool ra_gl_ctx_test_version(struct ra_ctx *ctx, int version, bool es) { - if (name == NULL || strcmp(name, "auto") == 0) - return -1; - for (int n = 0; n < MP_ARRAY_SIZE(backends); n++) { - if (strcmp(backends[n]->name, name) == 0) - return n; + bool ret; + struct opengl_opts *opts; + void *tmp = talloc_new(NULL); + opts = mp_get_config_group(tmp, ctx->global, &opengl_conf); + + // Version too high + if (opts->restrict_version && version >= opts->restrict_version) { + ret = false; + goto done; } - return -2; -} -int mpgl_validate_backend_opt(struct mp_log *log, const struct m_option *opt, - struct bstr name, struct bstr param) -{ - if (bstr_equals0(param, "help")) { - mp_info(log, "OpenGL windowing backends:\n"); - mp_info(log, " auto (autodetect)\n"); - for (int n = 0; n < MP_ARRAY_SIZE(backends); n++) - mp_info(log, " %s\n", backends[n]->name); - return M_OPT_EXIT; + switch (opts->gles_mode) { + case GLES_YES: ret = es; goto done; + case GLES_NO: ret = !es; goto done; + case GLES_AUTO: ret = true; goto done; + default: abort(); } - char s[20]; - snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); - return mpgl_find_backend(s) >= -1 ? 1 : M_OPT_INVALID; + +done: + talloc_free(tmp); + return ret; } -static void *get_native_display(void *pctx, const char *name) +static void *get_native_display(void *priv, const char *name) { - MPGLContext *ctx = pctx; - if (!ctx->native_display_type || !name) + struct priv *p = priv; + if (!p->params.native_display_type || !name) + return NULL; + if (strcmp(p->params.native_display_type, name) != 0) return NULL; - return strcmp(ctx->native_display_type, name) == 0 ? ctx->native_display : NULL; + + return p->params.native_display; } -static MPGLContext *init_backend(struct vo *vo, const struct mpgl_driver *driver, - bool probing, int vo_flags) +void ra_gl_ctx_uninit(struct ra_ctx *ctx) { - MPGLContext *ctx = talloc_ptrtype(NULL, ctx); - *ctx = (MPGLContext) { - .gl = talloc_zero(ctx, GL), - .vo = vo, - .global = vo->global, - .driver = driver, - .log = vo->log, + if (ctx->ra) + ctx->ra->fns->destroy(ctx->ra); + if (ctx->swapchain) { + talloc_free(ctx->swapchain); + ctx->swapchain = NULL; + } +} + +static const struct ra_swapchain_fns ra_gl_swapchain_fns; + +bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params) +{ + struct ra_swapchain *sw = ctx->swapchain = talloc_ptrtype(NULL, sw); + *sw = (struct ra_swapchain) { + .ctx = ctx, + .flip_v = !params.flipped, // OpenGL framebuffers are normally inverted }; - if (probing) - vo_flags |= VOFLAG_PROBING; - bool old_probing = vo->probing; - vo->probing = probing; // hack; kill it once backends are separate - MP_VERBOSE(vo, "Initializing OpenGL backend '%s'\n", ctx->driver->name); - ctx->priv = talloc_zero_size(ctx, ctx->driver->priv_size); - if (ctx->driver->init(ctx, vo_flags) < 0) { - vo->probing = old_probing; - talloc_free(ctx); - return NULL; + + struct priv *p = sw->priv = talloc_ptrtype(sw, p); + *p = (struct priv) { + .gl = gl, + .log = ctx->log, + .params = params, + .opts = mp_get_config_group(p, ctx->global, &opengl_conf), + .fns = ra_gl_swapchain_fns, + }; + + sw->fns = &p->fns; + + const struct ra_swapchain_fns *ext = p->params.external_swapchain; + if (ext) { + if (ext->color_depth) + p->fns.color_depth = ext->color_depth; + if (ext->screenshot) + p->fns.screenshot = ext->screenshot; + if (ext->start_frame) + p->fns.start_frame = ext->start_frame; + if (ext->submit_frame) + p->fns.submit_frame = ext->submit_frame; + if (ext->swap_buffers) + p->fns.swap_buffers = ext->swap_buffers; } - vo->probing = old_probing; - if (!ctx->gl->version && !ctx->gl->es) - goto cleanup; + if (!gl->version && !gl->es) + return false; - if (probing && ctx->gl->es && (vo_flags & VOFLAG_NO_GLES)) { - MP_VERBOSE(ctx->vo, "Skipping GLES backend.\n"); - goto cleanup; + if (gl->mpgl_caps & MPGL_CAP_SW) { + MP_WARN(p, "Suspected software renderer or indirect context.\n"); + if (ctx->opts.probing && !ctx->opts.allow_sw) + return false; } - if (ctx->gl->mpgl_caps & MPGL_CAP_SW) { - MP_WARN(ctx->vo, "Suspected software renderer or indirect context.\n"); - if (vo->probing && !(vo_flags & VOFLAG_SW)) - goto cleanup; + gl->debug_context = ctx->opts.debug; + gl->get_native_display_ctx = p; + gl->get_native_display = get_native_display; + + if (gl->SwapInterval) { + gl->SwapInterval(p->opts->swapinterval); + } else { + MP_VERBOSE(p, "GL_*_swap_control extension missing.\n"); } - ctx->gl->debug_context = !!(vo_flags & VOFLAG_GL_DEBUG); + ctx->ra = ra_create_gl(p->gl, ctx->log); + return !!ctx->ra; +} - ctx->gl->get_native_display_ctx = ctx; - ctx->gl->get_native_display = get_native_display; +void ra_gl_ctx_resize(struct ra_swapchain *sw, int w, int h, int fbo) +{ + struct priv *p = sw->priv; + if (p->main_fb == fbo && p->wrapped_fb && p->wrapped_fb->params.w == w + && p->wrapped_fb->params.h == h) + return; - return ctx; + if (p->wrapped_fb) + ra_tex_free(sw->ctx->ra, &p->wrapped_fb); -cleanup: - mpgl_uninit(ctx); - return NULL; + p->main_fb = fbo; + p->wrapped_fb = ra_create_wrapped_fb(sw->ctx->ra, fbo, w, h); } -// Create a VO window and create a GL context on it. -// vo_flags: passed to the backend's create window function -MPGLContext *mpgl_init(struct vo *vo, const char *backend_name, int vo_flags) +int ra_gl_ctx_color_depth(struct ra_swapchain *sw) { - MPGLContext *ctx = NULL; - int index = mpgl_find_backend(backend_name); - if (index == -1) { - for (int n = 0; n < MP_ARRAY_SIZE(backends); n++) { - ctx = init_backend(vo, backends[n], true, vo_flags); - if (ctx) - break; - } - // VO forced, but no backend is ok => force the first that works at all - if (!ctx && !vo->probing) { - for (int n = 0; n < MP_ARRAY_SIZE(backends); n++) { - ctx = init_backend(vo, backends[n], false, vo_flags); - if (ctx) - break; - } - } - } else if (index >= 0) { - ctx = init_backend(vo, backends[index], false, vo_flags); - } - return ctx; + struct priv *p = sw->priv; + GL *gl = p->gl; + + if (!p->wrapped_fb) + return 0; + + if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB)) + return 0; + + gl->BindFramebuffer(GL_FRAMEBUFFER, p->main_fb); + + GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK; + if (p->main_fb) + obj = GL_COLOR_ATTACHMENT0; + + GLint depth_g = 0; + + gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g); + + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + + return depth_g; } -int mpgl_reconfig_window(struct MPGLContext *ctx) +struct mp_image *ra_gl_ctx_screenshot(struct ra_swapchain *sw) { - return ctx->driver->reconfig(ctx); + struct priv *p = sw->priv; + + assert(p->wrapped_fb); + struct mp_image *screen = gl_read_fbo_contents(p->gl, p->main_fb, + p->wrapped_fb->params.w, + p->wrapped_fb->params.h); + + // OpenGL FB is also read in flipped order, so we need to flip when the + // rendering is *not* flipped, which in our case is whenever + // p->params.flipped is true. I hope that made sense + if (p->params.flipped) + mp_image_vflip(screen); + + return screen; } -int mpgl_control(struct MPGLContext *ctx, int *events, int request, void *arg) +struct ra_tex *ra_gl_ctx_start_frame(struct ra_swapchain *sw) { - return ctx->driver->control(ctx, events, request, arg); + struct priv *p = sw->priv; + + return p->wrapped_fb; } -void mpgl_start_frame(struct MPGLContext *ctx) +bool ra_gl_ctx_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) { - if (ctx->driver->start_frame) - ctx->driver->start_frame(ctx); + struct priv *p = sw->priv; + GL *gl = p->gl; + + if (p->opts->use_glfinish) + gl->Finish(); + + if (gl->FenceSync && !p->params.external_swapchain) { + GLsync fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + if (fence) + MP_TARRAY_APPEND(p, p->vsync_fences, p->num_vsync_fences, fence); + } + + switch (p->opts->early_flush) { + case FLUSH_AUTO: + if (frame->display_synced) + break; + // fall through + case FLUSH_YES: + gl->Flush(); + } + + return true; } -void mpgl_swap_buffers(struct MPGLContext *ctx) +static void check_pattern(struct priv *p, int item) { - ctx->driver->swap_buffers(ctx); + int expected = p->opts->vsync_pattern[p->last_pattern]; + if (item == expected) { + p->last_pattern++; + if (p->last_pattern >= 2) + p->last_pattern = 0; + p->matches++; + } else { + p->mismatches++; + MP_WARN(p, "wrong pattern, expected %d got %d (hit: %d, mis: %d)\n", + expected, item, p->matches, p->mismatches); + } } -void mpgl_uninit(MPGLContext *ctx) +void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw) { - if (ctx) - ctx->driver->uninit(ctx); - talloc_free(ctx); + struct priv *p = sw->priv; + GL *gl = p->gl; + + p->params.swap_buffers(sw->ctx); + p->frames_rendered++; + + if (p->frames_rendered > 5 && !sw->ctx->opts.debug) + ra_gl_set_debug(sw->ctx->ra, false); + + if ((p->opts->waitvsync || p->opts->vsync_pattern[0]) + && gl->GetVideoSync) + { + unsigned int n1 = 0, n2 = 0; + gl->GetVideoSync(&n1); + if (p->opts->waitvsync) + gl->WaitVideoSync(2, (n1 + 1) % 2, &n2); + int step = n1 - p->prev_sgi_sync_count; + p->prev_sgi_sync_count = n1; + MP_DBG(p, "Flip counts: %u->%u, step=%d\n", n1, n2, step); + if (p->opts->vsync_pattern[0]) + check_pattern(p, step); + } + + while (p->num_vsync_fences >= sw->ctx->opts.swapchain_depth) { + gl->ClientWaitSync(p->vsync_fences[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9); + gl->DeleteSync(p->vsync_fences[0]); + MP_TARRAY_REMOVE_AT(p->vsync_fences, p->num_vsync_fences, 0); + } } + +static const struct ra_swapchain_fns ra_gl_swapchain_fns = { + .color_depth = ra_gl_ctx_color_depth, + .screenshot = ra_gl_ctx_screenshot, + .start_frame = ra_gl_ctx_start_frame, + .submit_frame = ra_gl_ctx_submit_frame, + .swap_buffers = ra_gl_ctx_swap_buffers, +}; diff --git a/video/out/opengl/context.h b/video/out/opengl/context.h index 229c5ef54f..bdf426b9b4 100644 --- a/video/out/opengl/context.h +++ b/video/out/opengl/context.h @@ -1,116 +1,56 @@ -/* - * common OpenGL routines - * - * copyleft (C) 2005-2010 Reimar Döffinger - * Special thanks go to the xine team and Matthias Hopf, whose video_out_opengl.c - * gave me lots of good ideas. - * - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#ifndef MP_GL_CONTEXT_H_ -#define MP_GL_CONTEXT_H_ +#pragma once +#include "common/global.h" +#include "video/out/gpu/context.h" #include "common.h" -enum { - VOFLAG_GLES = 1 << 0, // Hint to create a GLES context - VOFLAG_NO_GLES = 1 << 1, // Hint to create a desktop GL context - VOFLAG_GL_DEBUG = 1 << 2, // Hint to request debug OpenGL context - VOFLAG_ALPHA = 1 << 3, // Hint to request alpha framebuffer - VOFLAG_SW = 1 << 4, // Hint to accept a software GL renderer - VOFLAG_PROBING = 1 << 6, // The backend is being auto-probed. - VOFLAG_GLES2 = 1 << 7, // Hint for GLESv2 (needs VOFLAG_GLES) -}; - extern const int mpgl_preferred_gl_versions[]; -struct MPGLContext; - -// A windowing backend (like X11, win32, ...), which provides OpenGL rendering. -struct mpgl_driver { - const char *name; - - // Size of the struct allocated for MPGLContext.priv - int priv_size; - - // Init the GL context and possibly the underlying VO backend. - // The created context should be compatible to GL 3.2 core profile, but - // some other GL versions are supported as well (e.g. GL 2.1 or GLES 2). - // Return 0 on success, negative value (-1) on error. - int (*init)(struct MPGLContext *ctx, int vo_flags); - - // Resize the window, or create a new window if there isn't one yet. - // Currently, there is an unfortunate interaction with ctx->vo, and - // display size etc. are determined by it. - // Return 0 on success, negative value (-1) on error. - int (*reconfig)(struct MPGLContext *ctx); - - // Called when rendering starts. The backend can map or resize the - // framebuffer, or update GL.main_fb. swap_buffers() ends the frame. - // Optional. - void (*start_frame)(struct MPGLContext *ctx); - - // Present the frame. - void (*swap_buffers)(struct MPGLContext *ctx); - - // This behaves exactly like vo_driver.control(). - int (*control)(struct MPGLContext *ctx, int *events, int request, void *arg); - - // These behave exactly like vo_driver.wakeup/wait_events. They are - // optional. - void (*wakeup)(struct MPGLContext *ctx); - void (*wait_events)(struct MPGLContext *ctx, int64_t until_time_us); - - // Destroy the GL context and possibly the underlying VO backend. - void (*uninit)(struct MPGLContext *ctx); -}; - -typedef struct MPGLContext { - GL *gl; - struct vo *vo; - const struct mpgl_driver *driver; - struct mpv_global *global; - struct mp_log *log; - - // For hwdec_vaegl.c. +// Returns whether or not a candidate GL version should be accepted or not +// (based on the --opengl opts). Implementations may call this before +// ra_gl_ctx_init if they wish to probe for multiple possible GL versions. +bool ra_gl_ctx_test_version(struct ra_ctx *ctx, int version, bool es); + +// These are a set of helpers for ra_ctx providers based on ra_gl. +// The init function also initializes ctx->ra and ctx->swapchain, so the user +// doesn't have to do this manually. (Similarly, the uninit function will +// clean them up) + +struct ra_gl_ctx_params { + // Set to the platform-specific function to swap buffers, like + // glXSwapBuffers, eglSwapBuffers etc. This will be called by + // ra_gl_ctx_swap_buffers. Required unless you either never call that + // function or if you override it yourself. + void (*swap_buffers)(struct ra_ctx *ctx); + + // Set to false if the implementation follows normal GL semantics, which is + // upside down. Set to true if it does *not*, i.e. if rendering is right + // side up + bool flipped; + + // If this is set to non-NULL, then the ra_gl_ctx will consider the GL + // implementation to be using an external swapchain, which disables the + // software simulation of --swapchain-depth. Any functions defined by this + // ra_swapchain_fns structs will entirely replace the equivalent ra_gl_ctx + // functions in the resulting ra_swapchain. + const struct ra_swapchain_fns *external_swapchain; + + // For hwdec_vaegl.c: const char *native_display_type; void *native_display; +}; - // Flip the rendered image vertically. This is useful for dxinterop. - bool flip_v; - - // framebuffer to render to (normally 0) - GLuint main_fb; - - // For free use by the mpgl_driver. - void *priv; -} MPGLContext; - -MPGLContext *mpgl_init(struct vo *vo, const char *backend_name, int vo_flags); -void mpgl_uninit(MPGLContext *ctx); -int mpgl_reconfig_window(struct MPGLContext *ctx); -int mpgl_control(struct MPGLContext *ctx, int *events, int request, void *arg); -void mpgl_start_frame(struct MPGLContext *ctx); -void mpgl_swap_buffers(struct MPGLContext *ctx); - -int mpgl_find_backend(const char *name); +void ra_gl_ctx_uninit(struct ra_ctx *ctx); +bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params); -struct m_option; -int mpgl_validate_backend_opt(struct mp_log *log, const struct m_option *opt, - struct bstr name, struct bstr param); +// Call this any time the window size or main framebuffer changes +void ra_gl_ctx_resize(struct ra_swapchain *sw, int w, int h, int fbo); -#endif +// These functions are normally set in the ra_swapchain->fns, but if an +// implementation has a need to override this fns struct with custom functions +// for whatever reason, these can be used to inherit the original behavior. +int ra_gl_ctx_color_depth(struct ra_swapchain *sw); +struct mp_image *ra_gl_ctx_screenshot(struct ra_swapchain *sw); +struct ra_tex *ra_gl_ctx_start_frame(struct ra_swapchain *sw); +bool ra_gl_ctx_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame); +void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw); diff --git a/video/out/opengl/context_cocoa.c b/video/out/opengl/context_cocoa.c index 1d9a10cf38..cdf6faffcd 100644 --- a/video/out/opengl/context_cocoa.c +++ b/video/out/opengl/context_cocoa.c @@ -188,4 +188,4 @@ const struct mpgl_driver mpgl_driver_cocoa = { .swap_buffers = cocoa_swap_buffers, .control = cocoa_control, .uninit = cocoa_uninit, -}; \ No newline at end of file +}; diff --git a/video/out/opengl/context_drm_egl.c b/video/out/opengl/context_drm_egl.c index e52fec451b..21b16a52d5 100644 --- a/video/out/opengl/context_drm_egl.c +++ b/video/out/opengl/context_drm_egl.c @@ -28,10 +28,12 @@ #include #include -#include "context.h" -#include "egl_helpers.h" -#include "common/common.h" #include "video/out/drm_common.h" +#include "common/common.h" + +#include "egl_helpers.h" +#include "common.h" +#include "context.h" #define USE_MASTER 0 @@ -59,6 +61,7 @@ struct egl }; struct priv { + GL gl; struct kms *kms; drmEventContext ev; @@ -75,34 +78,33 @@ struct priv { struct vt_switcher vt_switcher; }; -static bool init_egl(struct MPGLContext *ctx, int flags) +static bool init_egl(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - MP_VERBOSE(ctx->vo, "Initializing EGL\n"); + MP_VERBOSE(ctx, "Initializing EGL\n"); p->egl.display = eglGetDisplay(p->gbm.device); if (p->egl.display == EGL_NO_DISPLAY) { - MP_ERR(ctx->vo, "Failed to get EGL display.\n"); + MP_ERR(ctx, "Failed to get EGL display.\n"); return false; } if (!eglInitialize(p->egl.display, NULL, NULL)) { - MP_ERR(ctx->vo, "Failed to initialize EGL.\n"); + MP_ERR(ctx, "Failed to initialize EGL.\n"); return false; } EGLConfig config; - if (!mpegl_create_context(p->egl.display, ctx->vo->log, flags, - &p->egl.context, &config)) - return -1; - MP_VERBOSE(ctx->vo, "Initializing EGL surface\n"); + if (!mpegl_create_context(ctx, p->egl.display, &p->egl.context, &config)) + return false; + MP_VERBOSE(ctx, "Initializing EGL surface\n"); p->egl.surface = eglCreateWindowSurface(p->egl.display, config, p->gbm.surface, NULL); if (p->egl.surface == EGL_NO_SURFACE) { - MP_ERR(ctx->vo, "Failed to create EGL surface.\n"); + MP_ERR(ctx, "Failed to create EGL surface.\n"); return false; } return true; } -static bool init_gbm(struct MPGLContext *ctx) +static bool init_gbm(struct ra_ctx *ctx) { struct priv *p = ctx->priv; MP_VERBOSE(ctx->vo, "Creating GBM device\n"); @@ -136,7 +138,7 @@ static void framebuffer_destroy_callback(struct gbm_bo *bo, void *data) } static void update_framebuffer_from_bo( - const struct MPGLContext *ctx, struct gbm_bo *bo) + const struct ra_ctx *ctx, struct gbm_bo *bo) { struct priv *p = ctx->priv; p->fb.bo = bo; @@ -161,7 +163,7 @@ static void page_flipped(int fd, unsigned int frame, unsigned int sec, p->waiting_for_flip = false; } -static bool crtc_setup(struct MPGLContext *ctx) +static bool crtc_setup(struct ra_ctx *ctx) { struct priv *p = ctx->priv; if (p->active) @@ -174,7 +176,7 @@ static bool crtc_setup(struct MPGLContext *ctx) return ret == 0; } -static void crtc_release(struct MPGLContext *ctx) +static void crtc_release(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -204,7 +206,7 @@ static void crtc_release(struct MPGLContext *ctx) static void release_vt(void *data) { - struct MPGLContext *ctx = data; + struct ra_ctx *ctx = data; MP_VERBOSE(ctx->vo, "Releasing VT"); crtc_release(ctx); if (USE_MASTER) { @@ -221,7 +223,7 @@ static void release_vt(void *data) static void acquire_vt(void *data) { - struct MPGLContext *ctx = data; + struct ra_ctx *ctx = data; MP_VERBOSE(ctx->vo, "Acquiring VT"); if (USE_MASTER) { struct priv *p = ctx->priv; @@ -234,11 +236,41 @@ static void acquire_vt(void *data) crtc_setup(ctx); } -static void drm_egl_uninit(MPGLContext *ctx) +static void drm_egl_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - crtc_release(ctx); + eglSwapBuffers(p->egl.display, p->egl.surface); + p->gbm.next_bo = gbm_surface_lock_front_buffer(p->gbm.surface); + p->waiting_for_flip = true; + update_framebuffer_from_bo(ctx, p->gbm.next_bo); + int ret = drmModePageFlip(p->kms->fd, p->kms->crtc_id, p->fb.id, + DRM_MODE_PAGE_FLIP_EVENT, p); + if (ret) { + MP_WARN(ctx->vo, "Failed to queue page flip: %s\n", mp_strerror(errno)); + } + + // poll page flip finish event + const int timeout_ms = 3000; + struct pollfd fds[1] = { { .events = POLLIN, .fd = p->kms->fd } }; + poll(fds, 1, timeout_ms); + if (fds[0].revents & POLLIN) { + ret = drmHandleEvent(p->kms->fd, &p->ev); + if (ret != 0) { + MP_ERR(ctx->vo, "drmHandleEvent failed: %i\n", ret); + return; + } + } + + gbm_surface_release_buffer(p->gbm.surface, p->gbm.bo); + p->gbm.bo = p->gbm.next_bo; +} +static void drm_egl_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + + crtc_release(ctx); if (p->vt_switcher_active) vt_switcher_destroy(&p->vt_switcher); @@ -258,19 +290,14 @@ static void drm_egl_uninit(MPGLContext *ctx) } } -static int drm_egl_init(struct MPGLContext *ctx, int flags) +static bool drm_egl_init(struct ra_ctx *ctx) { - if (ctx->vo->probing) { - MP_VERBOSE(ctx->vo, "DRM EGL backend can be activated only manually.\n"); - return -1; + if (ctx->opts.probing) { + MP_VERBOSE(ctx, "DRM EGL backend can be activated only manually.\n"); + return false; } - struct priv *p = ctx->priv; - p->kms = NULL; - p->old_crtc = NULL; - p->gbm.surface = NULL; - p->gbm.device = NULL; - p->active = false; - p->waiting_for_flip = false; + + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); p->ev.version = DRM_EVENT_CONTEXT_VERSION; p->ev.page_flip_handler = page_flipped; @@ -279,79 +306,76 @@ static int drm_egl_init(struct MPGLContext *ctx, int flags) vt_switcher_acquire(&p->vt_switcher, acquire_vt, ctx); vt_switcher_release(&p->vt_switcher, release_vt, ctx); } else { - MP_WARN(ctx->vo, "Failed to set up VT switcher. Terminal switching will be unavailable.\n"); + MP_WARN(ctx, "Failed to set up VT switcher. Terminal switching will be unavailable.\n"); } - MP_VERBOSE(ctx->vo, "Initializing KMS\n"); - p->kms = kms_create(ctx->vo->log, ctx->vo->opts->drm_connector_spec, + MP_VERBOSE(ctx, "Initializing KMS\n"); + p->kms = kms_create(ctx->log, ctx->vo->opts->drm_connector_spec, ctx->vo->opts->drm_mode_id); if (!p->kms) { MP_ERR(ctx->vo, "Failed to create KMS.\n"); - return -1; + return false; } if (!init_gbm(ctx)) { MP_ERR(ctx->vo, "Failed to setup GBM.\n"); - return -1; + return false; } - if (!init_egl(ctx, flags)) { + if (!init_egl(ctx)) { MP_ERR(ctx->vo, "Failed to setup EGL.\n"); - return -1; + return false; } if (!eglMakeCurrent(p->egl.display, p->egl.surface, p->egl.surface, p->egl.context)) { MP_ERR(ctx->vo, "Failed to make context current.\n"); - return -1; + return false; } - mpegl_load_functions(ctx->gl, ctx->vo->log); - - ctx->native_display_type = "drm"; - ctx->native_display = (void *)(intptr_t)p->kms->fd; - + mpegl_load_functions(&p->gl, ctx->vo->log); // required by gbm_surface_lock_front_buffer eglSwapBuffers(p->egl.display, p->egl.surface); - MP_VERBOSE(ctx->vo, "Preparing framebuffer\n"); + MP_VERBOSE(ctx, "Preparing framebuffer\n"); p->gbm.bo = gbm_surface_lock_front_buffer(p->gbm.surface); if (!p->gbm.bo) { - MP_ERR(ctx->vo, "Failed to lock GBM surface.\n"); - return -1; + MP_ERR(ctx, "Failed to lock GBM surface.\n"); + return false; } update_framebuffer_from_bo(ctx, p->gbm.bo); if (!p->fb.id) { - MP_ERR(ctx->vo, "Failed to create framebuffer.\n"); - return -1; + MP_ERR(ctx, "Failed to create framebuffer.\n"); + return false; } if (!crtc_setup(ctx)) { - MP_ERR(ctx->vo, "Failed to set CRTC for connector %u: %s\n", + MP_ERR(ctx, "Failed to set CRTC for connector %u: %s\n", p->kms->connector->connector_id, mp_strerror(errno)); - return -1; + return false; } - return 0; -} + struct ra_gl_ctx_params params = { + .swap_buffers = drm_egl_swap_buffers, + .native_display_type = "drm", + .native_display = (void *)(intptr_t)p->kms->fd, + }; + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + return false; -static int drm_egl_init_deprecated(struct MPGLContext *ctx, int flags) -{ - if (ctx->vo->probing) - return -1; - MP_WARN(ctx->vo, "'drm-egl' is deprecated, use 'drm' instead.\n"); - return drm_egl_init(ctx, flags); + return true; } -static int drm_egl_reconfig(struct MPGLContext *ctx) +static bool drm_egl_reconfig(struct ra_ctx *ctx) { struct priv *p = ctx->priv; ctx->vo->dwidth = p->fb.width; ctx->vo->dheight = p->fb.height; - return 0; + ra_gl_ctx_resize(ctx->swapchain, p->fb.width, p->fb.height, 0); + return true; } -static int drm_egl_control(struct MPGLContext *ctx, int *events, int request, +static int drm_egl_control(struct ra_ctx *ctx, int *events, int request, void *arg) { struct priv *p = ctx->priv; @@ -367,51 +391,11 @@ static int drm_egl_control(struct MPGLContext *ctx, int *events, int request, return VO_NOTIMPL; } -static void drm_egl_swap_buffers(MPGLContext *ctx) -{ - struct priv *p = ctx->priv; - eglSwapBuffers(p->egl.display, p->egl.surface); - p->gbm.next_bo = gbm_surface_lock_front_buffer(p->gbm.surface); - p->waiting_for_flip = true; - update_framebuffer_from_bo(ctx, p->gbm.next_bo); - int ret = drmModePageFlip(p->kms->fd, p->kms->crtc_id, p->fb.id, - DRM_MODE_PAGE_FLIP_EVENT, p); - if (ret) { - MP_WARN(ctx->vo, "Failed to queue page flip: %s\n", mp_strerror(errno)); - } - - // poll page flip finish event - const int timeout_ms = 3000; - struct pollfd fds[1] = { { .events = POLLIN, .fd = p->kms->fd } }; - poll(fds, 1, timeout_ms); - if (fds[0].revents & POLLIN) { - ret = drmHandleEvent(p->kms->fd, &p->ev); - if (ret != 0) { - MP_ERR(ctx->vo, "drmHandleEvent failed: %i\n", ret); - return; - } - } - - gbm_surface_release_buffer(p->gbm.surface, p->gbm.bo); - p->gbm.bo = p->gbm.next_bo; -} - -const struct mpgl_driver mpgl_driver_drm = { +const struct ra_ctx_fns ra_ctx_drm_egl = { + .type = "opengl", .name = "drm", - .priv_size = sizeof(struct priv), - .init = drm_egl_init, .reconfig = drm_egl_reconfig, - .swap_buffers = drm_egl_swap_buffers, - .control = drm_egl_control, - .uninit = drm_egl_uninit, -}; - -const struct mpgl_driver mpgl_driver_drm_egl = { - .name = "drm-egl", - .priv_size = sizeof(struct priv), - .init = drm_egl_init_deprecated, - .reconfig = drm_egl_reconfig, - .swap_buffers = drm_egl_swap_buffers, .control = drm_egl_control, + .init = drm_egl_init, .uninit = drm_egl_uninit, }; diff --git a/video/out/opengl/context_glx.c b/video/out/opengl/context_glx.c new file mode 100644 index 0000000000..462f2cf592 --- /dev/null +++ b/video/out/opengl/context_glx.c @@ -0,0 +1,376 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include + +// FreeBSD 10.0-CURRENT lacks the GLX_ARB_create_context extension completely +#ifndef GLX_CONTEXT_MAJOR_VERSION_ARB +#define GLX_CONTEXT_MAJOR_VERSION_ARB 0x2091 +#define GLX_CONTEXT_MINOR_VERSION_ARB 0x2092 +#define GLX_CONTEXT_FLAGS_ARB 0x2094 +#define GLX_CONTEXT_PROFILE_MASK_ARB 0x9126 +#ifndef __APPLE__ +// These are respectively 0x00000001 and 0x00000002 on OSX +#define GLX_CONTEXT_DEBUG_BIT_ARB 0x0001 +#define GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002 +#endif +#define GLX_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001 +#define GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB 0x00000002 +#endif +// GLX_EXT_create_context_es2_profile +#ifndef GLX_CONTEXT_ES2_PROFILE_BIT_EXT +#define GLX_CONTEXT_ES2_PROFILE_BIT_EXT 0x00000004 +#endif + +#include "video/out/x11_common.h" +#include "context.h" +#include "utils.h" + +struct priv { + GL gl; + XVisualInfo *vinfo; + GLXContext context; + GLXFBConfig fbc; +}; + +static void glx_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + + if (p->vinfo) + XFree(p->vinfo); + if (p->context) { + Display *display = ctx->vo->x11->display; + glXMakeCurrent(display, None, NULL); + glXDestroyContext(display, p->context); + } + + vo_x11_uninit(ctx->vo); +} + +static bool create_context_x11_old(struct ra_ctx *ctx, GL *gl) +{ + struct priv *p = ctx->priv; + Display *display = ctx->vo->x11->display; + struct vo *vo = ctx->vo; + + if (p->context) + return true; + + if (!p->vinfo) { + MP_FATAL(vo, "Can't create a legacy GLX context without X visual\n"); + return false; + } + + GLXContext new_context = glXCreateContext(display, p->vinfo, NULL, True); + if (!new_context) { + MP_FATAL(vo, "Could not create GLX context!\n"); + return false; + } + + if (!glXMakeCurrent(display, ctx->vo->x11->window, new_context)) { + MP_FATAL(vo, "Could not set GLX context!\n"); + glXDestroyContext(display, new_context); + return false; + } + + const char *glxstr = glXQueryExtensionsString(display, ctx->vo->x11->screen); + + mpgl_load_functions(gl, (void *)glXGetProcAddressARB, glxstr, vo->log); + + p->context = new_context; + + return true; +} + +typedef GLXContext (*glXCreateContextAttribsARBProc) + (Display*, GLXFBConfig, GLXContext, Bool, const int*); + +static bool create_context_x11_gl3(struct ra_ctx *ctx, GL *gl, int gl_version, + bool es) +{ + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + + if (p->context) + return true; + + if (!ra_gl_ctx_test_version(ctx, gl_version, es)) + return false; + + glXCreateContextAttribsARBProc glXCreateContextAttribsARB = + (glXCreateContextAttribsARBProc) + glXGetProcAddressARB((const GLubyte *)"glXCreateContextAttribsARB"); + + const char *glxstr = + glXQueryExtensionsString(vo->x11->display, vo->x11->screen); + bool have_ctx_ext = glxstr && !!strstr(glxstr, "GLX_ARB_create_context"); + + if (!(have_ctx_ext && glXCreateContextAttribsARB)) { + return false; + } + + int ctx_flags = ctx->opts.debug ? GLX_CONTEXT_DEBUG_BIT_ARB : 0; + int profile_mask = GLX_CONTEXT_CORE_PROFILE_BIT_ARB; + + if (es) { + profile_mask = GLX_CONTEXT_ES2_PROFILE_BIT_EXT; + if (!(glxstr && strstr(glxstr, "GLX_EXT_create_context_es2_profile"))) + return false; + } + + int context_attribs[] = { + GLX_CONTEXT_MAJOR_VERSION_ARB, MPGL_VER_GET_MAJOR(gl_version), + GLX_CONTEXT_MINOR_VERSION_ARB, MPGL_VER_GET_MINOR(gl_version), + GLX_CONTEXT_PROFILE_MASK_ARB, profile_mask, + GLX_CONTEXT_FLAGS_ARB, ctx_flags, + None + }; + vo_x11_silence_xlib(1); + GLXContext context = glXCreateContextAttribsARB(vo->x11->display, + p->fbc, 0, True, + context_attribs); + vo_x11_silence_xlib(-1); + if (!context) + return false; + + // set context + if (!glXMakeCurrent(vo->x11->display, vo->x11->window, context)) { + MP_FATAL(vo, "Could not set GLX context!\n"); + glXDestroyContext(vo->x11->display, context); + return false; + } + + p->context = context; + + mpgl_load_functions(gl, (void *)glXGetProcAddressARB, glxstr, vo->log); + + return true; +} + +// The GL3/FBC initialization code roughly follows/copies from: +// http://www.opengl.org/wiki/Tutorial:_OpenGL_3.0_Context_Creation_(GLX) +// but also uses some of the old code. + +static GLXFBConfig select_fb_config(struct vo *vo, const int *attribs, bool alpha) +{ + int fbcount; + GLXFBConfig *fbc = glXChooseFBConfig(vo->x11->display, vo->x11->screen, + attribs, &fbcount); + if (!fbc) + return NULL; + + // The list in fbc is sorted (so that the first element is the best). + GLXFBConfig fbconfig = fbcount > 0 ? fbc[0] : NULL; + + if (alpha) { + for (int n = 0; n < fbcount; n++) { + XVisualInfo *v = glXGetVisualFromFBConfig(vo->x11->display, fbc[n]); + if (v) { + bool is_rgba = vo_x11_is_rgba_visual(v); + XFree(v); + if (is_rgba) { + fbconfig = fbc[n]; + break; + } + } + } + } + + XFree(fbc); + + return fbconfig; +} + +static void set_glx_attrib(int *attribs, int name, int value) +{ + for (int n = 0; attribs[n * 2 + 0] != None; n++) { + if (attribs[n * 2 + 0] == name) { + attribs[n * 2 + 1] = value; + break; + } + } +} + +static void glx_swap_buffers(struct ra_ctx *ctx) +{ + glXSwapBuffers(ctx->vo->x11->display, ctx->vo->x11->window); +} + +static bool glx_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct vo *vo = ctx->vo; + GL *gl = &p->gl; + + if (!vo_x11_init(ctx->vo)) + goto uninit; + + int glx_major, glx_minor; + + if (!glXQueryVersion(vo->x11->display, &glx_major, &glx_minor)) { + MP_ERR(ctx, "GLX not found.\n"); + goto uninit; + } + // FBConfigs were added in GLX version 1.3. + if (MPGL_VER(glx_major, glx_minor) < MPGL_VER(1, 3)) { + MP_ERR(ctx, "GLX version older than 1.3.\n"); + goto uninit; + } + + int glx_attribs[] = { + GLX_X_RENDERABLE, True, + GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR, + GLX_RED_SIZE, 1, + GLX_GREEN_SIZE, 1, + GLX_BLUE_SIZE, 1, + GLX_ALPHA_SIZE, 0, + GLX_DOUBLEBUFFER, True, + None + }; + GLXFBConfig fbc = NULL; + if (ctx->opts.want_alpha) { + set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 1); + fbc = select_fb_config(vo, glx_attribs, true); + if (!fbc) + set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 0); + } + if (!fbc) + fbc = select_fb_config(vo, glx_attribs, false); + if (!fbc) { + MP_ERR(ctx, "no GLX support present\n"); + goto uninit; + } + + int fbid = -1; + if (!glXGetFBConfigAttrib(vo->x11->display, fbc, GLX_FBCONFIG_ID, &fbid)) + MP_VERBOSE(ctx, "GLX chose FB config with ID 0x%x\n", fbid); + + p->fbc = fbc; + p->vinfo = glXGetVisualFromFBConfig(vo->x11->display, fbc); + if (p->vinfo) { + MP_VERBOSE(ctx, "GLX chose visual with ID 0x%x\n", + (int)p->vinfo->visualid); + } else { + MP_WARN(ctx, "Selected GLX FB config has no associated X visual\n"); + } + + if (!vo_x11_create_vo_window(vo, p->vinfo, "gl")) + goto uninit; + + bool success = false; + for (int n = 0; mpgl_preferred_gl_versions[n]; n++) { + int version = mpgl_preferred_gl_versions[n]; + MP_VERBOSE(ctx, "Creating OpenGL %d.%d context...\n", + MPGL_VER_P(version)); + if (version >= 300) { + success = create_context_x11_gl3(ctx, gl, version, false); + } else { + success = create_context_x11_old(ctx, gl); + } + if (success) + break; + } + if (!success) // try again for GLES + success = create_context_x11_gl3(ctx, gl, 200, true); + if (success && !glXIsDirect(vo->x11->display, p->context)) + gl->mpgl_caps |= MPGL_CAP_SW; + if (!success) + goto uninit; + + struct ra_gl_ctx_params params = { + .swap_buffers = glx_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, gl, params)) + goto uninit; + + return true; + +uninit: + glx_uninit(ctx); + return false; +} + +static bool glx_init_probe(struct ra_ctx *ctx) +{ + if (!glx_init(ctx)) + return false; + + struct priv *p = ctx->priv; + if (!(p->gl.mpgl_caps & MPGL_CAP_VDPAU)) { + MP_VERBOSE(ctx, "No vdpau support found - probing more things.\n"); + glx_uninit(ctx); + return false; + } + + return true; +} + +static void resize(struct ra_ctx *ctx) +{ + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); +} + +static bool glx_reconfig(struct ra_ctx *ctx) +{ + vo_x11_config_vo_window(ctx->vo); + resize(ctx); + return true; +} + +static int glx_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_x11_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; +} + +static void glx_wakeup(struct ra_ctx *ctx) +{ + vo_x11_wakeup(ctx->vo); +} + +static void glx_wait_events(struct ra_ctx *ctx, int64_t until_time_us) +{ + vo_x11_wait_events(ctx->vo, until_time_us); +} + +const struct ra_ctx_fns ra_ctx_glx = { + .type = "opengl", + .name = "x11", + .reconfig = glx_reconfig, + .control = glx_control, + .wakeup = glx_wakeup, + .wait_events = glx_wait_events, + .init = glx_init, + .uninit = glx_uninit, +}; + +const struct ra_ctx_fns ra_ctx_glx_probe = { + .type = "opengl", + .name = "x11probe", + .reconfig = glx_reconfig, + .control = glx_control, + .wakeup = glx_wakeup, + .wait_events = glx_wait_events, + .init = glx_init_probe, + .uninit = glx_uninit, +}; diff --git a/video/out/opengl/context_mali_fbdev.c b/video/out/opengl/context_mali_fbdev.c index 66daa7f9ee..8576e536d3 100644 --- a/video/out/opengl/context_mali_fbdev.c +++ b/video/out/opengl/context_mali_fbdev.c @@ -50,8 +50,7 @@ static bool get_fbdev_size(int *w, int *h) } struct priv { - struct mp_log *log; - struct GL *gl; + struct GL gl; EGLDisplay egl_display; EGLConfig egl_config; EGLContext egl_context; @@ -60,9 +59,10 @@ struct priv { int w, h; }; -static void mali_uninit(struct MPGLContext *ctx) +static void mali_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); if (p->egl_surface) { eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, @@ -74,25 +74,29 @@ static void mali_uninit(struct MPGLContext *ctx) eglReleaseThread(); } -static int mali_init(struct MPGLContext *ctx, int flags) +static void mali_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - p->log = ctx->vo->log; + eglSwapBuffers(p->egl_display, p->egl_surface); +} + +static bool mali_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); if (!get_fbdev_size(&p->w, &p->h)) { - MP_FATAL(p, "Could not get fbdev size.\n"); + MP_FATAL(ctx, "Could not get fbdev size.\n"); goto fail; } p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); if (!eglInitialize(p->egl_display, NULL, NULL)) { - MP_FATAL(p, "EGL failed to initialize.\n"); + MP_FATAL(ctx, "EGL failed to initialize.\n"); goto fail; } EGLConfig config; - if (!mpegl_create_context(p->egl_display, p->log, flags, &p->egl_context, - &config)) + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, &config)) goto fail; p->egl_window = (struct fbdev_window){ @@ -104,53 +108,51 @@ static int mali_init(struct MPGLContext *ctx, int flags) (EGLNativeWindowType)&p->egl_window, NULL); if (p->egl_surface == EGL_NO_SURFACE) { - MP_FATAL(p, "Could not create EGL surface!\n"); + MP_FATAL(ctx, "Could not create EGL surface!\n"); goto fail; } if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, p->egl_context)) { - MP_FATAL(p, "Failed to set context!\n"); + MP_FATAL(ctx, "Failed to set context!\n"); goto fail; } - ctx->gl = talloc_zero(ctx, GL); + mpegl_load_functions(&p->gl, ctx->log); - mpegl_load_functions(ctx->gl, p->log); + struct ra_gl_ctx_params params = { + .swap_buffers = mali_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto fail; - return 0; + return true; fail: mali_uninit(ctx); - return -1; + return false; } -static int mali_reconfig(struct MPGLContext *ctx) +static bool mali_reconfig(struct ra_ctx *ctx) { struct priv *p = ctx->priv; ctx->vo->dwidth = p->w; ctx->vo->dheight = p->h; - return 0; + ra_gl_ctx_resize(ctx->swapchain, p->w, p->h, 0); } -static void mali_swap_buffers(MPGLContext *ctx) -{ - struct priv *p = ctx->priv; - eglSwapBuffers(p->egl_display, p->egl_surface); -} - -static int mali_control(MPGLContext *ctx, int *events, int request, void *arg) +static int mali_control(struct ra_ctx *ctx, int *events, int request, void *arg) { return VO_NOTIMPL; } -const struct mpgl_driver mpgl_driver_mali = { +const struct ra_ctx_fns ra_ctx_mali_fbdev = { + .type = "opengl", .name = "mali-fbdev", - .priv_size = sizeof(struct priv), - .init = mali_init, .reconfig = mali_reconfig, - .swap_buffers = mali_swap_buffers, .control = mali_control, + .init = mali_init, .uninit = mali_uninit, }; diff --git a/video/out/opengl/context_rpi.c b/video/out/opengl/context_rpi.c index e79622be5d..8b447d0bfc 100644 --- a/video/out/opengl/context_rpi.c +++ b/video/out/opengl/context_rpi.c @@ -30,7 +30,7 @@ #include "egl_helpers.h" struct priv { - struct mp_log *log; + struct GL gl; DISPMANX_DISPLAY_HANDLE_T display; DISPMANX_ELEMENT_HANDLE_T window; DISPMANX_UPDATE_HANDLE_T update; @@ -49,13 +49,13 @@ struct priv { static void tv_callback(void *callback_data, uint32_t reason, uint32_t param1, uint32_t param2) { - struct MPGLContext *ctx = callback_data; + struct ra_ctx *ctx = callback_data; struct priv *p = ctx->priv; atomic_store(&p->reload_display, true); vo_wakeup(ctx->vo); } -static void destroy_dispmanx(struct MPGLContext *ctx) +static void destroy_dispmanx(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -77,9 +77,10 @@ static void destroy_dispmanx(struct MPGLContext *ctx) p->update = 0; } -static void rpi_uninit(MPGLContext *ctx) +static void rpi_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); vc_tv_unregister_callback_full(tv_callback, ctx); @@ -92,26 +93,26 @@ static void rpi_uninit(MPGLContext *ctx) p->egl_display = EGL_NO_DISPLAY; } -static int recreate_dispmanx(struct MPGLContext *ctx) +static bool recreate_dispmanx(struct ra_ctx *ctx) { struct priv *p = ctx->priv; int display_nr = 0; int layer = 0; - MP_VERBOSE(ctx->vo, "Recreating DISPMANX state...\n"); + MP_VERBOSE(ctx, "Recreating DISPMANX state...\n"); destroy_dispmanx(ctx); p->display = vc_dispmanx_display_open(display_nr); p->update = vc_dispmanx_update_start(0); if (!p->display || !p->update) { - MP_FATAL(ctx->vo, "Could not get DISPMANX objects.\n"); + MP_FATAL(ctx, "Could not get DISPMANX objects.\n"); goto fail; } uint32_t dispw, disph; if (graphics_get_display_size(0, &dispw, &disph) < 0) { - MP_FATAL(ctx->vo, "Could not get display size.\n"); + MP_FATAL(ctx, "Could not get display size.\n"); goto fail; } p->w = dispw; @@ -145,7 +146,7 @@ static int recreate_dispmanx(struct MPGLContext *ctx) &src, DISPMANX_PROTECTION_NONE, &alpha, 0, 0); if (!p->window) { - MP_FATAL(ctx->vo, "Could not add DISPMANX element.\n"); + MP_FATAL(ctx, "Could not add DISPMANX element.\n"); goto fail; } @@ -161,14 +162,14 @@ static int recreate_dispmanx(struct MPGLContext *ctx) &p->egl_window, NULL); if (p->egl_surface == EGL_NO_SURFACE) { - MP_FATAL(p, "Could not create EGL surface!\n"); + MP_FATAL(ctx, "Could not create EGL surface!\n"); goto fail; } if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, p->egl_context)) { - MP_FATAL(p, "Failed to set context!\n"); + MP_FATAL(ctx, "Failed to set context!\n"); goto fail; } @@ -197,21 +198,27 @@ static int recreate_dispmanx(struct MPGLContext *ctx) ctx->vo->dwidth = p->w; ctx->vo->dheight = p->h; + ra_gl_ctx_resize(ctx->swapchain, p->w, p->h, 0); ctx->vo->want_redraw = true; vo_event(ctx->vo, VO_EVENT_WIN_STATE); - return 0; + return true; fail: destroy_dispmanx(ctx); - return -1; + return false; } -static int rpi_init(struct MPGLContext *ctx, int flags) +static void rpi_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - p->log = ctx->vo->log; + eglSwapBuffers(p->egl_display, p->egl_surface); +} + +static bool rpi_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); bcm_host_init(); @@ -219,43 +226,40 @@ static int rpi_init(struct MPGLContext *ctx, int flags) p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); if (!eglInitialize(p->egl_display, NULL, NULL)) { - MP_FATAL(p, "EGL failed to initialize.\n"); + MP_FATAL(ctx, "EGL failed to initialize.\n"); goto fail; } - if (!mpegl_create_context(p->egl_display, p->log, 0, &p->egl_context, - &p->egl_config)) + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, &p->egl_config)) goto fail; if (recreate_dispmanx(ctx) < 0) goto fail; - ctx->gl = talloc_zero(ctx, GL); + mpegl_load_functions(&p->gl, ctx->log); - mpegl_load_functions(ctx->gl, p->log); + struct ra_gl_ctx_params params = { + .swap_buffers = rpi_swap_buffers, + .native_display_type = "MPV_RPI_WINDOW", + .native_display = p->win_params, + }; - ctx->native_display_type = "MPV_RPI_WINDOW"; - ctx->native_display = p->win_params; + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto fail; - return 0; + return true; fail: rpi_uninit(ctx); - return -1; + return false; } -static int rpi_reconfig(struct MPGLContext *ctx) +static bool rpi_reconfig(struct ra_ctx *ctx) { return recreate_dispmanx(ctx); } -static void rpi_swap_buffers(MPGLContext *ctx) -{ - struct priv *p = ctx->priv; - eglSwapBuffers(p->egl_display, p->egl_surface); -} - -static struct mp_image *take_screenshot(struct MPGLContext *ctx) +static struct mp_image *take_screenshot(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -289,21 +293,20 @@ fail: return NULL; } - -static int rpi_control(MPGLContext *ctx, int *events, int request, void *arg) +static int rpi_control(struct ra_ctx *ctx, int *events, int request, void *arg) { struct priv *p = ctx->priv; switch (request) { case VOCTRL_SCREENSHOT_WIN: *(struct mp_image **)arg = take_screenshot(ctx); - return true; + return VO_TRUE; case VOCTRL_FULLSCREEN: recreate_dispmanx(ctx); return VO_TRUE; case VOCTRL_CHECK_EVENTS: if (atomic_fetch_and(&p->reload_display, 0)) { - MP_WARN(ctx->vo, "Recovering from display mode switch...\n"); + MP_WARN(ctx, "Recovering from display mode switch...\n"); recreate_dispmanx(ctx); } return VO_TRUE; @@ -315,12 +318,11 @@ static int rpi_control(MPGLContext *ctx, int *events, int request, void *arg) return VO_NOTIMPL; } -const struct mpgl_driver mpgl_driver_rpi = { +const struct ra_ctx_fns ra_ctx_rpi = { + .type = "opengl", .name = "rpi", - .priv_size = sizeof(struct priv), - .init = rpi_init, .reconfig = rpi_reconfig, - .swap_buffers = rpi_swap_buffers, .control = rpi_control, + .init = rpi_init, .uninit = rpi_uninit, -}; \ No newline at end of file +}; diff --git a/video/out/opengl/context_vdpau.c b/video/out/opengl/context_vdpau.c index 40d21ab65c..a2321f78dd 100644 --- a/video/out/opengl/context_vdpau.c +++ b/video/out/opengl/context_vdpau.c @@ -26,8 +26,6 @@ // follow it. I'm not sure about the original nvidia headers. #define BRAINDEATH(x) ((void *)(uintptr_t)(x)) -#define NUM_SURFACES 4 - struct surface { int w, h; VdpOutputSurface surface; @@ -39,21 +37,22 @@ struct surface { }; struct priv { + GL gl; GLXContext context; struct mp_vdpau_ctx *vdp; VdpPresentationQueueTarget vdp_target; VdpPresentationQueue vdp_queue; + struct surface *surfaces; int num_surfaces; - struct surface surfaces[NUM_SURFACES]; - int current_surface; + int idx_surfaces; }; typedef GLXContext (*glXCreateContextAttribsARBProc) (Display*, GLXFBConfig, GLXContext, Bool, const int*); -static bool create_context_x11(struct MPGLContext *ctx, int vo_flags) +static bool create_context_x11(struct ra_ctx *ctx) { - struct priv *glx_ctx = ctx->priv; + struct priv *p = ctx->priv; struct vo *vo = ctx->vo; int glx_major, glx_minor; @@ -62,6 +61,9 @@ static bool create_context_x11(struct MPGLContext *ctx, int vo_flags) return false; } + if (!ra_gl_ctx_test_version(ctx, MPGL_VER(glx_major, glx_minor), false)) + return false; + int glx_attribs[] = { GLX_X_RENDERABLE, True, GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR, @@ -96,7 +98,7 @@ static bool create_context_x11(struct MPGLContext *ctx, int vo_flags) return false; } - int ctx_flags = vo_flags & VOFLAG_GL_DEBUG ? GLX_CONTEXT_DEBUG_BIT_ARB : 0; + int ctx_flags = ctx->opts.debug ? GLX_CONTEXT_DEBUG_BIT_ARB : 0; int context_attribs[] = { GLX_CONTEXT_MAJOR_VERSION_ARB, 4, GLX_CONTEXT_MINOR_VERSION_ARB, 0, @@ -117,19 +119,20 @@ static bool create_context_x11(struct MPGLContext *ctx, int vo_flags) return false; } - glx_ctx->context = context; - mpgl_load_functions(ctx->gl, (void *)glXGetProcAddressARB, glxstr, vo->log); + p->context = context; + mpgl_load_functions(&p->gl, (void *)glXGetProcAddressARB, glxstr, vo->log); return true; } -static int create_vdpau_objects(struct MPGLContext *ctx) +static int create_vdpau_objects(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + struct GL *gl = &p->gl; VdpDevice dev = p->vdp->vdp_device; struct vdp_functions *vdp = &p->vdp->vdp; VdpStatus vdp_st; - ctx->gl->VDPAUInitNV(BRAINDEATH(dev), p->vdp->get_proc_address); + gl->VDPAUInitNV(BRAINDEATH(dev), p->vdp->get_proc_address); vdp_st = vdp->presentation_queue_target_create_x11(dev, ctx->vo->x11->window, &p->vdp_target); @@ -141,13 +144,13 @@ static int create_vdpau_objects(struct MPGLContext *ctx) return 0; } -static void destroy_vdpau_surface(struct MPGLContext *ctx, +static void destroy_vdpau_surface(struct ra_ctx *ctx, struct surface *surface) { struct priv *p = ctx->priv; struct vdp_functions *vdp = &p->vdp->vdp; VdpStatus vdp_st; - GL *gl = ctx->gl; + GL *gl = &p->gl; if (surface->mapped) gl->VDPAUUnmapSurfacesNV(1, &surface->registered); @@ -168,14 +171,14 @@ static void destroy_vdpau_surface(struct MPGLContext *ctx, }; } -static int recreate_vdpau_surface(struct MPGLContext *ctx, - struct surface *surface) +static bool recreate_vdpau_surface(struct ra_ctx *ctx, + struct surface *surface) { struct priv *p = ctx->priv; VdpDevice dev = p->vdp->vdp_device; struct vdp_functions *vdp = &p->vdp->vdp; VdpStatus vdp_st; - GL *gl = ctx->gl; + GL *gl = &p->gl; destroy_vdpau_surface(ctx, surface); @@ -219,16 +222,37 @@ static int recreate_vdpau_surface(struct MPGLContext *ctx, gl->VDPAUUnmapSurfacesNV(1, &surface->registered); surface->mapped = false; - return 0; + return true; error: destroy_vdpau_surface(ctx, surface); - return -1; + return false; +} + +static void vdpau_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vdp_functions *vdp = &p->vdp->vdp; + VdpStatus vdp_st; + + // This is the *next* surface we will be rendering to. By delaying the + // block_until_idle, we're essentially allowing p->num_surfaces - 1 + // in-flight surfaces, plus the one currently visible surface. + struct surface *surf = &p->surfaces[p->idx_surfaces]; + if (surf->surface == VDP_INVALID_HANDLE) + return; + + VdpTime prev_vsync_time; + vdp_st = vdp->presentation_queue_block_until_surface_idle(p->vdp_queue, + surf->surface, + &prev_vsync_time); + CHECK_VDP_WARNING(ctx, "waiting for surface failed"); } -static void glx_uninit(MPGLContext *ctx) +static void vdpau_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); if (p->vdp) { struct vdp_functions *vdp = &p->vdp->vdp; @@ -259,10 +283,12 @@ static void glx_uninit(MPGLContext *ctx) vo_x11_uninit(ctx->vo); } -static int glx_init(struct MPGLContext *ctx, int flags) +static const struct ra_swapchain_fns vdpau_swapchain; + +static bool vdpau_init(struct ra_ctx *ctx) { struct vo *vo = ctx->vo; - struct priv *p = ctx->priv; + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); p->vdp_queue = VDP_INVALID_HANDLE; p->vdp_target = VDP_INVALID_HANDLE; @@ -280,110 +306,112 @@ static int glx_init(struct MPGLContext *ctx, int flags) if (!vo_x11_create_vo_window(vo, NULL, "vdpauglx")) goto uninit; - if (!create_context_x11(ctx, flags)) + if (!create_context_x11(ctx)) goto uninit; - if (!(ctx->gl->mpgl_caps & MPGL_CAP_VDPAU)) + if (!(p->gl.mpgl_caps & MPGL_CAP_VDPAU)) goto uninit; if (create_vdpau_objects(ctx) < 0) goto uninit; - p->num_surfaces = NUM_SURFACES; + p->num_surfaces = ctx->opts.swapchain_depth + 1; // +1 for the visible image + p->surfaces = talloc_zero_array(p, struct surface, p->num_surfaces); for (int n = 0; n < p->num_surfaces; n++) p->surfaces[n].surface = VDP_INVALID_HANDLE; - ctx->flip_v = true; + struct ra_gl_ctx_params params = { + .swap_buffers = vdpau_swap_buffers, + .external_swapchain = &vdpau_swapchain, + .flipped = true, + }; - return 0; + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto uninit; + + return true; uninit: - glx_uninit(ctx); - return -1; + vdpau_uninit(ctx); + return false; } -static int glx_reconfig(struct MPGLContext *ctx) +static struct ra_tex *vdpau_start_frame(struct ra_swapchain *sw) { - vo_x11_config_vo_window(ctx->vo); - return 0; -} + struct priv *p = sw->ctx->priv; + struct vo *vo = sw->ctx->vo; + GL *gl = &p->gl; + + struct surface *surf = &p->surfaces[p->idx_surfaces]; + if (surf->w != vo->dwidth || surf->h != vo->dheight || + surf->surface == VDP_INVALID_HANDLE) + { + if (!recreate_vdpau_surface(sw->ctx, surf)) + return NULL; + } -static int glx_control(struct MPGLContext *ctx, int *events, int request, - void *arg) -{ - return vo_x11_control(ctx->vo, events, request, arg); + assert(!surf->mapped); + gl->VDPAUMapSurfacesNV(1, &surf->registered); + surf->mapped = true; + + ra_gl_ctx_resize(sw, surf->w, surf->h, surf->fbo); + return ra_gl_ctx_start_frame(sw); } -static void glx_start_frame(struct MPGLContext *ctx) +static bool vdpau_submit_frame(struct ra_swapchain *sw, + const struct vo_frame *frame) { - struct priv *p = ctx->priv; + struct priv *p = sw->ctx->priv; + GL *gl = &p->gl; struct vdp_functions *vdp = &p->vdp->vdp; VdpStatus vdp_st; - GL *gl = ctx->gl; - - struct surface *surface = &p->surfaces[p->current_surface]; - - if (surface->surface != VDP_INVALID_HANDLE) { - VdpTime prev_vsync_time; - vdp_st = vdp->presentation_queue_block_until_surface_idle(p->vdp_queue, - surface->surface, - &prev_vsync_time); - CHECK_VDP_WARNING(ctx, "waiting for surface failed"); - } - if (surface->w != ctx->vo->dwidth || surface->h != ctx->vo->dheight) - recreate_vdpau_surface(ctx, surface); + struct surface *surf = &p->surfaces[p->idx_surfaces]; + assert(surf->surface != VDP_INVALID_HANDLE); + assert(surf->mapped); + gl->VDPAUUnmapSurfacesNV(1, &surf->registered); + surf->mapped = false; + vdp_st = vdp->presentation_queue_display(p->vdp_queue, surf->surface, 0, 0, 0); + CHECK_VDP_WARNING(sw->ctx, "trying to present vdp surface"); - ctx->main_fb = surface->fbo; // 0 if creating the surface failed - - if (surface->surface != VDP_INVALID_HANDLE) { - gl->VDPAUMapSurfacesNV(1, &surface->registered); - surface->mapped = true; - } + p->idx_surfaces = (p->idx_surfaces + 1) % p->num_surfaces; + return ra_gl_ctx_submit_frame(sw, frame) && vdp_st == VDP_STATUS_OK; } -static void glx_swap_buffers(struct MPGLContext *ctx) +static bool vdpau_reconfig(struct ra_ctx *ctx) { - struct priv *p = ctx->priv; - struct vdp_functions *vdp = &p->vdp->vdp; - VdpStatus vdp_st; - GL *gl = ctx->gl; - - struct surface *surface = &p->surfaces[p->current_surface]; - if (surface->surface == VDP_INVALID_HANDLE) - return; // surface alloc probably failed before - - if (surface->mapped) - gl->VDPAUUnmapSurfacesNV(1, &surface->registered); - surface->mapped = false; - - vdp_st = vdp->presentation_queue_display(p->vdp_queue, surface->surface, - 0, 0, 0); - CHECK_VDP_WARNING(ctx, "trying to present vdp surface"); + vo_x11_config_vo_window(ctx->vo); + return true; +} - p->current_surface = (p->current_surface + 1) % p->num_surfaces; +static int vdpau_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + return vo_x11_control(ctx->vo, events, request, arg); } -static void glx_wakeup(struct MPGLContext *ctx) +static void vdpau_wakeup(struct ra_ctx *ctx) { vo_x11_wakeup(ctx->vo); } -static void glx_wait_events(struct MPGLContext *ctx, int64_t until_time_us) +static void vdpau_wait_events(struct ra_ctx *ctx, int64_t until_time_us) { vo_x11_wait_events(ctx->vo, until_time_us); } -const struct mpgl_driver mpgl_driver_vdpauglx = { +static const struct ra_swapchain_fns vdpau_swapchain = { + .start_frame = vdpau_start_frame, + .submit_frame = vdpau_submit_frame, +}; + +const struct ra_ctx_fns ra_ctx_vdpauglx = { + .type = "opengl", .name = "vdpauglx", - .priv_size = sizeof(struct priv), - .init = glx_init, - .reconfig = glx_reconfig, - .start_frame = glx_start_frame, - .swap_buffers = glx_swap_buffers, - .control = glx_control, - .wakeup = glx_wakeup, - .wait_events = glx_wait_events, - .uninit = glx_uninit, + .reconfig = vdpau_reconfig, + .control = vdpau_control, + .wakeup = vdpau_wakeup, + .wait_events = vdpau_wait_events, + .init = vdpau_init, + .uninit = vdpau_uninit, }; diff --git a/video/out/opengl/context_wayland.c b/video/out/opengl/context_wayland.c index 87e98cd64f..6ddc550306 100644 --- a/video/out/opengl/context_wayland.c +++ b/video/out/opengl/context_wayland.c @@ -19,6 +19,7 @@ #include "video/out/wayland_common.h" #include "context.h" #include "egl_helpers.h" +#include "utils.h" static void egl_resize(struct vo_wayland_state *wl) { @@ -63,30 +64,42 @@ static void egl_resize(struct vo_wayland_state *wl) wl->vo->want_redraw = true; } -static int egl_create_context(struct vo_wayland_state *wl, MPGLContext *ctx, - int flags) +static void waylandgl_swap_buffers(struct ra_ctx *ctx) { - GL *gl = ctx->gl; + struct vo_wayland_state *wl = ctx->vo->wayland; + vo_wayland_wait_events(ctx->vo, 0); + eglSwapBuffers(wl->egl_context.egl.dpy, wl->egl_context.egl_surface); +} + +static bool egl_create_context(struct ra_ctx *ctx, struct vo_wayland_state *wl) +{ + GL *gl = ctx->priv = talloc_zero(ctx, GL); if (!(wl->egl_context.egl.dpy = eglGetDisplay(wl->display.display))) - return -1; + return false; if (eglInitialize(wl->egl_context.egl.dpy, NULL, NULL) != EGL_TRUE) - return -1; + return false; - if (!mpegl_create_context(wl->egl_context.egl.dpy, wl->log, flags, + if (!mpegl_create_context(ctx, wl->egl_context.egl.dpy, &wl->egl_context.egl.ctx, &wl->egl_context.egl.conf)) - return -1; + return false; eglMakeCurrent(wl->egl_context.egl.dpy, NULL, NULL, wl->egl_context.egl.ctx); mpegl_load_functions(gl, wl->log); - ctx->native_display_type = "wl"; - ctx->native_display = wl->display.display; + struct ra_gl_ctx_params params = { + .swap_buffers = waylandgl_swap_buffers, + .native_display_type = "wl", + .native_display = wl->display.display, + }; + + if (!ra_gl_ctx_init(ctx, gl, params)) + return false; - return 0; + return true; } static void egl_create_window(struct vo_wayland_state *wl) @@ -122,23 +135,25 @@ static void egl_create_window(struct vo_wayland_state *wl) eglSwapInterval(wl->egl_context.egl.dpy, 0); } -static int waylandgl_reconfig(struct MPGLContext *ctx) +static bool waylandgl_reconfig(struct ra_ctx *ctx) { struct vo_wayland_state * wl = ctx->vo->wayland; if (!vo_wayland_config(ctx->vo)) - return -1; + return false; if (!wl->egl_context.egl_window) egl_create_window(wl); - return 0; + return true; } -static void waylandgl_uninit(MPGLContext *ctx) +static void waylandgl_uninit(struct ra_ctx *ctx) { struct vo_wayland_state *wl = ctx->vo->wayland; + ra_gl_ctx_uninit(ctx); + if (wl->egl_context.egl.ctx) { eglReleaseThread(); if (wl->egl_context.egl_window) @@ -153,52 +168,45 @@ static void waylandgl_uninit(MPGLContext *ctx) vo_wayland_uninit(ctx->vo); } -static void waylandgl_swap_buffers(MPGLContext *ctx) -{ - struct vo_wayland_state *wl = ctx->vo->wayland; - - vo_wayland_wait_events(ctx->vo, 0); - - eglSwapBuffers(wl->egl_context.egl.dpy, wl->egl_context.egl_surface); -} - -static int waylandgl_control(MPGLContext *ctx, int *events, int request, +static int waylandgl_control(struct ra_ctx *ctx, int *events, int request, void *data) { struct vo_wayland_state *wl = ctx->vo->wayland; int r = vo_wayland_control(ctx->vo, events, request, data); - if (*events & VO_EVENT_RESIZE) + if (*events & VO_EVENT_RESIZE) { egl_resize(wl); + ra_gl_ctx_resize(ctx->swapchain, wl->vo->dwidth, wl->vo->dheight, 0); + } return r; } -static void wayland_wakeup(struct MPGLContext *ctx) +static void wayland_wakeup(struct ra_ctx *ctx) { vo_wayland_wakeup(ctx->vo); } -static void wayland_wait_events(struct MPGLContext *ctx, int64_t until_time_us) +static void wayland_wait_events(struct ra_ctx *ctx, int64_t until_time_us) { vo_wayland_wait_events(ctx->vo, until_time_us); } -static int waylandgl_init(struct MPGLContext *ctx, int flags) +static bool waylandgl_init(struct ra_ctx *ctx) { if (!vo_wayland_init(ctx->vo)) - return -1; + return false; - return egl_create_context(ctx->vo->wayland, ctx, flags); + return egl_create_context(ctx, ctx->vo->wayland); } -const struct mpgl_driver mpgl_driver_wayland = { +const struct ra_ctx_fns ra_ctx_wayland_egl = { + .type = "opengl", .name = "wayland", - .init = waylandgl_init, .reconfig = waylandgl_reconfig, - .swap_buffers = waylandgl_swap_buffers, .control = waylandgl_control, .wakeup = wayland_wakeup, .wait_events = wayland_wait_events, + .init = waylandgl_init, .uninit = waylandgl_uninit, }; diff --git a/video/out/opengl/context_x11.c b/video/out/opengl/context_x11.c deleted file mode 100644 index 4d8dac1ea5..0000000000 --- a/video/out/opengl/context_x11.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include -#include - -// FreeBSD 10.0-CURRENT lacks the GLX_ARB_create_context extension completely -#ifndef GLX_CONTEXT_MAJOR_VERSION_ARB -#define GLX_CONTEXT_MAJOR_VERSION_ARB 0x2091 -#define GLX_CONTEXT_MINOR_VERSION_ARB 0x2092 -#define GLX_CONTEXT_FLAGS_ARB 0x2094 -#define GLX_CONTEXT_PROFILE_MASK_ARB 0x9126 -#ifndef __APPLE__ -// These are respectively 0x00000001 and 0x00000002 on OSX -#define GLX_CONTEXT_DEBUG_BIT_ARB 0x0001 -#define GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002 -#endif -#define GLX_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001 -#define GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB 0x00000002 -#endif -// GLX_EXT_create_context_es2_profile -#ifndef GLX_CONTEXT_ES2_PROFILE_BIT_EXT -#define GLX_CONTEXT_ES2_PROFILE_BIT_EXT 0x00000004 -#endif - -#include "video/out/x11_common.h" -#include "context.h" - -struct glx_context { - XVisualInfo *vinfo; - GLXContext context; - GLXFBConfig fbc; -}; - -static void glx_uninit(MPGLContext *ctx) -{ - struct glx_context *glx_ctx = ctx->priv; - if (glx_ctx->vinfo) - XFree(glx_ctx->vinfo); - if (glx_ctx->context) { - Display *display = ctx->vo->x11->display; - glXMakeCurrent(display, None, NULL); - glXDestroyContext(display, glx_ctx->context); - } - vo_x11_uninit(ctx->vo); -} - -static bool create_context_x11_old(struct MPGLContext *ctx) -{ - struct glx_context *glx_ctx = ctx->priv; - Display *display = ctx->vo->x11->display; - struct vo *vo = ctx->vo; - GL *gl = ctx->gl; - - if (glx_ctx->context) - return true; - - if (!glx_ctx->vinfo) { - MP_FATAL(vo, "Can't create a legacy GLX context without X visual\n"); - return false; - } - - GLXContext new_context = glXCreateContext(display, glx_ctx->vinfo, NULL, - True); - if (!new_context) { - MP_FATAL(vo, "Could not create GLX context!\n"); - return false; - } - - if (!glXMakeCurrent(display, ctx->vo->x11->window, new_context)) { - MP_FATAL(vo, "Could not set GLX context!\n"); - glXDestroyContext(display, new_context); - return false; - } - - const char *glxstr = glXQueryExtensionsString(display, ctx->vo->x11->screen); - - mpgl_load_functions(gl, (void *)glXGetProcAddressARB, glxstr, vo->log); - - glx_ctx->context = new_context; - - return true; -} - -typedef GLXContext (*glXCreateContextAttribsARBProc) - (Display*, GLXFBConfig, GLXContext, Bool, const int*); - -static bool create_context_x11_gl3(struct MPGLContext *ctx, int vo_flags, - int gl_version, bool es) -{ - struct glx_context *glx_ctx = ctx->priv; - struct vo *vo = ctx->vo; - - if (glx_ctx->context) - return true; - - glXCreateContextAttribsARBProc glXCreateContextAttribsARB = - (glXCreateContextAttribsARBProc) - glXGetProcAddressARB((const GLubyte *)"glXCreateContextAttribsARB"); - - const char *glxstr = - glXQueryExtensionsString(vo->x11->display, vo->x11->screen); - bool have_ctx_ext = glxstr && !!strstr(glxstr, "GLX_ARB_create_context"); - - if (!(have_ctx_ext && glXCreateContextAttribsARB)) { - return false; - } - - int ctx_flags = vo_flags & VOFLAG_GL_DEBUG ? GLX_CONTEXT_DEBUG_BIT_ARB : 0; - int profile_mask = GLX_CONTEXT_CORE_PROFILE_BIT_ARB; - - if (es) { - profile_mask = GLX_CONTEXT_ES2_PROFILE_BIT_EXT; - if (!(glxstr && strstr(glxstr, "GLX_EXT_create_context_es2_profile"))) - return false; - } - - int context_attribs[] = { - GLX_CONTEXT_MAJOR_VERSION_ARB, MPGL_VER_GET_MAJOR(gl_version), - GLX_CONTEXT_MINOR_VERSION_ARB, MPGL_VER_GET_MINOR(gl_version), - GLX_CONTEXT_PROFILE_MASK_ARB, profile_mask, - GLX_CONTEXT_FLAGS_ARB, ctx_flags, - None - }; - vo_x11_silence_xlib(1); - GLXContext context = glXCreateContextAttribsARB(vo->x11->display, - glx_ctx->fbc, 0, True, - context_attribs); - vo_x11_silence_xlib(-1); - if (!context) - return false; - - // set context - if (!glXMakeCurrent(vo->x11->display, vo->x11->window, context)) { - MP_FATAL(vo, "Could not set GLX context!\n"); - glXDestroyContext(vo->x11->display, context); - return false; - } - - glx_ctx->context = context; - - mpgl_load_functions(ctx->gl, (void *)glXGetProcAddressARB, glxstr, vo->log); - - return true; -} - -// The GL3/FBC initialization code roughly follows/copies from: -// http://www.opengl.org/wiki/Tutorial:_OpenGL_3.0_Context_Creation_(GLX) -// but also uses some of the old code. - -static GLXFBConfig select_fb_config(struct vo *vo, const int *attribs, int flags) -{ - int fbcount; - GLXFBConfig *fbc = glXChooseFBConfig(vo->x11->display, vo->x11->screen, - attribs, &fbcount); - if (!fbc) - return NULL; - - // The list in fbc is sorted (so that the first element is the best). - GLXFBConfig fbconfig = fbcount > 0 ? fbc[0] : NULL; - - if (flags & VOFLAG_ALPHA) { - for (int n = 0; n < fbcount; n++) { - XVisualInfo *v = glXGetVisualFromFBConfig(vo->x11->display, fbc[n]); - if (v) { - bool is_rgba = vo_x11_is_rgba_visual(v); - XFree(v); - if (is_rgba) { - fbconfig = fbc[n]; - break; - } - } - } - } - - XFree(fbc); - - return fbconfig; -} - -static void set_glx_attrib(int *attribs, int name, int value) -{ - for (int n = 0; attribs[n * 2 + 0] != None; n++) { - if (attribs[n * 2 + 0] == name) { - attribs[n * 2 + 1] = value; - break; - } - } -} - -static int glx_init(struct MPGLContext *ctx, int flags) -{ - struct vo *vo = ctx->vo; - struct glx_context *glx_ctx = ctx->priv; - - if (!vo_x11_init(ctx->vo)) - goto uninit; - - int glx_major, glx_minor; - - if (!glXQueryVersion(vo->x11->display, &glx_major, &glx_minor)) { - MP_ERR(vo, "GLX not found.\n"); - goto uninit; - } - // FBConfigs were added in GLX version 1.3. - if (MPGL_VER(glx_major, glx_minor) < MPGL_VER(1, 3)) { - MP_ERR(vo, "GLX version older than 1.3.\n"); - goto uninit; - } - - int glx_attribs[] = { - GLX_X_RENDERABLE, True, - GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR, - GLX_RED_SIZE, 1, - GLX_GREEN_SIZE, 1, - GLX_BLUE_SIZE, 1, - GLX_ALPHA_SIZE, 0, - GLX_DOUBLEBUFFER, True, - None - }; - GLXFBConfig fbc = NULL; - if (flags & VOFLAG_ALPHA) { - set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 1); - fbc = select_fb_config(vo, glx_attribs, flags); - if (!fbc) { - set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 0); - flags &= ~VOFLAG_ALPHA; - } - } - if (!fbc) - fbc = select_fb_config(vo, glx_attribs, flags); - if (!fbc) { - MP_ERR(vo, "no GLX support present\n"); - goto uninit; - } - - int fbid = -1; - if (!glXGetFBConfigAttrib(vo->x11->display, fbc, GLX_FBCONFIG_ID, &fbid)) - MP_VERBOSE(vo, "GLX chose FB config with ID 0x%x\n", fbid); - - glx_ctx->fbc = fbc; - glx_ctx->vinfo = glXGetVisualFromFBConfig(vo->x11->display, fbc); - if (glx_ctx->vinfo) { - MP_VERBOSE(vo, "GLX chose visual with ID 0x%x\n", - (int)glx_ctx->vinfo->visualid); - } else { - MP_WARN(vo, "Selected GLX FB config has no associated X visual\n"); - } - - if (!vo_x11_create_vo_window(vo, glx_ctx->vinfo, "gl")) - goto uninit; - - bool success = false; - if (!(flags & VOFLAG_GLES)) { - for (int n = 0; mpgl_preferred_gl_versions[n]; n++) { - int version = mpgl_preferred_gl_versions[n]; - MP_VERBOSE(vo, "Creating OpenGL %d.%d context...\n", - MPGL_VER_P(version)); - if (version >= 300) { - success = create_context_x11_gl3(ctx, flags, version, false); - } else { - success = create_context_x11_old(ctx); - } - if (success) - break; - } - } - if (!success) // try ES - success = create_context_x11_gl3(ctx, flags, 200, true); - if (success && !glXIsDirect(vo->x11->display, glx_ctx->context)) - ctx->gl->mpgl_caps |= MPGL_CAP_SW; - if (!success) - goto uninit; - - return 0; - -uninit: - glx_uninit(ctx); - return -1; -} - -static int glx_init_probe(struct MPGLContext *ctx, int flags) -{ - int r = glx_init(ctx, flags); - if (r >= 0) { - if (!(ctx->gl->mpgl_caps & MPGL_CAP_VDPAU)) { - MP_VERBOSE(ctx->vo, "No vdpau support found - probing more things.\n"); - glx_uninit(ctx); - r = -1; - } - } - return r; -} - -static int glx_reconfig(struct MPGLContext *ctx) -{ - vo_x11_config_vo_window(ctx->vo); - return 0; -} - -static int glx_control(struct MPGLContext *ctx, int *events, int request, - void *arg) -{ - return vo_x11_control(ctx->vo, events, request, arg); -} - -static void glx_swap_buffers(struct MPGLContext *ctx) -{ - glXSwapBuffers(ctx->vo->x11->display, ctx->vo->x11->window); -} - -static void glx_wakeup(struct MPGLContext *ctx) -{ - vo_x11_wakeup(ctx->vo); -} - -static void glx_wait_events(struct MPGLContext *ctx, int64_t until_time_us) -{ - vo_x11_wait_events(ctx->vo, until_time_us); -} - -const struct mpgl_driver mpgl_driver_x11 = { - .name = "x11", - .priv_size = sizeof(struct glx_context), - .init = glx_init, - .reconfig = glx_reconfig, - .swap_buffers = glx_swap_buffers, - .control = glx_control, - .wakeup = glx_wakeup, - .wait_events = glx_wait_events, - .uninit = glx_uninit, -}; - -const struct mpgl_driver mpgl_driver_x11_probe = { - .name = "x11probe", - .priv_size = sizeof(struct glx_context), - .init = glx_init_probe, - .reconfig = glx_reconfig, - .swap_buffers = glx_swap_buffers, - .control = glx_control, - .wakeup = glx_wakeup, - .wait_events = glx_wait_events, - .uninit = glx_uninit, -}; diff --git a/video/out/opengl/context_x11egl.c b/video/out/opengl/context_x11egl.c index 2b68007a33..7ab4fe0579 100644 --- a/video/out/opengl/context_x11egl.c +++ b/video/out/opengl/context_x11egl.c @@ -32,14 +32,17 @@ #include "egl_helpers.h" struct priv { + GL gl; EGLDisplay egl_display; EGLContext egl_context; EGLSurface egl_surface; }; -static void mpegl_uninit(MPGLContext *ctx) +static void mpegl_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + if (p->egl_context) { eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); @@ -51,7 +54,7 @@ static void mpegl_uninit(MPGLContext *ctx) static int pick_xrgba_config(void *user_data, EGLConfig *configs, int num_configs) { - struct MPGLContext *ctx = user_data; + struct ra_ctx *ctx = user_data; struct priv *p = ctx->priv; struct vo *vo = ctx->vo; @@ -72,40 +75,44 @@ static int pick_xrgba_config(void *user_data, EGLConfig *configs, int num_config return 0; } -static int mpegl_init(struct MPGLContext *ctx, int flags) +static void mpegl_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + eglSwapBuffers(p->egl_display, p->egl_surface); +} + +static bool mpegl_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); struct vo *vo = ctx->vo; - int msgl = vo->probing ? MSGL_V : MSGL_FATAL; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_FATAL; if (!vo_x11_init(vo)) goto uninit; p->egl_display = eglGetDisplay(vo->x11->display); if (!eglInitialize(p->egl_display, NULL, NULL)) { - mp_msg(vo->log, msgl, "Could not initialize EGL.\n"); + MP_MSG(ctx, msgl, "Could not initialize EGL.\n"); goto uninit; } - struct mpegl_opts opts = { - .vo_flags = flags, + struct mpegl_cb cb = { .user_data = ctx, - .refine_config = (flags & VOFLAG_ALPHA) ? pick_xrgba_config : NULL, + .refine_config = ctx->opts.want_alpha ? pick_xrgba_config : NULL, }; EGLConfig config; - if (!mpegl_create_context_opts(p->egl_display, vo->log, &opts, - &p->egl_context, &config)) + if (!mpegl_create_context_cb(ctx, p->egl_display, cb, &p->egl_context, &config)) goto uninit; int vID, n; eglGetConfigAttrib(p->egl_display, config, EGL_NATIVE_VISUAL_ID, &vID); - MP_VERBOSE(vo, "chose visual 0x%x\n", vID); + MP_VERBOSE(ctx, "chose visual 0x%x\n", vID); XVisualInfo template = {.visualid = vID}; XVisualInfo *vi = XGetVisualInfo(vo->x11->display, VisualIDMask, &template, &n); if (!vi) { - MP_FATAL(vo, "Getting X visual failed!\n"); + MP_FATAL(ctx, "Getting X visual failed!\n"); goto uninit; } @@ -120,64 +127,73 @@ static int mpegl_init(struct MPGLContext *ctx, int flags) (EGLNativeWindowType)vo->x11->window, NULL); if (p->egl_surface == EGL_NO_SURFACE) { - MP_FATAL(ctx->vo, "Could not create EGL surface!\n"); + MP_FATAL(ctx, "Could not create EGL surface!\n"); goto uninit; } if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, p->egl_context)) { - MP_FATAL(ctx->vo, "Could not make context current!\n"); + MP_FATAL(ctx, "Could not make context current!\n"); goto uninit; } - mpegl_load_functions(ctx->gl, vo->log); + mpegl_load_functions(&p->gl, ctx->log); - ctx->native_display_type = "x11"; - ctx->native_display = vo->x11->display; - return 0; + struct ra_gl_ctx_params params = { + .swap_buffers = mpegl_swap_buffers, + .native_display_type = "x11", + .native_display = vo->x11->display, + }; + + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto uninit; + + return true; uninit: mpegl_uninit(ctx); - return -1; + return false; } -static int mpegl_reconfig(struct MPGLContext *ctx) +static void resize(struct ra_ctx *ctx) { - vo_x11_config_vo_window(ctx->vo); - return 0; + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); } -static int mpegl_control(struct MPGLContext *ctx, int *events, int request, - void *arg) +static bool mpegl_reconfig(struct ra_ctx *ctx) { - return vo_x11_control(ctx->vo, events, request, arg); + vo_x11_config_vo_window(ctx->vo); + resize(ctx); + return true; } -static void mpegl_swap_buffers(MPGLContext *ctx) +static int mpegl_control(struct ra_ctx *ctx, int *events, int request, + void *arg) { - struct priv *p = ctx->priv; - eglSwapBuffers(p->egl_display, p->egl_surface); + int ret = vo_x11_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; } -static void mpegl_wakeup(struct MPGLContext *ctx) +static void mpegl_wakeup(struct ra_ctx *ctx) { vo_x11_wakeup(ctx->vo); } -static void mpegl_wait_events(struct MPGLContext *ctx, int64_t until_time_us) +static void mpegl_wait_events(struct ra_ctx *ctx, int64_t until_time_us) { vo_x11_wait_events(ctx->vo, until_time_us); } -const struct mpgl_driver mpgl_driver_x11egl = { +const struct ra_ctx_fns ra_ctx_x11_egl = { + .type = "opengl", .name = "x11egl", - .priv_size = sizeof(struct priv), - .init = mpegl_init, .reconfig = mpegl_reconfig, - .swap_buffers = mpegl_swap_buffers, .control = mpegl_control, .wakeup = mpegl_wakeup, .wait_events = mpegl_wait_events, + .init = mpegl_init, .uninit = mpegl_uninit, }; diff --git a/video/out/opengl/egl_helpers.c b/video/out/opengl/egl_helpers.c index ac152df06a..0033bf1e33 100644 --- a/video/out/opengl/egl_helpers.c +++ b/video/out/opengl/egl_helpers.c @@ -25,6 +25,7 @@ #include "egl_helpers.h" #include "common.h" +#include "utils.h" #include "context.h" #if HAVE_EGL_ANGLE @@ -43,41 +44,49 @@ #define EGL_OPENGL_ES3_BIT 0x00000040 #endif -// es_version = 0 (desktop), 2/3 (ES major version) -static bool create_context(EGLDisplay display, struct mp_log *log, bool probing, - int es_version, struct mpegl_opts *opts, +// es_version: 0 (core), 2 or 3 +static bool create_context(struct ra_ctx *ctx, EGLDisplay display, + int es_version, struct mpegl_cb cb, EGLContext *out_context, EGLConfig *out_config) { - int msgl = probing ? MSGL_V : MSGL_FATAL; - - EGLenum api = EGL_OPENGL_API; - EGLint rend = EGL_OPENGL_BIT; - const char *name = "Desktop OpenGL"; - if (es_version == 2) { + int msgl = ctx->opts.probing ? MSGL_V : MSGL_FATAL; + + EGLenum api; + EGLint rend; + const char *name; + + switch (es_version) { + case 0: + api = EGL_OPENGL_API; + rend = EGL_OPENGL_BIT; + name = "Desktop OpenGL"; + break; + case 2: api = EGL_OPENGL_ES_API; rend = EGL_OPENGL_ES2_BIT; - name = "GLES 2.0"; - } - if (es_version == 3) { + name = "GLES 2.x"; + break; + case 3: api = EGL_OPENGL_ES_API; rend = EGL_OPENGL_ES3_BIT; name = "GLES 3.x"; + break; + default: abort(); } - mp_msg(log, MSGL_V, "Trying to create %s context.\n", name); + MP_VERBOSE(ctx, "Trying to create %s context.\n", name); if (!eglBindAPI(api)) { - mp_msg(log, MSGL_V, "Could not bind API!\n"); + MP_VERBOSE(ctx, "Could not bind API!\n"); return false; } - EGLint attributes[] = { EGL_SURFACE_TYPE, EGL_WINDOW_BIT, EGL_RED_SIZE, 1, EGL_GREEN_SIZE, 1, EGL_BLUE_SIZE, 1, - EGL_ALPHA_SIZE, (opts->vo_flags & VOFLAG_ALPHA ) ? 1 : 0, + EGL_ALPHA_SIZE, ctx->opts.want_alpha ? 1 : 0, EGL_RENDERABLE_TYPE, rend, EGL_NONE }; @@ -92,29 +101,34 @@ static bool create_context(EGLDisplay display, struct mp_log *log, bool probing, if (!num_configs) { talloc_free(configs); - mp_msg(log, msgl, "Could not choose EGLConfig!\n"); + MP_MSG(ctx, msgl, "Could not choose EGLConfig!\n"); return false; } int chosen = 0; - if (opts->refine_config) - chosen = opts->refine_config(opts->user_data, configs, num_configs); + if (cb.refine_config) + chosen = cb.refine_config(cb.user_data, configs, num_configs); EGLConfig config = configs[chosen]; talloc_free(configs); - EGLContext *ctx = NULL; + EGLContext *egl_ctx = NULL; if (es_version) { + if (!ra_gl_ctx_test_version(ctx, MPGL_VER(es_version, 0), true)) + return false; + EGLint attrs[] = { EGL_CONTEXT_CLIENT_VERSION, es_version, EGL_NONE }; - ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); + egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); } else { for (int n = 0; mpgl_preferred_gl_versions[n]; n++) { int ver = mpgl_preferred_gl_versions[n]; + if (!ra_gl_ctx_test_version(ctx, ver, false)) + continue; EGLint attrs[] = { EGL_CONTEXT_MAJOR_VERSION, MPGL_VER_GET_MAJOR(ver), @@ -124,25 +138,25 @@ static bool create_context(EGLDisplay display, struct mp_log *log, bool probing, EGL_NONE }; - ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); - if (ctx) + egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); + if (egl_ctx) break; } - if (!ctx) { + if (!egl_ctx && ra_gl_ctx_test_version(ctx, 140, false)) { // Fallback for EGL 1.4 without EGL_KHR_create_context. EGLint attrs[] = { EGL_NONE }; - ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); + egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); } } - if (!ctx) { - mp_msg(log, msgl, "Could not create EGL context!\n"); + if (!egl_ctx) { + MP_MSG(ctx, msgl, "Could not create EGL context!\n"); return false; } - *out_context = ctx; + *out_context = egl_ctx; *out_config = config; return true; } @@ -152,56 +166,36 @@ static bool create_context(EGLDisplay display, struct mp_log *log, bool probing, // Create a context and return it and the config it was created with. If it // returns false, the out_* pointers are set to NULL. // vo_flags is a combination of VOFLAG_* values. -bool mpegl_create_context(EGLDisplay display, struct mp_log *log, int vo_flags, +bool mpegl_create_context(struct ra_ctx *ctx, EGLDisplay display, EGLContext *out_context, EGLConfig *out_config) { - return mpegl_create_context_opts(display, log, - &(struct mpegl_opts){.vo_flags = vo_flags}, out_context, out_config); + return mpegl_create_context_cb(ctx, display, (struct mpegl_cb){0}, + out_context, out_config); } // Create a context and return it and the config it was created with. If it // returns false, the out_* pointers are set to NULL. -bool mpegl_create_context_opts(EGLDisplay display, struct mp_log *log, - struct mpegl_opts *opts, - EGLContext *out_context, EGLConfig *out_config) +bool mpegl_create_context_cb(struct ra_ctx *ctx, EGLDisplay display, + struct mpegl_cb cb, EGLContext *out_context, + EGLConfig *out_config) { - assert(opts); - *out_context = NULL; *out_config = NULL; const char *version = eglQueryString(display, EGL_VERSION); const char *vendor = eglQueryString(display, EGL_VENDOR); const char *apis = eglQueryString(display, EGL_CLIENT_APIS); - mp_verbose(log, "EGL_VERSION=%s\nEGL_VENDOR=%s\nEGL_CLIENT_APIS=%s\n", + MP_VERBOSE(ctx, "EGL_VERSION=%s\nEGL_VENDOR=%s\nEGL_CLIENT_APIS=%s\n", STR_OR_ERR(version), STR_OR_ERR(vendor), STR_OR_ERR(apis)); - bool probing = opts->vo_flags & VOFLAG_PROBING; - int msgl = probing ? MSGL_V : MSGL_FATAL; - bool try_gles = !(opts->vo_flags & VOFLAG_NO_GLES); - - if (!(opts->vo_flags & VOFLAG_GLES)) { - // Desktop OpenGL - if (create_context(display, log, try_gles | probing, 0, opts, - out_context, out_config)) - return true; - } - - if (try_gles && !(opts->vo_flags & VOFLAG_GLES2)) { - // ES 3.x - if (create_context(display, log, true, 3, opts, - out_context, out_config)) - return true; - } - - if (try_gles) { - // ES 2.0 - if (create_context(display, log, probing, 2, opts, - out_context, out_config)) + int es[] = {0, 3, 2}; // preference order + for (int i = 0; i < MP_ARRAY_SIZE(es); i++) { + if (create_context(ctx, display, es[i], cb, out_context, out_config)) return true; } - mp_msg(log, msgl, "Could not create a GL context.\n"); + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + MP_MSG(ctx, msgl, "Could not create a GL context.\n"); return false; } diff --git a/video/out/opengl/egl_helpers.h b/video/out/opengl/egl_helpers.h index 05f9dccb70..eaaf9d7a48 100644 --- a/video/out/opengl/egl_helpers.h +++ b/video/out/opengl/egl_helpers.h @@ -6,26 +6,23 @@ #include #include +#include "video/out/gpu/context.h" + struct mp_log; -bool mpegl_create_context(EGLDisplay display, struct mp_log *log, int vo_flags, +bool mpegl_create_context(struct ra_ctx *ctx, EGLDisplay display, EGLContext *out_context, EGLConfig *out_config); -struct mpegl_opts { - // combination of VOFLAG_* values. - int vo_flags; - - // for callbacks - void *user_data; - +struct mpegl_cb { // if set, pick the desired config from the given list and return its index // defaults to 0 (they are sorted by eglChooseConfig) int (*refine_config)(void *user_data, EGLConfig *configs, int num_configs); + void *user_data; }; -bool mpegl_create_context_opts(EGLDisplay display, struct mp_log *log, - struct mpegl_opts *opts, - EGLContext *out_context, EGLConfig *out_config); +bool mpegl_create_context_cb(struct ra_ctx *ctx, EGLDisplay display, + struct mpegl_cb cb, EGLContext *out_context, + EGLConfig *out_config); struct GL; void mpegl_load_functions(struct GL *gl, struct mp_log *log); diff --git a/video/out/opengl/formats.h b/video/out/opengl/formats.h index 3da6ede82a..f727a3b6ef 100644 --- a/video/out/opengl/formats.h +++ b/video/out/opengl/formats.h @@ -2,7 +2,6 @@ #define MPGL_FORMATS_H_ #include "common.h" -#include "ra.h" struct gl_format { const char *name; // symbolic name for user interaction/debugging diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c deleted file mode 100644 index bce2dabe5d..0000000000 --- a/video/out/opengl/gl_utils.c +++ /dev/null @@ -1,291 +0,0 @@ -/* - * This file is part of mpv. - * Parts based on MPlayer code by Reimar Döffinger. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "osdep/io.h" - -#include "common/common.h" -#include "options/path.h" -#include "stream/stream.h" -#include "formats.h" -#include "ra_gl.h" -#include "gl_utils.h" - -// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL) -static const char *gl_error_to_string(GLenum error) -{ - switch (error) { - case GL_INVALID_ENUM: return "INVALID_ENUM"; - case GL_INVALID_VALUE: return "INVALID_VALUE"; - case GL_INVALID_OPERATION: return "INVALID_OPERATION"; - case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION"; - case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY"; - default: return "unknown"; - } -} - -void gl_check_error(GL *gl, struct mp_log *log, const char *info) -{ - for (;;) { - GLenum error = gl->GetError(); - if (error == GL_NO_ERROR) - break; - mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info, - gl_error_to_string(error)); - } -} - -static int get_alignment(int stride) -{ - if (stride % 8 == 0) - return 8; - if (stride % 4 == 0) - return 4; - if (stride % 2 == 0) - return 2; - return 1; -} - -// upload a texture, handling things like stride and slices -// target: texture target, usually GL_TEXTURE_2D -// format, type: texture parameters -// dataptr, stride: image data -// x, y, width, height: part of the image to upload -void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, - const void *dataptr, int stride, - int x, int y, int w, int h) -{ - int bpp = gl_bytes_per_pixel(format, type); - const uint8_t *data = dataptr; - int y_max = y + h; - if (w <= 0 || h <= 0 || !bpp) - return; - if (stride < 0) { - data += (h - 1) * stride; - stride = -stride; - } - gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); - int slice = h; - if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) { - // this is not always correct, but should work for MPlayer - gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp); - } else { - if (stride != bpp * w) - slice = 1; // very inefficient, but at least it works - } - for (; y + slice <= y_max; y += slice) { - gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data); - data += stride * slice; - } - if (y < y_max) - gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data); - if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) - gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); - gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); -} - -mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h) -{ - if (gl->es) - return NULL; // ES can't read from front buffer - mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h); - if (!image) - return NULL; - gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); - GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT; - gl->PixelStorei(GL_PACK_ALIGNMENT, 1); - gl->ReadBuffer(obj); - //flip image while reading (and also avoid stride-related trouble) - for (int y = 0; y < h; y++) { - gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE, - image->planes[0] + y * image->stride[0]); - } - gl->PixelStorei(GL_PACK_ALIGNMENT, 4); - gl->BindFramebuffer(GL_FRAMEBUFFER, 0); - return image; -} - -static void gl_vao_enable_attribs(struct gl_vao *vao) -{ - GL *gl = vao->gl; - - for (int n = 0; n < vao->num_entries; n++) { - const struct ra_renderpass_input *e = &vao->entries[n]; - GLenum type = 0; - bool normalized = false; - switch (e->type) { - case RA_VARTYPE_INT: - type = GL_INT; - break; - case RA_VARTYPE_FLOAT: - type = GL_FLOAT; - break; - case RA_VARTYPE_BYTE_UNORM: - type = GL_UNSIGNED_BYTE; - normalized = true; - break; - default: - abort(); - } - assert(e->dim_m == 1); - - gl->EnableVertexAttribArray(n); - gl->VertexAttribPointer(n, e->dim_v, type, normalized, - vao->stride, (void *)(intptr_t)e->offset); - } -} - -void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, - const struct ra_renderpass_input *entries, - int num_entries) -{ - assert(!vao->vao); - assert(!vao->buffer); - - *vao = (struct gl_vao){ - .gl = gl, - .stride = stride, - .entries = entries, - .num_entries = num_entries, - }; - - gl->GenBuffers(1, &vao->buffer); - - if (gl->BindVertexArray) { - gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - - gl->GenVertexArrays(1, &vao->vao); - gl->BindVertexArray(vao->vao); - gl_vao_enable_attribs(vao); - gl->BindVertexArray(0); - - gl->BindBuffer(GL_ARRAY_BUFFER, 0); - } -} - -void gl_vao_uninit(struct gl_vao *vao) -{ - GL *gl = vao->gl; - if (!gl) - return; - - if (gl->DeleteVertexArrays) - gl->DeleteVertexArrays(1, &vao->vao); - gl->DeleteBuffers(1, &vao->buffer); - - *vao = (struct gl_vao){0}; -} - -static void gl_vao_bind(struct gl_vao *vao) -{ - GL *gl = vao->gl; - - if (gl->BindVertexArray) { - gl->BindVertexArray(vao->vao); - } else { - gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - gl_vao_enable_attribs(vao); - gl->BindBuffer(GL_ARRAY_BUFFER, 0); - } -} - -static void gl_vao_unbind(struct gl_vao *vao) -{ - GL *gl = vao->gl; - - if (gl->BindVertexArray) { - gl->BindVertexArray(0); - } else { - for (int n = 0; n < vao->num_entries; n++) - gl->DisableVertexAttribArray(n); - } -} - -// Draw the vertex data (as described by the gl_vao_entry entries) in ptr -// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES. -// If ptr is NULL, then skip the upload, and use the data uploaded with the -// previous call. -void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num) -{ - GL *gl = vao->gl; - - if (ptr) { - gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW); - gl->BindBuffer(GL_ARRAY_BUFFER, 0); - } - - gl_vao_bind(vao); - - gl->DrawArrays(prim, 0, num); - - gl_vao_unbind(vao); -} - -static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id, - GLenum severity, GLsizei length, - const GLchar *message, const void *userParam) -{ - // keep in mind that the debug callback can be asynchronous - struct mp_log *log = (void *)userParam; - int level = MSGL_ERR; - switch (severity) { - case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break; - case GL_DEBUG_SEVERITY_LOW: level = MSGL_INFO; break; - case GL_DEBUG_SEVERITY_MEDIUM: level = MSGL_WARN; break; - case GL_DEBUG_SEVERITY_HIGH: level = MSGL_ERR; break; - } - mp_msg(log, level, "GL: %s\n", message); -} - -void gl_set_debug_logger(GL *gl, struct mp_log *log) -{ - if (gl->DebugMessageCallback) - gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log); -} - -int gl_get_fb_depth(GL *gl, int fbo) -{ - if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB)) - return -1; - - gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); - - GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK; - if (fbo) - obj = GL_COLOR_ATTACHMENT0; - - GLint depth_g = -1; - - gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, - GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g); - - gl->BindFramebuffer(GL_FRAMEBUFFER, 0); - - return depth_g > 0 ? depth_g : -1; -} diff --git a/video/out/opengl/gl_utils.h b/video/out/opengl/gl_utils.h deleted file mode 100644 index 306ee23f65..0000000000 --- a/video/out/opengl/gl_utils.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * This file is part of mpv. - * Parts based on MPlayer code by Reimar Döffinger. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#ifndef MP_GL_UTILS_ -#define MP_GL_UTILS_ - -#include - -#include "common.h" -#include "ra.h" - -struct mp_log; - -void gl_check_error(GL *gl, struct mp_log *log, const char *info); - -void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, - const void *dataptr, int stride, - int x, int y, int w, int h); - -mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h); - -struct gl_vao { - GL *gl; - GLuint vao; // the VAO object, or 0 if unsupported by driver - GLuint buffer; // GL_ARRAY_BUFFER used for the data - int stride; // size of each element (interleaved elements are assumed) - const struct ra_renderpass_input *entries; - int num_entries; -}; - -void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, - const struct ra_renderpass_input *entries, - int num_entries); -void gl_vao_uninit(struct gl_vao *vao); -void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num); - -void gl_set_debug_logger(GL *gl, struct mp_log *log); - -int gl_get_fb_depth(GL *gl, int fbo); - -#endif diff --git a/video/out/opengl/hwdec.c b/video/out/opengl/hwdec.c deleted file mode 100644 index 5fbc1aa4a9..0000000000 --- a/video/out/opengl/hwdec.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include -#include - -#include "config.h" - -#include "common/common.h" -#include "common/msg.h" -#include "options/m_config.h" -#include "hwdec.h" - -extern const struct ra_hwdec_driver ra_hwdec_vaegl; -extern const struct ra_hwdec_driver ra_hwdec_vaglx; -extern const struct ra_hwdec_driver ra_hwdec_videotoolbox; -extern const struct ra_hwdec_driver ra_hwdec_vdpau; -extern const struct ra_hwdec_driver ra_hwdec_dxva2egl; -extern const struct ra_hwdec_driver ra_hwdec_d3d11egl; -extern const struct ra_hwdec_driver ra_hwdec_d3d11eglrgb; -extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx; -extern const struct ra_hwdec_driver ra_hwdec_dxva2; -extern const struct ra_hwdec_driver ra_hwdec_cuda; -extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay; - -static const struct ra_hwdec_driver *const mpgl_hwdec_drivers[] = { -#if HAVE_VAAPI_EGL - &ra_hwdec_vaegl, -#endif -#if HAVE_VAAPI_GLX - &ra_hwdec_vaglx, -#endif -#if HAVE_VDPAU_GL_X11 - &ra_hwdec_vdpau, -#endif -#if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL - &ra_hwdec_videotoolbox, -#endif -#if HAVE_D3D_HWACCEL - &ra_hwdec_d3d11egl, - &ra_hwdec_d3d11eglrgb, - #if HAVE_D3D9_HWACCEL - &ra_hwdec_dxva2egl, - #endif -#endif -#if HAVE_GL_DXINTEROP_D3D9 - &ra_hwdec_dxva2gldx, -#endif -#if HAVE_CUDA_HWACCEL - &ra_hwdec_cuda, -#endif -#if HAVE_RPI - &ra_hwdec_rpi_overlay, -#endif - NULL -}; - -static struct ra_hwdec *load_hwdec_driver(struct mp_log *log, struct ra *ra, - struct mpv_global *global, - struct mp_hwdec_devices *devs, - const struct ra_hwdec_driver *drv, - bool is_auto) -{ - struct ra_hwdec *hwdec = talloc(NULL, struct ra_hwdec); - *hwdec = (struct ra_hwdec) { - .driver = drv, - .log = mp_log_new(hwdec, log, drv->name), - .global = global, - .ra = ra, - .devs = devs, - .probing = is_auto, - .priv = talloc_zero_size(hwdec, drv->priv_size), - }; - mp_verbose(log, "Loading hwdec driver '%s'\n", drv->name); - if (hwdec->driver->init(hwdec) < 0) { - ra_hwdec_uninit(hwdec); - mp_verbose(log, "Loading failed.\n"); - return NULL; - } - return hwdec; -} - -struct ra_hwdec *ra_hwdec_load_api(struct mp_log *log, struct ra *ra, - struct mpv_global *g, - struct mp_hwdec_devices *devs, - enum hwdec_type api) -{ - bool is_auto = HWDEC_IS_AUTO(api); - for (int n = 0; mpgl_hwdec_drivers[n]; n++) { - const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; - if ((is_auto || api == drv->api) && !drv->testing_only) { - struct ra_hwdec *r = load_hwdec_driver(log, ra, g, devs, drv, is_auto); - if (r) - return r; - } - } - return NULL; -} - -// Load by option name. -struct ra_hwdec *ra_hwdec_load(struct mp_log *log, struct ra *ra, - struct mpv_global *g, - struct mp_hwdec_devices *devs, - const char *name) -{ - int g_hwdec_api; - mp_read_option_raw(g, "hwdec", &m_option_type_choice, &g_hwdec_api); - if (!name || !name[0]) - name = m_opt_choice_str(mp_hwdec_names, g_hwdec_api); - - int api_id = HWDEC_NONE; - for (int n = 0; mp_hwdec_names[n].name; n++) { - if (name && strcmp(mp_hwdec_names[n].name, name) == 0) - api_id = mp_hwdec_names[n].value; - } - - for (int n = 0; mpgl_hwdec_drivers[n]; n++) { - const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; - if (name && strcmp(drv->name, name) == 0) { - struct ra_hwdec *r = load_hwdec_driver(log, ra, g, devs, drv, false); - if (r) - return r; - } - } - - return ra_hwdec_load_api(log, ra, g, devs, api_id); -} - -int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param) -{ - bool help = bstr_equals0(param, "help"); - if (help) - mp_info(log, "Available hwdecs:\n"); - for (int n = 0; mpgl_hwdec_drivers[n]; n++) { - const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; - const char *api_name = m_opt_choice_str(mp_hwdec_names, drv->api); - if (help) { - mp_info(log, " %s [%s]\n", drv->name, api_name); - } else if (bstr_equals0(param, drv->name) || - bstr_equals0(param, api_name)) - { - return 1; - } - } - if (help) { - mp_info(log, " auto (loads best)\n" - " (other --hwdec values)\n" - "Setting an empty string means use --hwdec.\n"); - return M_OPT_EXIT; - } - if (!param.len) - return 1; // "" is treated specially - for (int n = 0; mp_hwdec_names[n].name; n++) { - if (bstr_equals0(param, mp_hwdec_names[n].name)) - return 1; - } - mp_fatal(log, "No hwdec backend named '%.*s' found!\n", BSTR_P(param)); - return M_OPT_INVALID; -} - -void ra_hwdec_uninit(struct ra_hwdec *hwdec) -{ - if (hwdec) - hwdec->driver->uninit(hwdec); - talloc_free(hwdec); -} - -bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt) -{ - for (int n = 0; hwdec->driver->imgfmts[n]; n++) { - if (hwdec->driver->imgfmts[n] == imgfmt) - return true; - } - return false; -} - -struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec, - struct mp_image_params *params) -{ - assert(ra_hwdec_test_format(hwdec, params->imgfmt)); - - struct ra_hwdec_mapper *mapper = talloc_ptrtype(NULL, mapper); - *mapper = (struct ra_hwdec_mapper){ - .owner = hwdec, - .driver = hwdec->driver->mapper, - .log = hwdec->log, - .ra = hwdec->ra, - .priv = talloc_zero_size(mapper, hwdec->driver->mapper->priv_size), - .src_params = *params, - .dst_params = *params, - }; - if (mapper->driver->init(mapper) < 0) - ra_hwdec_mapper_free(&mapper); - return mapper; -} - -void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper) -{ - struct ra_hwdec_mapper *p = *mapper; - if (p) { - ra_hwdec_mapper_unmap(p); - p->driver->uninit(p); - talloc_free(p); - } - *mapper = NULL; -} - -void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper) -{ - if (mapper->driver->unmap) - mapper->driver->unmap(mapper); - mp_image_unrefp(&mapper->src); -} - -int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img) -{ - ra_hwdec_mapper_unmap(mapper); - mp_image_setrefp(&mapper->src, img); - if (mapper->driver->map(mapper) < 0) { - ra_hwdec_mapper_unmap(mapper); - return -1; - } - return 0; -} diff --git a/video/out/opengl/hwdec.h b/video/out/opengl/hwdec.h deleted file mode 100644 index 20bbaae9eb..0000000000 --- a/video/out/opengl/hwdec.h +++ /dev/null @@ -1,130 +0,0 @@ -#ifndef MPGL_HWDEC_H_ -#define MPGL_HWDEC_H_ - -#include "video/mp_image.h" -#include "ra.h" -#include "video/hwdec.h" - -struct ra_hwdec { - const struct ra_hwdec_driver *driver; - struct mp_log *log; - struct mpv_global *global; - struct ra *ra; - struct mp_hwdec_devices *devs; - // GLSL extensions required to sample textures from this. - const char **glsl_extensions; - // For free use by hwdec driver - void *priv; - // For working around the vdpau vs. vaapi mess. - bool probing; - // Used in overlay mode only. - float overlay_colorkey[4]; -}; - -struct ra_hwdec_mapper { - const struct ra_hwdec_mapper_driver *driver; - struct mp_log *log; - struct ra *ra; - void *priv; - struct ra_hwdec *owner; - // Input frame parameters. (Set before init(), immutable.) - struct mp_image_params src_params; - // Output frame parameters (represents the format the textures return). Must - // be set by init(), immutable afterwards, - struct mp_image_params dst_params; - - // The currently mapped source image (or the image about to be mapped in - // ->map()). NULL if unmapped. The mapper can also clear this reference if - // the mapped textures contain a full copy. - struct mp_image *src; - - // The mapped textures and metadata about them. These fields change if a - // new frame is mapped (or unmapped), but otherwise remain constant. - // The common code won't mess with these, so you can e.g. set them in the - // .init() callback. - struct ra_tex *tex[4]; - bool vdpau_fields; -}; - -// This can be used to map frames of a specific hw format as GL textures. -struct ra_hwdec_mapper_driver { - // Used to create ra_hwdec_mapper.priv. - size_t priv_size; - - // Init the mapper implementation. At this point, the field src_params, - // fns, devs, priv are initialized. - int (*init)(struct ra_hwdec_mapper *mapper); - // Destroy the mapper. unmap is called before this. - void (*uninit)(struct ra_hwdec_mapper *mapper); - - // Map mapper->src as texture, and set mapper->frame to textures using it. - // It is expected that that the textures remain valid until the next unmap - // or uninit call. - // The function is allowed to unref mapper->src if it's not needed (i.e. - // this function creates a copy). - // The underlying format can change, so you might need to do some form - // of change detection. You also must reject unsupported formats with an - // error. - // On error, returns negative value on error and remains unmapped. - int (*map)(struct ra_hwdec_mapper *mapper); - // Unmap the frame. Does nothing if already unmapped. Optional. - void (*unmap)(struct ra_hwdec_mapper *mapper); -}; - -struct ra_hwdec_driver { - // Name of the interop backend. This is used for informational purposes only. - const char *name; - // Used to create ra_hwdec.priv. - size_t priv_size; - // Used to explicitly request a specific API. - enum hwdec_type api; - // One of the hardware surface IMGFMT_ that must be passed to map_image later. - // Terminated with a 0 entry. (Extend the array size as needed.) - const int imgfmts[3]; - // Dosn't load this unless requested by name. - bool testing_only; - - // Create the hwdec device. It must add it to hw->devs, if applicable. - int (*init)(struct ra_hwdec *hw); - void (*uninit)(struct ra_hwdec *hw); - - // This will be used to create a ra_hwdec_mapper from ra_hwdec. - const struct ra_hwdec_mapper_driver *mapper; - - // The following function provides an alternative API. Each ra_hwdec_driver - // must have either provide a mapper or overlay_frame (not both or none), and - // if overlay_frame is set, it operates in overlay mode. In this mode, - // OSD etc. is rendered via OpenGL, but the video is rendered as a separate - // layer below it. - // Non-overlay mode is strictly preferred, so try not to use overlay mode. - // Set the given frame as overlay, replacing the previous one. This can also - // just change the position of the overlay. - // hw_image==src==dst==NULL is passed to clear the overlay. - int (*overlay_frame)(struct ra_hwdec *hw, struct mp_image *hw_image, - struct mp_rect *src, struct mp_rect *dst, bool newframe); -}; - -struct ra_hwdec *ra_hwdec_load_api(struct mp_log *log, struct ra *ra, - struct mpv_global *g, - struct mp_hwdec_devices *devs, - enum hwdec_type api); - -struct ra_hwdec *ra_hwdec_load(struct mp_log *log, struct ra *ra, - struct mpv_global *g, - struct mp_hwdec_devices *devs, - const char *name); - -int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param); - -void ra_hwdec_uninit(struct ra_hwdec *hwdec); - -bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt); - -struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec, - struct mp_image_params *params); -void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper); -void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper); -int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img); - -#endif diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c index d40bafee24..d9c4c199f1 100644 --- a/video/out/opengl/hwdec_cuda.c +++ b/video/out/opengl/hwdec_cuda.c @@ -32,11 +32,10 @@ #include #include +#include "video/out/gpu/hwdec.h" #include "formats.h" -#include "hwdec.h" #include "options/m_config.h" #include "ra_gl.h" -#include "video.h" struct priv_owner { struct mp_hwdec_ctx hwctx; diff --git a/video/out/opengl/hwdec_ios.m b/video/out/opengl/hwdec_ios.m index 8e020ded63..71b205b583 100644 --- a/video/out/opengl/hwdec_ios.m +++ b/video/out/opengl/hwdec_ios.m @@ -27,10 +27,10 @@ #include "config.h" +#include "video/out/gpu/hwdec.h" #include "video/mp_image_pool.h" #include "video/vt.h" #include "ra_gl.h" -#include "hwdec.h" struct priv_owner { struct mp_hwdec_ctx hwctx; diff --git a/video/out/opengl/hwdec_osx.c b/video/out/opengl/hwdec_osx.c index 348a5e19c5..cfd5f52e7b 100644 --- a/video/out/opengl/hwdec_osx.c +++ b/video/out/opengl/hwdec_osx.c @@ -29,9 +29,9 @@ #include "config.h" #include "video/mp_image_pool.h" +#include "video/out/gpu/hwdec.h" #include "video/vt.h" #include "ra_gl.h" -#include "hwdec.h" struct priv_owner { struct mp_hwdec_ctx hwctx; diff --git a/video/out/opengl/hwdec_rpi.c b/video/out/opengl/hwdec_rpi.c index 6f39c3e330..ea8312a179 100644 --- a/video/out/opengl/hwdec_rpi.c +++ b/video/out/opengl/hwdec_rpi.c @@ -33,8 +33,8 @@ #include "common/common.h" #include "common/msg.h" #include "video/mp_image.h" +#include "video/out/gpu/hwdec.h" -#include "hwdec.h" #include "common.h" #include "ra_gl.h" diff --git a/video/out/opengl/hwdec_vaegl.c b/video/out/opengl/hwdec_vaegl.c index a0e3222cfc..6078222bd5 100644 --- a/video/out/opengl/hwdec_vaegl.c +++ b/video/out/opengl/hwdec_vaegl.c @@ -30,9 +30,9 @@ #include "config.h" -#include "hwdec.h" -#include "video/vaapi.h" +#include "video/out/gpu/hwdec.h" #include "video/mp_image_pool.h" +#include "video/vaapi.h" #include "common.h" #include "ra_gl.h" diff --git a/video/out/opengl/hwdec_vaglx.c b/video/out/opengl/hwdec_vaglx.c index 8db15c4468..d5bc0b6ee7 100644 --- a/video/out/opengl/hwdec_vaglx.c +++ b/video/out/opengl/hwdec_vaglx.c @@ -25,10 +25,11 @@ #include #include "video/out/x11_common.h" -#include "ra_gl.h" -#include "hwdec.h" +#include "video/out/gpu/hwdec.h" #include "video/vaapi.h" +#include "ra_gl.h" + struct priv_owner { struct mp_vaapi_ctx *ctx; VADisplay *display; diff --git a/video/out/opengl/hwdec_vdpau.c b/video/out/opengl/hwdec_vdpau.c index d733650328..e0618e425e 100644 --- a/video/out/opengl/hwdec_vdpau.c +++ b/video/out/opengl/hwdec_vdpau.c @@ -20,7 +20,7 @@ #include -#include "hwdec.h" +#include "video/out/gpu/hwdec.h" #include "ra_gl.h" #include "video/vdpau.h" #include "video/vdpau_mixer.h" diff --git a/video/out/opengl/lcms.c b/video/out/opengl/lcms.c deleted file mode 100644 index 8747ae6aa6..0000000000 --- a/video/out/opengl/lcms.c +++ /dev/null @@ -1,531 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include -#include - -#include "mpv_talloc.h" - -#include "config.h" - -#include "stream/stream.h" -#include "common/common.h" -#include "misc/bstr.h" -#include "common/msg.h" -#include "options/m_option.h" -#include "options/path.h" -#include "video/csputils.h" -#include "lcms.h" - -#include "osdep/io.h" - -#if HAVE_LCMS2 - -#include -#include -#include - -struct gl_lcms { - void *icc_data; - size_t icc_size; - struct AVBufferRef *vid_profile; - char *current_profile; - bool using_memory_profile; - bool changed; - enum mp_csp_prim current_prim; - enum mp_csp_trc current_trc; - - struct mp_log *log; - struct mpv_global *global; - struct mp_icc_opts *opts; -}; - -static bool parse_3dlut_size(const char *arg, int *p1, int *p2, int *p3) -{ - if (sscanf(arg, "%dx%dx%d", p1, p2, p3) != 3) - return false; - for (int n = 0; n < 3; n++) { - int s = ((int[]) { *p1, *p2, *p3 })[n]; - if (s < 2 || s > 512) - return false; - } - return true; -} - -static int validate_3dlut_size_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param) -{ - int p1, p2, p3; - char s[20]; - snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); - return parse_3dlut_size(s, &p1, &p2, &p3); -} - -#define OPT_BASE_STRUCT struct mp_icc_opts -const struct m_sub_options mp_icc_conf = { - .opts = (const m_option_t[]) { - OPT_FLAG("use-embedded-icc-profile", use_embedded, 0), - OPT_STRING("icc-profile", profile, M_OPT_FILE), - OPT_FLAG("icc-profile-auto", profile_auto, 0), - OPT_STRING("icc-cache-dir", cache_dir, M_OPT_FILE), - OPT_INT("icc-intent", intent, 0), - OPT_INTRANGE("icc-contrast", contrast, 0, 0, 100000), - OPT_STRING_VALIDATE("icc-3dlut-size", size_str, 0, validate_3dlut_size_opt), - - OPT_REPLACED("3dlut-size", "icc-3dlut-size"), - OPT_REMOVED("icc-cache", "see icc-cache-dir"), - {0} - }, - .size = sizeof(struct mp_icc_opts), - .defaults = &(const struct mp_icc_opts) { - .size_str = "64x64x64", - .intent = INTENT_RELATIVE_COLORIMETRIC, - .use_embedded = true, - }, -}; - -static void lcms2_error_handler(cmsContext ctx, cmsUInt32Number code, - const char *msg) -{ - struct gl_lcms *p = cmsGetContextUserData(ctx); - MP_ERR(p, "lcms2: %s\n", msg); -} - -static void load_profile(struct gl_lcms *p) -{ - talloc_free(p->icc_data); - p->icc_data = NULL; - p->icc_size = 0; - p->using_memory_profile = false; - talloc_free(p->current_profile); - p->current_profile = NULL; - - if (!p->opts->profile || !p->opts->profile[0]) - return; - - char *fname = mp_get_user_path(NULL, p->global, p->opts->profile); - MP_VERBOSE(p, "Opening ICC profile '%s'\n", fname); - struct bstr iccdata = stream_read_file(fname, p, p->global, - 100000000); // 100 MB - talloc_free(fname); - if (!iccdata.len) - return; - - talloc_free(p->icc_data); - - p->icc_data = iccdata.start; - p->icc_size = iccdata.len; - p->current_profile = talloc_strdup(p, p->opts->profile); -} - -static void gl_lcms_destructor(void *ptr) -{ - struct gl_lcms *p = ptr; - av_buffer_unref(&p->vid_profile); -} - -struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, - struct mpv_global *global, - struct mp_icc_opts *opts) -{ - struct gl_lcms *p = talloc_ptrtype(talloc_ctx, p); - talloc_set_destructor(p, gl_lcms_destructor); - *p = (struct gl_lcms) { - .global = global, - .log = log, - .opts = opts, - }; - gl_lcms_update_options(p); - return p; -} - -void gl_lcms_update_options(struct gl_lcms *p) -{ - if ((p->using_memory_profile && !p->opts->profile_auto) || - !bstr_equals(bstr0(p->opts->profile), bstr0(p->current_profile))) - { - load_profile(p); - } - - p->changed = true; // probably -} - -// Warning: profile.start must point to a ta allocation, and the function -// takes over ownership. -// Returns whether the internal profile was changed. -bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) -{ - if (!p->opts->profile_auto || (p->opts->profile && p->opts->profile[0])) { - talloc_free(profile.start); - return false; - } - - if (p->using_memory_profile && - p->icc_data && profile.start && - profile.len == p->icc_size && - memcmp(profile.start, p->icc_data, p->icc_size) == 0) - { - talloc_free(profile.start); - return false; - } - - p->changed = true; - p->using_memory_profile = true; - - talloc_free(p->icc_data); - - p->icc_data = talloc_steal(p, profile.start); - p->icc_size = profile.len; - - return true; -} - -// Guards against NULL and uses bstr_equals to short-circuit some special cases -static bool vid_profile_eq(struct AVBufferRef *a, struct AVBufferRef *b) -{ - if (!a || !b) - return a == b; - - return bstr_equals((struct bstr){ a->data, a->size }, - (struct bstr){ b->data, b->size }); -} - -// Return whether the profile or config has changed since the last time it was -// retrieved. If it has changed, gl_lcms_get_lut3d() should be called. -bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, - enum mp_csp_trc trc, struct AVBufferRef *vid_profile) -{ - if (p->changed || p->current_prim != prim || p->current_trc != trc) - return true; - - return !vid_profile_eq(p->vid_profile, vid_profile); -} - -// Whether a profile is set. (gl_lcms_get_lut3d() is expected to return a lut, -// but it could still fail due to runtime errors, such as invalid icc data.) -bool gl_lcms_has_profile(struct gl_lcms *p) -{ - return p->icc_size > 0; -} - -static cmsHPROFILE get_vid_profile(struct gl_lcms *p, cmsContext cms, - cmsHPROFILE disp_profile, - enum mp_csp_prim prim, enum mp_csp_trc trc) -{ - if (p->opts->use_embedded && p->vid_profile) { - // Try using the embedded ICC profile - cmsHPROFILE prof = cmsOpenProfileFromMemTHR(cms, p->vid_profile->data, - p->vid_profile->size); - if (prof) { - MP_VERBOSE(p, "Successfully opened embedded ICC profile\n"); - return prof; - } - - // Otherwise, warn the user and generate the profile as usual - MP_WARN(p, "Video contained an invalid ICC profile! Ignoring..\n"); - } - - // The input profile for the transformation is dependent on the video - // primaries and transfer characteristics - struct mp_csp_primaries csp = mp_get_csp_primaries(prim); - cmsCIExyY wp_xyY = {csp.white.x, csp.white.y, 1.0}; - cmsCIExyYTRIPLE prim_xyY = { - .Red = {csp.red.x, csp.red.y, 1.0}, - .Green = {csp.green.x, csp.green.y, 1.0}, - .Blue = {csp.blue.x, csp.blue.y, 1.0}, - }; - - cmsToneCurve *tonecurve[3] = {0}; - switch (trc) { - case MP_CSP_TRC_LINEAR: tonecurve[0] = cmsBuildGamma(cms, 1.0); break; - case MP_CSP_TRC_GAMMA18: tonecurve[0] = cmsBuildGamma(cms, 1.8); break; - case MP_CSP_TRC_GAMMA22: tonecurve[0] = cmsBuildGamma(cms, 2.2); break; - case MP_CSP_TRC_GAMMA28: tonecurve[0] = cmsBuildGamma(cms, 2.8); break; - - case MP_CSP_TRC_SRGB: - // Values copied from Little-CMS - tonecurve[0] = cmsBuildParametricToneCurve(cms, 4, - (double[5]){2.40, 1/1.055, 0.055/1.055, 1/12.92, 0.04045}); - break; - - case MP_CSP_TRC_PRO_PHOTO: - tonecurve[0] = cmsBuildParametricToneCurve(cms, 4, - (double[5]){1.8, 1.0, 0.0, 1/16.0, 0.03125}); - break; - - case MP_CSP_TRC_BT_1886: { - // To build an appropriate BT.1886 transformation we need access to - // the display's black point, so we LittleCMS' detection function. - // Relative colorimetric is used since we want to approximate the - // BT.1886 to the target device's actual black point even in e.g. - // perceptual mode - const int intent = MP_INTENT_RELATIVE_COLORIMETRIC; - cmsCIEXYZ bp_XYZ; - if (!cmsDetectBlackPoint(&bp_XYZ, disp_profile, intent, 0)) - return false; - - // Map this XYZ value back into the (linear) source space - cmsToneCurve *linear = cmsBuildGamma(cms, 1.0); - cmsHPROFILE rev_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, - (cmsToneCurve*[3]){linear, linear, linear}); - cmsHPROFILE xyz_profile = cmsCreateXYZProfile(); - cmsHTRANSFORM xyz2src = cmsCreateTransformTHR(cms, - xyz_profile, TYPE_XYZ_DBL, rev_profile, TYPE_RGB_DBL, - intent, 0); - cmsFreeToneCurve(linear); - cmsCloseProfile(rev_profile); - cmsCloseProfile(xyz_profile); - if (!xyz2src) - return false; - - double src_black[3]; - cmsDoTransform(xyz2src, &bp_XYZ, src_black, 1); - cmsDeleteTransform(xyz2src); - - // Contrast limiting - if (p->opts->contrast > 0) { - for (int i = 0; i < 3; i++) - src_black[i] = MPMAX(src_black[i], 1.0 / p->opts->contrast); - } - - // Built-in contrast failsafe - double contrast = 3.0 / (src_black[0] + src_black[1] + src_black[2]); - if (contrast > 100000) { - MP_WARN(p, "ICC profile detected contrast very high (>100000)," - " falling back to contrast 1000 for sanity. Set the" - " icc-contrast option to silence this warning.\n"); - src_black[0] = src_black[1] = src_black[2] = 1.0 / 1000; - } - - // Build the parametric BT.1886 transfer curve, one per channel - for (int i = 0; i < 3; i++) { - const double gamma = 2.40; - double binv = pow(src_black[i], 1.0/gamma); - tonecurve[i] = cmsBuildParametricToneCurve(cms, 6, - (double[4]){gamma, 1.0 - binv, binv, 0.0}); - } - break; - } - - default: - abort(); - } - - if (!tonecurve[0]) - return false; - - if (!tonecurve[1]) tonecurve[1] = tonecurve[0]; - if (!tonecurve[2]) tonecurve[2] = tonecurve[0]; - - cmsHPROFILE *vid_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, - tonecurve); - - if (tonecurve[2] != tonecurve[0]) cmsFreeToneCurve(tonecurve[2]); - if (tonecurve[1] != tonecurve[0]) cmsFreeToneCurve(tonecurve[1]); - cmsFreeToneCurve(tonecurve[0]); - - return vid_profile; -} - -bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, - enum mp_csp_prim prim, enum mp_csp_trc trc, - struct AVBufferRef *vid_profile) -{ - int s_r, s_g, s_b; - bool result = false; - - p->changed = false; - p->current_prim = prim; - p->current_trc = trc; - - // We need to hold on to a reference to the video's ICC profile for as long - // as we still need to perform equality checking, so generate a new - // reference here - av_buffer_unref(&p->vid_profile); - if (vid_profile) { - MP_VERBOSE(p, "Got an embedded ICC profile.\n"); - p->vid_profile = av_buffer_ref(vid_profile); - if (!p->vid_profile) - abort(); - } - - if (!parse_3dlut_size(p->opts->size_str, &s_r, &s_g, &s_b)) - return false; - - if (!gl_lcms_has_profile(p)) - return false; - - void *tmp = talloc_new(NULL); - uint16_t *output = talloc_array(tmp, uint16_t, s_r * s_g * s_b * 4); - struct lut3d *lut = NULL; - cmsContext cms = NULL; - - char *cache_file = NULL; - if (p->opts->cache_dir && p->opts->cache_dir[0]) { - // Gamma is included in the header to help uniquely identify it, - // because we may change the parameter in the future or make it - // customizable, same for the primaries. - char *cache_info = talloc_asprintf(tmp, - "ver=1.4, intent=%d, size=%dx%dx%d, prim=%d, trc=%d, " - "contrast=%d\n", - p->opts->intent, s_r, s_g, s_b, prim, trc, p->opts->contrast); - - uint8_t hash[32]; - struct AVSHA *sha = av_sha_alloc(); - if (!sha) - abort(); - av_sha_init(sha, 256); - av_sha_update(sha, cache_info, strlen(cache_info)); - if (vid_profile) - av_sha_update(sha, vid_profile->data, vid_profile->size); - av_sha_update(sha, p->icc_data, p->icc_size); - av_sha_final(sha, hash); - av_free(sha); - - char *cache_dir = mp_get_user_path(tmp, p->global, p->opts->cache_dir); - cache_file = talloc_strdup(tmp, ""); - for (int i = 0; i < sizeof(hash); i++) - cache_file = talloc_asprintf_append(cache_file, "%02X", hash[i]); - cache_file = mp_path_join(tmp, cache_dir, cache_file); - - mp_mkdirp(cache_dir); - } - - // check cache - if (cache_file && stat(cache_file, &(struct stat){0}) == 0) { - MP_VERBOSE(p, "Opening 3D LUT cache in file '%s'.\n", cache_file); - struct bstr cachedata = stream_read_file(cache_file, tmp, p->global, - 1000000000); // 1 GB - if (cachedata.len == talloc_get_size(output)) { - memcpy(output, cachedata.start, cachedata.len); - goto done; - } else { - MP_WARN(p, "3D LUT cache invalid!\n"); - } - } - - cms = cmsCreateContext(NULL, p); - if (!cms) - goto error_exit; - cmsSetLogErrorHandlerTHR(cms, lcms2_error_handler); - - cmsHPROFILE profile = - cmsOpenProfileFromMemTHR(cms, p->icc_data, p->icc_size); - if (!profile) - goto error_exit; - - cmsHPROFILE vid_hprofile = get_vid_profile(p, cms, profile, prim, trc); - if (!vid_hprofile) { - cmsCloseProfile(profile); - goto error_exit; - } - - cmsHTRANSFORM trafo = cmsCreateTransformTHR(cms, vid_hprofile, TYPE_RGB_16, - profile, TYPE_RGBA_16, - p->opts->intent, - cmsFLAGS_HIGHRESPRECALC | - cmsFLAGS_BLACKPOINTCOMPENSATION); - cmsCloseProfile(profile); - cmsCloseProfile(vid_hprofile); - - if (!trafo) - goto error_exit; - - // transform a (s_r)x(s_g)x(s_b) cube, with 3 components per channel - uint16_t *input = talloc_array(tmp, uint16_t, s_r * 3); - for (int b = 0; b < s_b; b++) { - for (int g = 0; g < s_g; g++) { - for (int r = 0; r < s_r; r++) { - input[r * 3 + 0] = r * 65535 / (s_r - 1); - input[r * 3 + 1] = g * 65535 / (s_g - 1); - input[r * 3 + 2] = b * 65535 / (s_b - 1); - } - size_t base = (b * s_r * s_g + g * s_r) * 4; - cmsDoTransform(trafo, input, output + base, s_r); - } - } - - cmsDeleteTransform(trafo); - - if (cache_file) { - FILE *out = fopen(cache_file, "wb"); - if (out) { - fwrite(output, talloc_get_size(output), 1, out); - fclose(out); - } - } - -done: ; - - lut = talloc_ptrtype(NULL, lut); - *lut = (struct lut3d) { - .data = talloc_steal(lut, output), - .size = {s_r, s_g, s_b}, - }; - - *result_lut3d = lut; - result = true; - -error_exit: - - if (cms) - cmsDeleteContext(cms); - - if (!lut) - MP_FATAL(p, "Error loading ICC profile.\n"); - - talloc_free(tmp); - return result; -} - -#else /* HAVE_LCMS2 */ - -const struct m_sub_options mp_icc_conf = { - .opts = (const m_option_t[]) { {0} }, - .size = sizeof(struct mp_icc_opts), - .defaults = &(const struct mp_icc_opts) {0}, -}; - -struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, - struct mpv_global *global, - struct mp_icc_opts *opts) -{ - return (struct gl_lcms *) talloc_new(talloc_ctx); -} - -void gl_lcms_update_options(struct gl_lcms *p) { } -bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) {return false;} - -bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, - enum mp_csp_trc trc, struct AVBufferRef *vid_profile) -{ - return false; -} - -bool gl_lcms_has_profile(struct gl_lcms *p) -{ - return false; -} - -bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, - enum mp_csp_prim prim, enum mp_csp_trc trc, - struct AVBufferRef *vid_profile) -{ - return false; -} - -#endif diff --git a/video/out/opengl/lcms.h b/video/out/opengl/lcms.h deleted file mode 100644 index 35bbd61fe0..0000000000 --- a/video/out/opengl/lcms.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef MP_GL_LCMS_H -#define MP_GL_LCMS_H - -#include -#include -#include "misc/bstr.h" -#include "video/csputils.h" -#include - -extern const struct m_sub_options mp_icc_conf; - -struct mp_icc_opts { - int use_embedded; - char *profile; - int profile_auto; - char *cache_dir; - char *size_str; - int intent; - int contrast; -}; - -struct lut3d { - uint16_t *data; - int size[3]; -}; - -struct mp_log; -struct mpv_global; -struct gl_lcms; - -struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, - struct mpv_global *global, - struct mp_icc_opts *opts); -void gl_lcms_update_options(struct gl_lcms *p); -bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile); -bool gl_lcms_has_profile(struct gl_lcms *p); -bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **, - enum mp_csp_prim prim, enum mp_csp_trc trc, - struct AVBufferRef *vid_profile); -bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, - enum mp_csp_trc trc, struct AVBufferRef *vid_profile); - -#endif diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c deleted file mode 100644 index f7c325d1db..0000000000 --- a/video/out/opengl/osd.c +++ /dev/null @@ -1,367 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include -#include -#include - -#include - -#include "common/common.h" -#include "common/msg.h" -#include "video/csputils.h" -#include "video/mp_image.h" -#include "osd.h" - -#define GLSL(x) gl_sc_add(sc, #x "\n"); - -// glBlendFuncSeparate() arguments -static const int blend_factors[SUBBITMAP_COUNT][4] = { - [SUBBITMAP_LIBASS] = {RA_BLEND_SRC_ALPHA, RA_BLEND_ONE_MINUS_SRC_ALPHA, - RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, - [SUBBITMAP_RGBA] = {RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA, - RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, -}; - -struct vertex { - float position[2]; - float texcoord[2]; - uint8_t ass_color[4]; -}; - -static const struct ra_renderpass_input vertex_vao[] = { - {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, - {"texcoord" , RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord)}, - {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)}, - {0} -}; - -struct mpgl_osd_part { - enum sub_bitmap_format format; - int change_id; - struct ra_tex *texture; - int w, h; - int num_subparts; - int prev_num_subparts; - struct sub_bitmap *subparts; - int num_vertices; - struct vertex *vertices; -}; - -struct mpgl_osd { - struct mp_log *log; - struct osd_state *osd; - struct ra *ra; - struct mpgl_osd_part *parts[MAX_OSD_PARTS]; - const struct ra_format *fmt_table[SUBBITMAP_COUNT]; - bool formats[SUBBITMAP_COUNT]; - bool change_flag; // for reporting to API user only - // temporary - int stereo_mode; - struct mp_osd_res osd_res; - void *scratch; -}; - -struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, - struct osd_state *osd) -{ - struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx); - *ctx = (struct mpgl_osd) { - .log = log, - .osd = osd, - .ra = ra, - .change_flag = true, - .scratch = talloc_zero_size(ctx, 1), - }; - - ctx->fmt_table[SUBBITMAP_LIBASS] = ra_find_unorm_format(ra, 1, 1); - ctx->fmt_table[SUBBITMAP_RGBA] = ra_find_unorm_format(ra, 1, 4); - - for (int n = 0; n < MAX_OSD_PARTS; n++) - ctx->parts[n] = talloc_zero(ctx, struct mpgl_osd_part); - - for (int n = 0; n < SUBBITMAP_COUNT; n++) - ctx->formats[n] = !!ctx->fmt_table[n]; - - return ctx; -} - -void mpgl_osd_destroy(struct mpgl_osd *ctx) -{ - if (!ctx) - return; - - for (int n = 0; n < MAX_OSD_PARTS; n++) { - struct mpgl_osd_part *p = ctx->parts[n]; - ra_tex_free(ctx->ra, &p->texture); - } - talloc_free(ctx); -} - -static int next_pow2(int v) -{ - for (int x = 0; x < 30; x++) { - if ((1 << x) >= v) - return 1 << x; - } - return INT_MAX; -} - -static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, - struct sub_bitmaps *imgs) -{ - struct ra *ra = ctx->ra; - bool ok = false; - - assert(imgs->packed); - - int req_w = next_pow2(imgs->packed_w); - int req_h = next_pow2(imgs->packed_h); - - const struct ra_format *fmt = ctx->fmt_table[imgs->format]; - assert(fmt); - - if (!osd->texture || req_w > osd->w || req_h > osd->h || - osd->format != imgs->format) - { - ra_tex_free(ra, &osd->texture); - - osd->format = imgs->format; - osd->w = FFMAX(32, req_w); - osd->h = FFMAX(32, req_h); - - MP_VERBOSE(ctx, "Reallocating OSD texture to %dx%d.\n", osd->w, osd->h); - - if (osd->w > ra->max_texture_wh || osd->h > ra->max_texture_wh) { - MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " - "supported size %dx%d.\n", ra->max_texture_wh, - ra->max_texture_wh); - goto done; - } - - struct ra_tex_params params = { - .dimensions = 2, - .w = osd->w, - .h = osd->h, - .d = 1, - .format = fmt, - .render_src = true, - .src_linear = true, - .host_mutable = true, - }; - osd->texture = ra_tex_create(ra, ¶ms); - if (!osd->texture) - goto done; - } - - struct ra_tex_upload_params params = { - .tex = osd->texture, - .src = imgs->packed->planes[0], - .invalidate = true, - .rc = &(struct mp_rect){0, 0, imgs->packed_w, imgs->packed_h}, - .stride = imgs->packed->stride[0], - }; - - ok = ra->fns->tex_upload(ra, ¶ms); - -done: - return ok; -} - -static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs) -{ - struct mpgl_osd *ctx = pctx; - - if (imgs->num_parts == 0 || !ctx->formats[imgs->format]) - return; - - struct mpgl_osd_part *osd = ctx->parts[imgs->render_index]; - - bool ok = true; - if (imgs->change_id != osd->change_id) { - if (!upload_osd(ctx, osd, imgs)) - ok = false; - - osd->change_id = imgs->change_id; - ctx->change_flag = true; - } - osd->num_subparts = ok ? imgs->num_parts : 0; - - MP_TARRAY_GROW(osd, osd->subparts, osd->num_subparts); - memcpy(osd->subparts, imgs->parts, - osd->num_subparts * sizeof(osd->subparts[0])); -} - -bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, - struct gl_shader_cache *sc) -{ - assert(index >= 0 && index < MAX_OSD_PARTS); - struct mpgl_osd_part *part = ctx->parts[index]; - - enum sub_bitmap_format fmt = part->format; - if (!fmt || !part->num_subparts) - return false; - - gl_sc_uniform_texture(sc, "osdtex", part->texture); - switch (fmt) { - case SUBBITMAP_RGBA: { - GLSL(color = texture(osdtex, texcoord).bgra;) - break; - } - case SUBBITMAP_LIBASS: { - GLSL(color = - vec4(ass_color.rgb, ass_color.a * texture(osdtex, texcoord).r);) - break; - } - default: - abort(); - } - - gl_sc_set_vertex_format(sc, vertex_vao, sizeof(struct vertex)); - - return true; -} - -static void write_quad(struct vertex *va, struct gl_transform t, - float x0, float y0, float x1, float y1, - float tx0, float ty0, float tx1, float ty1, - float tex_w, float tex_h, const uint8_t color[4]) -{ - gl_transform_vec(t, &x0, &y0); - gl_transform_vec(t, &x1, &y1); - -#define COLOR_INIT {color[0], color[1], color[2], color[3]} - va[0] = (struct vertex){ {x0, y0}, {tx0 / tex_w, ty0 / tex_h}, COLOR_INIT }; - va[1] = (struct vertex){ {x0, y1}, {tx0 / tex_w, ty1 / tex_h}, COLOR_INIT }; - va[2] = (struct vertex){ {x1, y0}, {tx1 / tex_w, ty0 / tex_h}, COLOR_INIT }; - va[3] = (struct vertex){ {x1, y1}, {tx1 / tex_w, ty1 / tex_h}, COLOR_INIT }; - va[4] = va[2]; - va[5] = va[1]; -#undef COLOR_INIT -} - -static void generate_verts(struct mpgl_osd_part *part, struct gl_transform t) -{ - int num_vertices = part->num_subparts * 6; - MP_TARRAY_GROW(part, part->vertices, part->num_vertices + num_vertices); - - for (int n = 0; n < part->num_subparts; n++) { - struct sub_bitmap *b = &part->subparts[n]; - struct vertex *va = &part->vertices[part->num_vertices]; - - // NOTE: the blend color is used with SUBBITMAP_LIBASS only, so it - // doesn't matter that we upload garbage for the other formats - uint32_t c = b->libass.color; - uint8_t color[4] = { c >> 24, (c >> 16) & 0xff, - (c >> 8) & 0xff, 255 - (c & 0xff) }; - - write_quad(&va[n * 6], t, - b->x, b->y, b->x + b->dw, b->y + b->dh, - b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h, - part->w, part->h, color); - } - - part->num_vertices += num_vertices; -} - -// number of screen divisions per axis (x=0, y=1) for the current 3D mode -static void get_3d_side_by_side(int stereo_mode, int div[2]) -{ - div[0] = div[1] = 1; - switch (stereo_mode) { - case MP_STEREO3D_SBS2L: - case MP_STEREO3D_SBS2R: div[0] = 2; break; - case MP_STEREO3D_AB2R: - case MP_STEREO3D_AB2L: div[1] = 2; break; - } -} - -void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, - struct gl_shader_cache *sc, struct fbodst target) -{ - struct mpgl_osd_part *part = ctx->parts[index]; - - int div[2]; - get_3d_side_by_side(ctx->stereo_mode, div); - - part->num_vertices = 0; - - for (int x = 0; x < div[0]; x++) { - for (int y = 0; y < div[1]; y++) { - struct gl_transform t; - gl_transform_ortho_fbodst(&t, target); - - float a_x = ctx->osd_res.w * x; - float a_y = ctx->osd_res.h * y; - t.t[0] += a_x * t.m[0][0] + a_y * t.m[1][0]; - t.t[1] += a_x * t.m[0][1] + a_y * t.m[1][1]; - - generate_verts(part, t); - } - } - - const int *factors = &blend_factors[part->format][0]; - gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]); - - gl_sc_dispatch_draw(sc, target.tex, part->vertices, part->num_vertices); -} - -static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) -{ - int div[2]; - get_3d_side_by_side(stereo_mode, div); - - res.w /= div[0]; - res.h /= div[1]; - ctx->osd_res = res; -} - -void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts, - int stereo_mode, int draw_flags) -{ - for (int n = 0; n < MAX_OSD_PARTS; n++) - ctx->parts[n]->num_subparts = 0; - - set_res(ctx, res, stereo_mode); - - osd_draw(ctx->osd, ctx->osd_res, pts, draw_flags, ctx->formats, gen_osd_cb, ctx); - ctx->stereo_mode = stereo_mode; - - // Parts going away does not necessarily result in gen_osd_cb() being called - // (not even with num_parts==0), so check this separately. - for (int n = 0; n < MAX_OSD_PARTS; n++) { - struct mpgl_osd_part *part = ctx->parts[n]; - if (part->num_subparts != part->prev_num_subparts) - ctx->change_flag = true; - part->prev_num_subparts = part->num_subparts; - } -} - -// See osd_resize() for remarks. This function is an optional optimization too. -void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) -{ - set_res(ctx, res, stereo_mode); - osd_resize(ctx->osd, ctx->osd_res); -} - -bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res, - double pts) -{ - ctx->change_flag = false; - mpgl_osd_generate(ctx, *res, pts, 0, 0); - return ctx->change_flag; -} diff --git a/video/out/opengl/osd.h b/video/out/opengl/osd.h deleted file mode 100644 index 6c2b886de3..0000000000 --- a/video/out/opengl/osd.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef MPLAYER_GL_OSD_H -#define MPLAYER_GL_OSD_H - -#include -#include - -#include "utils.h" -#include "shader_cache.h" -#include "sub/osd.h" - -struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, - struct osd_state *osd); -void mpgl_osd_destroy(struct mpgl_osd *ctx); - -void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts, - int stereo_mode, int draw_flags); -void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode); -bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, - struct gl_shader_cache *sc); -void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, - struct gl_shader_cache *sc, struct fbodst target); -bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res, - double pts); - -#endif diff --git a/video/out/opengl/ra.c b/video/out/opengl/ra.c deleted file mode 100644 index ef1de54d1a..0000000000 --- a/video/out/opengl/ra.c +++ /dev/null @@ -1,327 +0,0 @@ -#include "common/common.h" -#include "common/msg.h" -#include "video/img_format.h" - -#include "ra.h" - -struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params) -{ - return ra->fns->tex_create(ra, params); -} - -void ra_tex_free(struct ra *ra, struct ra_tex **tex) -{ - if (*tex) - ra->fns->tex_destroy(ra, *tex); - *tex = NULL; -} - -struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params) -{ - return ra->fns->buf_create(ra, params); -} - -void ra_buf_free(struct ra *ra, struct ra_buf **buf) -{ - if (*buf) - ra->fns->buf_destroy(ra, *buf); - *buf = NULL; -} - -void ra_free(struct ra **ra) -{ - if (*ra) - (*ra)->fns->destroy(*ra); - talloc_free(*ra); - *ra = NULL; -} - -size_t ra_vartype_size(enum ra_vartype type) -{ - switch (type) { - case RA_VARTYPE_INT: return sizeof(int); - case RA_VARTYPE_FLOAT: return sizeof(float); - case RA_VARTYPE_BYTE_UNORM: return 1; - default: return 0; - } -} - -struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input) -{ - size_t el_size = ra_vartype_size(input->type); - if (!el_size) - return (struct ra_layout){0}; - - // host data is always tightly packed - return (struct ra_layout) { - .align = 1, - .stride = el_size * input->dim_v, - .size = el_size * input->dim_v * input->dim_m, - }; -} - -static struct ra_renderpass_input *dup_inputs(void *ta_parent, - const struct ra_renderpass_input *inputs, int num_inputs) -{ - struct ra_renderpass_input *res = - talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0])); - for (int n = 0; n < num_inputs; n++) - res[n].name = talloc_strdup(res, res[n].name); - return res; -} - -// Return a newly allocated deep-copy of params. -struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, - const struct ra_renderpass_params *params) -{ - struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res); - *res = *params; - res->inputs = dup_inputs(res, res->inputs, res->num_inputs); - res->vertex_attribs = - dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs); - res->cached_program = bstrdup(res, res->cached_program); - res->vertex_shader = talloc_strdup(res, res->vertex_shader); - res->frag_shader = talloc_strdup(res, res->frag_shader); - res->compute_shader = talloc_strdup(res, res->compute_shader); - return res; -}; - - -// Return whether this is a tightly packed format with no external padding and -// with the same bit size/depth in all components, and the shader returns -// components in the same order as in memory. -static bool ra_format_is_regular(const struct ra_format *fmt) -{ - if (!fmt->pixel_size || !fmt->num_components || !fmt->ordered) - return false; - for (int n = 1; n < fmt->num_components; n++) { - if (fmt->component_size[n] != fmt->component_size[0] || - fmt->component_depth[n] != fmt->component_depth[0]) - return false; - } - if (fmt->component_size[0] * fmt->num_components != fmt->pixel_size * 8) - return false; - return true; -} - -// Return a regular filterable format using RA_CTYPE_UNORM. -const struct ra_format *ra_find_unorm_format(struct ra *ra, - int bytes_per_component, - int n_components) -{ - for (int n = 0; n < ra->num_formats; n++) { - const struct ra_format *fmt = ra->formats[n]; - if (fmt->ctype == RA_CTYPE_UNORM && fmt->num_components == n_components && - fmt->pixel_size == bytes_per_component * n_components && - fmt->component_depth[0] == bytes_per_component * 8 && - fmt->linear_filter && ra_format_is_regular(fmt)) - return fmt; - } - return NULL; -} - -// Return a regular format using RA_CTYPE_UINT. -const struct ra_format *ra_find_uint_format(struct ra *ra, - int bytes_per_component, - int n_components) -{ - for (int n = 0; n < ra->num_formats; n++) { - const struct ra_format *fmt = ra->formats[n]; - if (fmt->ctype == RA_CTYPE_UINT && fmt->num_components == n_components && - fmt->pixel_size == bytes_per_component * n_components && - fmt->component_depth[0] == bytes_per_component * 8 && - ra_format_is_regular(fmt)) - return fmt; - } - return NULL; -} - -// Find a float format of any precision that matches the C type of the same -// size for upload. -// May drop bits from the mantissa (such as selecting float16 even if -// bytes_per_component == 32); prefers possibly faster formats first. -static const struct ra_format *ra_find_float_format(struct ra *ra, - int bytes_per_component, - int n_components) -{ - // Assumes ra_format are ordered by performance. - // The >=16 check is to avoid catching fringe formats. - for (int n = 0; n < ra->num_formats; n++) { - const struct ra_format *fmt = ra->formats[n]; - if (fmt->ctype == RA_CTYPE_FLOAT && fmt->num_components == n_components && - fmt->pixel_size == bytes_per_component * n_components && - fmt->component_depth[0] >= 16 && - fmt->linear_filter && ra_format_is_regular(fmt)) - return fmt; - } - return NULL; -} - -// Return a filterable regular format that uses at least float16 internally, and -// uses a normal C float for transfer on the CPU side. (This is just so we don't -// need 32->16 bit conversion on CPU, which would be messy.) -const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components) -{ - return ra_find_float_format(ra, sizeof(float), n_components); -} - -const struct ra_format *ra_find_named_format(struct ra *ra, const char *name) -{ - for (int n = 0; n < ra->num_formats; n++) { - const struct ra_format *fmt = ra->formats[n]; - if (strcmp(fmt->name, name) == 0) - return fmt; - } - return NULL; -} - -// Like ra_find_unorm_format(), but if no fixed point format is available, -// return an unsigned integer format. -static const struct ra_format *find_plane_format(struct ra *ra, int bytes, - int n_channels, - enum mp_component_type ctype) -{ - switch (ctype) { - case MP_COMPONENT_TYPE_UINT: { - const struct ra_format *f = ra_find_unorm_format(ra, bytes, n_channels); - if (f) - return f; - return ra_find_uint_format(ra, bytes, n_channels); - } - case MP_COMPONENT_TYPE_FLOAT: - return ra_find_float_format(ra, bytes, n_channels); - default: return NULL; - } -} - -// Put a mapping of imgfmt to texture formats into *out. Basically it selects -// the correct texture formats needed to represent an imgfmt in a shader, with -// textures using the same memory organization as on the CPU. -// Each plane is represented by a texture, and each texture has a RGBA -// component order. out->components describes the meaning of them. -// May return integer formats for >8 bit formats, if the driver has no -// normalized 16 bit formats. -// Returns false (and *out is not touched) if no format found. -bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out) -{ - struct ra_imgfmt_desc res = {0}; - - struct mp_regular_imgfmt regfmt; - if (mp_get_regular_imgfmt(®fmt, imgfmt)) { - enum ra_ctype ctype = RA_CTYPE_UNKNOWN; - res.num_planes = regfmt.num_planes; - res.component_bits = regfmt.component_size * 8; - res.component_pad = regfmt.component_pad; - for (int n = 0; n < regfmt.num_planes; n++) { - struct mp_regular_imgfmt_plane *plane = ®fmt.planes[n]; - res.planes[n] = find_plane_format(ra, regfmt.component_size, - plane->num_components, - regfmt.component_type); - if (!res.planes[n]) - return false; - for (int i = 0; i < plane->num_components; i++) - res.components[n][i] = plane->components[i]; - // Dropping LSBs when shifting will lead to dropped MSBs. - if (res.component_bits > res.planes[n]->component_depth[0] && - res.component_pad < 0) - return false; - // Renderer restriction, but actually an unwanted corner case. - if (ctype != RA_CTYPE_UNKNOWN && ctype != res.planes[n]->ctype) - return false; - ctype = res.planes[n]->ctype; - } - res.chroma_w = regfmt.chroma_w; - res.chroma_h = regfmt.chroma_h; - goto supported; - } - - for (int n = 0; n < ra->num_formats; n++) { - if (imgfmt && ra->formats[n]->special_imgfmt == imgfmt) { - res = *ra->formats[n]->special_imgfmt_desc; - goto supported; - } - } - - // Unsupported format - return false; - -supported: - - *out = res; - return true; -} - -void ra_dump_tex_formats(struct ra *ra, int msgl) -{ - if (!mp_msg_test(ra->log, msgl)) - return; - MP_MSG(ra, msgl, "Texture formats:\n"); - MP_MSG(ra, msgl, " NAME COMP*TYPE SIZE DEPTH PER COMP.\n"); - for (int n = 0; n < ra->num_formats; n++) { - const struct ra_format *fmt = ra->formats[n]; - const char *ctype = "unknown"; - switch (fmt->ctype) { - case RA_CTYPE_UNORM: ctype = "unorm"; break; - case RA_CTYPE_UINT: ctype = "uint "; break; - case RA_CTYPE_FLOAT: ctype = "float"; break; - } - char cl[40] = ""; - for (int i = 0; i < fmt->num_components; i++) { - mp_snprintf_cat(cl, sizeof(cl), "%s%d", i ? " " : "", - fmt->component_size[i]); - if (fmt->component_size[i] != fmt->component_depth[i]) - mp_snprintf_cat(cl, sizeof(cl), "/%d", fmt->component_depth[i]); - } - MP_MSG(ra, msgl, " %-10s %d*%s %3dB %s %s %s {%s}\n", fmt->name, - fmt->num_components, ctype, fmt->pixel_size, - fmt->luminance_alpha ? "LA" : " ", - fmt->linear_filter ? "LF" : " ", - fmt->renderable ? "CR" : " ", cl); - } - MP_MSG(ra, msgl, " LA = LUMINANCE_ALPHA hack format\n"); - MP_MSG(ra, msgl, " LF = linear filterable\n"); - MP_MSG(ra, msgl, " CR = can be used for render targets\n"); -} - -void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc, - int msgl) -{ - char pl[80] = ""; - char pf[80] = ""; - for (int n = 0; n < desc->num_planes; n++) { - if (n > 0) { - mp_snprintf_cat(pl, sizeof(pl), "/"); - mp_snprintf_cat(pf, sizeof(pf), "/"); - } - char t[5] = {0}; - for (int i = 0; i < 4; i++) - t[i] = "_rgba"[desc->components[n][i]]; - for (int i = 3; i > 0 && t[i] == '_'; i--) - t[i] = '\0'; - mp_snprintf_cat(pl, sizeof(pl), "%s", t); - mp_snprintf_cat(pf, sizeof(pf), "%s", desc->planes[n]->name); - } - MP_MSG(ra, msgl, "%d planes %dx%d %d/%d [%s] (%s)\n", - desc->num_planes, desc->chroma_w, desc->chroma_h, - desc->component_bits, desc->component_pad, pf, pl); -} - -void ra_dump_img_formats(struct ra *ra, int msgl) -{ - if (!mp_msg_test(ra->log, msgl)) - return; - MP_MSG(ra, msgl, "Image formats:\n"); - for (int imgfmt = IMGFMT_START; imgfmt < IMGFMT_END; imgfmt++) { - const char *name = mp_imgfmt_to_name(imgfmt); - if (strcmp(name, "unknown") == 0) - continue; - MP_MSG(ra, msgl, " %s", name); - struct ra_imgfmt_desc desc; - if (ra_get_imgfmt_desc(ra, imgfmt, &desc)) { - MP_MSG(ra, msgl, " => "); - ra_dump_imgfmt_desc(ra, &desc, msgl); - } else { - MP_MSG(ra, msgl, "\n"); - } - } -} diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h deleted file mode 100644 index ae7fb9aea7..0000000000 --- a/video/out/opengl/ra.h +++ /dev/null @@ -1,491 +0,0 @@ -#pragma once - -#include "common/common.h" -#include "misc/bstr.h" - -// Handle for a rendering API backend. -struct ra { - struct ra_fns *fns; - void *priv; - - int glsl_version; // GLSL version (e.g. 300 => 3.0) - bool glsl_es; // use ES dialect - bool glsl_vulkan; // use vulkan dialect - - struct mp_log *log; - - // RA_CAP_* bit field. The RA backend must set supported features at init - // time. - uint64_t caps; - - // Maximum supported width and height of a 2D texture. Set by the RA backend - // at init time. - int max_texture_wh; - - // Maximum shared memory for compute shaders. Set by the RA backend at init - // time. - size_t max_shmem; - - // Set of supported texture formats. Must be added by RA backend at init time. - // If there are equivalent formats with different caveats, the preferred - // formats should have a lower index. (E.g. GLES3 should put rg8 before la.) - struct ra_format **formats; - int num_formats; - - // Accelerate texture uploads via an extra PBO even when - // RA_CAP_DIRECT_UPLOAD is supported. This is basically only relevant for - // OpenGL. Set by the RA user. - bool use_pbo; -}; - -enum { - RA_CAP_TEX_1D = 1 << 0, // supports 1D textures (as shader inputs) - RA_CAP_TEX_3D = 1 << 1, // supports 3D textures (as shader inputs) - RA_CAP_BLIT = 1 << 2, // supports ra_fns.blit - RA_CAP_COMPUTE = 1 << 3, // supports compute shaders - RA_CAP_DIRECT_UPLOAD = 1 << 4, // supports tex_upload without ra_buf - RA_CAP_BUF_RO = 1 << 5, // supports RA_VARTYPE_BUF_RO - RA_CAP_BUF_RW = 1 << 6, // supports RA_VARTYPE_BUF_RW - RA_CAP_NESTED_ARRAY = 1 << 7, // supports nested arrays - RA_CAP_SHARED_BINDING = 1 << 8, // sampler/image/buffer namespaces are disjoint - RA_CAP_GLOBAL_UNIFORM = 1 << 9, // supports using "naked" uniforms (not UBO) -}; - -enum ra_ctype { - RA_CTYPE_UNKNOWN = 0, // also used for inconsistent multi-component formats - RA_CTYPE_UNORM, // unsigned normalized integer (fixed point) formats - RA_CTYPE_UINT, // full integer formats - RA_CTYPE_FLOAT, // float formats (signed, any bit size) -}; - -// All formats must be useable as texture formats. All formats must be byte -// aligned (all pixels start and end on a byte boundary), at least as far CPU -// transfers are concerned. -struct ra_format { - // All fields are read-only after creation. - const char *name; // symbolic name for user interaction/debugging - void *priv; - enum ra_ctype ctype; // data type of each component - bool ordered; // components are sequential in memory, and returned - // by the shader in memory order (the shader can - // return arbitrary values for unused components) - int num_components; // component count, 0 if not applicable, max. 4 - int component_size[4]; // in bits, all entries 0 if not applicable - int component_depth[4]; // bits in use for each component, 0 if not applicable - // (_must_ be set if component_size[] includes padding, - // and the real procession as seen by shader is lower) - int pixel_size; // in bytes, total pixel size (0 if opaque) - bool luminance_alpha; // pre-GL_ARB_texture_rg hack for 2 component textures - // if this is set, shader must use .ra instead of .rg - // only applies to 2-component textures - bool linear_filter; // linear filtering available from shader - bool renderable; // can be used for render targets - - // If not 0, the format represents some sort of packed fringe format, whose - // shader representation is given by the special_imgfmt_desc pointer. - int special_imgfmt; - const struct ra_imgfmt_desc *special_imgfmt_desc; -}; - -struct ra_tex_params { - int dimensions; // 1-3 for 1D-3D textures - // Size of the texture. 1D textures require h=d=1, 2D textures require d=1. - int w, h, d; - const struct ra_format *format; - bool render_src; // must be useable as source texture in a shader - bool render_dst; // must be useable as target texture in a shader - bool storage_dst; // must be usable as a storage image (RA_VARTYPE_IMG_W) - bool blit_src; // must be usable as a blit source - bool blit_dst; // must be usable as a blit destination - bool host_mutable; // texture may be updated with tex_upload - // When used as render source texture. - bool src_linear; // if false, use nearest sampling (whether this can - // be true depends on ra_format.linear_filter) - bool src_repeat; // if false, clamp texture coordinates to edge - // if true, repeat texture coordinates - bool non_normalized; // hack for GL_TEXTURE_RECTANGLE OSX idiocy - // always set to false, except in OSX code - bool external_oes; // hack for GL_TEXTURE_EXTERNAL_OES idiocy - // If non-NULL, the texture will be created with these contents. Using - // this does *not* require setting host_mutable. Otherwise, the initial - // data is undefined. - void *initial_data; -}; - -// Conflates the following typical GPU API concepts: -// - texture itself -// - sampler state -// - staging buffers for texture upload -// - framebuffer objects -// - wrappers for swapchain framebuffers -// - synchronization needed for upload/rendering/etc. -struct ra_tex { - // All fields are read-only after creation. - struct ra_tex_params params; - void *priv; -}; - -struct ra_tex_upload_params { - struct ra_tex *tex; // Texture to upload to - bool invalidate; // Discard pre-existing data not in the region uploaded - // Uploading from buffer: - struct ra_buf *buf; // Buffer to upload from (mutually exclusive with `src`) - size_t buf_offset; // Start of data within buffer (bytes) - // Uploading directly: (Note: If RA_CAP_DIRECT_UPLOAD is not set, then this - // will be internally translated to a tex_upload buffer by the RA) - const void *src; // Address of data - // For 2D textures only: - struct mp_rect *rc; // Region to upload. NULL means entire image - ptrdiff_t stride; // The size of a horizontal line in bytes (*not* texels!) -}; - -// Buffer type hint. Setting this may result in more or less efficient -// operation, although it shouldn't technically prohibit anything -enum ra_buf_type { - RA_BUF_TYPE_INVALID, - RA_BUF_TYPE_TEX_UPLOAD, // texture upload buffer (pixel buffer object) - RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW - RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO -}; - -struct ra_buf_params { - enum ra_buf_type type; - size_t size; - bool host_mapped; // create a read-writable persistent mapping (ra_buf.data) - bool host_mutable; // contents may be updated via buf_update() - // If non-NULL, the buffer will be created with these contents. Otherwise, - // the initial data is undefined. - void *initial_data; -}; - -// A generic buffer, which can be used for many purposes (texture upload, -// storage buffer, uniform buffer, etc.) -struct ra_buf { - // All fields are read-only after creation. - struct ra_buf_params params; - void *data; // for persistently mapped buffers, points to the first byte - void *priv; -}; - -// Type of a shader uniform variable, or a vertex attribute. In all cases, -// vectors are matrices are done by having more than 1 value. -enum ra_vartype { - RA_VARTYPE_INVALID, - RA_VARTYPE_INT, // C: int, GLSL: int, ivec* - RA_VARTYPE_FLOAT, // C: float, GLSL: float, vec*, mat* - RA_VARTYPE_TEX, // C: ra_tex*, GLSL: various sampler types - // ra_tex.params.render_src must be true - RA_VARTYPE_IMG_W, // C: ra_tex*, GLSL: various image types - // write-only (W) image for compute shaders - // ra_tex.params.storage_dst must be true - RA_VARTYPE_BYTE_UNORM, // C: uint8_t, GLSL: int, vec* (vertex data only) - RA_VARTYPE_BUF_RO, // C: ra_buf*, GLSL: uniform buffer block - // buf type must be RA_BUF_TYPE_UNIFORM - RA_VARTYPE_BUF_RW, // C: ra_buf*, GLSL: shader storage buffer block - // buf type must be RA_BUF_TYPE_SHADER_STORAGE - RA_VARTYPE_COUNT -}; - -// Returns the host size of a ra_vartype, or 0 for abstract vartypes (e.g. tex) -size_t ra_vartype_size(enum ra_vartype type); - -// Represents a uniform, texture input parameter, and similar things. -struct ra_renderpass_input { - const char *name; // name as used in the shader - enum ra_vartype type; - // The total number of values is given by dim_v * dim_m. - int dim_v; // vector dimension (1 for non-vector and non-matrix) - int dim_m; // additional matrix dimension (dim_v x dim_m) - // Vertex data: byte offset of the attribute into the vertex struct - size_t offset; - // RA_VARTYPE_TEX: texture unit - // RA_VARTYPE_IMG_W: image unit - // RA_VARTYPE_BUF_* buffer binding point - // Other uniforms: unused - // If RA_CAP_SHARED_BINDING is set, these may only be unique per input type. - // Otherwise, these must be unique for all input values. - int binding; -}; - -// Represents the layout requirements of an input value -struct ra_layout { - size_t align; // the alignment requirements (always a power of two) - size_t stride; // the delta between two rows of an array/matrix - size_t size; // the total size of the input -}; - -// Returns the host layout of a render pass input. Returns {0} for renderpass -// inputs without a corresponding host representation (e.g. textures/buffers) -struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input); - -enum ra_blend { - RA_BLEND_ZERO, - RA_BLEND_ONE, - RA_BLEND_SRC_ALPHA, - RA_BLEND_ONE_MINUS_SRC_ALPHA, -}; - -enum ra_renderpass_type { - RA_RENDERPASS_TYPE_INVALID, - RA_RENDERPASS_TYPE_RASTER, // vertex+fragment shader - RA_RENDERPASS_TYPE_COMPUTE, // compute shader -}; - -// Static part of a rendering pass. It conflates the following: -// - compiled shader and its list of uniforms -// - vertex attributes and its shader mappings -// - blending parameters -// (For Vulkan, this would be shader module + pipeline state.) -// Upon creation, the values of dynamic values such as uniform contents (whose -// initial values are not provided here) are required to be 0. -struct ra_renderpass_params { - enum ra_renderpass_type type; - - // Uniforms, including texture/sampler inputs. - struct ra_renderpass_input *inputs; - int num_inputs; - - // Highly implementation-specific byte array storing a compiled version - // of the program. Can be used to speed up shader compilation. A backend - // xan read this in renderpass_create, or set this on the newly created - // ra_renderpass params field. - bstr cached_program; - - // --- type==RA_RENDERPASS_TYPE_RASTER only - - // Describes the format of the vertex data. When using ra.glsl_vulkan, - // the order of this array must match the vertex attribute locations. - struct ra_renderpass_input *vertex_attribs; - int num_vertex_attribs; - int vertex_stride; - - // Format of the target texture - const struct ra_format *target_format; - - // Shader text, in GLSL. (Yes, you need a GLSL compiler.) - // These are complete shaders, including prelude and declarations. - const char *vertex_shader; - const char *frag_shader; - - // Target blending mode. If enable_blend is false, the blend_ fields can - // be ignored. - bool enable_blend; - enum ra_blend blend_src_rgb; - enum ra_blend blend_dst_rgb; - enum ra_blend blend_src_alpha; - enum ra_blend blend_dst_alpha; - - // --- type==RA_RENDERPASS_TYPE_COMPUTE only - - // Shader text, like vertex_shader/frag_shader. - const char *compute_shader; -}; - -struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, - const struct ra_renderpass_params *params); - -// Conflates the following typical GPU API concepts: -// - various kinds of shaders -// - rendering pipelines -// - descriptor sets, uniforms, other bindings -// - all synchronization necessary -// - the current values of all uniforms (this one makes it relatively stateful -// from an API perspective) -struct ra_renderpass { - // All fields are read-only after creation. - struct ra_renderpass_params params; - void *priv; -}; - -// An input value (see ra_renderpass_input). -struct ra_renderpass_input_val { - int index; // index into ra_renderpass_params.inputs[] - void *data; // pointer to data according to ra_renderpass_input - // (e.g. type==RA_VARTYPE_FLOAT+dim_v=3,dim_m=3 => float[9]) -}; - -// Parameters for performing a rendering pass (basically the dynamic params). -// These change potentially every time. -struct ra_renderpass_run_params { - struct ra_renderpass *pass; - - // Generally this lists parameters only which changed since the last - // invocation and need to be updated. The ra_renderpass instance is - // supposed to keep unchanged values from the previous run. - // For non-primitive types like textures, these entries are always added, - // even if they do not change. - struct ra_renderpass_input_val *values; - int num_values; - - // --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only - - // target->params.render_dst must be true, and target->params.format must - // match pass->params.target_format. - struct ra_tex *target; - struct mp_rect viewport; - struct mp_rect scissors; - - // (The primitive type is always a triangle list.) - void *vertex_data; - int vertex_count; // number of vertex elements, not bytes - - // --- pass->params.type==RA_RENDERPASS_TYPE_COMPUTE only - - // Number of work groups to be run in X/Y/Z dimensions. - int compute_groups[3]; -}; - -// This is an opaque type provided by the implementation, but we want to at -// least give it a saner name than void* for code readability purposes. -typedef void ra_timer; - -// Rendering API entrypoints. (Note: there are some additional hidden features -// you need to take care of. For example, hwdec mapping will be provided -// separately from ra, but might need to call into ra private code.) -struct ra_fns { - void (*destroy)(struct ra *ra); - - // Create a texture (with undefined contents). Return NULL on failure. - // This is a rare operation, and normally textures and even FBOs for - // temporary rendering intermediate data are cached. - struct ra_tex *(*tex_create)(struct ra *ra, - const struct ra_tex_params *params); - - void (*tex_destroy)(struct ra *ra, struct ra_tex *tex); - - // Upload data to a texture. This is an extremely common operation. When - // using a buffer, the contants of the buffer must exactly match the image - // - conversions between bit depth etc. are not supported. The buffer *may* - // be marked as "in use" while this operation is going on, and the contents - // must not be touched again by the API user until buf_poll returns true. - // Returns whether successful. - bool (*tex_upload)(struct ra *ra, const struct ra_tex_upload_params *params); - - // Create a buffer. This can be used as a persistently mapped buffer, - // a uniform buffer, a shader storage buffer or possibly others. - // Not all usage types must be supported; may return NULL if unavailable. - struct ra_buf *(*buf_create)(struct ra *ra, - const struct ra_buf_params *params); - - void (*buf_destroy)(struct ra *ra, struct ra_buf *buf); - - // Update the contents of a buffer, starting at a given offset and up to a - // given size, with the contents of *data. This is an extremely common - // operation. Calling this while the buffer is considered "in use" is an - // error. (See: buf_poll) - void (*buf_update)(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, - const void *data, size_t size); - - // Returns if a buffer is currently "in use" or not. Updating the contents - // of a buffer (via buf_update or writing to buf->data) while it is still - // in use is an error and may result in graphical corruption. Optional, if - // NULL then all buffers are always usable. - bool (*buf_poll)(struct ra *ra, struct ra_buf *buf); - - // Returns the layout requirements of a uniform buffer element. Optional, - // but must be implemented if RA_CAP_BUF_RO is supported. - struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp); - - // Clear the dst with the given color (rgba) and within the given scissor. - // dst must have dst->params.render_dst==true. Content outside of the - // scissor is preserved. - void (*clear)(struct ra *ra, struct ra_tex *dst, float color[4], - struct mp_rect *scissor); - - // Copy a sub-rectangle from one texture to another. The source/dest region - // is always within the texture bounds. Areas outside the dest region are - // preserved. The formats of the textures must be losely compatible. The - // dst texture can be a swapchain framebuffer, but src can not. Only 2D - // textures are supported. - // The textures must have blit_src and blit_dst set, respectively. - // Rectangles with negative width/height lead to flipping, different src/dst - // sizes lead to point scaling. Coordinates are always in pixels. - // Optional. Only available if RA_CAP_BLIT is set (if it's not set, it must - // not be called, even if it's non-NULL). - void (*blit)(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, - struct mp_rect *dst_rc, struct mp_rect *src_rc); - - // Compile a shader and create a pipeline. This is a rare operation. - // The params pointer and anything it points to must stay valid until - // renderpass_destroy. - struct ra_renderpass *(*renderpass_create)(struct ra *ra, - const struct ra_renderpass_params *params); - - void (*renderpass_destroy)(struct ra *ra, struct ra_renderpass *pass); - - // Perform a render pass, basically drawing a list of triangles to a FBO. - // This is an extremely common operation. - void (*renderpass_run)(struct ra *ra, - const struct ra_renderpass_run_params *params); - - // Create a timer object. Returns NULL on failure, or if timers are - // unavailable for some reason. Optional. - ra_timer *(*timer_create)(struct ra *ra); - - void (*timer_destroy)(struct ra *ra, ra_timer *timer); - - // Start recording a timer. Note that valid usage requires you to pair - // every start with a stop. Trying to start a timer twice, or trying to - // stop a timer before having started it, consistutes invalid usage. - void (*timer_start)(struct ra *ra, ra_timer *timer); - - // Stop recording a timer. This also returns any results that have been - // measured since the last usage of this ra_timer. It's important to note - // that GPU timer measurement are asynchronous, so this function does not - // always produce a value - and the values it does produce are typically - // delayed by a few frames. When no value is available, this returns 0. - uint64_t (*timer_stop)(struct ra *ra, ra_timer *timer); - - // Hint that possibly queued up commands should be sent to the GPU. Optional. - void (*flush)(struct ra *ra); - - // Associates a marker with any past error messages, for debugging - // purposes. Optional. - void (*debug_marker)(struct ra *ra, const char *msg); -}; - -struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params); -void ra_tex_free(struct ra *ra, struct ra_tex **tex); - -struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params); -void ra_buf_free(struct ra *ra, struct ra_buf **buf); - -void ra_free(struct ra **ra); - -const struct ra_format *ra_find_unorm_format(struct ra *ra, - int bytes_per_component, - int n_components); -const struct ra_format *ra_find_uint_format(struct ra *ra, - int bytes_per_component, - int n_components); -const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components); -const struct ra_format *ra_find_named_format(struct ra *ra, const char *name); - -struct ra_imgfmt_desc { - int num_planes; - const struct ra_format *planes[4]; - // Chroma pixel size (1x1 is 4:4:4) - uint8_t chroma_w, chroma_h; - // Component storage size in bits (possibly padded). For formats with - // different sizes per component, this is arbitrary. For padded formats - // like P010 or YUV420P10, padding is included. - int component_bits; - // Like mp_regular_imgfmt.component_pad. - int component_pad; - // For each texture and each texture output (rgba order) describe what - // component it returns. - // The values are like the values in mp_regular_imgfmt_plane.components[]. - // Access as components[plane_nr][component_index]. Set unused items to 0. - // For ra_format.luminance_alpha, this returns 1/2 ("rg") instead of 1/4 - // ("ra"). the logic is that the texture format has 2 channels, thus the - // data must be returned in the first two components. The renderer fixes - // this later. - uint8_t components[4][4]; -}; - -bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out); - -void ra_dump_tex_formats(struct ra *ra, int msgl); -void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc, - int msgl); -void ra_dump_img_formats(struct ra *ra, int msgl); diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c index 0d99877a9e..ccb8755ba6 100644 --- a/video/out/opengl/ra_gl.c +++ b/video/out/opengl/ra_gl.c @@ -1097,12 +1097,6 @@ static uint64_t gl_timer_stop(struct ra *ra, ra_timer *ratimer) return timer->result; } -static void gl_flush(struct ra *ra) -{ - GL *gl = ra_gl_get(ra); - gl->Flush(); -} - static void gl_debug_marker(struct ra *ra, const char *msg) { struct ra_gl *p = ra->priv; @@ -1130,6 +1124,5 @@ static struct ra_fns ra_fns_gl = { .timer_destroy = gl_timer_destroy, .timer_start = gl_timer_start, .timer_stop = gl_timer_stop, - .flush = gl_flush, .debug_marker = gl_debug_marker, }; diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h index e5e09a0197..9844977801 100644 --- a/video/out/opengl/ra_gl.h +++ b/video/out/opengl/ra_gl.h @@ -1,8 +1,7 @@ #pragma once #include "common.h" -#include "ra.h" -#include "gl_utils.h" +#include "utils.h" struct ra *ra_create_gl(GL *gl, struct mp_log *log); struct ra_tex *ra_create_wrapped_tex(struct ra *ra, diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c deleted file mode 100644 index 90a757617b..0000000000 --- a/video/out/opengl/shader_cache.c +++ /dev/null @@ -1,955 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "osdep/io.h" - -#include "common/common.h" -#include "options/path.h" -#include "stream/stream.h" -#include "shader_cache.h" -#include "formats.h" -#include "utils.h" - -// Force cache flush if more than this number of shaders is created. -#define SC_MAX_ENTRIES 48 - -union uniform_val { - float f[9]; // RA_VARTYPE_FLOAT - int i[4]; // RA_VARTYPE_INT - struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_* - struct ra_buf *buf; // RA_VARTYPE_BUF_* -}; - -enum sc_uniform_type { - SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM) - SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO) -}; - -struct sc_uniform { - enum sc_uniform_type type; - struct ra_renderpass_input input; - const char *glsl_type; - union uniform_val v; - char *buffer_format; - // for SC_UNIFORM_TYPE_UBO: - struct ra_layout layout; - size_t offset; // byte offset within the buffer -}; - -struct sc_cached_uniform { - union uniform_val v; - int index; // for ra_renderpass_input_val - bool set; // whether the uniform has ever been set -}; - -struct sc_entry { - struct ra_renderpass *pass; - struct sc_cached_uniform *cached_uniforms; - int num_cached_uniforms; - bstr total; - struct timer_pool *timer; - struct ra_buf *ubo; - int ubo_index; // for ra_renderpass_input_val.index -}; - -struct gl_shader_cache { - struct ra *ra; - struct mp_log *log; - - // permanent - char **exts; - int num_exts; - - // this is modified during use (gl_sc_add() etc.) and reset for each shader - bstr prelude_text; - bstr header_text; - bstr text; - - // Next binding point (texture unit, image unit, buffer binding, etc.) - // In OpenGL these are separate for each input type - int next_binding[RA_VARTYPE_COUNT]; - - struct ra_renderpass_params params; - - struct sc_entry **entries; - int num_entries; - - struct sc_entry *current_shader; // set by gl_sc_generate() - - struct sc_uniform *uniforms; - int num_uniforms; - - int ubo_binding; - size_t ubo_size; - - struct ra_renderpass_input_val *values; - int num_values; - - // For checking that the user is calling gl_sc_reset() properly. - bool needs_reset; - - bool error_state; // true if an error occurred - - // temporary buffers (avoids frequent reallocations) - bstr tmp[6]; - - // For the disk-cache. - char *cache_dir; - struct mpv_global *global; // can be NULL -}; - -static void gl_sc_reset(struct gl_shader_cache *sc); - -struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, - struct mp_log *log) -{ - struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc); - *sc = (struct gl_shader_cache){ - .ra = ra, - .global = global, - .log = log, - }; - gl_sc_reset(sc); - return sc; -} - -// Reset the previous pass. This must be called after gl_sc_generate and before -// starting a new shader. -static void gl_sc_reset(struct gl_shader_cache *sc) -{ - sc->prelude_text.len = 0; - sc->header_text.len = 0; - sc->text.len = 0; - for (int n = 0; n < sc->num_uniforms; n++) - talloc_free((void *)sc->uniforms[n].input.name); - sc->num_uniforms = 0; - sc->ubo_binding = 0; - sc->ubo_size = 0; - for (int i = 0; i < RA_VARTYPE_COUNT; i++) - sc->next_binding[i] = 0; - sc->current_shader = NULL; - sc->params = (struct ra_renderpass_params){0}; - sc->needs_reset = false; -} - -static void sc_flush_cache(struct gl_shader_cache *sc) -{ - MP_VERBOSE(sc, "flushing shader cache\n"); - - for (int n = 0; n < sc->num_entries; n++) { - struct sc_entry *e = sc->entries[n]; - ra_buf_free(sc->ra, &e->ubo); - if (e->pass) - sc->ra->fns->renderpass_destroy(sc->ra, e->pass); - timer_pool_destroy(e->timer); - talloc_free(e); - } - sc->num_entries = 0; -} - -void gl_sc_destroy(struct gl_shader_cache *sc) -{ - if (!sc) - return; - gl_sc_reset(sc); - sc_flush_cache(sc); - talloc_free(sc); -} - -bool gl_sc_error_state(struct gl_shader_cache *sc) -{ - return sc->error_state; -} - -void gl_sc_reset_error(struct gl_shader_cache *sc) -{ - sc->error_state = false; -} - -void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name) -{ - for (int n = 0; n < sc->num_exts; n++) { - if (strcmp(sc->exts[n], name) == 0) - return; - } - MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name)); -} - -#define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s)) - -void gl_sc_add(struct gl_shader_cache *sc, const char *text) -{ - bstr_xappend0(sc, &sc->text, text); -} - -void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) -{ - va_list ap; - va_start(ap, textf); - bstr_xappend_vasprintf(sc, &sc->text, textf, ap); - va_end(ap); -} - -void gl_sc_hadd(struct gl_shader_cache *sc, const char *text) -{ - bstr_xappend0(sc, &sc->header_text, text); -} - -void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) -{ - va_list ap; - va_start(ap, textf); - bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap); - va_end(ap); -} - -void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text) -{ - bstr_xappend(sc, &sc->header_text, text); -} - -void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) -{ - va_list ap; - va_start(ap, textf); - bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap); - va_end(ap); -} - -static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, - const char *name) -{ - struct sc_uniform new = { - .input = { - .dim_v = 1, - .dim_m = 1, - }, - }; - - for (int n = 0; n < sc->num_uniforms; n++) { - struct sc_uniform *u = &sc->uniforms[n]; - if (strcmp(u->input.name, name) == 0) { - const char *allocname = u->input.name; - *u = new; - u->input.name = allocname; - return u; - } - } - - // not found -> add it - new.input.name = talloc_strdup(NULL, name); - MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); - return &sc->uniforms[sc->num_uniforms - 1]; -} - -static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type) -{ - if (sc->ra->caps & RA_CAP_SHARED_BINDING) { - return sc->next_binding[type]++; - } else { - return sc->next_binding[0]++; - } -} - -// Updates the UBO metadata for the given sc_uniform. Assumes sc_uniform->input -// is already set. Also updates sc_uniform->type. -static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u) -{ - if (!(sc->ra->caps & RA_CAP_BUF_RO)) - return; - - // Using UBOs with explicit layout(offset) like we do requires GLSL version - // 440 or higher. In theory the UBO code can also use older versions, but - // just try and avoid potential headaches. This also ensures they're only - // used on drivers that are probably modern enough to actually support them - // correctly. - if (sc->ra->glsl_version < 440) - return; - - u->type = SC_UNIFORM_TYPE_UBO; - u->layout = sc->ra->fns->uniform_layout(&u->input); - u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); - sc->ubo_size = u->offset + u->layout.size; -} - -void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, - struct ra_tex *tex) -{ - const char *glsl_type = "sampler2D"; - if (tex->params.dimensions == 1) { - glsl_type = "sampler1D"; - } else if (tex->params.dimensions == 3) { - glsl_type = "sampler3D"; - } else if (tex->params.non_normalized) { - glsl_type = "sampler2DRect"; - } else if (tex->params.external_oes) { - glsl_type = "samplerExternalOES"; - } else if (tex->params.format->ctype == RA_CTYPE_UINT) { - glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D"; - } - - struct sc_uniform *u = find_uniform(sc, name); - u->input.type = RA_VARTYPE_TEX; - u->glsl_type = glsl_type; - u->input.binding = gl_sc_next_binding(sc, u->input.type); - u->v.tex = tex; -} - -void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, - struct ra_tex *tex) -{ - gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store"); - - struct sc_uniform *u = find_uniform(sc, name); - u->input.type = RA_VARTYPE_IMG_W; - u->glsl_type = "writeonly image2D"; - u->input.binding = gl_sc_next_binding(sc, u->input.type); - u->v.tex = tex; -} - -void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf, - char *format, ...) -{ - assert(sc->ra->caps & RA_CAP_BUF_RW); - gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object"); - - struct sc_uniform *u = find_uniform(sc, name); - u->input.type = RA_VARTYPE_BUF_RW; - u->glsl_type = ""; - u->input.binding = gl_sc_next_binding(sc, u->input.type); - u->v.buf = buf; - - va_list ap; - va_start(ap, format); - u->buffer_format = ta_vasprintf(sc, format, ap); - va_end(ap); -} - -void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f) -{ - struct sc_uniform *u = find_uniform(sc, name); - u->input.type = RA_VARTYPE_FLOAT; - u->glsl_type = "float"; - update_ubo_params(sc, u); - u->v.f[0] = f; -} - -void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i) -{ - struct sc_uniform *u = find_uniform(sc, name); - u->input.type = RA_VARTYPE_INT; - u->glsl_type = "int"; - update_ubo_params(sc, u); - u->v.i[0] = i; -} - -void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]) -{ - struct sc_uniform *u = find_uniform(sc, name); - u->input.type = RA_VARTYPE_FLOAT; - u->input.dim_v = 2; - u->glsl_type = "vec2"; - update_ubo_params(sc, u); - u->v.f[0] = f[0]; - u->v.f[1] = f[1]; -} - -void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3]) -{ - struct sc_uniform *u = find_uniform(sc, name); - u->input.type = RA_VARTYPE_FLOAT; - u->input.dim_v = 3; - u->glsl_type = "vec3"; - update_ubo_params(sc, u); - u->v.f[0] = f[0]; - u->v.f[1] = f[1]; - u->v.f[2] = f[2]; -} - -static void transpose2x2(float r[2 * 2]) -{ - MPSWAP(float, r[0+2*1], r[1+2*0]); -} - -void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, - bool transpose, GLfloat *v) -{ - struct sc_uniform *u = find_uniform(sc, name); - u->input.type = RA_VARTYPE_FLOAT; - u->input.dim_v = 2; - u->input.dim_m = 2; - u->glsl_type = "mat2"; - update_ubo_params(sc, u); - for (int n = 0; n < 4; n++) - u->v.f[n] = v[n]; - if (transpose) - transpose2x2(&u->v.f[0]); -} - -static void transpose3x3(float r[3 * 3]) -{ - MPSWAP(float, r[0+3*1], r[1+3*0]); - MPSWAP(float, r[0+3*2], r[2+3*0]); - MPSWAP(float, r[1+3*2], r[2+3*1]); -} - -void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, - bool transpose, GLfloat *v) -{ - struct sc_uniform *u = find_uniform(sc, name); - u->input.type = RA_VARTYPE_FLOAT; - u->input.dim_v = 3; - u->input.dim_m = 3; - u->glsl_type = "mat3"; - update_ubo_params(sc, u); - for (int n = 0; n < 9; n++) - u->v.f[n] = v[n]; - if (transpose) - transpose3x3(&u->v.f[0]); -} - -// Tell the shader generator (and later gl_sc_draw_data()) about the vertex -// data layout and attribute names. The entries array is terminated with a {0} -// entry. The array memory must remain valid indefinitely (for now). -void gl_sc_set_vertex_format(struct gl_shader_cache *sc, - const struct ra_renderpass_input *entries, - int vertex_stride) -{ - sc->params.vertex_attribs = (struct ra_renderpass_input *)entries; - sc->params.num_vertex_attribs = 0; - while (entries[sc->params.num_vertex_attribs].name) - sc->params.num_vertex_attribs++; - sc->params.vertex_stride = vertex_stride; -} - -void gl_sc_blend(struct gl_shader_cache *sc, - enum ra_blend blend_src_rgb, - enum ra_blend blend_dst_rgb, - enum ra_blend blend_src_alpha, - enum ra_blend blend_dst_alpha) -{ - sc->params.enable_blend = true; - sc->params.blend_src_rgb = blend_src_rgb; - sc->params.blend_dst_rgb = blend_dst_rgb; - sc->params.blend_src_alpha = blend_src_alpha; - sc->params.blend_dst_alpha = blend_dst_alpha; -} - -static const char *vao_glsl_type(const struct ra_renderpass_input *e) -{ - // pretty dumb... too dumb, but works for us - switch (e->dim_v) { - case 1: return "float"; - case 2: return "vec2"; - case 3: return "vec3"; - case 4: return "vec4"; - default: abort(); - } -} - -static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u) -{ - uintptr_t src = (uintptr_t) &u->v; - size_t dst = u->offset; - struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); - struct ra_layout dst_layout = u->layout; - - for (int i = 0; i < u->input.dim_m; i++) { - ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride); - src += src_layout.stride; - dst += dst_layout.stride; - } -} - -static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, - struct sc_uniform *u, int n) -{ - struct sc_cached_uniform *un = &e->cached_uniforms[n]; - struct ra_layout layout = ra_renderpass_input_layout(&u->input); - if (layout.size > 0 && un->set && memcmp(&un->v, &u->v, layout.size) == 0) - return; - - un->v = u->v; - un->set = true; - - switch (u->type) { - case SC_UNIFORM_TYPE_GLOBAL: { - struct ra_renderpass_input_val value = { - .index = un->index, - .data = &un->v, - }; - MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value); - break; - } - case SC_UNIFORM_TYPE_UBO: - assert(e->ubo); - update_ubo(sc->ra, e->ubo, u); - break; - default: abort(); - } -} - -void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir) -{ - talloc_free(sc->cache_dir); - sc->cache_dir = talloc_strdup(sc, dir); -} - -static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) -{ - bool ret = false; - - void *tmp = talloc_new(NULL); - struct ra_renderpass_params params = sc->params; - - MP_VERBOSE(sc, "new shader program:\n"); - if (sc->header_text.len) { - MP_VERBOSE(sc, "header:\n"); - mp_log_source(sc->log, MSGL_V, sc->header_text.start); - MP_VERBOSE(sc, "body:\n"); - } - if (sc->text.len) - mp_log_source(sc->log, MSGL_V, sc->text.start); - - // The vertex shader uses mangled names for the vertex attributes, so that - // the fragment shader can use the "real" names. But the shader is expecting - // the vertex attribute names (at least with older GLSL targets for GL). - params.vertex_attribs = talloc_memdup(tmp, params.vertex_attribs, - params.num_vertex_attribs * sizeof(params.vertex_attribs[0])); - for (int n = 0; n < params.num_vertex_attribs; n++) { - struct ra_renderpass_input *attrib = ¶ms.vertex_attribs[n]; - attrib->name = talloc_asprintf(tmp, "vertex_%s", attrib->name); - } - - const char *cache_header = "mpv shader cache v1\n"; - char *cache_filename = NULL; - char *cache_dir = NULL; - - if (sc->cache_dir && sc->cache_dir[0]) { - // Try to load it from a disk cache. - cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir); - - struct AVSHA *sha = av_sha_alloc(); - if (!sha) - abort(); - av_sha_init(sha, 256); - av_sha_update(sha, entry->total.start, entry->total.len); - - uint8_t hash[256 / 8]; - av_sha_final(sha, hash); - av_free(sha); - - char hashstr[256 / 8 * 2 + 1]; - for (int n = 0; n < 256 / 8; n++) - snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]); - - cache_filename = mp_path_join(tmp, cache_dir, hashstr); - if (stat(cache_filename, &(struct stat){0}) == 0) { - MP_VERBOSE(sc, "Trying to load shader from disk...\n"); - struct bstr cachedata = - stream_read_file(cache_filename, tmp, sc->global, 1000000000); - if (bstr_eatstart0(&cachedata, cache_header)) - params.cached_program = cachedata; - } - } - - // If using a UBO, also make sure to add it as an input value so the RA - // can see it - if (sc->ubo_size) { - entry->ubo_index = sc->params.num_inputs; - struct ra_renderpass_input ubo_input = { - .name = "UBO", - .type = RA_VARTYPE_BUF_RO, - .dim_v = 1, - .dim_m = 1, - .binding = sc->ubo_binding, - }; - MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input); - } - - entry->pass = sc->ra->fns->renderpass_create(sc->ra, ¶ms); - if (!entry->pass) - goto error; - - if (sc->ubo_size) { - struct ra_buf_params ubo_params = { - .type = RA_BUF_TYPE_UNIFORM, - .size = sc->ubo_size, - .host_mutable = true, - }; - - entry->ubo = ra_buf_create(sc->ra, &ubo_params); - if (!entry->ubo) { - MP_ERR(sc, "Failed creating uniform buffer!\n"); - goto error; - } - } - - if (entry->pass && cache_filename) { - bstr nc = entry->pass->params.cached_program; - if (nc.len && !bstr_equals(params.cached_program, nc)) { - mp_mkdirp(cache_dir); - - MP_VERBOSE(sc, "Writing shader cache file: %s\n", cache_filename); - FILE *out = fopen(cache_filename, "wb"); - if (out) { - fwrite(cache_header, strlen(cache_header), 1, out); - fwrite(nc.start, nc.len, 1, out); - fclose(out); - } - } - } - - ret = true; - -error: - talloc_free(tmp); - return ret; -} - -#define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__) -#define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s)) - -static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) -{ - // Add all of the UBO entries separately as members of their own buffer - if (sc->ubo_size > 0) { - ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding); - for (int n = 0; n < sc->num_uniforms; n++) { - struct sc_uniform *u = &sc->uniforms[n]; - if (u->type != SC_UNIFORM_TYPE_UBO) - continue; - ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, - u->glsl_type, u->input.name); - } - ADD(dst, "};\n"); - } - - for (int n = 0; n < sc->num_uniforms; n++) { - struct sc_uniform *u = &sc->uniforms[n]; - if (u->type != SC_UNIFORM_TYPE_GLOBAL) - continue; - switch (u->input.type) { - case RA_VARTYPE_INT: - case RA_VARTYPE_FLOAT: - assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); - // fall through - case RA_VARTYPE_TEX: - case RA_VARTYPE_IMG_W: - // Vulkan requires explicitly assigning the bindings in the shader - // source. For OpenGL it's optional, but requires higher GL version - // so we don't do it (and instead have ra_gl update the bindings - // after program creation). - if (sc->ra->glsl_vulkan) - ADD(dst, "layout(binding=%d) ", u->input.binding); - ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name); - break; - case RA_VARTYPE_BUF_RO: - ADD(dst, "layout(std140, binding=%d) uniform %s { %s };\n", - u->input.binding, u->input.name, u->buffer_format); - break; - case RA_VARTYPE_BUF_RW: - ADD(dst, "layout(std430, binding=%d) buffer %s { %s };\n", - u->input.binding, u->input.name, u->buffer_format); - break; - } - } -} - -// 1. Generate vertex and fragment shaders from the fragment shader text added -// with gl_sc_add(). The generated shader program is cached (based on the -// text), so actual compilation happens only the first time. -// 2. Update the uniforms and textures set with gl_sc_uniform_*. -// 3. Make the new shader program current (glUseProgram()). -// After that, you render, and then you call gc_sc_reset(), which does: -// 1. Unbind the program and all textures. -// 2. Reset the sc state and prepare for a new shader program. (All uniforms -// and fragment operations needed for the next program have to be re-added.) -static void gl_sc_generate(struct gl_shader_cache *sc, - enum ra_renderpass_type type, - const struct ra_format *target_format) -{ - int glsl_version = sc->ra->glsl_version; - int glsl_es = sc->ra->glsl_es ? glsl_version : 0; - - sc->params.type = type; - - // gl_sc_reset() must be called after ending the previous render process, - // and before starting a new one. - assert(!sc->needs_reset); - sc->needs_reset = true; - - // gl_sc_set_vertex_format() must always be called - assert(sc->params.vertex_attribs); - - // If using a UBO, pick a binding (needed for shader generation) - if (sc->ubo_size) - sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO); - - for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++) - sc->tmp[n].len = 0; - - // set up shader text (header + uniforms + body) - bstr *header = &sc->tmp[0]; - ADD(header, "#version %d%s\n", glsl_version, glsl_es >= 300 ? " es" : ""); - if (type == RA_RENDERPASS_TYPE_COMPUTE) { - // This extension cannot be enabled in fragment shader. Enable it as - // an exception for compute shader. - ADD(header, "#extension GL_ARB_compute_shader : enable\n"); - } - for (int n = 0; n < sc->num_exts; n++) - ADD(header, "#extension %s : enable\n", sc->exts[n]); - if (glsl_es) { - ADD(header, "precision mediump float;\n"); - ADD(header, "precision mediump sampler2D;\n"); - if (sc->ra->caps & RA_CAP_TEX_3D) - ADD(header, "precision mediump sampler3D;\n"); - } - - if (glsl_version >= 130) { - ADD(header, "#define tex1D texture\n"); - ADD(header, "#define tex3D texture\n"); - } else { - ADD(header, "#define tex1D texture1D\n"); - ADD(header, "#define tex3D texture3D\n"); - ADD(header, "#define texture texture2D\n"); - } - - if (sc->ra->glsl_vulkan && type == RA_RENDERPASS_TYPE_COMPUTE) { - ADD(header, "#define gl_GlobalInvocationIndex " - "(gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID)\n"); - } - - // Additional helpers. - ADD(header, "#define LUT_POS(x, lut_size)" - " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n"); - - char *vert_in = glsl_version >= 130 ? "in" : "attribute"; - char *vert_out = glsl_version >= 130 ? "out" : "varying"; - char *frag_in = glsl_version >= 130 ? "in" : "varying"; - - struct bstr *vert = NULL, *frag = NULL, *comp = NULL; - - if (type == RA_RENDERPASS_TYPE_RASTER) { - // vertex shader: we don't use the vertex shader, so just setup a - // dummy, which passes through the vertex array attributes. - bstr *vert_head = &sc->tmp[1]; - ADD_BSTR(vert_head, *header); - bstr *vert_body = &sc->tmp[2]; - ADD(vert_body, "void main() {\n"); - bstr *frag_vaos = &sc->tmp[3]; - for (int n = 0; n < sc->params.num_vertex_attribs; n++) { - const struct ra_renderpass_input *e = &sc->params.vertex_attribs[n]; - const char *glsl_type = vao_glsl_type(e); - char loc[32] = {0}; - if (sc->ra->glsl_vulkan) - snprintf(loc, sizeof(loc), "layout(location=%d) ", n); - if (strcmp(e->name, "position") == 0) { - // setting raster pos. requires setting gl_Position magic variable - assert(e->dim_v == 2 && e->type == RA_VARTYPE_FLOAT); - ADD(vert_head, "%s%s vec2 vertex_position;\n", loc, vert_in); - ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n"); - } else { - ADD(vert_head, "%s%s %s vertex_%s;\n", loc, vert_in, glsl_type, e->name); - ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, glsl_type, e->name); - ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name); - ADD(frag_vaos, "%s%s %s %s;\n", loc, frag_in, glsl_type, e->name); - } - } - ADD(vert_body, "}\n"); - vert = vert_head; - ADD_BSTR(vert, *vert_body); - - // fragment shader; still requires adding used uniforms and VAO elements - frag = &sc->tmp[4]; - ADD_BSTR(frag, *header); - if (glsl_version >= 130) { - ADD(frag, "%sout vec4 out_color;\n", - sc->ra->glsl_vulkan ? "layout(location=0) " : ""); - } - ADD_BSTR(frag, *frag_vaos); - add_uniforms(sc, frag); - - ADD_BSTR(frag, sc->prelude_text); - ADD_BSTR(frag, sc->header_text); - - ADD(frag, "void main() {\n"); - // we require _all_ frag shaders to write to a "vec4 color" - ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); - ADD_BSTR(frag, sc->text); - if (glsl_version >= 130) { - ADD(frag, "out_color = color;\n"); - } else { - ADD(frag, "gl_FragColor = color;\n"); - } - ADD(frag, "}\n"); - - // We need to fix the format of the render dst at renderpass creation - // time - assert(target_format); - sc->params.target_format = target_format; - } - - if (type == RA_RENDERPASS_TYPE_COMPUTE) { - comp = &sc->tmp[4]; - ADD_BSTR(comp, *header); - - add_uniforms(sc, comp); - - ADD_BSTR(comp, sc->prelude_text); - ADD_BSTR(comp, sc->header_text); - - ADD(comp, "void main() {\n"); - ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience - ADD_BSTR(comp, sc->text); - ADD(comp, "}\n"); - } - - bstr *hash_total = &sc->tmp[5]; - - ADD(hash_total, "type %d\n", sc->params.type); - - if (frag) { - ADD_BSTR(hash_total, *frag); - sc->params.frag_shader = frag->start; - } - ADD(hash_total, "\n"); - if (vert) { - ADD_BSTR(hash_total, *vert); - sc->params.vertex_shader = vert->start; - } - ADD(hash_total, "\n"); - if (comp) { - ADD_BSTR(hash_total, *comp); - sc->params.compute_shader = comp->start; - } - ADD(hash_total, "\n"); - - if (sc->params.enable_blend) { - ADD(hash_total, "blend %d %d %d %d\n", - sc->params.blend_src_rgb, sc->params.blend_dst_rgb, - sc->params.blend_src_alpha, sc->params.blend_dst_alpha); - } - - if (sc->params.target_format) - ADD(hash_total, "format %s\n", sc->params.target_format->name); - - struct sc_entry *entry = NULL; - for (int n = 0; n < sc->num_entries; n++) { - struct sc_entry *cur = sc->entries[n]; - if (bstr_equals(cur->total, *hash_total)) { - entry = cur; - break; - } - } - if (!entry) { - if (sc->num_entries == SC_MAX_ENTRIES) - sc_flush_cache(sc); - entry = talloc_ptrtype(NULL, entry); - *entry = (struct sc_entry){ - .total = bstrdup(entry, *hash_total), - .timer = timer_pool_create(sc->ra), - }; - for (int n = 0; n < sc->num_uniforms; n++) { - struct sc_cached_uniform u = {0}; - if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) { - // global uniforms need to be made visible to the ra_renderpass - u.index = sc->params.num_inputs; - MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs, - sc->uniforms[n].input); - } - MP_TARRAY_APPEND(entry, entry->cached_uniforms, - entry->num_cached_uniforms, u); - } - if (!create_pass(sc, entry)) - sc->error_state = true; - MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry); - } - if (sc->error_state) - return; - - assert(sc->num_uniforms == entry->num_cached_uniforms); - - sc->num_values = 0; - for (int n = 0; n < sc->num_uniforms; n++) - update_uniform(sc, entry, &sc->uniforms[n], n); - - // If we're using a UBO, make sure to bind it as well - if (sc->ubo_size) { - struct ra_renderpass_input_val ubo_val = { - .index = entry->ubo_index, - .data = &entry->ubo, - }; - MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val); - } - - sc->current_shader = entry; -} - -struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, - struct ra_tex *target, - void *ptr, size_t num) -{ - struct timer_pool *timer = NULL; - - gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format); - if (!sc->current_shader) - goto error; - - timer = sc->current_shader->timer; - - struct mp_rect full_rc = {0, 0, target->params.w, target->params.h}; - - struct ra_renderpass_run_params run = { - .pass = sc->current_shader->pass, - .values = sc->values, - .num_values = sc->num_values, - .target = target, - .vertex_data = ptr, - .vertex_count = num, - .viewport = full_rc, - .scissors = full_rc, - }; - - timer_pool_start(timer); - sc->ra->fns->renderpass_run(sc->ra, &run); - timer_pool_stop(timer); - -error: - gl_sc_reset(sc); - return timer_pool_measure(timer); -} - -struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, - int w, int h, int d) -{ - struct timer_pool *timer = NULL; - - gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL); - if (!sc->current_shader) - goto error; - - timer = sc->current_shader->timer; - - struct ra_renderpass_run_params run = { - .pass = sc->current_shader->pass, - .values = sc->values, - .num_values = sc->num_values, - .compute_groups = {w, h, d}, - }; - - timer_pool_start(timer); - sc->ra->fns->renderpass_run(sc->ra, &run); - timer_pool_stop(timer); - -error: - gl_sc_reset(sc); - return timer_pool_measure(timer); -} diff --git a/video/out/opengl/shader_cache.h b/video/out/opengl/shader_cache.h deleted file mode 100644 index 82a078079b..0000000000 --- a/video/out/opengl/shader_cache.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include "common/common.h" -#include "misc/bstr.h" -#include "ra.h" - -// For mp_pass_perf -#include "video/out/vo.h" - -struct mp_log; -struct mpv_global; -struct gl_shader_cache; - -struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, - struct mp_log *log); -void gl_sc_destroy(struct gl_shader_cache *sc); -bool gl_sc_error_state(struct gl_shader_cache *sc); -void gl_sc_reset_error(struct gl_shader_cache *sc); -void gl_sc_add(struct gl_shader_cache *sc, const char *text); -void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) - PRINTF_ATTRIBUTE(2, 3); -void gl_sc_hadd(struct gl_shader_cache *sc, const char *text); -void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) - PRINTF_ATTRIBUTE(2, 3); -void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text); -void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) - PRINTF_ATTRIBUTE(2, 3); -void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, - struct ra_tex *tex); -void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, - struct ra_tex *tex); -void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf, - char *format, ...) PRINTF_ATTRIBUTE(4, 5); -void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f); -void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int f); -void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]); -void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]); -void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, - bool transpose, float *v); -void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, - bool transpose, float *v); -void gl_sc_set_vertex_format(struct gl_shader_cache *sc, - const struct ra_renderpass_input *vertex_attribs, - int vertex_stride); -void gl_sc_blend(struct gl_shader_cache *sc, - enum ra_blend blend_src_rgb, - enum ra_blend blend_dst_rgb, - enum ra_blend blend_src_alpha, - enum ra_blend blend_dst_alpha); -void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name); -struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, - struct ra_tex *target, - void *ptr, size_t num); -struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, - int w, int h, int d); -void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir); diff --git a/video/out/opengl/user_shaders.c b/video/out/opengl/user_shaders.c deleted file mode 100644 index 58a1ac9e64..0000000000 --- a/video/out/opengl/user_shaders.c +++ /dev/null @@ -1,452 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include - -#include "misc/ctype.h" -#include "user_shaders.h" -#include "formats.h" - -static bool parse_rpn_szexpr(struct bstr line, struct szexp out[MAX_SZEXP_SIZE]) -{ - int pos = 0; - - while (line.len > 0) { - struct bstr word = bstr_strip(bstr_splitchar(line, &line, ' ')); - if (word.len == 0) - continue; - - if (pos >= MAX_SZEXP_SIZE) - return false; - - struct szexp *exp = &out[pos++]; - - if (bstr_eatend0(&word, ".w") || bstr_eatend0(&word, ".width")) { - exp->tag = SZEXP_VAR_W; - exp->val.varname = word; - continue; - } - - if (bstr_eatend0(&word, ".h") || bstr_eatend0(&word, ".height")) { - exp->tag = SZEXP_VAR_H; - exp->val.varname = word; - continue; - } - - switch (word.start[0]) { - case '+': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_ADD; continue; - case '-': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_SUB; continue; - case '*': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MUL; continue; - case '/': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_DIV; continue; - case '!': exp->tag = SZEXP_OP1; exp->val.op = SZEXP_OP_NOT; continue; - case '>': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_GT; continue; - case '<': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_LT; continue; - } - - if (mp_isdigit(word.start[0])) { - exp->tag = SZEXP_CONST; - if (bstr_sscanf(word, "%f", &exp->val.cval) != 1) - return false; - continue; - } - - // Some sort of illegal expression - return false; - } - - return true; -} - -// Returns whether successful. 'result' is left untouched on failure -bool eval_szexpr(struct mp_log *log, void *priv, - bool (*lookup)(void *priv, struct bstr var, float size[2]), - struct szexp expr[MAX_SZEXP_SIZE], float *result) -{ - float stack[MAX_SZEXP_SIZE] = {0}; - int idx = 0; // points to next element to push - - for (int i = 0; i < MAX_SZEXP_SIZE; i++) { - switch (expr[i].tag) { - case SZEXP_END: - goto done; - - case SZEXP_CONST: - // Since our SZEXPs are bound by MAX_SZEXP_SIZE, it should be - // impossible to overflow the stack - assert(idx < MAX_SZEXP_SIZE); - stack[idx++] = expr[i].val.cval; - continue; - - case SZEXP_OP1: - if (idx < 1) { - mp_warn(log, "Stack underflow in RPN expression!\n"); - return false; - } - - switch (expr[i].val.op) { - case SZEXP_OP_NOT: stack[idx-1] = !stack[idx-1]; break; - default: abort(); - } - continue; - - case SZEXP_OP2: - if (idx < 2) { - mp_warn(log, "Stack underflow in RPN expression!\n"); - return false; - } - - // Pop the operands in reverse order - float op2 = stack[--idx]; - float op1 = stack[--idx]; - float res = 0.0; - switch (expr[i].val.op) { - case SZEXP_OP_ADD: res = op1 + op2; break; - case SZEXP_OP_SUB: res = op1 - op2; break; - case SZEXP_OP_MUL: res = op1 * op2; break; - case SZEXP_OP_DIV: res = op1 / op2; break; - case SZEXP_OP_GT: res = op1 > op2; break; - case SZEXP_OP_LT: res = op1 < op2; break; - default: abort(); - } - - if (!isfinite(res)) { - mp_warn(log, "Illegal operation in RPN expression!\n"); - return false; - } - - stack[idx++] = res; - continue; - - case SZEXP_VAR_W: - case SZEXP_VAR_H: { - struct bstr name = expr[i].val.varname; - float size[2]; - - if (!lookup(priv, name, size)) { - mp_warn(log, "Variable %.*s not found in RPN expression!\n", - BSTR_P(name)); - return false; - } - - stack[idx++] = (expr[i].tag == SZEXP_VAR_W) ? size[0] : size[1]; - continue; - } - } - } - -done: - // Return the single stack element - if (idx != 1) { - mp_warn(log, "Malformed stack after RPN expression!\n"); - return false; - } - - *result = stack[0]; - return true; -} - -static bool parse_hook(struct mp_log *log, struct bstr *body, - struct gl_user_shader_hook *out) -{ - *out = (struct gl_user_shader_hook){ - .pass_desc = bstr0("(unknown)"), - .offset = identity_trans, - .width = {{ SZEXP_VAR_W, { .varname = bstr0("HOOKED") }}}, - .height = {{ SZEXP_VAR_H, { .varname = bstr0("HOOKED") }}}, - .cond = {{ SZEXP_CONST, { .cval = 1.0 }}}, - }; - - int hook_idx = 0; - int bind_idx = 0; - - // Parse all headers - while (true) { - struct bstr rest; - struct bstr line = bstr_strip(bstr_getline(*body, &rest)); - - // Check for the presence of the magic line beginning - if (!bstr_eatstart0(&line, "//!")) - break; - - *body = rest; - - // Parse the supported commands - if (bstr_eatstart0(&line, "HOOK")) { - if (hook_idx == SHADER_MAX_HOOKS) { - mp_err(log, "Passes may only hook up to %d textures!\n", - SHADER_MAX_HOOKS); - return false; - } - out->hook_tex[hook_idx++] = bstr_strip(line); - continue; - } - - if (bstr_eatstart0(&line, "BIND")) { - if (bind_idx == SHADER_MAX_BINDS) { - mp_err(log, "Passes may only bind up to %d textures!\n", - SHADER_MAX_BINDS); - return false; - } - out->bind_tex[bind_idx++] = bstr_strip(line); - continue; - } - - if (bstr_eatstart0(&line, "SAVE")) { - out->save_tex = bstr_strip(line); - continue; - } - - if (bstr_eatstart0(&line, "DESC")) { - out->pass_desc = bstr_strip(line); - continue; - } - - if (bstr_eatstart0(&line, "OFFSET")) { - float ox, oy; - if (bstr_sscanf(line, "%f %f", &ox, &oy) != 2) { - mp_err(log, "Error while parsing OFFSET!\n"); - return false; - } - out->offset.t[0] = ox; - out->offset.t[1] = oy; - continue; - } - - if (bstr_eatstart0(&line, "WIDTH")) { - if (!parse_rpn_szexpr(line, out->width)) { - mp_err(log, "Error while parsing WIDTH!\n"); - return false; - } - continue; - } - - if (bstr_eatstart0(&line, "HEIGHT")) { - if (!parse_rpn_szexpr(line, out->height)) { - mp_err(log, "Error while parsing HEIGHT!\n"); - return false; - } - continue; - } - - if (bstr_eatstart0(&line, "WHEN")) { - if (!parse_rpn_szexpr(line, out->cond)) { - mp_err(log, "Error while parsing WHEN!\n"); - return false; - } - continue; - } - - if (bstr_eatstart0(&line, "COMPONENTS")) { - if (bstr_sscanf(line, "%d", &out->components) != 1) { - mp_err(log, "Error while parsing COMPONENTS!\n"); - return false; - } - continue; - } - - if (bstr_eatstart0(&line, "COMPUTE")) { - struct compute_info *ci = &out->compute; - int num = bstr_sscanf(line, "%d %d %d %d", &ci->block_w, &ci->block_h, - &ci->threads_w, &ci->threads_h); - - if (num == 2 || num == 4) { - ci->active = true; - ci->directly_writes = true; - } else { - mp_err(log, "Error while parsing COMPUTE!\n"); - return false; - } - continue; - } - - // Unknown command type - mp_err(log, "Unrecognized command '%.*s'!\n", BSTR_P(line)); - return false; - } - - // The rest of the file up until the next magic line beginning (if any) - // shall be the shader body - if (bstr_split_tok(*body, "//!", &out->pass_body, body)) { - // Make sure the magic line is part of the rest - body->start -= 3; - body->len += 3; - } - - // Sanity checking - if (hook_idx == 0) - mp_warn(log, "Pass has no hooked textures (will be ignored)!\n"); - - return true; -} - -static bool parse_tex(struct mp_log *log, struct ra *ra, struct bstr *body, - struct gl_user_shader_tex *out) -{ - *out = (struct gl_user_shader_tex){ - .name = bstr0("USER_TEX"), - .params = { - .dimensions = 2, - .w = 1, .h = 1, .d = 1, - .render_src = true, - .src_linear = true, - }, - }; - struct ra_tex_params *p = &out->params; - - while (true) { - struct bstr rest; - struct bstr line = bstr_strip(bstr_getline(*body, &rest)); - - if (!bstr_eatstart0(&line, "//!")) - break; - - *body = rest; - - if (bstr_eatstart0(&line, "TEXTURE")) { - out->name = bstr_strip(line); - continue; - } - - if (bstr_eatstart0(&line, "SIZE")) { - p->dimensions = bstr_sscanf(line, "%d %d %d", &p->w, &p->h, &p->d); - if (p->dimensions < 1 || p->dimensions > 3 || - p->w < 1 || p->h < 1 || p->d < 1) - { - mp_err(log, "Error while parsing SIZE!\n"); - return false; - } - continue; - } - - if (bstr_eatstart0(&line, "FORMAT ")) { - p->format = NULL; - for (int n = 0; n < ra->num_formats; n++) { - const struct ra_format *fmt = ra->formats[n]; - if (bstr_equals0(line, fmt->name)) { - p->format = fmt; - break; - } - } - // (pixel_size==0 is for opaque formats) - if (!p->format || !p->format->pixel_size) { - mp_err(log, "Unrecognized/unavailable FORMAT name: '%.*s'!\n", - BSTR_P(line)); - return false; - } - continue; - } - - if (bstr_eatstart0(&line, "FILTER")) { - line = bstr_strip(line); - if (bstr_equals0(line, "LINEAR")) { - p->src_linear = true; - } else if (bstr_equals0(line, "NEAREST")) { - p->src_linear = false; - } else { - mp_err(log, "Unrecognized FILTER: '%.*s'!\n", BSTR_P(line)); - return false; - } - continue; - } - - if (bstr_eatstart0(&line, "BORDER")) { - line = bstr_strip(line); - if (bstr_equals0(line, "CLAMP")) { - p->src_repeat = false; - } else if (bstr_equals0(line, "REPEAT")) { - p->src_repeat = true; - } else { - mp_err(log, "Unrecognized BORDER: '%.*s'!\n", BSTR_P(line)); - return false; - } - continue; - } - - mp_err(log, "Unrecognized command '%.*s'!\n", BSTR_P(line)); - return false; - } - - if (!p->format) { - mp_err(log, "No FORMAT specified.\n"); - return false; - } - - if (p->src_linear && !p->format->linear_filter) { - mp_err(log, "The specified texture format cannot be filtered!\n"); - return false; - } - - // Decode the rest of the section (up to the next //! marker) as raw hex - // data for the texture - struct bstr hexdata; - if (bstr_split_tok(*body, "//!", &hexdata, body)) { - // Make sure the magic line is part of the rest - body->start -= 3; - body->len += 3; - } - - struct bstr tex; - if (!bstr_decode_hex(NULL, bstr_strip(hexdata), &tex)) { - mp_err(log, "Error while parsing TEXTURE body: must be a valid " - "hexadecimal sequence, on a single line!\n"); - return false; - } - - int expected_len = p->w * p->h * p->d * p->format->pixel_size; - if (tex.len != expected_len) { - mp_err(log, "Shader TEXTURE size mismatch: got %zd bytes, expected %d!\n", - tex.len, expected_len); - talloc_free(tex.start); - return false; - } - - p->initial_data = tex.start; - return true; -} - -void parse_user_shader(struct mp_log *log, struct ra *ra, struct bstr shader, - void *priv, - bool (*dohook)(void *p, struct gl_user_shader_hook hook), - bool (*dotex)(void *p, struct gl_user_shader_tex tex)) -{ - if (!dohook || !dotex || !shader.len) - return; - - // Skip all garbage (e.g. comments) before the first header - int pos = bstr_find(shader, bstr0("//!")); - if (pos < 0) { - mp_warn(log, "Shader appears to contain no headers!\n"); - return; - } - shader = bstr_cut(shader, pos); - - // Loop over the file - while (shader.len > 0) - { - // Peek at the first header to dispatch the right type - if (bstr_startswith0(shader, "//!TEXTURE")) { - struct gl_user_shader_tex t; - if (!parse_tex(log, ra, &shader, &t) || !dotex(priv, t)) - return; - continue; - } - - struct gl_user_shader_hook h; - if (!parse_hook(log, &shader, &h) || !dohook(priv, h)) - return; - } -} diff --git a/video/out/opengl/user_shaders.h b/video/out/opengl/user_shaders.h deleted file mode 100644 index 94a070c8e2..0000000000 --- a/video/out/opengl/user_shaders.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#ifndef MP_GL_USER_SHADERS_H -#define MP_GL_USER_SHADERS_H - -#include "utils.h" -#include "ra.h" - -#define SHADER_MAX_PASSES 32 -#define SHADER_MAX_HOOKS 16 -#define SHADER_MAX_BINDS 6 -#define SHADER_MAX_SAVED 64 -#define MAX_SZEXP_SIZE 32 - -enum szexp_op { - SZEXP_OP_ADD, - SZEXP_OP_SUB, - SZEXP_OP_MUL, - SZEXP_OP_DIV, - SZEXP_OP_NOT, - SZEXP_OP_GT, - SZEXP_OP_LT, -}; - -enum szexp_tag { - SZEXP_END = 0, // End of an RPN expression - SZEXP_CONST, // Push a constant value onto the stack - SZEXP_VAR_W, // Get the width/height of a named texture (variable) - SZEXP_VAR_H, - SZEXP_OP2, // Pop two elements and push the result of a dyadic operation - SZEXP_OP1, // Pop one element and push the result of a monadic operation -}; - -struct szexp { - enum szexp_tag tag; - union { - float cval; - struct bstr varname; - enum szexp_op op; - } val; -}; - -struct compute_info { - bool active; - int block_w, block_h; // Block size (each block corresponds to one WG) - int threads_w, threads_h; // How many threads form a working group - bool directly_writes; // If true, shader is assumed to imageStore(out_image) -}; - -struct gl_user_shader_hook { - struct bstr pass_desc; - struct bstr hook_tex[SHADER_MAX_HOOKS]; - struct bstr bind_tex[SHADER_MAX_BINDS]; - struct bstr save_tex; - struct bstr pass_body; - struct gl_transform offset; - struct szexp width[MAX_SZEXP_SIZE]; - struct szexp height[MAX_SZEXP_SIZE]; - struct szexp cond[MAX_SZEXP_SIZE]; - int components; - struct compute_info compute; -}; - -struct gl_user_shader_tex { - struct bstr name; - struct ra_tex_params params; - // for video.c - struct ra_tex *tex; -}; - -// Parse the next shader block from `body`. The callbacks are invoked on every -// valid shader block parsed. -void parse_user_shader(struct mp_log *log, struct ra *ra, struct bstr shader, - void *priv, - bool (*dohook)(void *p, struct gl_user_shader_hook hook), - bool (*dotex)(void *p, struct gl_user_shader_tex tex)); - -// Evaluate a szexp, given a lookup function for named textures -bool eval_szexpr(struct mp_log *log, void *priv, - bool (*lookup)(void *priv, struct bstr var, float size[2]), - struct szexp expr[MAX_SZEXP_SIZE], float *result); - -#endif diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index b8fc24a52e..3b296d52de 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -1,371 +1,269 @@ -#include "common/msg.h" -#include "video/out/vo.h" +/* + * This file is part of mpv. + * Parts based on MPlayer code by Reimar Döffinger. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "osdep/io.h" + +#include "common/common.h" +#include "options/path.h" +#include "stream/stream.h" +#include "formats.h" #include "utils.h" -// Standard parallel 2D projection, except y1 < y0 means that the coordinate -// system is flipped, not the projection. -void gl_transform_ortho(struct gl_transform *t, float x0, float x1, - float y0, float y1) +// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL) +static const char *gl_error_to_string(GLenum error) { - if (y1 < y0) { - float tmp = y0; - y0 = tmp - y1; - y1 = tmp; + switch (error) { + case GL_INVALID_ENUM: return "INVALID_ENUM"; + case GL_INVALID_VALUE: return "INVALID_VALUE"; + case GL_INVALID_OPERATION: return "INVALID_OPERATION"; + case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION"; + case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY"; + default: return "unknown"; } - - t->m[0][0] = 2.0f / (x1 - x0); - t->m[0][1] = 0.0f; - t->m[1][0] = 0.0f; - t->m[1][1] = 2.0f / (y1 - y0); - t->t[0] = -(x1 + x0) / (x1 - x0); - t->t[1] = -(y1 + y0) / (y1 - y0); -} - -// Apply the effects of one transformation to another, transforming it in the -// process. In other words: post-composes t onto x -void gl_transform_trans(struct gl_transform t, struct gl_transform *x) -{ - struct gl_transform xt = *x; - x->m[0][0] = t.m[0][0] * xt.m[0][0] + t.m[0][1] * xt.m[1][0]; - x->m[1][0] = t.m[1][0] * xt.m[0][0] + t.m[1][1] * xt.m[1][0]; - x->m[0][1] = t.m[0][0] * xt.m[0][1] + t.m[0][1] * xt.m[1][1]; - x->m[1][1] = t.m[1][0] * xt.m[0][1] + t.m[1][1] * xt.m[1][1]; - gl_transform_vec(t, &x->t[0], &x->t[1]); -} - -void gl_transform_ortho_fbodst(struct gl_transform *t, struct fbodst fbo) -{ - int y_dir = fbo.flip ? -1 : 1; - gl_transform_ortho(t, 0, fbo.tex->params.w, 0, fbo.tex->params.h * y_dir); } -void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool) +void gl_check_error(GL *gl, struct mp_log *log, const char *info) { - for (int i = 0; i < pool->num_buffers; i++) - ra_buf_free(ra, &pool->buffers[i]); - - talloc_free(pool->buffers); - *pool = (struct ra_buf_pool){0}; + for (;;) { + GLenum error = gl->GetError(); + if (error == GL_NO_ERROR) + break; + mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info, + gl_error_to_string(error)); + } } -static bool ra_buf_params_compatible(const struct ra_buf_params *new, - const struct ra_buf_params *old) +static int get_alignment(int stride) { - return new->type == old->type && - new->size <= old->size && - new->host_mapped == old->host_mapped && - new->host_mutable == old->host_mutable; + if (stride % 8 == 0) + return 8; + if (stride % 4 == 0) + return 4; + if (stride % 2 == 0) + return 2; + return 1; } -static bool ra_buf_pool_grow(struct ra *ra, struct ra_buf_pool *pool) +// upload a texture, handling things like stride and slices +// target: texture target, usually GL_TEXTURE_2D +// format, type: texture parameters +// dataptr, stride: image data +// x, y, width, height: part of the image to upload +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h) { - struct ra_buf *buf = ra_buf_create(ra, &pool->current_params); - if (!buf) - return false; - - MP_TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf); - MP_VERBOSE(ra, "Resized buffer pool to size %d\n", pool->num_buffers); - return true; + int bpp = gl_bytes_per_pixel(format, type); + const uint8_t *data = dataptr; + int y_max = y + h; + if (w <= 0 || h <= 0 || !bpp) + return; + if (stride < 0) { + data += (h - 1) * stride; + stride = -stride; + } + gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); + int slice = h; + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) { + // this is not always correct, but should work for MPlayer + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp); + } else { + if (stride != bpp * w) + slice = 1; // very inefficient, but at least it works + } + for (; y + slice <= y_max; y += slice) { + gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data); + data += stride * slice; + } + if (y < y_max) + gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data); + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); } -struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, - const struct ra_buf_params *params) +mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h) { - assert(!params->initial_data); - - if (!ra_buf_params_compatible(params, &pool->current_params)) { - ra_buf_pool_uninit(ra, pool); - pool->current_params = *params; - } - - // Make sure we have at least one buffer available - if (!pool->buffers && !ra_buf_pool_grow(ra, pool)) - return NULL; - - // Make sure the next buffer is available for use - if (!ra->fns->buf_poll(ra, pool->buffers[pool->index]) && - !ra_buf_pool_grow(ra, pool)) - { + if (gl->es) + return NULL; // ES can't read from front buffer + mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h); + if (!image) return NULL; + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); + GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT; + gl->PixelStorei(GL_PACK_ALIGNMENT, 1); + gl->ReadBuffer(obj); + //flip image while reading (and also avoid stride-related trouble) + for (int y = 0; y < h; y++) { + gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE, + image->planes[0] + y * image->stride[0]); } - - struct ra_buf *buf = pool->buffers[pool->index++]; - pool->index %= pool->num_buffers; - - return buf; + gl->PixelStorei(GL_PACK_ALIGNMENT, 4); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + return image; } -bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, - const struct ra_tex_upload_params *params) +static void gl_vao_enable_attribs(struct gl_vao *vao) { - if (params->buf) - return ra->fns->tex_upload(ra, params); - - struct ra_tex *tex = params->tex; - size_t row_size = tex->params.dimensions == 2 ? params->stride : - tex->params.w * tex->params.format->pixel_size; - - struct ra_buf_params bufparams = { - .type = RA_BUF_TYPE_TEX_UPLOAD, - .size = row_size * tex->params.h * tex->params.d, - .host_mutable = true, - }; - - struct ra_buf *buf = ra_buf_pool_get(ra, pbo, &bufparams); - if (!buf) - return false; - - ra->fns->buf_update(ra, buf, 0, params->src, bufparams.size); - - struct ra_tex_upload_params newparams = *params; - newparams.buf = buf; - newparams.src = NULL; - - return ra->fns->tex_upload(ra, &newparams); -} + GL *gl = vao->gl; + + for (int n = 0; n < vao->num_entries; n++) { + const struct ra_renderpass_input *e = &vao->entries[n]; + GLenum type = 0; + bool normalized = false; + switch (e->type) { + case RA_VARTYPE_INT: + type = GL_INT; + break; + case RA_VARTYPE_FLOAT: + type = GL_FLOAT; + break; + case RA_VARTYPE_BYTE_UNORM: + type = GL_UNSIGNED_BYTE; + normalized = true; + break; + default: + abort(); + } + assert(e->dim_m == 1); -struct ra_layout std140_layout(struct ra_renderpass_input *inp) -{ - size_t el_size = ra_vartype_size(inp->type); - - // std140 packing rules: - // 1. The alignment of generic values is their size in bytes - // 2. The alignment of vectors is the vector length * the base count, with - // the exception of vec3 which is always aligned like vec4 - // 3. The alignment of arrays is that of the element size rounded up to - // the nearest multiple of vec4 - // 4. Matrices are treated like arrays of vectors - // 5. Arrays/matrices are laid out with a stride equal to the alignment - size_t size = el_size * inp->dim_v; - if (inp->dim_v == 3) - size += el_size; - if (inp->dim_m > 1) - size = MP_ALIGN_UP(size, sizeof(float[4])); - - return (struct ra_layout) { - .align = size, - .stride = size, - .size = size * inp->dim_m, - }; + gl->EnableVertexAttribArray(n); + gl->VertexAttribPointer(n, e->dim_v, type, normalized, + vao->stride, (void *)(intptr_t)e->offset); + } } -struct ra_layout std430_layout(struct ra_renderpass_input *inp) +void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, + const struct ra_renderpass_input *entries, + int num_entries) { - size_t el_size = ra_vartype_size(inp->type); - - // std430 packing rules: like std140, except arrays/matrices are always - // "tightly" packed, even arrays/matrices of vec3s - size_t align = el_size * inp->dim_v; - if (inp->dim_v == 3 && inp->dim_m == 1) - align += el_size; - - return (struct ra_layout) { - .align = align, - .stride = align, - .size = align * inp->dim_m, + assert(!vao->vao); + assert(!vao->buffer); + + *vao = (struct gl_vao){ + .gl = gl, + .stride = stride, + .entries = entries, + .num_entries = num_entries, }; -} - -// Create a texture and a FBO using the texture as color attachments. -// fmt: texture internal format -// If the parameters are the same as the previous call, do not touch it. -// flags can be 0, or a combination of FBOTEX_FUZZY_W and FBOTEX_FUZZY_H. -// Enabling FUZZY for W or H means the w or h does not need to be exact. -bool fbotex_change(struct fbotex *fbo, struct ra *ra, struct mp_log *log, - int w, int h, const struct ra_format *fmt, int flags) -{ - int lw = w, lh = h; - - if (fbo->tex) { - int cw = w, ch = h; - int rw = fbo->tex->params.w, rh = fbo->tex->params.h; - if ((flags & FBOTEX_FUZZY_W) && cw < rw) - cw = rw; - if ((flags & FBOTEX_FUZZY_H) && ch < rh) - ch = rh; + gl->GenBuffers(1, &vao->buffer); - if (rw == cw && rh == ch && fbo->tex->params.format == fmt) - goto done; - } + if (gl->BindVertexArray) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - if (flags & FBOTEX_FUZZY_W) - w = MP_ALIGN_UP(w, 256); - if (flags & FBOTEX_FUZZY_H) - h = MP_ALIGN_UP(h, 256); + gl->GenVertexArrays(1, &vao->vao); + gl->BindVertexArray(vao->vao); + gl_vao_enable_attribs(vao); + gl->BindVertexArray(0); - mp_verbose(log, "Create FBO: %dx%d (%dx%d)\n", lw, lh, w, h); - - if (!fmt || !fmt->renderable || !fmt->linear_filter) { - mp_err(log, "Format %s not supported.\n", fmt ? fmt->name : "(unset)"); - return false; + gl->BindBuffer(GL_ARRAY_BUFFER, 0); } +} - fbotex_uninit(fbo); - - *fbo = (struct fbotex) { - .ra = ra, - }; - - struct ra_tex_params params = { - .dimensions = 2, - .w = w, - .h = h, - .d = 1, - .format = fmt, - .src_linear = true, - .render_src = true, - .render_dst = true, - .storage_dst = true, - .blit_src = true, - }; - - fbo->tex = ra_tex_create(fbo->ra, ¶ms); - - if (!fbo->tex) { - mp_err(log, "Error: framebuffer could not be created.\n"); - fbotex_uninit(fbo); - return false; - } - -done: - - fbo->lw = lw; - fbo->lh = lh; +void gl_vao_uninit(struct gl_vao *vao) +{ + GL *gl = vao->gl; + if (!gl) + return; - fbo->fbo = (struct fbodst){ - .tex = fbo->tex, - }; + if (gl->DeleteVertexArrays) + gl->DeleteVertexArrays(1, &vao->vao); + gl->DeleteBuffers(1, &vao->buffer); - return true; + *vao = (struct gl_vao){0}; } -void fbotex_uninit(struct fbotex *fbo) +static void gl_vao_bind(struct gl_vao *vao) { - if (fbo->ra) { - ra_tex_free(fbo->ra, &fbo->tex); - *fbo = (struct fbotex) {0}; + GL *gl = vao->gl; + + if (gl->BindVertexArray) { + gl->BindVertexArray(vao->vao); + } else { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl_vao_enable_attribs(vao); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); } } -struct timer_pool { - struct ra *ra; - ra_timer *timer; - bool running; // detect invalid usage - - uint64_t samples[VO_PERF_SAMPLE_COUNT]; - int sample_idx; - int sample_count; - - uint64_t sum; - uint64_t peak; -}; - -struct timer_pool *timer_pool_create(struct ra *ra) +static void gl_vao_unbind(struct gl_vao *vao) { - if (!ra->fns->timer_create) - return NULL; - - ra_timer *timer = ra->fns->timer_create(ra); - if (!timer) - return NULL; + GL *gl = vao->gl; - struct timer_pool *pool = talloc(NULL, struct timer_pool); - if (!pool) { - ra->fns->timer_destroy(ra, timer); - return NULL; + if (gl->BindVertexArray) { + gl->BindVertexArray(0); + } else { + for (int n = 0; n < vao->num_entries; n++) + gl->DisableVertexAttribArray(n); } - - *pool = (struct timer_pool){ .ra = ra, .timer = timer }; - return pool; } -void timer_pool_destroy(struct timer_pool *pool) +// Draw the vertex data (as described by the gl_vao_entry entries) in ptr +// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES. +// If ptr is NULL, then skip the upload, and use the data uploaded with the +// previous call. +void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num) { - if (!pool) - return; - - pool->ra->fns->timer_destroy(pool->ra, pool->timer); - talloc_free(pool); -} + GL *gl = vao->gl; -void timer_pool_start(struct timer_pool *pool) -{ - if (!pool) - return; + if (ptr) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } - assert(!pool->running); - pool->ra->fns->timer_start(pool->ra, pool->timer); - pool->running = true; -} + gl_vao_bind(vao); -void timer_pool_stop(struct timer_pool *pool) -{ - if (!pool) - return; + gl->DrawArrays(prim, 0, num); - assert(pool->running); - uint64_t res = pool->ra->fns->timer_stop(pool->ra, pool->timer); - pool->running = false; - - if (res) { - // Input res into the buffer and grab the previous value - uint64_t old = pool->samples[pool->sample_idx]; - pool->sample_count = MPMIN(pool->sample_count + 1, VO_PERF_SAMPLE_COUNT); - pool->samples[pool->sample_idx++] = res; - pool->sample_idx %= VO_PERF_SAMPLE_COUNT; - pool->sum = pool->sum + res - old; - - // Update peak if necessary - if (res >= pool->peak) { - pool->peak = res; - } else if (pool->peak == old) { - // It's possible that the last peak was the value we just removed, - // if so we need to scan for the new peak - uint64_t peak = res; - for (int i = 0; i < VO_PERF_SAMPLE_COUNT; i++) - peak = MPMAX(peak, pool->samples[i]); - pool->peak = peak; - } - } + gl_vao_unbind(vao); } -struct mp_pass_perf timer_pool_measure(struct timer_pool *pool) +static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id, + GLenum severity, GLsizei length, + const GLchar *message, const void *userParam) { - if (!pool) - return (struct mp_pass_perf){0}; - - struct mp_pass_perf res = { - .peak = pool->peak, - .count = pool->sample_count, - }; - - int idx = pool->sample_idx - pool->sample_count + VO_PERF_SAMPLE_COUNT; - for (int i = 0; i < res.count; i++) { - idx %= VO_PERF_SAMPLE_COUNT; - res.samples[i] = pool->samples[idx++]; + // keep in mind that the debug callback can be asynchronous + struct mp_log *log = (void *)userParam; + int level = MSGL_ERR; + switch (severity) { + case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break; + case GL_DEBUG_SEVERITY_LOW: level = MSGL_INFO; break; + case GL_DEBUG_SEVERITY_MEDIUM: level = MSGL_WARN; break; + case GL_DEBUG_SEVERITY_HIGH: level = MSGL_ERR; break; } - - if (res.count > 0) { - res.last = res.samples[res.count - 1]; - res.avg = pool->sum / res.count; - } - - return res; + mp_msg(log, level, "GL: %s\n", message); } -void mp_log_source(struct mp_log *log, int lev, const char *src) +void gl_set_debug_logger(GL *gl, struct mp_log *log) { - int line = 1; - if (!src) - return; - while (*src) { - const char *end = strchr(src, '\n'); - const char *next = end + 1; - if (!end) - next = end = src + strlen(src); - mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src); - line++; - src = next; - } + if (gl->DebugMessageCallback) + gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log); } diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 7d00d26cf5..18cab476ed 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -1,121 +1,54 @@ -#pragma once +/* + * This file is part of mpv. + * Parts based on MPlayer code by Reimar Döffinger. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#ifndef MP_GL_UTILS_ +#define MP_GL_UTILS_ -#include #include -#include "video/out/vo.h" -#include "ra.h" +#include "video/out/gpu/utils.h" +#include "common.h" -// A 3x2 matrix, with the translation part separate. -struct gl_transform { - // row-major, e.g. in mathematical notation: - // | m[0][0] m[0][1] | - // | m[1][0] m[1][1] | - float m[2][2]; - float t[2]; -}; - -static const struct gl_transform identity_trans = { - .m = {{1.0, 0.0}, {0.0, 1.0}}, - .t = {0.0, 0.0}, -}; - -void gl_transform_ortho(struct gl_transform *t, float x0, float x1, - float y0, float y1); - -// This treats m as an affine transformation, in other words m[2][n] gets -// added to the output. -static inline void gl_transform_vec(struct gl_transform t, float *x, float *y) -{ - float vx = *x, vy = *y; - *x = vx * t.m[0][0] + vy * t.m[0][1] + t.t[0]; - *y = vx * t.m[1][0] + vy * t.m[1][1] + t.t[1]; -} +struct mp_log; -struct mp_rect_f { - float x0, y0, x1, y1; -}; - -// Semantic equality (fuzzy comparison) -static inline bool mp_rect_f_seq(struct mp_rect_f a, struct mp_rect_f b) -{ - return fabs(a.x0 - b.x0) < 1e-6 && fabs(a.x1 - b.x1) < 1e-6 && - fabs(a.y0 - b.y0) < 1e-6 && fabs(a.y1 - b.y1) < 1e-6; -} - -static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r) -{ - gl_transform_vec(t, &r->x0, &r->y0); - gl_transform_vec(t, &r->x1, &r->y1); -} +void gl_check_error(GL *gl, struct mp_log *log, const char *info); -static inline bool gl_transform_eq(struct gl_transform a, struct gl_transform b) -{ - for (int x = 0; x < 2; x++) { - for (int y = 0; y < 2; y++) { - if (a.m[x][y] != b.m[x][y]) - return false; - } - } +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h); - return a.t[0] == b.t[0] && a.t[1] == b.t[1]; -} +mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h); -void gl_transform_trans(struct gl_transform t, struct gl_transform *x); - -struct fbodst { - struct ra_tex *tex; - bool flip; // mirror vertically +struct gl_vao { + GL *gl; + GLuint vao; // the VAO object, or 0 if unsupported by driver + GLuint buffer; // GL_ARRAY_BUFFER used for the data + int stride; // size of each element (interleaved elements are assumed) + const struct ra_renderpass_input *entries; + int num_entries; }; -void gl_transform_ortho_fbodst(struct gl_transform *t, struct fbodst fbo); - -// A pool of buffers, which can grow as needed -struct ra_buf_pool { - struct ra_buf_params current_params; - struct ra_buf **buffers; - int num_buffers; - int index; -}; - -void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool); - -// Note: params->initial_data is *not* supported -struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, - const struct ra_buf_params *params); - -// Helper that wraps ra_tex_upload using texture upload buffers to ensure that -// params->buf is always set. This is intended for RA-internal usage. -bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, - const struct ra_tex_upload_params *params); - -// Layout rules for GLSL's packing modes -struct ra_layout std140_layout(struct ra_renderpass_input *inp); -struct ra_layout std430_layout(struct ra_renderpass_input *inp); - -struct fbotex { - struct ra *ra; - struct ra_tex *tex; - int lw, lh; // logical (configured) size, <= than texture size - struct fbodst fbo; -}; - -void fbotex_uninit(struct fbotex *fbo); -bool fbotex_change(struct fbotex *fbo, struct ra *ra, struct mp_log *log, - int w, int h, const struct ra_format *fmt, int flags); -#define FBOTEX_FUZZY_W 1 -#define FBOTEX_FUZZY_H 2 -#define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H) - -// A wrapper around ra_timer that does result pooling, averaging etc. -struct timer_pool; +void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, + const struct ra_renderpass_input *entries, + int num_entries); +void gl_vao_uninit(struct gl_vao *vao); +void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num); -struct timer_pool *timer_pool_create(struct ra *ra); -void timer_pool_destroy(struct timer_pool *pool); -void timer_pool_start(struct timer_pool *pool); -void timer_pool_stop(struct timer_pool *pool); -struct mp_pass_perf timer_pool_measure(struct timer_pool *pool); +void gl_set_debug_logger(GL *gl, struct mp_log *log); -// print a multi line string with line numbers (e.g. for shader sources) -// log, lev: module and log level, as in mp_msg() -void mp_log_source(struct mp_log *log, int lev, const char *src); +#endif diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c deleted file mode 100644 index 3362381eff..0000000000 --- a/video/out/opengl/video.c +++ /dev/null @@ -1,3813 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "video.h" - -#include "misc/bstr.h" -#include "options/m_config.h" -#include "common/global.h" -#include "options/options.h" -#include "utils.h" -#include "hwdec.h" -#include "osd.h" -#include "ra.h" -#include "stream/stream.h" -#include "video_shaders.h" -#include "user_shaders.h" -#include "video/out/filter_kernels.h" -#include "video/out/aspect.h" -#include "video/out/dither.h" -#include "video/out/vo.h" - -// scale/cscale arguments that map directly to shader filter routines. -// Note that the convolution filters are not included in this list. -static const char *const fixed_scale_filters[] = { - "bilinear", - "bicubic_fast", - "oversample", - NULL -}; -static const char *const fixed_tscale_filters[] = { - "oversample", - "linear", - NULL -}; - -// must be sorted, and terminated with 0 -int filter_sizes[] = - {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0}; -int tscale_sizes[] = {2, 4, 6, 0}; // limited by TEXUNIT_VIDEO_NUM - -struct vertex_pt { - float x, y; -}; - -struct vertex { - struct vertex_pt position; - struct vertex_pt texcoord[TEXUNIT_VIDEO_NUM]; -}; - -static const struct ra_renderpass_input vertex_vao[] = { - {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, - {"texcoord0", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[0])}, - {"texcoord1", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[1])}, - {"texcoord2", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[2])}, - {"texcoord3", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[3])}, - {"texcoord4", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[4])}, - {"texcoord5", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[5])}, - {0} -}; - -struct texplane { - struct ra_tex *tex; - int w, h; - bool flipped; -}; - -struct video_image { - struct texplane planes[4]; - struct mp_image *mpi; // original input image - uint64_t id; // unique ID identifying mpi contents - bool hwdec_mapped; -}; - -enum plane_type { - PLANE_NONE = 0, - PLANE_RGB, - PLANE_LUMA, - PLANE_CHROMA, - PLANE_ALPHA, - PLANE_XYZ, -}; - -static const char *plane_names[] = { - [PLANE_NONE] = "unknown", - [PLANE_RGB] = "rgb", - [PLANE_LUMA] = "luma", - [PLANE_CHROMA] = "chroma", - [PLANE_ALPHA] = "alpha", - [PLANE_XYZ] = "xyz", -}; - -// A self-contained description of a source image which can be bound to a -// texture unit and sampled from. Contains metadata about how it's to be used -struct img_tex { - enum plane_type type; // must be set to something non-zero - int components; // number of relevant coordinates - float multiplier; // multiplier to be used when sampling - struct ra_tex *tex; - int w, h; // logical size (after transformation) - struct gl_transform transform; // rendering transformation -}; - -// A named img_tex, for user scripting purposes -struct saved_tex { - const char *name; - struct img_tex tex; -}; - -// A texture hook. This is some operation that transforms a named texture as -// soon as it's generated -struct tex_hook { - const char *save_tex; - const char *hook_tex[SHADER_MAX_HOOKS]; - const char *bind_tex[TEXUNIT_VIDEO_NUM]; - int components; // how many components are relevant (0 = same as input) - void *priv; // this gets talloc_freed when the tex_hook is removed - void (*hook)(struct gl_video *p, struct img_tex tex, // generates GLSL - struct gl_transform *trans, void *priv); - bool (*cond)(struct gl_video *p, struct img_tex tex, void *priv); -}; - -struct fbosurface { - struct fbotex fbotex; - uint64_t id; - double pts; -}; - -#define FBOSURFACES_MAX 10 - -struct cached_file { - char *path; - struct bstr body; -}; - -struct pass_info { - struct bstr desc; - struct mp_pass_perf perf; -}; - -#define PASS_INFO_MAX (SHADER_MAX_PASSES + 32) - -struct dr_buffer { - struct ra_buf *buf; - // The mpi reference will keep the data from being recycled (or from other - // references gaining write access) while the GPU is accessing the buffer. - struct mp_image *mpi; -}; - -struct gl_video { - struct ra *ra; - - struct mpv_global *global; - struct mp_log *log; - struct gl_video_opts opts; - struct m_config_cache *opts_cache; - struct gl_lcms *cms; - - int fb_depth; // actual bits available in GL main framebuffer - struct m_color clear_color; - bool force_clear_color; - - struct gl_shader_cache *sc; - - struct osd_state *osd_state; - struct mpgl_osd *osd; - double osd_pts; - - struct ra_tex *lut_3d_texture; - bool use_lut_3d; - int lut_3d_size[3]; - - struct ra_tex *dither_texture; - - struct mp_image_params real_image_params; // configured format - struct mp_image_params image_params; // texture format (mind hwdec case) - struct ra_imgfmt_desc ra_format; // texture format - int plane_count; - - bool is_gray; - bool has_alpha; - char color_swizzle[5]; - bool use_integer_conversion; - - struct video_image image; - - struct dr_buffer *dr_buffers; - int num_dr_buffers; - - bool using_dr_path; - - bool dumb_mode; - bool forced_dumb_mode; - - const struct ra_format *fbo_format; - struct fbotex merge_fbo[4]; - struct fbotex scale_fbo[4]; - struct fbotex integer_fbo[4]; - struct fbotex indirect_fbo; - struct fbotex blend_subs_fbo; - struct fbotex screen_fbo; - struct fbotex output_fbo; - struct fbosurface surfaces[FBOSURFACES_MAX]; - struct fbotex vdpau_deinterleave_fbo[2]; - struct ra_buf *hdr_peak_ssbo; - - // user pass descriptions and textures - struct tex_hook tex_hooks[SHADER_MAX_PASSES]; - int tex_hook_num; - struct gl_user_shader_tex user_textures[SHADER_MAX_PASSES]; - int user_tex_num; - - int surface_idx; - int surface_now; - int frames_drawn; - bool is_interpolated; - bool output_fbo_valid; - - // state for configured scalers - struct scaler scaler[SCALER_COUNT]; - - struct mp_csp_equalizer_state *video_eq; - - struct mp_rect src_rect; // displayed part of the source video - struct mp_rect dst_rect; // video rectangle on output window - struct mp_osd_res osd_rect; // OSD size/margins - - // temporary during rendering - struct img_tex pass_tex[TEXUNIT_VIDEO_NUM]; - struct compute_info pass_compute; // compute shader metadata for this pass - int pass_tex_num; - int texture_w, texture_h; - struct gl_transform texture_offset; // texture transform without rotation - int components; - bool use_linear; - float user_gamma; - - // pass info / metrics - struct pass_info pass_fresh[PASS_INFO_MAX]; - struct pass_info pass_redraw[PASS_INFO_MAX]; - struct pass_info *pass; - int pass_idx; - struct timer_pool *upload_timer; - struct timer_pool *blit_timer; - struct timer_pool *osd_timer; - - // intermediate textures - struct saved_tex saved_tex[SHADER_MAX_SAVED]; - int saved_tex_num; - struct fbotex hook_fbos[SHADER_MAX_SAVED]; - int hook_fbo_num; - - int frames_uploaded; - int frames_rendered; - AVLFG lfg; - - // Cached because computing it can take relatively long - int last_dither_matrix_size; - float *last_dither_matrix; - - struct cached_file *files; - int num_files; - - struct ra_hwdec *hwdec; - struct ra_hwdec_mapper *hwdec_mapper; - bool hwdec_active; - - bool dsi_warned; - bool broken_frame; // temporary error state -}; - -static const struct gl_video_opts gl_video_opts_def = { - .dither_algo = DITHER_FRUIT, - .dither_depth = -1, - .dither_size = 6, - .temporal_dither_period = 1, - .fbo_format = "auto", - .sigmoid_center = 0.75, - .sigmoid_slope = 6.5, - .scaler = { - {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .cutoff = 0.001}, // scale - {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .cutoff = 0.001}, // dscale - {{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .cutoff = 0.001}, // cscale - {{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}}, - .clamp = 1, }, // tscale - }, - .scaler_resizes_only = 1, - .scaler_lut_size = 6, - .interpolation_threshold = 0.0001, - .alpha_mode = ALPHA_BLEND_TILES, - .background = {0, 0, 0, 255}, - .gamma = 1.0f, - .tone_mapping = TONE_MAPPING_MOBIUS, - .tone_mapping_param = NAN, - .tone_mapping_desat = 2.0, - .early_flush = -1, -}; - -static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param); - -static int validate_window_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param); - -#define OPT_BASE_STRUCT struct gl_video_opts - -#define SCALER_OPTS(n, i) \ - OPT_STRING_VALIDATE(n, scaler[i].kernel.name, 0, validate_scaler_opt), \ - OPT_FLOAT(n"-param1", scaler[i].kernel.params[0], 0), \ - OPT_FLOAT(n"-param2", scaler[i].kernel.params[1], 0), \ - OPT_FLOAT(n"-blur", scaler[i].kernel.blur, 0), \ - OPT_FLOATRANGE(n"-cutoff", scaler[i].cutoff, 0, 0.0, 1.0), \ - OPT_FLOATRANGE(n"-taper", scaler[i].kernel.taper, 0, 0.0, 1.0), \ - OPT_FLOAT(n"-wparam", scaler[i].window.params[0], 0), \ - OPT_FLOAT(n"-wblur", scaler[i].window.blur, 0), \ - OPT_FLOATRANGE(n"-wtaper", scaler[i].window.taper, 0, 0.0, 1.0), \ - OPT_FLOATRANGE(n"-clamp", scaler[i].clamp, 0, 0.0, 1.0), \ - OPT_FLOATRANGE(n"-radius", scaler[i].radius, 0, 0.5, 16.0), \ - OPT_FLOATRANGE(n"-antiring", scaler[i].antiring, 0, 0.0, 1.0), \ - OPT_STRING_VALIDATE(n"-window", scaler[i].window.name, 0, validate_window_opt) - -const struct m_sub_options gl_video_conf = { - .opts = (const m_option_t[]) { - OPT_CHOICE("opengl-dumb-mode", dumb_mode, 0, - ({"auto", 0}, {"yes", 1}, {"no", -1})), - OPT_FLOATRANGE("opengl-gamma", gamma, 0, 0.1, 2.0), - OPT_FLAG("gamma-auto", gamma_auto, 0), - OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names), - OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), - OPT_CHOICE("tone-mapping", tone_mapping, 0, - ({"clip", TONE_MAPPING_CLIP}, - {"mobius", TONE_MAPPING_MOBIUS}, - {"reinhard", TONE_MAPPING_REINHARD}, - {"hable", TONE_MAPPING_HABLE}, - {"gamma", TONE_MAPPING_GAMMA}, - {"linear", TONE_MAPPING_LINEAR})), - OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0), - OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0), - OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0), - OPT_FLAG("gamut-warning", gamut_warning, 0), - OPT_FLAG("opengl-pbo", pbo, 0), - SCALER_OPTS("scale", SCALER_SCALE), - SCALER_OPTS("dscale", SCALER_DSCALE), - SCALER_OPTS("cscale", SCALER_CSCALE), - SCALER_OPTS("tscale", SCALER_TSCALE), - OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10), - OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0), - OPT_FLAG("linear-scaling", linear_scaling, 0), - OPT_FLAG("correct-downscaling", correct_downscaling, 0), - OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0), - OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0), - OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0), - OPT_STRING("opengl-fbo-format", fbo_format, 0), - OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16, - ({"no", -1}, {"auto", 0})), - OPT_CHOICE("dither", dither_algo, 0, - ({"fruit", DITHER_FRUIT}, - {"ordered", DITHER_ORDERED}, - {"no", DITHER_NONE})), - OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8), - OPT_FLAG("temporal-dither", temporal_dither, 0), - OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128), - OPT_CHOICE("alpha", alpha_mode, 0, - ({"no", ALPHA_NO}, - {"yes", ALPHA_YES}, - {"blend", ALPHA_BLEND}, - {"blend-tiles", ALPHA_BLEND_TILES})), - OPT_FLAG("opengl-rectangle-textures", use_rectangle, 0), - OPT_COLOR("background", background, 0), - OPT_FLAG("interpolation", interpolation, 0), - OPT_FLOAT("interpolation-threshold", interpolation_threshold, 0), - OPT_CHOICE("blend-subtitles", blend_subs, 0, - ({"no", BLEND_SUBS_NO}, - {"yes", BLEND_SUBS_YES}, - {"video", BLEND_SUBS_VIDEO})), - OPT_PATHLIST("opengl-shaders", user_shaders, 0), - OPT_CLI_ALIAS("opengl-shader", "opengl-shaders-append"), - OPT_FLAG("deband", deband, 0), - OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0), - OPT_FLOAT("sharpen", unsharp, 0), - OPT_INTRANGE("opengl-tex-pad-x", tex_pad_x, 0, 0, 4096), - OPT_INTRANGE("opengl-tex-pad-y", tex_pad_y, 0, 0, 4096), - OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0), - OPT_CHOICE("opengl-early-flush", early_flush, 0, - ({"no", 0}, {"yes", 1}, {"auto", -1})), - OPT_STRING("opengl-shader-cache-dir", shader_cache_dir, 0), - OPT_REPLACED("hdr-tone-mapping", "tone-mapping"), - {0} - }, - .size = sizeof(struct gl_video_opts), - .defaults = &gl_video_opts_def, -}; - -static void uninit_rendering(struct gl_video *p); -static void uninit_scaler(struct gl_video *p, struct scaler *scaler); -static void check_gl_features(struct gl_video *p); -static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id); -static const char *handle_scaler_opt(const char *name, bool tscale); -static void reinit_from_options(struct gl_video *p); -static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]); -static void gl_video_setup_hooks(struct gl_video *p); - -#define GLSL(x) gl_sc_add(p->sc, #x "\n"); -#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) -#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__) -#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__) - -static struct bstr load_cached_file(struct gl_video *p, const char *path) -{ - if (!path || !path[0]) - return (struct bstr){0}; - for (int n = 0; n < p->num_files; n++) { - if (strcmp(p->files[n].path, path) == 0) - return p->files[n].body; - } - // not found -> load it - struct bstr s = stream_read_file(path, p, p->global, 1024000); // 1024 kB - if (s.len) { - struct cached_file new = { - .path = talloc_strdup(p, path), - .body = s, - }; - MP_TARRAY_APPEND(p, p->files, p->num_files, new); - return new.body; - } - return (struct bstr){0}; -} - -static void debug_check_gl(struct gl_video *p, const char *msg) -{ - if (p->ra->fns->debug_marker) - p->ra->fns->debug_marker(p->ra, msg); -} - -static void gl_video_reset_surfaces(struct gl_video *p) -{ - for (int i = 0; i < FBOSURFACES_MAX; i++) { - p->surfaces[i].id = 0; - p->surfaces[i].pts = MP_NOPTS_VALUE; - } - p->surface_idx = 0; - p->surface_now = 0; - p->frames_drawn = 0; - p->output_fbo_valid = false; -} - -static void gl_video_reset_hooks(struct gl_video *p) -{ - for (int i = 0; i < p->tex_hook_num; i++) - talloc_free(p->tex_hooks[i].priv); - - for (int i = 0; i < p->user_tex_num; i++) - ra_tex_free(p->ra, &p->user_textures[i].tex); - - p->tex_hook_num = 0; - p->user_tex_num = 0; -} - -static inline int fbosurface_wrap(int id) -{ - id = id % FBOSURFACES_MAX; - return id < 0 ? id + FBOSURFACES_MAX : id; -} - -static void reinit_osd(struct gl_video *p) -{ - mpgl_osd_destroy(p->osd); - p->osd = NULL; - if (p->osd_state) - p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state); -} - -static void uninit_rendering(struct gl_video *p) -{ - for (int n = 0; n < SCALER_COUNT; n++) - uninit_scaler(p, &p->scaler[n]); - - ra_tex_free(p->ra, &p->dither_texture); - - for (int n = 0; n < 4; n++) { - fbotex_uninit(&p->merge_fbo[n]); - fbotex_uninit(&p->scale_fbo[n]); - fbotex_uninit(&p->integer_fbo[n]); - } - - fbotex_uninit(&p->indirect_fbo); - fbotex_uninit(&p->blend_subs_fbo); - fbotex_uninit(&p->screen_fbo); - fbotex_uninit(&p->output_fbo); - - for (int n = 0; n < FBOSURFACES_MAX; n++) - fbotex_uninit(&p->surfaces[n].fbotex); - - for (int n = 0; n < SHADER_MAX_SAVED; n++) - fbotex_uninit(&p->hook_fbos[n]); - - for (int n = 0; n < 2; n++) - fbotex_uninit(&p->vdpau_deinterleave_fbo[n]); - - gl_video_reset_surfaces(p); - gl_video_reset_hooks(p); - - gl_sc_reset_error(p->sc); -} - -bool gl_video_gamma_auto_enabled(struct gl_video *p) -{ - return p->opts.gamma_auto; -} - -struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p) -{ - return (struct mp_colorspace) { - .primaries = p->opts.target_prim, - .gamma = p->opts.target_trc, - }; -} - -// Warning: profile.start must point to a ta allocation, and the function -// takes over ownership. -void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data) -{ - if (gl_lcms_set_memory_profile(p->cms, icc_data)) - reinit_from_options(p); -} - -bool gl_video_icc_auto_enabled(struct gl_video *p) -{ - return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false; -} - -static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, - enum mp_csp_trc trc) -{ - if (!p->use_lut_3d) - return false; - - struct AVBufferRef *icc = NULL; - if (p->image.mpi) - icc = p->image.mpi->icc_profile; - - if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc, icc)) - return true; - - // GLES3 doesn't provide filtered 16 bit integer textures - // GLES2 doesn't even provide 3D textures - const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4); - if (!fmt || !(p->ra->caps & RA_CAP_TEX_3D)) { - p->use_lut_3d = false; - MP_WARN(p, "Disabling color management (no RGBA16 3D textures).\n"); - return false; - } - - struct lut3d *lut3d = NULL; - if (!fmt || !gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc, icc) || !lut3d) { - p->use_lut_3d = false; - return false; - } - - ra_tex_free(p->ra, &p->lut_3d_texture); - - struct ra_tex_params params = { - .dimensions = 3, - .w = lut3d->size[0], - .h = lut3d->size[1], - .d = lut3d->size[2], - .format = fmt, - .render_src = true, - .src_linear = true, - .initial_data = lut3d->data, - }; - p->lut_3d_texture = ra_tex_create(p->ra, ¶ms); - - debug_check_gl(p, "after 3d lut creation"); - - for (int i = 0; i < 3; i++) - p->lut_3d_size[i] = lut3d->size[i]; - - talloc_free(lut3d); - - return true; -} - -// Fill an img_tex struct from an FBO + some metadata -static struct img_tex img_tex_fbo(struct fbotex *fbo, enum plane_type type, - int components) -{ - assert(type != PLANE_NONE); - return (struct img_tex){ - .type = type, - .tex = fbo->tex, - .multiplier = 1.0, - .w = fbo->lw, - .h = fbo->lh, - .transform = identity_trans, - .components = components, - }; -} - -// Bind an img_tex to a free texture unit and return its ID. At most -// TEXUNIT_VIDEO_NUM texture units can be bound at once -static int pass_bind(struct gl_video *p, struct img_tex tex) -{ - assert(p->pass_tex_num < TEXUNIT_VIDEO_NUM); - p->pass_tex[p->pass_tex_num] = tex; - return p->pass_tex_num++; -} - -// Rotation by 90° and flipping. -// w/h is used for recentering. -static void get_transform(float w, float h, int rotate, bool flip, - struct gl_transform *out_tr) -{ - int a = rotate % 90 ? 0 : rotate / 90; - int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc. - int cos90[4] = {1, 0, -1, 0}; - struct gl_transform tr = {{{ cos90[a], sin90[a]}, - {-sin90[a], cos90[a]}}}; - - // basically, recenter to keep the whole image in view - float b[2] = {1, 1}; - gl_transform_vec(tr, &b[0], &b[1]); - tr.t[0] += b[0] < 0 ? w : 0; - tr.t[1] += b[1] < 0 ? h : 0; - - if (flip) { - struct gl_transform fliptr = {{{1, 0}, {0, -1}}, {0, h}}; - gl_transform_trans(fliptr, &tr); - } - - *out_tr = tr; -} - -// Return the chroma plane upscaled to luma size, but with additional padding -// for image sizes not aligned to subsampling. -static int chroma_upsize(int size, int pixel) -{ - return (size + pixel - 1) / pixel * pixel; -} - -// If a and b are on the same plane, return what plane type should be used. -// If a or b are none, the other type always wins. -// Usually: LUMA/RGB/XYZ > CHROMA > ALPHA -static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b) -{ - if (a == PLANE_NONE) - return b; - if (b == PLANE_LUMA || b == PLANE_RGB || b == PLANE_XYZ) - return b; - if (b != PLANE_NONE && a == PLANE_ALPHA) - return b; - return a; -} - -// Places a video_image's image textures + associated metadata into tex[]. The -// number of textures is equal to p->plane_count. Any necessary plane offsets -// are stored in off. (e.g. chroma position) -static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, - struct img_tex tex[4], struct gl_transform off[4]) -{ - assert(vimg->mpi); - - int w = p->image_params.w; - int h = p->image_params.h; - - // Determine the chroma offset - float ls_w = 1.0 / p->ra_format.chroma_w; - float ls_h = 1.0 / p->ra_format.chroma_h; - - struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}}; - - if (p->image_params.chroma_location != MP_CHROMA_CENTER) { - int cx, cy; - mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy); - // By default texture coordinates are such that chroma is centered with - // any chroma subsampling. If a specific direction is given, make it - // so that the luma and chroma sample line up exactly. - // For 4:4:4, setting chroma location should have no effect at all. - // luma sample size (in chroma coord. space) - chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; - chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; - } - - int msb_valid_bits = - p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0); - // The existing code assumes we just have a single tex multiplier for - // all of the planes. This may change in the future - float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.color.space, - msb_valid_bits, - p->ra_format.component_bits); - - memset(tex, 0, 4 * sizeof(tex[0])); - for (int n = 0; n < p->plane_count; n++) { - struct texplane *t = &vimg->planes[n]; - - enum plane_type type = PLANE_NONE; - for (int i = 0; i < 4; i++) { - int c = p->ra_format.components[n][i]; - enum plane_type ctype; - if (c == 0) { - ctype = PLANE_NONE; - } else if (c == 4) { - ctype = PLANE_ALPHA; - } else if (p->image_params.color.space == MP_CSP_RGB) { - ctype = PLANE_RGB; - } else if (p->image_params.color.space == MP_CSP_XYZ) { - ctype = PLANE_XYZ; - } else { - ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA; - } - type = merge_plane_types(type, ctype); - } - - tex[n] = (struct img_tex){ - .type = type, - .tex = t->tex, - .multiplier = tex_mul, - .w = t->w, - .h = t->h, - }; - - for (int i = 0; i < 4; i++) - tex[n].components += !!p->ra_format.components[n][i]; - - get_transform(t->w, t->h, p->image_params.rotate, t->flipped, - &tex[n].transform); - if (p->image_params.rotate % 180 == 90) - MPSWAP(int, tex[n].w, tex[n].h); - - off[n] = identity_trans; - - if (type == PLANE_CHROMA) { - struct gl_transform rot; - get_transform(0, 0, p->image_params.rotate, true, &rot); - - struct gl_transform tr = chroma; - gl_transform_vec(rot, &tr.t[0], &tr.t[1]); - - float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w; - float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h; - - // Adjust the chroma offset if the real chroma size is fractional - // due image sizes not aligned to chroma subsampling. - struct gl_transform rot2; - get_transform(0, 0, p->image_params.rotate, t->flipped, &rot2); - if (rot2.m[0][0] < 0) - tr.t[0] += dx; - if (rot2.m[1][0] < 0) - tr.t[0] += dy; - if (rot2.m[0][1] < 0) - tr.t[1] += dx; - if (rot2.m[1][1] < 0) - tr.t[1] += dy; - - off[n] = tr; - } - } -} - -// Return the index of the given component (assuming all non-padding components -// of all planes are concatenated into a linear list). -static int find_comp(struct ra_imgfmt_desc *desc, int component) -{ - int cur = 0; - for (int n = 0; n < desc->num_planes; n++) { - for (int i = 0; i < 4; i++) { - if (desc->components[n][i]) { - if (desc->components[n][i] == component) - return cur; - cur++; - } - } - } - return -1; -} - -static void init_video(struct gl_video *p) -{ - p->use_integer_conversion = false; - - if (p->hwdec && ra_hwdec_test_format(p->hwdec, p->image_params.imgfmt)) { - if (p->hwdec->driver->overlay_frame) { - MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed " - "on the video!\n"); - } else { - p->hwdec_mapper = ra_hwdec_mapper_create(p->hwdec, &p->image_params); - if (!p->hwdec_mapper) - MP_ERR(p, "Initializing texture for hardware decoding failed.\n"); - } - if (p->hwdec_mapper) - p->image_params = p->hwdec_mapper->dst_params; - const char **exts = p->hwdec->glsl_extensions; - for (int n = 0; exts && exts[n]; n++) - gl_sc_enable_extension(p->sc, (char *)exts[n]); - p->hwdec_active = true; - } - - p->ra_format = (struct ra_imgfmt_desc){0}; - ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format); - - p->plane_count = p->ra_format.num_planes; - - p->has_alpha = false; - p->is_gray = true; - - for (int n = 0; n < p->ra_format.num_planes; n++) { - for (int i = 0; i < 4; i++) { - if (p->ra_format.components[n][i]) { - p->has_alpha |= p->ra_format.components[n][i] == 4; - p->is_gray &= p->ra_format.components[n][i] == 1 || - p->ra_format.components[n][i] == 4; - } - } - } - - for (int c = 0; c < 4; c++) { - int loc = find_comp(&p->ra_format, c + 1); - p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0]; - } - p->color_swizzle[4] = '\0'; - - // Format-dependent checks. - check_gl_features(p); - - mp_image_params_guess_csp(&p->image_params); - - av_lfg_init(&p->lfg, 1); - - debug_check_gl(p, "before video texture creation"); - - if (!p->hwdec_active) { - struct video_image *vimg = &p->image; - - struct mp_image layout = {0}; - mp_image_set_params(&layout, &p->image_params); - - for (int n = 0; n < p->plane_count; n++) { - struct texplane *plane = &vimg->planes[n]; - const struct ra_format *format = p->ra_format.planes[n]; - - plane->w = mp_image_plane_w(&layout, n); - plane->h = mp_image_plane_h(&layout, n); - - struct ra_tex_params params = { - .dimensions = 2, - .w = plane->w + p->opts.tex_pad_x, - .h = plane->h + p->opts.tex_pad_y, - .d = 1, - .format = format, - .render_src = true, - .src_linear = format->linear_filter, - .non_normalized = p->opts.use_rectangle, - .host_mutable = true, - }; - - MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, - params.w, params.h); - - plane->tex = ra_tex_create(p->ra, ¶ms); - if (!plane->tex) - abort(); // shit happens - - p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT; - } - } - - debug_check_gl(p, "after video texture creation"); - - gl_video_setup_hooks(p); -} - -// Release any texture mappings associated with the current frame. -static void unmap_current_image(struct gl_video *p) -{ - struct video_image *vimg = &p->image; - - if (vimg->hwdec_mapped) { - assert(p->hwdec_active && p->hwdec_mapper); - ra_hwdec_mapper_unmap(p->hwdec_mapper); - memset(vimg->planes, 0, sizeof(vimg->planes)); - vimg->hwdec_mapped = false; - vimg->id = 0; // needs to be mapped again - } -} - -static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr) -{ - for (int i = 0; i < p->num_dr_buffers; i++) { - struct dr_buffer *buffer = &p->dr_buffers[i]; - uint8_t *bufptr = buffer->buf->data; - size_t size = buffer->buf->params.size; - if (ptr >= bufptr && ptr < bufptr + size) - return buffer; - } - - return NULL; -} - -static void gc_pending_dr_fences(struct gl_video *p, bool force) -{ -again:; - for (int n = 0; n < p->num_dr_buffers; n++) { - struct dr_buffer *buffer = &p->dr_buffers[n]; - if (!buffer->mpi) - continue; - - bool res = p->ra->fns->buf_poll(p->ra, buffer->buf); - if (res || force) { - // Unreferencing the image could cause gl_video_dr_free_buffer() - // to be called by the talloc destructor (if it was the last - // reference). This will implicitly invalidate the buffer pointer - // and change the p->dr_buffers array. To make it worse, it could - // free multiple dr_buffers due to weird theoretical corner cases. - // This is also why we use the goto to iterate again from the - // start, because everything gets fucked up. Hail satan! - struct mp_image *ref = buffer->mpi; - buffer->mpi = NULL; - talloc_free(ref); - goto again; - } - } -} - -static void unref_current_image(struct gl_video *p) -{ - unmap_current_image(p); - p->image.id = 0; - - mp_image_unrefp(&p->image.mpi); - - // While we're at it, also garbage collect pending fences in here to - // get it out of the way. - gc_pending_dr_fences(p, false); -} - -// If overlay mode is used, make sure to remove the overlay. -// Be careful with this. Removing the overlay and adding another one will -// lead to flickering artifacts. -static void unmap_overlay(struct gl_video *p) -{ - if (p->hwdec_active && p->hwdec->driver->overlay_frame) - p->hwdec->driver->overlay_frame(p->hwdec, NULL, NULL, NULL, true); -} - -static void uninit_video(struct gl_video *p) -{ - uninit_rendering(p); - - struct video_image *vimg = &p->image; - - unmap_overlay(p); - unref_current_image(p); - - for (int n = 0; n < p->plane_count; n++) { - struct texplane *plane = &vimg->planes[n]; - ra_tex_free(p->ra, &plane->tex); - } - *vimg = (struct video_image){0}; - - // Invalidate image_params to ensure that gl_video_config() will call - // init_video() on uninitialized gl_video. - p->real_image_params = (struct mp_image_params){0}; - p->image_params = p->real_image_params; - p->hwdec_active = false; - ra_hwdec_mapper_free(&p->hwdec_mapper); -} - -static void pass_record(struct gl_video *p, struct mp_pass_perf perf) -{ - if (!p->pass || p->pass_idx == PASS_INFO_MAX) - return; - - struct pass_info *pass = &p->pass[p->pass_idx]; - pass->perf = perf; - - if (pass->desc.len == 0) - bstr_xappend(p, &pass->desc, bstr0("(unknown)")); - - p->pass_idx++; -} - -PRINTF_ATTRIBUTE(2, 3) -static void pass_describe(struct gl_video *p, const char *textf, ...) -{ - if (!p->pass || p->pass_idx == PASS_INFO_MAX) - return; - - struct pass_info *pass = &p->pass[p->pass_idx]; - - if (pass->desc.len > 0) - bstr_xappend(p, &pass->desc, bstr0(" + ")); - - va_list ap; - va_start(ap, textf); - bstr_xappend_vasprintf(p, &pass->desc, textf, ap); - va_end(ap); -} - -static void pass_info_reset(struct gl_video *p, bool is_redraw) -{ - p->pass = is_redraw ? p->pass_redraw : p->pass_fresh; - p->pass_idx = 0; - - for (int i = 0; i < PASS_INFO_MAX; i++) { - p->pass[i].desc.len = 0; - p->pass[i].perf = (struct mp_pass_perf){0}; - } -} - -static void pass_report_performance(struct gl_video *p) -{ - if (!p->pass) - return; - - for (int i = 0; i < PASS_INFO_MAX; i++) { - struct pass_info *pass = &p->pass[i]; - if (pass->desc.len) { - MP_DBG(p, "pass '%.*s': last %dus avg %dus peak %dus\n", - BSTR_P(pass->desc), - (int)pass->perf.last/1000, - (int)pass->perf.avg/1000, - (int)pass->perf.peak/1000); - } - } -} - -static void pass_prepare_src_tex(struct gl_video *p) -{ - struct gl_shader_cache *sc = p->sc; - - for (int n = 0; n < p->pass_tex_num; n++) { - struct img_tex *s = &p->pass_tex[n]; - if (!s->tex) - continue; - - char *texture_name = mp_tprintf(32, "texture%d", n); - char *texture_size = mp_tprintf(32, "texture_size%d", n); - char *texture_rot = mp_tprintf(32, "texture_rot%d", n); - char *texture_off = mp_tprintf(32, "texture_off%d", n); - char *pixel_size = mp_tprintf(32, "pixel_size%d", n); - - gl_sc_uniform_texture(sc, texture_name, s->tex); - float f[2] = {1, 1}; - if (!s->tex->params.non_normalized) { - f[0] = s->tex->params.w; - f[1] = s->tex->params.h; - } - gl_sc_uniform_vec2(sc, texture_size, f); - gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m); - gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t); - gl_sc_uniform_vec2(sc, pixel_size, (float[]){1.0f / f[0], - 1.0f / f[1]}); - } -} - -// Sets the appropriate compute shader metadata for an implicit compute pass -// bw/bh: block size -static void pass_is_compute(struct gl_video *p, int bw, int bh) -{ - p->pass_compute = (struct compute_info){ - .active = true, - .block_w = bw, - .block_h = bh, - }; -} - -// w/h: the width/height of the compute shader's operating domain (e.g. the -// target target that needs to be written, or the source texture that needs to -// be reduced) -static void dispatch_compute(struct gl_video *p, int w, int h, - struct compute_info info) -{ - PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", - info.threads_w > 0 ? info.threads_w : info.block_w, - info.threads_h > 0 ? info.threads_h : info.block_h); - - pass_prepare_src_tex(p); - gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex)); - - // Since we don't actually have vertices, we pretend for convenience - // reasons that we do and calculate the right texture coordinates based on - // the output sample ID - gl_sc_uniform_vec2(p->sc, "out_scale", (float[2]){ 1.0 / w, 1.0 / h }); - PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n"); - - for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) { - struct img_tex *s = &p->pass_tex[n]; - if (!s->tex) - continue; - - // We need to rescale the coordinates to the true texture size - char tex_scale[32]; - snprintf(tex_scale, sizeof(tex_scale), "tex_scale%d", n); - gl_sc_uniform_vec2(p->sc, tex_scale, (float[2]){ - (float)s->w / s->tex->params.w, - (float)s->h / s->tex->params.h, - }); - - PRELUDE("#define texcoord%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n); - PRELUDE("#define texcoord%d_rot(id) (texture_rot%d * texcoord%d_raw(id) + " - "pixel_size%d * texture_off%d)\n", n, n, n, n, n); - // Clamp the texture coordinates to prevent sampling out-of-bounds in - // threads that exceed the requested width/height - PRELUDE("#define texmap%d(id) min(texcoord%d_rot(id), vec2(1.0))\n", n, n); - PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n); - } - - // always round up when dividing to make sure we don't leave off a part of - // the image - int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1, - num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1; - - pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1)); - - memset(&p->pass_tex, 0, sizeof(p->pass_tex)); - p->pass_tex_num = 0; -} - -static struct mp_pass_perf render_pass_quad(struct gl_video *p, - struct fbodst target, - const struct mp_rect *dst) -{ - struct vertex va[6] = {0}; - - struct gl_transform t; - gl_transform_ortho_fbodst(&t, target); - - float x[2] = {dst->x0, dst->x1}; - float y[2] = {dst->y0, dst->y1}; - gl_transform_vec(t, &x[0], &y[0]); - gl_transform_vec(t, &x[1], &y[1]); - - for (int n = 0; n < 4; n++) { - struct vertex *v = &va[n]; - v->position.x = x[n / 2]; - v->position.y = y[n % 2]; - for (int i = 0; i < p->pass_tex_num; i++) { - struct img_tex *s = &p->pass_tex[i]; - if (!s->tex) - continue; - struct gl_transform tr = s->transform; - float tx = (n / 2) * s->w; - float ty = (n % 2) * s->h; - gl_transform_vec(tr, &tx, &ty); - bool rect = s->tex->params.non_normalized; - v->texcoord[i].x = tx / (rect ? 1 : s->tex->params.w); - v->texcoord[i].y = ty / (rect ? 1 : s->tex->params.h); - } - } - - va[4] = va[2]; - va[5] = va[1]; - - return gl_sc_dispatch_draw(p->sc, target.tex, va, 6); -} - -static void finish_pass_direct(struct gl_video *p, struct fbodst target, - const struct mp_rect *dst) -{ - pass_prepare_src_tex(p); - gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex)); - pass_record(p, render_pass_quad(p, target, dst)); - debug_check_gl(p, "after rendering"); - memset(&p->pass_tex, 0, sizeof(p->pass_tex)); - p->pass_tex_num = 0; -} - -// dst_fbo: this will be used for rendering; possibly reallocating the whole -// FBO, if the required parameters have changed -// w, h: required FBO target dimension, and also defines the target rectangle -// used for rasterization -// flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy -// flags allows the FBO to be larger than the w/h parameters) -static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, - int w, int h, int flags) -{ - fbotex_change(dst_fbo, p->ra, p->log, w, h, p->fbo_format, flags); - - if (p->pass_compute.active) { - if (!dst_fbo->tex) - return; - gl_sc_uniform_image2D_wo(p->sc, "out_image", dst_fbo->tex); - if (!p->pass_compute.directly_writes) - GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);) - - dispatch_compute(p, w, h, p->pass_compute); - p->pass_compute = (struct compute_info){0}; - - debug_check_gl(p, "after dispatching compute shader"); - } else { - finish_pass_direct(p, dst_fbo->fbo, &(struct mp_rect){0, 0, w, h}); - } -} - -static const char *get_tex_swizzle(struct img_tex *img) -{ - if (!img->tex) - return "rgba"; - return img->tex->params.format->luminance_alpha ? "raaa" : "rgba"; -} - -// Copy a texture to the vec4 color, while increasing offset. Also applies -// the texture multiplier to the sampled color -static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img) -{ - int count = img.components; - assert(*offset + count <= 4); - - int id = pass_bind(p, img); - char src[5] = {0}; - char dst[5] = {0}; - const char *tex_fmt = get_tex_swizzle(&img); - const char *dst_fmt = "rgba"; - for (int i = 0; i < count; i++) { - src[i] = tex_fmt[i]; - dst[i] = dst_fmt[*offset + i]; - } - - if (img.tex && img.tex->params.format->ctype == RA_CTYPE_UINT) { - uint64_t tex_max = 1ull << p->ra_format.component_bits; - img.multiplier *= 1.0 / (tex_max - 1); - } - - GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n", - dst, img.multiplier, id, id, src); - - *offset += count; -} - -static void skip_unused(struct gl_video *p, int num_components) -{ - for (int i = num_components; i < 4; i++) - GLSLF("color.%c = %f;\n", "rgba"[i], i < 3 ? 0.0 : 1.0); -} - -static void uninit_scaler(struct gl_video *p, struct scaler *scaler) -{ - fbotex_uninit(&scaler->sep_fbo); - ra_tex_free(p->ra, &scaler->lut); - scaler->kernel = NULL; - scaler->initialized = false; -} - -static void hook_prelude(struct gl_video *p, const char *name, int id, - struct img_tex tex) -{ - GLSLHF("#define %s_raw texture%d\n", name, id); - GLSLHF("#define %s_pos texcoord%d\n", name, id); - GLSLHF("#define %s_size texture_size%d\n", name, id); - GLSLHF("#define %s_rot texture_rot%d\n", name, id); - GLSLHF("#define %s_pt pixel_size%d\n", name, id); - GLSLHF("#define %s_map texmap%d\n", name, id); - GLSLHF("#define %s_mul %f\n", name, tex.multiplier); - - // Set up the sampling functions - GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n", - name, name, name, get_tex_swizzle(&tex)); - - // Since the extra matrix multiplication impacts performance, - // skip it unless the texture was actually rotated - if (gl_transform_eq(tex.transform, identity_trans)) { - GLSLHF("#define %s_texOff(off) %s_tex(%s_pos + %s_pt * vec2(off))\n", - name, name, name, name); - } else { - GLSLHF("#define %s_texOff(off) " - "%s_tex(%s_pos + %s_rot * vec2(off)/%s_size)\n", - name, name, name, name, name); - } -} - -static bool saved_tex_find(struct gl_video *p, const char *name, - struct img_tex *out) -{ - if (!name || !out) - return false; - - for (int i = 0; i < p->saved_tex_num; i++) { - if (strcmp(p->saved_tex[i].name, name) == 0) { - *out = p->saved_tex[i].tex; - return true; - } - } - - return false; -} - -static void saved_tex_store(struct gl_video *p, const char *name, - struct img_tex tex) -{ - assert(name); - - for (int i = 0; i < p->saved_tex_num; i++) { - if (strcmp(p->saved_tex[i].name, name) == 0) { - p->saved_tex[i].tex = tex; - return; - } - } - - assert(p->saved_tex_num < SHADER_MAX_SAVED); - p->saved_tex[p->saved_tex_num++] = (struct saved_tex) { - .name = name, - .tex = tex - }; -} - -static bool pass_hook_setup_binds(struct gl_video *p, const char *name, - struct img_tex tex, struct tex_hook *hook) -{ - for (int t = 0; t < TEXUNIT_VIDEO_NUM; t++) { - char *bind_name = (char *)hook->bind_tex[t]; - - if (!bind_name) - continue; - - // This is a special name that means "currently hooked texture" - if (strcmp(bind_name, "HOOKED") == 0) { - int id = pass_bind(p, tex); - hook_prelude(p, "HOOKED", id, tex); - hook_prelude(p, name, id, tex); - continue; - } - - // BIND can also be used to load user-defined textures, in which - // case we will directly load them as a uniform instead of - // generating the hook_prelude boilerplate - for (int u = 0; u < p->user_tex_num; u++) { - struct gl_user_shader_tex *utex = &p->user_textures[u]; - if (bstr_equals0(utex->name, bind_name)) { - gl_sc_uniform_texture(p->sc, bind_name, utex->tex); - goto next_bind; - } - } - - struct img_tex bind_tex; - if (!saved_tex_find(p, bind_name, &bind_tex)) { - // Clean up texture bindings and move on to the next hook - MP_DBG(p, "Skipping hook on %s due to no texture named %s.\n", - name, bind_name); - p->pass_tex_num -= t; - return false; - } - - hook_prelude(p, bind_name, pass_bind(p, bind_tex), bind_tex); - -next_bind: ; - } - - return true; -} - -// Process hooks for a plane, saving the result and returning a new img_tex -// If 'trans' is NULL, the shader is forbidden from transforming tex -static struct img_tex pass_hook(struct gl_video *p, const char *name, - struct img_tex tex, struct gl_transform *trans) -{ - if (!name) - return tex; - - saved_tex_store(p, name, tex); - - MP_DBG(p, "Running hooks for %s\n", name); - for (int i = 0; i < p->tex_hook_num; i++) { - struct tex_hook *hook = &p->tex_hooks[i]; - - // Figure out if this pass hooks this texture - for (int h = 0; h < SHADER_MAX_HOOKS; h++) { - if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0) - goto found; - } - - continue; - -found: - // Check the hook's condition - if (hook->cond && !hook->cond(p, tex, hook->priv)) { - MP_DBG(p, "Skipping hook on %s due to condition.\n", name); - continue; - } - - if (!pass_hook_setup_binds(p, name, tex, hook)) - continue; - - // Run the actual hook. This generates a series of GLSL shader - // instructions sufficient for drawing the hook's output - struct gl_transform hook_off = identity_trans; - hook->hook(p, tex, &hook_off, hook->priv); - - int comps = hook->components ? hook->components : tex.components; - skip_unused(p, comps); - - // Compute the updated FBO dimensions and store the result - struct mp_rect_f sz = {0, 0, tex.w, tex.h}; - gl_transform_rect(hook_off, &sz); - int w = lroundf(fabs(sz.x1 - sz.x0)); - int h = lroundf(fabs(sz.y1 - sz.y0)); - - assert(p->hook_fbo_num < SHADER_MAX_SAVED); - struct fbotex *fbo = &p->hook_fbos[p->hook_fbo_num++]; - finish_pass_fbo(p, fbo, w, h, 0); - - const char *store_name = hook->save_tex ? hook->save_tex : name; - struct img_tex saved_tex = img_tex_fbo(fbo, tex.type, comps); - - // If the texture we're saving overwrites the "current" texture, also - // update the tex parameter so that the future loop cycles will use the - // updated values, and export the offset - if (strcmp(store_name, name) == 0) { - if (!trans && !gl_transform_eq(hook_off, identity_trans)) { - MP_ERR(p, "Hook tried changing size of unscalable texture %s!\n", - name); - return tex; - } - - tex = saved_tex; - if (trans) - gl_transform_trans(hook_off, trans); - } - - saved_tex_store(p, store_name, saved_tex); - } - - return tex; -} - -// This can be used at any time in the middle of rendering to specify an -// optional hook point, which if triggered will render out to a new FBO and -// load the result back into vec4 color. Offsets applied by the hooks are -// accumulated in tex_trans, and the FBO is dimensioned according -// to p->texture_w/h -static void pass_opt_hook_point(struct gl_video *p, const char *name, - struct gl_transform *tex_trans) -{ - if (!name) - return; - - for (int i = 0; i < p->tex_hook_num; i++) { - struct tex_hook *hook = &p->tex_hooks[i]; - - for (int h = 0; h < SHADER_MAX_HOOKS; h++) { - if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0) - goto found; - } - - for (int b = 0; b < TEXUNIT_VIDEO_NUM; b++) { - if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0) - goto found; - } - } - - // Nothing uses this texture, don't bother storing it - return; - -found: - assert(p->hook_fbo_num < SHADER_MAX_SAVED); - struct fbotex *fbo = &p->hook_fbos[p->hook_fbo_num++]; - finish_pass_fbo(p, fbo, p->texture_w, p->texture_h, 0); - - struct img_tex img = img_tex_fbo(fbo, PLANE_RGB, p->components); - img = pass_hook(p, name, img, tex_trans); - copy_img_tex(p, &(int){0}, img); - p->texture_w = img.w; - p->texture_h = img.h; - p->components = img.components; - pass_describe(p, "(remainder pass)"); -} - -static void load_shader(struct gl_video *p, struct bstr body) -{ - gl_sc_hadd_bstr(p->sc, body); - gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX); - gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded); - gl_sc_uniform_vec2(p->sc, "input_size", - (float[]){(p->src_rect.x1 - p->src_rect.x0) * - p->texture_offset.m[0][0], - (p->src_rect.y1 - p->src_rect.y0) * - p->texture_offset.m[1][1]}); - gl_sc_uniform_vec2(p->sc, "target_size", - (float[]){p->dst_rect.x1 - p->dst_rect.x0, - p->dst_rect.y1 - p->dst_rect.y0}); - gl_sc_uniform_vec2(p->sc, "tex_offset", - (float[]){p->src_rect.x0 * p->texture_offset.m[0][0] + - p->texture_offset.t[0], - p->src_rect.y0 * p->texture_offset.m[1][1] + - p->texture_offset.t[1]}); -} - -// Semantic equality -static bool double_seq(double a, double b) -{ - return (isnan(a) && isnan(b)) || a == b; -} - -static bool scaler_fun_eq(struct scaler_fun a, struct scaler_fun b) -{ - if ((a.name && !b.name) || (b.name && !a.name)) - return false; - - return ((!a.name && !b.name) || strcmp(a.name, b.name) == 0) && - double_seq(a.params[0], b.params[0]) && - double_seq(a.params[1], b.params[1]) && - a.blur == b.blur && - a.taper == b.taper; -} - -static bool scaler_conf_eq(struct scaler_config a, struct scaler_config b) -{ - // Note: antiring isn't compared because it doesn't affect LUT - // generation - return scaler_fun_eq(a.kernel, b.kernel) && - scaler_fun_eq(a.window, b.window) && - a.radius == b.radius && - a.clamp == b.clamp; -} - -static void reinit_scaler(struct gl_video *p, struct scaler *scaler, - const struct scaler_config *conf, - double scale_factor, - int sizes[]) -{ - if (scaler_conf_eq(scaler->conf, *conf) && - scaler->scale_factor == scale_factor && - scaler->initialized) - return; - - uninit_scaler(p, scaler); - - scaler->conf = *conf; - bool is_tscale = scaler->index == SCALER_TSCALE; - scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale); - scaler->conf.window.name = (char *)handle_scaler_opt(conf->window.name, is_tscale); - scaler->scale_factor = scale_factor; - scaler->insufficient = false; - scaler->initialized = true; - - const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name); - if (!t_kernel) - return; - - scaler->kernel_storage = *t_kernel; - scaler->kernel = &scaler->kernel_storage; - - const char *win = conf->window.name; - if (!win || !win[0]) - win = t_kernel->window; // fall back to the scaler's default window - const struct filter_window *t_window = mp_find_filter_window(win); - if (t_window) - scaler->kernel->w = *t_window; - - for (int n = 0; n < 2; n++) { - if (!isnan(conf->kernel.params[n])) - scaler->kernel->f.params[n] = conf->kernel.params[n]; - if (!isnan(conf->window.params[n])) - scaler->kernel->w.params[n] = conf->window.params[n]; - } - - if (conf->kernel.blur > 0.0) - scaler->kernel->f.blur = conf->kernel.blur; - if (conf->window.blur > 0.0) - scaler->kernel->w.blur = conf->window.blur; - - if (conf->kernel.taper > 0.0) - scaler->kernel->f.taper = conf->kernel.taper; - if (conf->window.taper > 0.0) - scaler->kernel->w.taper = conf->window.taper; - - if (scaler->kernel->f.resizable && conf->radius > 0.0) - scaler->kernel->f.radius = conf->radius; - - scaler->kernel->clamp = conf->clamp; - scaler->kernel->value_cutoff = conf->cutoff; - - scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor); - - int size = scaler->kernel->size; - int num_components = size > 2 ? 4 : size; - const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components); - assert(fmt); - - int width = (size + num_components - 1) / num_components; // round up - int stride = width * num_components; - assert(size <= stride); - - scaler->lut_size = 1 << p->opts.scaler_lut_size; - - float *weights = talloc_array(NULL, float, scaler->lut_size * stride); - mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights); - - bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D); - - struct ra_tex_params lut_params = { - .dimensions = use_1d ? 1 : 2, - .w = use_1d ? scaler->lut_size : width, - .h = use_1d ? 1 : scaler->lut_size, - .d = 1, - .format = fmt, - .render_src = true, - .src_linear = true, - .initial_data = weights, - }; - scaler->lut = ra_tex_create(p->ra, &lut_params); - - talloc_free(weights); - - debug_check_gl(p, "after initializing scaler"); -} - -// Special helper for sampling from two separated stages -static void pass_sample_separated(struct gl_video *p, struct img_tex src, - struct scaler *scaler, int w, int h) -{ - // Separate the transformation into x and y components, per pass - struct gl_transform t_x = { - .m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}}, - .t = {src.transform.t[0], 0.0}, - }; - struct gl_transform t_y = { - .m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}}, - .t = {0.0, src.transform.t[1]}, - }; - - // First pass (scale only in the y dir) - src.transform = t_y; - sampler_prelude(p->sc, pass_bind(p, src)); - GLSLF("// first pass\n"); - pass_sample_separated_gen(p->sc, scaler, 0, 1); - GLSLF("color *= %f;\n", src.multiplier); - finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H); - - // Second pass (scale only in the x dir) - src = img_tex_fbo(&scaler->sep_fbo, src.type, src.components); - src.transform = t_x; - pass_describe(p, "%s second pass", scaler->conf.kernel.name); - sampler_prelude(p->sc, pass_bind(p, src)); - pass_sample_separated_gen(p->sc, scaler, 1, 0); -} - -// Picks either the compute shader version or the regular sampler version -// depending on hardware support -static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler, - struct img_tex tex, int w, int h) -{ - uint64_t reqs = RA_CAP_COMPUTE | RA_CAP_NESTED_ARRAY; - if ((p->ra->caps & reqs) != reqs) - goto fallback; - - int bound = ceil(scaler->kernel->radius_cutoff); - int offset = bound - 1; // padding top/left - int padding = offset + bound; // total padding - - float ratiox = (float)w / tex.w, - ratioy = (float)h / tex.h; - - // For performance we want to load at least as many pixels - // horizontally as there are threads in a warp (32 for nvidia), as - // well as enough to take advantage of shmem parallelism - const int warp_size = 32, threads = 256; - int bw = warp_size; - int bh = threads / bw; - - // We need to sample everything from base_min to base_max, so make sure - // we have enough room in shmem - int iw = (int)ceil(bw / ratiox) + padding + 1, - ih = (int)ceil(bh / ratioy) + padding + 1; - - int shmem_req = iw * ih * tex.components * sizeof(float); - if (shmem_req > p->ra->max_shmem) - goto fallback; - - pass_is_compute(p, bw, bh); - pass_compute_polar(p->sc, scaler, tex.components, bw, bh, iw, ih); - return; - -fallback: - // Fall back to regular polar shader when compute shaders are unsupported - // or the kernel is too big for shmem - pass_sample_polar(p->sc, scaler, tex.components, p->ra->glsl_version); -} - -// Sample from img_tex, with the src rectangle given by it. -// The dst rectangle is implicit by what the caller will do next, but w and h -// must still be what is going to be used (to dimension FBOs correctly). -// This will write the scaled contents to the vec4 "color". -// The scaler unit is initialized by this function; in order to avoid cache -// thrashing, the scaler unit should usually use the same parameters. -static void pass_sample(struct gl_video *p, struct img_tex tex, - struct scaler *scaler, const struct scaler_config *conf, - double scale_factor, int w, int h) -{ - reinit_scaler(p, scaler, conf, scale_factor, filter_sizes); - - // Describe scaler - const char *scaler_opt[] = { - [SCALER_SCALE] = "scale", - [SCALER_DSCALE] = "dscale", - [SCALER_CSCALE] = "cscale", - [SCALER_TSCALE] = "tscale", - }; - - pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index], - scaler->conf.kernel.name, plane_names[tex.type]); - - bool is_separated = scaler->kernel && !scaler->kernel->polar; - - // Set up the transformation+prelude and bind the texture, for everything - // other than separated scaling (which does this in the subfunction) - if (!is_separated) - sampler_prelude(p->sc, pass_bind(p, tex)); - - // Dispatch the scaler. They're all wildly different. - const char *name = scaler->conf.kernel.name; - if (strcmp(name, "bilinear") == 0) { - GLSL(color = texture(tex, pos);) - } else if (strcmp(name, "bicubic_fast") == 0) { - pass_sample_bicubic_fast(p->sc); - } else if (strcmp(name, "oversample") == 0) { - pass_sample_oversample(p->sc, scaler, w, h); - } else if (scaler->kernel && scaler->kernel->polar) { - pass_dispatch_sample_polar(p, scaler, tex, w, h); - } else if (scaler->kernel) { - pass_sample_separated(p, tex, scaler, w, h); - } else { - // Should never happen - abort(); - } - - // Apply any required multipliers. Separated scaling already does this in - // its first stage - if (!is_separated) - GLSLF("color *= %f;\n", tex.multiplier); - - // Micro-optimization: Avoid scaling unneeded channels - skip_unused(p, tex.components); -} - -// Returns true if two img_texs are semantically equivalent (same metadata) -static bool img_tex_equiv(struct img_tex a, struct img_tex b) -{ - return a.type == b.type && - a.components == b.components && - a.multiplier == b.multiplier && - a.tex->params.format == b.tex->params.format && - a.tex->params.w == b.tex->params.w && - a.tex->params.h == b.tex->params.h && - a.w == b.w && - a.h == b.h && - gl_transform_eq(a.transform, b.transform); -} - -static bool add_hook(struct gl_video *p, struct tex_hook hook) -{ - if (p->tex_hook_num < SHADER_MAX_PASSES) { - p->tex_hooks[p->tex_hook_num++] = hook; - return true; - } else { - MP_ERR(p, "Too many passes! Limit is %d.\n", SHADER_MAX_PASSES); - talloc_free(hook.priv); - return false; - } -} - -static void deband_hook(struct gl_video *p, struct img_tex tex, - struct gl_transform *trans, void *priv) -{ - pass_describe(p, "debanding (%s)", plane_names[tex.type]); - pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg, - p->image_params.color.gamma); -} - -static void unsharp_hook(struct gl_video *p, struct img_tex tex, - struct gl_transform *trans, void *priv) -{ - pass_describe(p, "unsharp masking"); - pass_sample_unsharp(p->sc, p->opts.unsharp); -} - -struct szexp_ctx { - struct gl_video *p; - struct img_tex tex; -}; - -static bool szexp_lookup(void *priv, struct bstr var, float size[2]) -{ - struct szexp_ctx *ctx = priv; - struct gl_video *p = ctx->p; - - if (bstr_equals0(var, "NATIVE_CROPPED")) { - size[0] = (p->src_rect.x1 - p->src_rect.x0) * p->texture_offset.m[0][0]; - size[1] = (p->src_rect.y1 - p->src_rect.y0) * p->texture_offset.m[1][1]; - return true; - } - - // The size of OUTPUT is determined. It could be useful for certain - // user shaders to skip passes. - if (bstr_equals0(var, "OUTPUT")) { - size[0] = p->dst_rect.x1 - p->dst_rect.x0; - size[1] = p->dst_rect.y1 - p->dst_rect.y0; - return true; - } - - // HOOKED is a special case - if (bstr_equals0(var, "HOOKED")) { - size[0] = ctx->tex.w; - size[1] = ctx->tex.h; - return true; - } - - for (int o = 0; o < p->saved_tex_num; o++) { - if (bstr_equals0(var, p->saved_tex[o].name)) { - size[0] = p->saved_tex[o].tex.w; - size[1] = p->saved_tex[o].tex.h; - return true; - } - } - - return false; -} - -static bool user_hook_cond(struct gl_video *p, struct img_tex tex, void *priv) -{ - struct gl_user_shader_hook *shader = priv; - assert(shader); - - float res = false; - eval_szexpr(p->log, &(struct szexp_ctx){p, tex}, szexp_lookup, shader->cond, &res); - return res; -} - -static void user_hook(struct gl_video *p, struct img_tex tex, - struct gl_transform *trans, void *priv) -{ - struct gl_user_shader_hook *shader = priv; - assert(shader); - load_shader(p, shader->pass_body); - - pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc), - plane_names[tex.type]); - - if (shader->compute.active) { - p->pass_compute = shader->compute; - GLSLF("hook();\n"); - } else { - GLSLF("color = hook();\n"); - } - - // Make sure we at least create a legal FBO on failure, since it's better - // to do this and display an error message than just crash OpenGL - float w = 1.0, h = 1.0; - - eval_szexpr(p->log, &(struct szexp_ctx){p, tex}, szexp_lookup, shader->width, &w); - eval_szexpr(p->log, &(struct szexp_ctx){p, tex}, szexp_lookup, shader->height, &h); - - *trans = (struct gl_transform){{{w / tex.w, 0}, {0, h / tex.h}}}; - gl_transform_trans(shader->offset, trans); -} - -static bool add_user_hook(void *priv, struct gl_user_shader_hook hook) -{ - struct gl_video *p = priv; - struct gl_user_shader_hook *copy = talloc_ptrtype(p, copy); - *copy = hook; - - struct tex_hook texhook = { - .save_tex = bstrdup0(copy, hook.save_tex), - .components = hook.components, - .hook = user_hook, - .cond = user_hook_cond, - .priv = copy, - }; - - for (int h = 0; h < SHADER_MAX_HOOKS; h++) - texhook.hook_tex[h] = bstrdup0(copy, hook.hook_tex[h]); - for (int h = 0; h < SHADER_MAX_BINDS; h++) - texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]); - - return add_hook(p, texhook); -} - -static bool add_user_tex(void *priv, struct gl_user_shader_tex tex) -{ - struct gl_video *p = priv; - - if (p->user_tex_num == SHADER_MAX_PASSES) { - MP_ERR(p, "Too many textures! Limit is %d.\n", SHADER_MAX_PASSES); - goto err; - } - - tex.tex = ra_tex_create(p->ra, &tex.params); - TA_FREEP(&tex.params.initial_data); - - p->user_textures[p->user_tex_num++] = tex; - return true; - -err: - talloc_free(tex.params.initial_data); - return false; -} - -static void load_user_shaders(struct gl_video *p, char **shaders) -{ - if (!shaders) - return; - - for (int n = 0; shaders[n] != NULL; n++) { - struct bstr file = load_cached_file(p, shaders[n]); - parse_user_shader(p->log, p->ra, file, p, add_user_hook, add_user_tex); - } -} - -static void gl_video_setup_hooks(struct gl_video *p) -{ - gl_video_reset_hooks(p); - - if (p->opts.deband) { - add_hook(p, (struct tex_hook) { - .hook_tex = {"LUMA", "CHROMA", "RGB", "XYZ"}, - .bind_tex = {"HOOKED"}, - .hook = deband_hook, - }); - } - - if (p->opts.unsharp != 0.0) { - add_hook(p, (struct tex_hook) { - .hook_tex = {"MAIN"}, - .bind_tex = {"HOOKED"}, - .hook = unsharp_hook, - }); - } - - load_user_shaders(p, p->opts.user_shaders); -} - -// sample from video textures, set "color" variable to yuv value -static void pass_read_video(struct gl_video *p) -{ - struct img_tex tex[4]; - struct gl_transform offsets[4]; - pass_get_img_tex(p, &p->image, tex, offsets); - - // To keep the code as simple as possibly, we currently run all shader - // stages even if they would be unnecessary (e.g. no hooks for a texture). - // In the future, deferred img_tex should optimize this away. - - // Merge semantically identical textures. This loop is done from back - // to front so that merged textures end up in the right order while - // simultaneously allowing us to skip unnecessary merges - for (int n = 3; n >= 0; n--) { - if (tex[n].type == PLANE_NONE) - continue; - - int first = n; - int num = 0; - - for (int i = 0; i < n; i++) { - if (img_tex_equiv(tex[n], tex[i]) && - gl_transform_eq(offsets[n], offsets[i])) - { - GLSLF("// merging plane %d ...\n", i); - copy_img_tex(p, &num, tex[i]); - first = MPMIN(first, i); - tex[i] = (struct img_tex){0}; - } - } - - if (num > 0) { - GLSLF("// merging plane %d ... into %d\n", n, first); - copy_img_tex(p, &num, tex[n]); - pass_describe(p, "merging planes"); - finish_pass_fbo(p, &p->merge_fbo[n], tex[n].w, tex[n].h, 0); - tex[first] = img_tex_fbo(&p->merge_fbo[n], tex[n].type, num); - tex[n] = (struct img_tex){0}; - } - } - - // If any textures are still in integer format by this point, we need - // to introduce an explicit conversion pass to avoid breaking hooks/scaling - for (int n = 0; n < 4; n++) { - if (tex[n].tex && tex[n].tex->params.format->ctype == RA_CTYPE_UINT) { - GLSLF("// use_integer fix for plane %d\n", n); - copy_img_tex(p, &(int){0}, tex[n]); - pass_describe(p, "use_integer fix"); - finish_pass_fbo(p, &p->integer_fbo[n], tex[n].w, tex[n].h, 0); - tex[n] = img_tex_fbo(&p->integer_fbo[n], tex[n].type, - tex[n].components); - } - } - - // Dispatch the hooks for all of these textures, saving and perhaps - // modifying them in the process - for (int n = 0; n < 4; n++) { - const char *name; - switch (tex[n].type) { - case PLANE_RGB: name = "RGB"; break; - case PLANE_LUMA: name = "LUMA"; break; - case PLANE_CHROMA: name = "CHROMA"; break; - case PLANE_ALPHA: name = "ALPHA"; break; - case PLANE_XYZ: name = "XYZ"; break; - default: continue; - } - - tex[n] = pass_hook(p, name, tex[n], &offsets[n]); - } - - // At this point all planes are finalized but they may not be at the - // required size yet. Furthermore, they may have texture offsets that - // require realignment. For lack of something better to do, we assume - // the rgb/luma texture is the "reference" and scale everything else - // to match. - for (int n = 0; n < 4; n++) { - switch (tex[n].type) { - case PLANE_RGB: - case PLANE_XYZ: - case PLANE_LUMA: break; - default: continue; - } - - p->texture_w = tex[n].w; - p->texture_h = tex[n].h; - p->texture_offset = offsets[n]; - break; - } - - // Compute the reference rect - struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h}; - struct mp_rect_f ref = src; - gl_transform_rect(p->texture_offset, &ref); - MP_DBG(p, "ref rect: {%f %f} {%f %f}\n", ref.x0, ref.y0, ref.x1, ref.y1); - - // Explicitly scale all of the textures that don't match - for (int n = 0; n < 4; n++) { - if (tex[n].type == PLANE_NONE) - continue; - - // If the planes are aligned identically, we will end up with the - // exact same source rectangle. - struct mp_rect_f rect = src; - gl_transform_rect(offsets[n], &rect); - MP_DBG(p, "rect[%d]: {%f %f} {%f %f}\n", n, - rect.x0, rect.y0, rect.x1, rect.y1); - - if (mp_rect_f_seq(ref, rect)) - continue; - - // If the rectangles differ, then our planes have a different - // alignment and/or size. First of all, we have to compute the - // corrections required to meet the target rectangle - struct gl_transform fix = { - .m = {{(ref.x1 - ref.x0) / (rect.x1 - rect.x0), 0.0}, - {0.0, (ref.y1 - ref.y0) / (rect.y1 - rect.y0)}}, - .t = {ref.x0, ref.y0}, - }; - MP_DBG(p, "-> fix[%d] = {%f %f} + off {%f %f}\n", n, - fix.m[0][0], fix.m[1][1], fix.t[0], fix.t[1]); - - // Since the scale in texture space is different from the scale in - // absolute terms, we have to scale the coefficients down to be - // relative to the texture's physical dimensions and local offset - struct gl_transform scale = { - .m = {{(float)tex[n].w / p->texture_w, 0.0}, - {0.0, (float)tex[n].h / p->texture_h}}, - .t = {-rect.x0, -rect.y0}, - }; - if (p->image_params.rotate % 180 == 90) - MPSWAP(double, scale.m[0][0], scale.m[1][1]); - - gl_transform_trans(scale, &fix); - MP_DBG(p, "-> scaled[%d] = {%f %f} + off {%f %f}\n", n, - fix.m[0][0], fix.m[1][1], fix.t[0], fix.t[1]); - - // Since the texture transform is a function of the texture coordinates - // to texture space, rather than the other way around, we have to - // actually apply the *inverse* of this. Fortunately, calculating - // the inverse is relatively easy here. - fix.m[0][0] = 1.0 / fix.m[0][0]; - fix.m[1][1] = 1.0 / fix.m[1][1]; - fix.t[0] = fix.m[0][0] * -fix.t[0]; - fix.t[1] = fix.m[1][1] * -fix.t[1]; - gl_transform_trans(fix, &tex[n].transform); - - int scaler_id = -1; - const char *name = NULL; - switch (tex[n].type) { - case PLANE_RGB: - case PLANE_LUMA: - case PLANE_XYZ: - scaler_id = SCALER_SCALE; - // these aren't worth hooking, fringe hypothetical cases only - break; - case PLANE_CHROMA: - scaler_id = SCALER_CSCALE; - name = "CHROMA_SCALED"; - break; - case PLANE_ALPHA: - // alpha always uses bilinear - name = "ALPHA_SCALED"; - } - - if (scaler_id < 0) - continue; - - const struct scaler_config *conf = &p->opts.scaler[scaler_id]; - struct scaler *scaler = &p->scaler[scaler_id]; - - // bilinear scaling is a free no-op thanks to GPU sampling - if (strcmp(conf->kernel.name, "bilinear") != 0) { - GLSLF("// upscaling plane %d\n", n); - pass_sample(p, tex[n], scaler, conf, 1.0, p->texture_w, p->texture_h); - finish_pass_fbo(p, &p->scale_fbo[n], p->texture_w, p->texture_h, 0); - tex[n] = img_tex_fbo(&p->scale_fbo[n], tex[n].type, tex[n].components); - } - - // Run any post-scaling hooks - tex[n] = pass_hook(p, name, tex[n], NULL); - } - - // All planes are of the same size and properly aligned at this point - GLSLF("// combining planes\n"); - int coord = 0; - for (int i = 0; i < 4; i++) { - if (tex[i].type != PLANE_NONE) - copy_img_tex(p, &coord, tex[i]); - } - p->components = coord; -} - -// Utility function that simply binds an FBO and reads from it, without any -// transformations. -static void pass_read_fbo(struct gl_video *p, struct fbotex *fbo) -{ - struct img_tex tex = img_tex_fbo(fbo, PLANE_RGB, p->components); - copy_img_tex(p, &(int){0}, tex); -} - -// yuv conversion, and any other conversions before main up/down-scaling -static void pass_convert_yuv(struct gl_video *p) -{ - struct gl_shader_cache *sc = p->sc; - - struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; - cparams.gray = p->is_gray; - mp_csp_set_image_params(&cparams, &p->image_params); - mp_csp_equalizer_state_get(p->video_eq, &cparams); - p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma); - - pass_describe(p, "color conversion"); - - if (p->color_swizzle[0]) - GLSLF("color = color.%s;\n", p->color_swizzle); - - // Pre-colormatrix input gamma correction - if (cparams.color.space == MP_CSP_XYZ) - GLSL(color.rgb = pow(color.rgb, vec3(2.6));) // linear light - - // We always explicitly normalize the range in pass_read_video - cparams.input_bits = cparams.texture_bits = 0; - - // Conversion to RGB. For RGB itself, this still applies e.g. brightness - // and contrast controls, or expansion of e.g. LSB-packed 10 bit data. - struct mp_cmat m = {{{0}}}; - mp_get_csp_matrix(&cparams, &m); - gl_sc_uniform_mat3(sc, "colormatrix", true, &m.m[0][0]); - gl_sc_uniform_vec3(sc, "colormatrix_c", m.c); - - GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;) - - if (p->image_params.color.space == MP_CSP_BT_2020_C) { - // Conversion for C'rcY'cC'bc via the BT.2020 CL system: - // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 - // = (B'-Y'c) / 1.5816 | C'bc > 0 - // - // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0 - // = (R'-Y'c) / 0.9936 | C'rc > 0 - // - // as per the BT.2020 specification, table 4. This is a non-linear - // transformation because (constant) luminance receives non-equal - // contributions from the three different channels. - GLSLF("// constant luminance conversion\n"); - GLSL(color.br = color.br * mix(vec2(1.5816, 0.9936), - vec2(1.9404, 1.7184), - lessThanEqual(color.br, vec2(0))) - + color.gg;) - // Expand channels to camera-linear light. This shader currently just - // assumes everything uses the BT.2020 12-bit gamma function, since the - // difference between 10 and 12-bit is negligible for anything other - // than 12-bit content. - GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), - pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), - vec3(1.0/0.45)), - lessThanEqual(vec3(0.08145), color.rgb));) - // Calculate the green channel from the expanded RYcB - // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B - GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;) - // Recompress to receive the R'G'B' result, same as other systems - GLSL(color.rgb = mix(color.rgb * vec3(4.5), - vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), - lessThanEqual(vec3(0.0181), color.rgb));) - } - - p->components = 3; - if (!p->has_alpha || p->opts.alpha_mode == ALPHA_NO) { - GLSL(color.a = 1.0;) - } else { // alpha present in image - p->components = 4; - GLSL(color = vec4(color.rgb * color.a, color.a);) - } -} - -static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]) -{ - double target_w = p->src_rect.x1 - p->src_rect.x0; - double target_h = p->src_rect.y1 - p->src_rect.y0; - if (transpose_rot && p->image_params.rotate % 180 == 90) - MPSWAP(double, target_w, target_h); - xy[0] = (p->dst_rect.x1 - p->dst_rect.x0) / target_w; - xy[1] = (p->dst_rect.y1 - p->dst_rect.y0) / target_h; -} - -// Cropping. -static void compute_src_transform(struct gl_video *p, struct gl_transform *tr) -{ - float sx = (p->src_rect.x1 - p->src_rect.x0) / (float)p->texture_w, - sy = (p->src_rect.y1 - p->src_rect.y0) / (float)p->texture_h, - ox = p->src_rect.x0, - oy = p->src_rect.y0; - struct gl_transform transform = {{{sx, 0}, {0, sy}}, {ox, oy}}; - - gl_transform_trans(p->texture_offset, &transform); - - *tr = transform; -} - -// Takes care of the main scaling and pre/post-conversions -static void pass_scale_main(struct gl_video *p) -{ - // Figure out the main scaler. - double xy[2]; - get_scale_factors(p, true, xy); - - // actual scale factor should be divided by the scale factor of prescaling. - xy[0] /= p->texture_offset.m[0][0]; - xy[1] /= p->texture_offset.m[1][1]; - - bool downscaling = xy[0] < 1.0 || xy[1] < 1.0; - bool upscaling = !downscaling && (xy[0] > 1.0 || xy[1] > 1.0); - double scale_factor = 1.0; - - struct scaler *scaler = &p->scaler[SCALER_SCALE]; - struct scaler_config scaler_conf = p->opts.scaler[SCALER_SCALE]; - if (p->opts.scaler_resizes_only && !downscaling && !upscaling) { - scaler_conf.kernel.name = "bilinear"; - // For scaler-resizes-only, we round the texture offset to - // the nearest round value in order to prevent ugly blurriness - // (in exchange for slightly shifting the image by up to half a - // subpixel) - p->texture_offset.t[0] = roundf(p->texture_offset.t[0]); - p->texture_offset.t[1] = roundf(p->texture_offset.t[1]); - } - if (downscaling && p->opts.scaler[SCALER_DSCALE].kernel.name) { - scaler_conf = p->opts.scaler[SCALER_DSCALE]; - scaler = &p->scaler[SCALER_DSCALE]; - } - - // When requesting correct-downscaling and the clip is anamorphic, and - // because only a single scale factor is used for both axes, enable it only - // when both axes are downscaled, and use the milder of the factors to not - // end up with too much blur on one axis (even if we end up with sub-optimal - // scale factor on the other axis). This is better than not respecting - // correct scaling at all for anamorphic clips. - double f = MPMAX(xy[0], xy[1]); - if (p->opts.correct_downscaling && f < 1.0) - scale_factor = 1.0 / f; - - // Pre-conversion, like linear light/sigmoidization - GLSLF("// scaler pre-conversion\n"); - bool use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling; - - // Linear light downscaling results in nasty artifacts for HDR curves due - // to the potentially extreme brightness differences severely compounding - // any ringing. So just scale in gamma light instead. - if (mp_trc_is_hdr(p->image_params.color.gamma) && downscaling) - use_linear = false; - - if (use_linear) { - p->use_linear = true; - pass_linearize(p->sc, p->image_params.color.gamma); - pass_opt_hook_point(p, "LINEAR", NULL); - } - - bool use_sigmoid = use_linear && p->opts.sigmoid_upscaling && upscaling; - float sig_center, sig_slope, sig_offset, sig_scale; - if (use_sigmoid) { - // Coefficients for the sigmoidal transform are taken from the - // formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal - sig_center = p->opts.sigmoid_center; - sig_slope = p->opts.sigmoid_slope; - // This function needs to go through (0,0) and (1,1) so we compute the - // values at 1 and 0, and then scale/shift them, respectively. - sig_offset = 1.0/(1+expf(sig_slope * sig_center)); - sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset; - GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n", - sig_center, sig_scale, sig_offset, sig_slope); - pass_opt_hook_point(p, "SIGMOID", NULL); - } - - pass_opt_hook_point(p, "PREKERNEL", NULL); - - int vp_w = p->dst_rect.x1 - p->dst_rect.x0; - int vp_h = p->dst_rect.y1 - p->dst_rect.y0; - struct gl_transform transform; - compute_src_transform(p, &transform); - - GLSLF("// main scaling\n"); - finish_pass_fbo(p, &p->indirect_fbo, p->texture_w, p->texture_h, 0); - struct img_tex src = img_tex_fbo(&p->indirect_fbo, PLANE_RGB, p->components); - gl_transform_trans(transform, &src.transform); - pass_sample(p, src, scaler, &scaler_conf, scale_factor, vp_w, vp_h); - - // Changes the texture size to display size after main scaler. - p->texture_w = vp_w; - p->texture_h = vp_h; - - pass_opt_hook_point(p, "POSTKERNEL", NULL); - - GLSLF("// scaler post-conversion\n"); - if (use_sigmoid) { - // Inverse of the transformation above - GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n", - sig_slope, sig_center, sig_offset, sig_scale); - } -} - -// Adapts the colors to the right output color space. (Final pass during -// rendering) -// If OSD is true, ignore any changes that may have been made to the video -// by previous passes (i.e. linear scaling) -static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool osd) -{ - struct ra *ra = p->ra; - - // Figure out the target color space from the options, or auto-guess if - // none were set - struct mp_colorspace dst = { - .gamma = p->opts.target_trc, - .primaries = p->opts.target_prim, - .light = MP_CSP_LIGHT_DISPLAY, - }; - - if (p->use_lut_3d) { - // The 3DLUT is always generated against the video's original source - // space, *not* the reference space. (To avoid having to regenerate - // the 3DLUT for the OSD on every frame) - enum mp_csp_prim prim_orig = p->image_params.color.primaries; - enum mp_csp_trc trc_orig = p->image_params.color.gamma; - - // One exception: HDR is not implemented by LittleCMS for technical - // limitation reasons, so we use a gamma 2.2 input curve here instead. - // We could pick any value we want here, the difference is just coding - // efficiency. - if (mp_trc_is_hdr(trc_orig)) - trc_orig = MP_CSP_TRC_GAMMA22; - - if (gl_video_get_lut3d(p, prim_orig, trc_orig)) { - dst.primaries = prim_orig; - dst.gamma = trc_orig; - } - } - - if (dst.primaries == MP_CSP_PRIM_AUTO) { - // The vast majority of people are on sRGB or BT.709 displays, so pick - // this as the default output color space. - dst.primaries = MP_CSP_PRIM_BT_709; - - if (src.primaries == MP_CSP_PRIM_BT_601_525 || - src.primaries == MP_CSP_PRIM_BT_601_625) - { - // Since we auto-pick BT.601 and BT.709 based on the dimensions, - // combined with the fact that they're very similar to begin with, - // and to avoid confusing the average user, just don't adapt BT.601 - // content automatically at all. - dst.primaries = src.primaries; - } - } - - if (dst.gamma == MP_CSP_TRC_AUTO) { - // Most people seem to complain when the image is darker or brighter - // than what they're "used to", so just avoid changing the gamma - // altogether by default. The only exceptions to this rule apply to - // very unusual TRCs, which even hardcode technoluddites would probably - // not enjoy viewing unaltered. - dst.gamma = src.gamma; - - // Avoid outputting linear light or HDR content "by default". For these - // just pick gamma 2.2 as a default, since it's a good estimate for - // the response of typical displays - if (dst.gamma == MP_CSP_TRC_LINEAR || mp_trc_is_hdr(dst.gamma)) - dst.gamma = MP_CSP_TRC_GAMMA22; - } - - bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma); - if (detect_peak && !p->hdr_peak_ssbo) { - struct { - unsigned int sig_peak_raw; - unsigned int index; - unsigned int frame_max[PEAK_DETECT_FRAMES+1]; - } peak_ssbo = {0}; - - // Prefill with safe values - int safe = MP_REF_WHITE * mp_trc_nom_peak(p->image_params.color.gamma); - peak_ssbo.sig_peak_raw = PEAK_DETECT_FRAMES * safe; - for (int i = 0; i < PEAK_DETECT_FRAMES+1; i++) - peak_ssbo.frame_max[i] = safe; - - struct ra_buf_params params = { - .type = RA_BUF_TYPE_SHADER_STORAGE, - .size = sizeof(peak_ssbo), - .initial_data = &peak_ssbo, - }; - - p->hdr_peak_ssbo = ra_buf_create(ra, ¶ms); - if (!p->hdr_peak_ssbo) { - MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n"); - detect_peak = (p->opts.compute_hdr_peak = false); - } - } - - if (detect_peak) { - pass_describe(p, "detect HDR peak"); - pass_is_compute(p, 8, 8); // 8x8 is good for performance - gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, - "uint sig_peak_raw;" - "uint index;" - "uint frame_max[%d];", PEAK_DETECT_FRAMES + 1 - ); - } - - // Adapt from src to dst as necessary - pass_color_map(p->sc, src, dst, p->opts.tone_mapping, - p->opts.tone_mapping_param, p->opts.tone_mapping_desat, - detect_peak, p->opts.gamut_warning, p->use_linear && !osd); - - if (p->use_lut_3d) { - gl_sc_uniform_texture(p->sc, "lut_3d", p->lut_3d_texture); - GLSL(vec3 cpos;) - for (int i = 0; i < 3; i++) - GLSLF("cpos[%d] = LUT_POS(color[%d], %d.0);\n", i, i, p->lut_3d_size[i]); - GLSL(color.rgb = tex3D(lut_3d, cpos).rgb;) - } -} - -void gl_video_set_fb_depth(struct gl_video *p, int fb_depth) -{ - p->fb_depth = fb_depth; -} - -static void pass_dither(struct gl_video *p) -{ - // Assume 8 bits per component if unknown. - int dst_depth = p->fb_depth > 0 ? p->fb_depth : 8; - if (p->opts.dither_depth > 0) - dst_depth = p->opts.dither_depth; - - if (p->opts.dither_depth < 0 || p->opts.dither_algo == DITHER_NONE) - return; - - if (!p->dither_texture) { - MP_VERBOSE(p, "Dither to %d.\n", dst_depth); - - int tex_size = 0; - void *tex_data = NULL; - const struct ra_format *fmt = NULL; - void *temp = NULL; - - if (p->opts.dither_algo == DITHER_FRUIT) { - int sizeb = p->opts.dither_size; - int size = 1 << sizeb; - - if (p->last_dither_matrix_size != size) { - p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix, - float, size * size); - mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb); - p->last_dither_matrix_size = size; - } - - // Prefer R16 texture since they provide higher precision. - fmt = ra_find_unorm_format(p->ra, 2, 1); - if (!fmt) - fmt = ra_find_float16_format(p->ra, 1); - if (fmt) { - tex_size = size; - tex_data = p->last_dither_matrix; - if (fmt->ctype == RA_CTYPE_UNORM) { - uint16_t *t = temp = talloc_array(NULL, uint16_t, size * size); - for (int n = 0; n < size * size; n++) - t[n] = p->last_dither_matrix[n] * UINT16_MAX; - tex_data = t; - } - } else { - MP_VERBOSE(p, "GL too old. Falling back to ordered dither.\n"); - p->opts.dither_algo = DITHER_ORDERED; - } - } - - if (p->opts.dither_algo == DITHER_ORDERED) { - temp = talloc_array(NULL, char, 8 * 8); - mp_make_ordered_dither_matrix(temp, 8); - - fmt = ra_find_unorm_format(p->ra, 1, 1); - tex_size = 8; - tex_data = temp; - } - - struct ra_tex_params params = { - .dimensions = 2, - .w = tex_size, - .h = tex_size, - .d = 1, - .format = fmt, - .render_src = true, - .src_repeat = true, - .initial_data = tex_data, - }; - p->dither_texture = ra_tex_create(p->ra, ¶ms); - - debug_check_gl(p, "dither setup"); - - talloc_free(temp); - } - - GLSLF("// dithering\n"); - - // This defines how many bits are considered significant for output on - // screen. The superfluous bits will be used for rounding according to the - // dither matrix. The precision of the source implicitly decides how many - // dither patterns can be visible. - int dither_quantization = (1 << dst_depth) - 1; - int dither_size = p->dither_texture->params.w; - - gl_sc_uniform_texture(p->sc, "dither", p->dither_texture); - - GLSLF("vec2 dither_pos = gl_FragCoord.xy * 1.0/%d.0;\n", dither_size); - - if (p->opts.temporal_dither) { - int phase = (p->frames_rendered / p->opts.temporal_dither_period) % 8u; - float r = phase * (M_PI / 2); // rotate - float m = phase < 4 ? 1 : -1; // mirror - - float matrix[2][2] = {{cos(r), -sin(r) }, - {sin(r) * m, cos(r) * m}}; - gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]); - - GLSL(dither_pos = dither_trafo * dither_pos;) - } - - GLSL(float dither_value = texture(dither, dither_pos).r;) - GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) * 1.0/%d.0;\n", - dither_quantization, dither_size * dither_size, dither_quantization); -} - -// Draws the OSD, in scene-referred colors.. If cms is true, subtitles are -// instead adapted to the display's gamut. -static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts, - struct mp_osd_res rect, struct fbodst target, bool cms) -{ - mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo_out, draw_flags); - - timer_pool_start(p->osd_timer); - for (int n = 0; n < MAX_OSD_PARTS; n++) { - // (This returns false if this part is empty with nothing to draw.) - if (!mpgl_osd_draw_prepare(p->osd, n, p->sc)) - continue; - // When subtitles need to be color managed, assume they're in sRGB - // (for lack of anything saner to do) - if (cms) { - static const struct mp_colorspace csp_srgb = { - .primaries = MP_CSP_PRIM_BT_709, - .gamma = MP_CSP_TRC_SRGB, - .light = MP_CSP_LIGHT_DISPLAY, - }; - - pass_colormanage(p, csp_srgb, true); - } - mpgl_osd_draw_finish(p->osd, n, p->sc, target); - } - - timer_pool_stop(p->osd_timer); - pass_describe(p, "drawing osd"); - pass_record(p, timer_pool_measure(p->osd_timer)); -} - -static float chroma_realign(int size, int pixel) -{ - return size / (float)chroma_upsize(size, pixel); -} - -// Minimal rendering code path, for GLES or OpenGL 2.1 without proper FBOs. -static void pass_render_frame_dumb(struct gl_video *p) -{ - struct img_tex tex[4]; - struct gl_transform off[4]; - pass_get_img_tex(p, &p->image, tex, off); - - struct gl_transform transform; - compute_src_transform(p, &transform); - - int index = 0; - for (int i = 0; i < p->plane_count; i++) { - int cw = tex[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1; - int ch = tex[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1; - if (p->image_params.rotate % 180 == 90) - MPSWAP(int, cw, ch); - - struct gl_transform t = transform; - t.m[0][0] *= chroma_realign(p->texture_w, cw); - t.m[1][1] *= chroma_realign(p->texture_h, ch); - - t.t[0] /= cw; - t.t[1] /= ch; - - t.t[0] += off[i].t[0]; - t.t[1] += off[i].t[1]; - - gl_transform_trans(tex[i].transform, &t); - tex[i].transform = t; - - copy_img_tex(p, &index, tex[i]); - } - - pass_convert_yuv(p); -} - -// The main rendering function, takes care of everything up to and including -// upscaling. p->image is rendered. -static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, uint64_t id) -{ - // initialize the texture parameters and temporary variables - p->texture_w = p->image_params.w; - p->texture_h = p->image_params.h; - p->texture_offset = identity_trans; - p->components = 0; - p->saved_tex_num = 0; - p->hook_fbo_num = 0; - p->use_linear = false; - - // try uploading the frame - if (!pass_upload_image(p, mpi, id)) - return false; - - if (p->image_params.rotate % 180 == 90) - MPSWAP(int, p->texture_w, p->texture_h); - - if (p->dumb_mode) - return true; - - pass_read_video(p); - pass_opt_hook_point(p, "NATIVE", &p->texture_offset); - pass_convert_yuv(p); - pass_opt_hook_point(p, "MAINPRESUB", &p->texture_offset); - - // For subtitles - double vpts = p->image.mpi->pts; - if (vpts == MP_NOPTS_VALUE) - vpts = p->osd_pts; - - if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO) { - double scale[2]; - get_scale_factors(p, false, scale); - struct mp_osd_res rect = { - .w = p->texture_w, .h = p->texture_h, - .display_par = scale[1] / scale[0], // counter compensate scaling - }; - finish_pass_fbo(p, &p->blend_subs_fbo, rect.w, rect.h, 0); - pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, - p->blend_subs_fbo.fbo, false); - pass_read_fbo(p, &p->blend_subs_fbo); - pass_describe(p, "blend subs video"); - } - pass_opt_hook_point(p, "MAIN", &p->texture_offset); - - pass_scale_main(p); - - int vp_w = p->dst_rect.x1 - p->dst_rect.x0, - vp_h = p->dst_rect.y1 - p->dst_rect.y0; - if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES) { - // Recreate the real video size from the src/dst rects - struct mp_osd_res rect = { - .w = vp_w, .h = vp_h, - .ml = -p->src_rect.x0, .mr = p->src_rect.x1 - p->image_params.w, - .mt = -p->src_rect.y0, .mb = p->src_rect.y1 - p->image_params.h, - .display_par = 1.0, - }; - // Adjust margins for scale - double scale[2]; - get_scale_factors(p, true, scale); - rect.ml *= scale[0]; rect.mr *= scale[0]; - rect.mt *= scale[1]; rect.mb *= scale[1]; - // We should always blend subtitles in non-linear light - if (p->use_linear) { - pass_delinearize(p->sc, p->image_params.color.gamma); - p->use_linear = false; - } - finish_pass_fbo(p, &p->blend_subs_fbo, p->texture_w, p->texture_h, 0); - pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, - p->blend_subs_fbo.fbo, false); - pass_read_fbo(p, &p->blend_subs_fbo); - pass_describe(p, "blend subs"); - } - - pass_opt_hook_point(p, "SCALED", NULL); - - return true; -} - -static void pass_draw_to_screen(struct gl_video *p, struct fbodst fbo) -{ - if (p->dumb_mode) - pass_render_frame_dumb(p); - - // Adjust the overall gamma before drawing to screen - if (p->user_gamma != 1) { - gl_sc_uniform_f(p->sc, "user_gamma", p->user_gamma); - GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) - GLSL(color.rgb = pow(color.rgb, vec3(user_gamma));) - } - - pass_colormanage(p, p->image_params.color, false); - - // Since finish_pass_direct doesn't work with compute shaders, and neither - // does the checkerboard/dither code, we may need an indirection via - // p->screen_fbo here. - if (p->pass_compute.active) { - int o_w = p->dst_rect.x1 - p->dst_rect.x0, - o_h = p->dst_rect.y1 - p->dst_rect.y0; - finish_pass_fbo(p, &p->screen_fbo, o_w, o_h, FBOTEX_FUZZY); - struct img_tex tmp = img_tex_fbo(&p->screen_fbo, PLANE_RGB, p->components); - copy_img_tex(p, &(int){0}, tmp); - } - - if (p->has_alpha){ - if (p->opts.alpha_mode == ALPHA_BLEND_TILES) { - // Draw checkerboard pattern to indicate transparency - GLSLF("// transparency checkerboard\n"); - GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy * 1.0/32.0), vec2(0.5));) - GLSL(vec3 background = vec3(tile.x == tile.y ? 1.0 : 0.75);) - GLSL(color.rgb = mix(background, color.rgb, color.a);) - } else if (p->opts.alpha_mode == ALPHA_BLEND) { - // Blend into background color (usually black) - struct m_color c = p->opts.background; - GLSLF("vec4 background = vec4(%f, %f, %f, %f);\n", - c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0); - GLSL(color = mix(background, vec4(color.rgb, 1.0), color.a);) - } - } - - pass_opt_hook_point(p, "OUTPUT", NULL); - - pass_dither(p); - pass_describe(p, "output to screen"); - finish_pass_direct(p, fbo, &p->dst_rect); -} - -static bool update_fbosurface(struct gl_video *p, struct mp_image *mpi, - uint64_t id, struct fbosurface *surf) -{ - int vp_w = p->dst_rect.x1 - p->dst_rect.x0, - vp_h = p->dst_rect.y1 - p->dst_rect.y0; - - pass_info_reset(p, false); - if (!pass_render_frame(p, mpi, id)) - return false; - - // Frame blending should always be done in linear light to preserve the - // overall brightness, otherwise this will result in flashing dark frames - // because mixing in compressed light artificially darkens the results - if (!p->use_linear) { - p->use_linear = true; - pass_linearize(p->sc, p->image_params.color.gamma); - } - - finish_pass_fbo(p, &surf->fbotex, vp_w, vp_h, FBOTEX_FUZZY); - surf->id = id; - surf->pts = mpi->pts; - return true; -} - -// Draws an interpolate frame to fbo, based on the frame timing in t -static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, - struct fbodst fbo) -{ - bool is_new = false; - - // Reset the queue completely if this is a still image, to avoid any - // interpolation artifacts from surrounding frames when unpausing or - // framestepping - if (t->still) - gl_video_reset_surfaces(p); - - // First of all, figure out if we have a frame available at all, and draw - // it manually + reset the queue if not - if (p->surfaces[p->surface_now].id == 0) { - struct fbosurface *now = &p->surfaces[p->surface_now]; - if (!update_fbosurface(p, t->current, t->frame_id, now)) - return; - p->surface_idx = p->surface_now; - is_new = true; - } - - // Find the right frame for this instant - if (t->current) { - int next = fbosurface_wrap(p->surface_now + 1); - while (p->surfaces[next].id && - p->surfaces[next].id > p->surfaces[p->surface_now].id && - p->surfaces[p->surface_now].id < t->frame_id) - { - p->surface_now = next; - next = fbosurface_wrap(next + 1); - } - } - - // Figure out the queue size. For illustration, a filter radius of 2 would - // look like this: _ A [B] C D _ - // A is surface_bse, B is surface_now, C is surface_now+1 and D is - // surface_end. - struct scaler *tscale = &p->scaler[SCALER_TSCALE]; - reinit_scaler(p, tscale, &p->opts.scaler[SCALER_TSCALE], 1, tscale_sizes); - bool oversample = strcmp(tscale->conf.kernel.name, "oversample") == 0; - bool linear = strcmp(tscale->conf.kernel.name, "linear") == 0; - int size; - - if (oversample || linear) { - size = 2; - } else { - assert(tscale->kernel && !tscale->kernel->polar); - size = ceil(tscale->kernel->size); - assert(size <= TEXUNIT_VIDEO_NUM); - } - - int radius = size/2; - int surface_now = p->surface_now; - int surface_bse = fbosurface_wrap(surface_now - (radius-1)); - int surface_end = fbosurface_wrap(surface_now + radius); - assert(fbosurface_wrap(surface_bse + size-1) == surface_end); - - // Render new frames while there's room in the queue. Note that technically, - // this should be done before the step where we find the right frame, but - // it only barely matters at the very beginning of playback, and this way - // makes the code much more linear. - int surface_dst = fbosurface_wrap(p->surface_idx + 1); - for (int i = 0; i < t->num_frames; i++) { - // Avoid overwriting data we might still need - if (surface_dst == surface_bse - 1) - break; - - struct mp_image *f = t->frames[i]; - uint64_t f_id = t->frame_id + i; - if (!mp_image_params_equal(&f->params, &p->real_image_params)) - continue; - - if (f_id > p->surfaces[p->surface_idx].id) { - struct fbosurface *dst = &p->surfaces[surface_dst]; - if (!update_fbosurface(p, f, f_id, dst)) - return; - p->surface_idx = surface_dst; - surface_dst = fbosurface_wrap(surface_dst + 1); - is_new = true; - } - } - - // Figure out whether the queue is "valid". A queue is invalid if the - // frames' PTS is not monotonically increasing. Anything else is invalid, - // so avoid blending incorrect data and just draw the latest frame as-is. - // Possible causes for failure of this condition include seeks, pausing, - // end of playback or start of playback. - bool valid = true; - for (int i = surface_bse, ii; valid && i != surface_end; i = ii) { - ii = fbosurface_wrap(i + 1); - if (p->surfaces[i].id == 0 || p->surfaces[ii].id == 0) { - valid = false; - } else if (p->surfaces[ii].id < p->surfaces[i].id) { - valid = false; - MP_DBG(p, "interpolation queue underrun\n"); - } - } - - // Update OSD PTS to synchronize subtitles with the displayed frame - p->osd_pts = p->surfaces[surface_now].pts; - - // Finally, draw the right mix of frames to the screen. - if (!is_new) - pass_info_reset(p, true); - pass_describe(p, "interpolation"); - if (!valid || t->still) { - // surface_now is guaranteed to be valid, so we can safely use it. - pass_read_fbo(p, &p->surfaces[surface_now].fbotex); - p->is_interpolated = false; - } else { - double mix = t->vsync_offset / t->ideal_frame_duration; - // The scaler code always wants the fcoord to be between 0 and 1, - // so we try to adjust by using the previous set of N frames instead - // (which requires some extra checking to make sure it's valid) - if (mix < 0.0) { - int prev = fbosurface_wrap(surface_bse - 1); - if (p->surfaces[prev].id != 0 && - p->surfaces[prev].id < p->surfaces[surface_bse].id) - { - mix += 1.0; - surface_bse = prev; - } else { - mix = 0.0; // at least don't blow up, this should only - // ever happen at the start of playback - } - } - - if (oversample) { - // Oversample uses the frame area as mix ratio, not the the vsync - // position itself - double vsync_dist = t->vsync_interval / t->ideal_frame_duration, - threshold = tscale->conf.kernel.params[0]; - threshold = isnan(threshold) ? 0.0 : threshold; - mix = (1 - mix) / vsync_dist; - mix = mix <= 0 + threshold ? 0 : mix; - mix = mix >= 1 - threshold ? 1 : mix; - mix = 1 - mix; - } - - // Blend the frames together - if (oversample || linear) { - gl_sc_uniform_f(p->sc, "inter_coeff", mix); - GLSL(color = mix(texture(texture0, texcoord0), - texture(texture1, texcoord1), - inter_coeff);) - } else { - gl_sc_uniform_f(p->sc, "fcoord", mix); - pass_sample_separated_gen(p->sc, tscale, 0, 0); - } - - // Load all the required frames - for (int i = 0; i < size; i++) { - struct img_tex img = - img_tex_fbo(&p->surfaces[fbosurface_wrap(surface_bse+i)].fbotex, - PLANE_RGB, p->components); - // Since the code in pass_sample_separated currently assumes - // the textures are bound in-order and starting at 0, we just - // assert to make sure this is the case (which it should always be) - int id = pass_bind(p, img); - assert(id == i); - } - - MP_DBG(p, "inter frame dur: %f vsync: %f, mix: %f\n", - t->ideal_frame_duration, t->vsync_interval, mix); - p->is_interpolated = true; - } - pass_draw_to_screen(p, fbo); - - p->frames_drawn += 1; -} - -void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, - struct fbodst target) -{ - struct mp_rect target_rc = {0, 0, target.tex->params.w, target.tex->params.h}; - - p->broken_frame = false; - - bool has_frame = !!frame->current; - - if (!has_frame || !mp_rect_equals(&p->dst_rect, &target_rc)) { - struct m_color c = p->clear_color; - float color[4] = {c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0}; - p->ra->fns->clear(p->ra, target.tex, color, &target_rc); - } - - if (p->hwdec_active && p->hwdec->driver->overlay_frame) { - if (has_frame) { - float *color = p->hwdec->overlay_colorkey; - p->ra->fns->clear(p->ra, target.tex, color, &p->dst_rect); - } - - p->hwdec->driver->overlay_frame(p->hwdec, frame->current, - &p->src_rect, &p->dst_rect, - frame->frame_id != p->image.id); - - if (frame->current) - p->osd_pts = frame->current->pts; - - // Disable GL rendering - has_frame = false; - } - - if (has_frame) { - bool interpolate = p->opts.interpolation && frame->display_synced && - (p->frames_drawn || !frame->still); - if (interpolate) { - double ratio = frame->ideal_frame_duration / frame->vsync_interval; - if (fabs(ratio - 1.0) < p->opts.interpolation_threshold) - interpolate = false; - } - - if (interpolate) { - gl_video_interpolate_frame(p, frame, target); - } else { - bool is_new = frame->frame_id != p->image.id; - - // Redrawing a frame might update subtitles. - if (frame->still && p->opts.blend_subs) - is_new = true; - - if (is_new || !p->output_fbo_valid) { - p->output_fbo_valid = false; - - pass_info_reset(p, !is_new); - if (!pass_render_frame(p, frame->current, frame->frame_id)) - goto done; - - // For the non-interpolation case, we draw to a single "cache" - // FBO to speed up subsequent re-draws (if any exist) - struct fbodst dest_fbo = target; - if (frame->num_vsyncs > 1 && frame->display_synced && - !p->dumb_mode && (p->ra->caps & RA_CAP_BLIT)) - { - fbotex_change(&p->output_fbo, p->ra, p->log, - target.tex->params.w, target.tex->params.h, - p->fbo_format, FBOTEX_FUZZY); - dest_fbo = p->output_fbo.fbo; - p->output_fbo_valid = true; - } - pass_draw_to_screen(p, dest_fbo); - } - - // "output fbo valid" and "output fbo needed" are equivalent - if (p->output_fbo_valid) { - pass_info_reset(p, true); - pass_describe(p, "redraw cached frame"); - struct mp_rect src = p->dst_rect; - struct mp_rect dst = src; - if (target.flip) { - dst.y0 = target.tex->params.h - src.y0; - dst.y1 = target.tex->params.h - src.y1; - } - timer_pool_start(p->blit_timer); - p->ra->fns->blit(p->ra, target.tex, p->output_fbo.tex, - &dst, &src); - timer_pool_stop(p->blit_timer); - pass_record(p, timer_pool_measure(p->blit_timer)); - } - } - } - -done: - - unmap_current_image(p); - - debug_check_gl(p, "after video rendering"); - - if (p->osd) { - // If we haven't actually drawn anything so far, then we technically - // need to consider this the start of a new pass. Let's call it a - // redraw just because, since it's basically a blank frame anyway - if (!has_frame) - pass_info_reset(p, true); - - pass_draw_osd(p, p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0, - p->osd_pts, p->osd_rect, target, true); - debug_check_gl(p, "after OSD rendering"); - } - - if (gl_sc_error_state(p->sc) || p->broken_frame) { - // Make the screen solid blue to make it visually clear that an - // error has occurred - float color[4] = {0.0, 0.05, 0.5, 1.0}; - p->ra->fns->clear(p->ra, target.tex, color, &target_rc); - } - - // The playloop calls this last before waiting some time until it decides - // to call flip_page(). Tell OpenGL to start execution of the GPU commands - // while we sleep (this happens asynchronously). - if ((p->opts.early_flush == -1 && !frame->display_synced) || - p->opts.early_flush == 1) - { - if (p->ra->fns->flush) - p->ra->fns->flush(p->ra); - } - - p->frames_rendered++; - pass_report_performance(p); -} - -// Use this color instead of the global option. -void gl_video_set_clear_color(struct gl_video *p, struct m_color c) -{ - p->force_clear_color = true; - p->clear_color = c; -} - -void gl_video_set_osd_pts(struct gl_video *p, double pts) -{ - p->osd_pts = pts; -} - -bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *res, - double pts) -{ - return p->osd ? mpgl_osd_check_change(p->osd, res, pts) : false; -} - -void gl_video_resize(struct gl_video *p, - struct mp_rect *src, struct mp_rect *dst, - struct mp_osd_res *osd) -{ - if (mp_rect_equals(&p->src_rect, src) && - mp_rect_equals(&p->dst_rect, dst) && - osd_res_equals(p->osd_rect, *osd)) - return; - - p->src_rect = *src; - p->dst_rect = *dst; - p->osd_rect = *osd; - - gl_video_reset_surfaces(p); - - if (p->osd) - mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo_out); -} - -static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out) -{ - for (int i = 0; i < PASS_INFO_MAX; i++) { - if (!pass[i].desc.len) - break; - out->perf[out->count] = pass[i].perf; - out->desc[out->count] = pass[i].desc.start; - out->count++; - } -} - -void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out) -{ - *out = (struct voctrl_performance_data){0}; - frame_perf_data(p->pass_fresh, &out->fresh); - frame_perf_data(p->pass_redraw, &out->redraw); -} - -// This assumes nv12, with textures set to GL_NEAREST filtering. -static void reinterleave_vdpau(struct gl_video *p, - struct ra_tex *input[4], struct ra_tex *output[2]) -{ - for (int n = 0; n < 2; n++) { - struct fbotex *fbo = &p->vdpau_deinterleave_fbo[n]; - // This is an array of the 2 to-merge planes. - struct ra_tex **src = &input[n * 2]; - int w = src[0]->params.w; - int h = src[0]->params.h; - int ids[2]; - for (int t = 0; t < 2; t++) { - ids[t] = pass_bind(p, (struct img_tex){ - .tex = src[t], - .multiplier = 1.0, - .transform = identity_trans, - .w = w, - .h = h, - }); - } - - GLSLF("color = fract(gl_FragCoord.y * 0.5) < 0.5\n"); - GLSLF(" ? texture(texture%d, texcoord%d)\n", ids[0], ids[0]); - GLSLF(" : texture(texture%d, texcoord%d);", ids[1], ids[1]); - - const struct ra_format *fmt = - ra_find_unorm_format(p->ra, 1, n == 0 ? 1 : 2); - fbotex_change(fbo, p->ra, p->log, w, h * 2, fmt, 0); - - pass_describe(p, "vdpau reinterleaving"); - finish_pass_direct(p, fbo->fbo, &(struct mp_rect){0, 0, w, h * 2}); - - output[n] = fbo->tex; - } -} - -// Returns false on failure. -static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id) -{ - struct video_image *vimg = &p->image; - - if (vimg->id == id) - return true; - - unref_current_image(p); - - mpi = mp_image_new_ref(mpi); - if (!mpi) - goto error; - - vimg->mpi = mpi; - vimg->id = id; - p->osd_pts = mpi->pts; - p->frames_uploaded++; - - if (p->hwdec_active) { - // Hardware decoding - - if (!p->hwdec_mapper) - goto error; - - pass_describe(p, "map frame (hwdec)"); - timer_pool_start(p->upload_timer); - bool ok = ra_hwdec_mapper_map(p->hwdec_mapper, vimg->mpi) >= 0; - timer_pool_stop(p->upload_timer); - pass_record(p, timer_pool_measure(p->upload_timer)); - - vimg->hwdec_mapped = true; - if (ok) { - struct mp_image layout = {0}; - mp_image_set_params(&layout, &p->image_params); - struct ra_tex **tex = p->hwdec_mapper->tex; - struct ra_tex *tmp[4] = {0}; - if (p->hwdec_mapper->vdpau_fields) { - reinterleave_vdpau(p, tex, tmp); - tex = tmp; - } - for (int n = 0; n < p->plane_count; n++) { - vimg->planes[n] = (struct texplane){ - .w = mp_image_plane_w(&layout, n), - .h = mp_image_plane_h(&layout, n), - .tex = tex[n], - }; - } - } else { - MP_FATAL(p, "Mapping hardware decoded surface failed.\n"); - goto error; - } - return true; - } - - // Software decoding - assert(mpi->num_planes == p->plane_count); - - timer_pool_start(p->upload_timer); - for (int n = 0; n < p->plane_count; n++) { - struct texplane *plane = &vimg->planes[n]; - - plane->flipped = mpi->stride[0] < 0; - - struct ra_tex_upload_params params = { - .tex = plane->tex, - .src = mpi->planes[n], - .invalidate = true, - .stride = mpi->stride[n], - }; - - struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]); - if (mapped) { - params.buf = mapped->buf; - params.buf_offset = (uintptr_t)params.src - - (uintptr_t)mapped->buf->data; - params.src = NULL; - } - - if (p->using_dr_path != !!mapped) { - p->using_dr_path = !!mapped; - MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no"); - } - - if (!p->ra->fns->tex_upload(p->ra, ¶ms)) { - timer_pool_stop(p->upload_timer); - goto error; - } - - if (mapped && !mapped->mpi) - mapped->mpi = mp_image_new_ref(mpi); - } - timer_pool_stop(p->upload_timer); - - bool using_pbo = p->ra->use_pbo || !(p->ra->caps & RA_CAP_DIRECT_UPLOAD); - const char *mode = p->using_dr_path ? "DR" : using_pbo ? "PBO" : "naive"; - pass_describe(p, "upload frame (%s)", mode); - pass_record(p, timer_pool_measure(p->upload_timer)); - - return true; - -error: - unref_current_image(p); - p->broken_frame = true; - return false; -} - -static bool test_fbo(struct gl_video *p, const struct ra_format *fmt) -{ - MP_VERBOSE(p, "Testing FBO format %s\n", fmt->name); - struct fbotex fbo = {0}; - bool success = fbotex_change(&fbo, p->ra, p->log, 16, 16, fmt, 0); - fbotex_uninit(&fbo); - return success; -} - -// Return whether dumb-mode can be used without disabling any features. -// Essentially, vo_opengl with mostly default settings will return true. -static bool check_dumb_mode(struct gl_video *p) -{ - struct gl_video_opts *o = &p->opts; - if (p->use_integer_conversion) - return false; - if (o->dumb_mode > 0) // requested by user - return true; - if (o->dumb_mode < 0) // disabled by user - return false; - - // otherwise, use auto-detection - if (o->target_prim || o->target_trc || o->linear_scaling || - o->correct_downscaling || o->sigmoid_upscaling || o->interpolation || - o->blend_subs || o->deband || o->unsharp) - return false; - // check remaining scalers (tscale is already implicitly excluded above) - for (int i = 0; i < SCALER_COUNT; i++) { - if (i != SCALER_TSCALE) { - const char *name = o->scaler[i].kernel.name; - if (name && strcmp(name, "bilinear") != 0) - return false; - } - } - if (o->user_shaders && o->user_shaders[0]) - return false; - if (p->use_lut_3d) - return false; - return true; -} - -// Disable features that are not supported with the current OpenGL version. -static void check_gl_features(struct gl_video *p) -{ - struct ra *ra = p->ra; - bool have_float_tex = !!ra_find_float16_format(ra, 1); - bool have_mglsl = ra->glsl_version >= 130; // modern GLSL - const struct ra_format *rg_tex = ra_find_unorm_format(p->ra, 1, 2); - bool have_texrg = rg_tex && !rg_tex->luminance_alpha; - bool have_compute = ra->caps & RA_CAP_COMPUTE; - bool have_ssbo = ra->caps & RA_CAP_BUF_RW; - - const char *auto_fbo_fmts[] = {"rgba16", "rgba16f", "rgb10_a2", "rgba8", 0}; - const char *user_fbo_fmts[] = {p->opts.fbo_format, 0}; - const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto") - ? user_fbo_fmts : auto_fbo_fmts; - bool have_fbo = false; - p->fbo_format = NULL; - for (int n = 0; fbo_fmts[n]; n++) { - const char *fmt = fbo_fmts[n]; - const struct ra_format *f = ra_find_named_format(p->ra, fmt); - if (!f && fbo_fmts == user_fbo_fmts) - MP_WARN(p, "FBO format '%s' not found!\n", fmt); - if (f && f->renderable && f->linear_filter && test_fbo(p, f)) { - MP_VERBOSE(p, "Using FBO format %s.\n", f->name); - have_fbo = true; - p->fbo_format = f; - break; - } - } - - p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg; - bool voluntarily_dumb = check_dumb_mode(p); - if (p->forced_dumb_mode || voluntarily_dumb) { - if (voluntarily_dumb) { - MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n"); - } else if (p->opts.dumb_mode <= 0) { - MP_WARN(p, "High bit depth FBOs unsupported. Enabling dumb mode.\n" - "Most extended features will be disabled.\n"); - } - p->dumb_mode = true; - p->use_lut_3d = false; - // Most things don't work, so whitelist all options that still work. - p->opts = (struct gl_video_opts){ - .gamma = p->opts.gamma, - .gamma_auto = p->opts.gamma_auto, - .pbo = p->opts.pbo, - .fbo_format = p->opts.fbo_format, - .alpha_mode = p->opts.alpha_mode, - .use_rectangle = p->opts.use_rectangle, - .background = p->opts.background, - .dither_algo = p->opts.dither_algo, - .dither_depth = p->opts.dither_depth, - .dither_size = p->opts.dither_size, - .temporal_dither = p->opts.temporal_dither, - .temporal_dither_period = p->opts.temporal_dither_period, - .tex_pad_x = p->opts.tex_pad_x, - .tex_pad_y = p->opts.tex_pad_y, - .tone_mapping = p->opts.tone_mapping, - .tone_mapping_param = p->opts.tone_mapping_param, - .tone_mapping_desat = p->opts.tone_mapping_desat, - .early_flush = p->opts.early_flush, - }; - for (int n = 0; n < SCALER_COUNT; n++) - p->opts.scaler[n] = gl_video_opts_def.scaler[n]; - return; - } - p->dumb_mode = false; - - // Normally, we want to disable them by default if FBOs are unavailable, - // because they will be slow (not critically slow, but still slower). - // Without FP textures, we must always disable them. - // I don't know if luminance alpha float textures exist, so disregard them. - for (int n = 0; n < SCALER_COUNT; n++) { - const struct filter_kernel *kernel = - mp_find_filter_kernel(p->opts.scaler[n].kernel.name); - if (kernel) { - char *reason = NULL; - if (!have_float_tex) - reason = "(float tex. missing)"; - if (!have_mglsl) - reason = "(GLSL version too old)"; - if (reason) { - MP_WARN(p, "Disabling scaler #%d %s %s.\n", n, - p->opts.scaler[n].kernel.name, reason); - // p->opts is a copy => we can just mess with it. - p->opts.scaler[n].kernel.name = "bilinear"; - if (n == SCALER_TSCALE) - p->opts.interpolation = 0; - } - } - } - - int use_cms = p->opts.target_prim != MP_CSP_PRIM_AUTO || - p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d; - - // mix() is needed for some gamma functions - if (!have_mglsl && (p->opts.linear_scaling || p->opts.sigmoid_upscaling)) { - p->opts.linear_scaling = false; - p->opts.sigmoid_upscaling = false; - MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n"); - } - if (!have_mglsl && use_cms) { - p->opts.target_prim = MP_CSP_PRIM_AUTO; - p->opts.target_trc = MP_CSP_TRC_AUTO; - p->use_lut_3d = false; - MP_WARN(p, "Disabling color management (GLSL version too old).\n"); - } - if (!have_mglsl && p->opts.deband) { - p->opts.deband = 0; - MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); - } - if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) { - p->opts.compute_hdr_peak = 0; - MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n"); - } -} - -static void init_gl(struct gl_video *p) -{ - debug_check_gl(p, "before init_gl"); - - p->upload_timer = timer_pool_create(p->ra); - p->blit_timer = timer_pool_create(p->ra); - p->osd_timer = timer_pool_create(p->ra); - - debug_check_gl(p, "after init_gl"); - - ra_dump_tex_formats(p->ra, MSGL_DEBUG); - ra_dump_img_formats(p->ra, MSGL_DEBUG); -} - -void gl_video_uninit(struct gl_video *p) -{ - if (!p) - return; - - uninit_video(p); - - gl_sc_destroy(p->sc); - - ra_tex_free(p->ra, &p->lut_3d_texture); - ra_buf_free(p->ra, &p->hdr_peak_ssbo); - - timer_pool_destroy(p->upload_timer); - timer_pool_destroy(p->blit_timer); - timer_pool_destroy(p->osd_timer); - - for (int i = 0; i < PASS_INFO_MAX; i++) { - talloc_free(p->pass_fresh[i].desc.start); - talloc_free(p->pass_redraw[i].desc.start); - } - - mpgl_osd_destroy(p->osd); - - // Forcibly destroy possibly remaining image references. This should also - // cause gl_video_dr_free_buffer() to be called for the remaining buffers. - gc_pending_dr_fences(p, true); - - // Should all have been unreffed already. - assert(!p->num_dr_buffers); - - talloc_free(p); -} - -void gl_video_reset(struct gl_video *p) -{ - gl_video_reset_surfaces(p); -} - -bool gl_video_showing_interpolated_frame(struct gl_video *p) -{ - return p->is_interpolated; -} - -static bool is_imgfmt_desc_supported(struct gl_video *p, - const struct ra_imgfmt_desc *desc) -{ - if (!desc->num_planes) - return false; - - if (desc->planes[0]->ctype == RA_CTYPE_UINT && p->forced_dumb_mode) - return false; - - return true; -} - -bool gl_video_check_format(struct gl_video *p, int mp_format) -{ - struct ra_imgfmt_desc desc; - if (ra_get_imgfmt_desc(p->ra, mp_format, &desc) && - is_imgfmt_desc_supported(p, &desc)) - return true; - if (p->hwdec && ra_hwdec_test_format(p->hwdec, mp_format)) - return true; - return false; -} - -void gl_video_config(struct gl_video *p, struct mp_image_params *params) -{ - unmap_overlay(p); - unref_current_image(p); - - if (!mp_image_params_equal(&p->real_image_params, params)) { - uninit_video(p); - p->real_image_params = *params; - p->image_params = *params; - if (params->imgfmt) - init_video(p); - } - - gl_video_reset_surfaces(p); -} - -void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd) -{ - mpgl_osd_destroy(p->osd); - p->osd = NULL; - p->osd_state = osd; - reinit_osd(p); -} - -struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, - struct mpv_global *g) -{ - struct gl_video *p = talloc_ptrtype(NULL, p); - *p = (struct gl_video) { - .ra = ra, - .global = g, - .log = log, - .sc = gl_sc_create(ra, g, log), - .video_eq = mp_csp_equalizer_create(p, g), - .opts_cache = m_config_cache_alloc(p, g, &gl_video_conf), - }; - // make sure this variable is initialized to *something* - p->pass = p->pass_fresh; - struct gl_video_opts *opts = p->opts_cache->opts; - p->cms = gl_lcms_init(p, log, g, opts->icc_opts), - p->opts = *opts; - for (int n = 0; n < SCALER_COUNT; n++) - p->scaler[n] = (struct scaler){.index = n}; - init_gl(p); - reinit_from_options(p); - return p; -} - -// Get static string for scaler shader. If "tscale" is set to true, the -// scaler must be a separable convolution filter. -static const char *handle_scaler_opt(const char *name, bool tscale) -{ - if (name && name[0]) { - const struct filter_kernel *kernel = mp_find_filter_kernel(name); - if (kernel && (!tscale || !kernel->polar)) - return kernel->f.name; - - for (const char *const *filter = tscale ? fixed_tscale_filters - : fixed_scale_filters; - *filter; filter++) { - if (strcmp(*filter, name) == 0) - return *filter; - } - } - return NULL; -} - -void gl_video_update_options(struct gl_video *p) -{ - if (m_config_cache_update(p->opts_cache)) { - gl_lcms_update_options(p->cms); - reinit_from_options(p); - } -} - -static void reinit_from_options(struct gl_video *p) -{ - p->use_lut_3d = gl_lcms_has_profile(p->cms); - - // Copy the option fields, so that check_gl_features() can mutate them. - // This works only for the fields themselves of course, not for any memory - // referenced by them. - p->opts = *(struct gl_video_opts *)p->opts_cache->opts; - - if (!p->force_clear_color) - p->clear_color = p->opts.background; - - check_gl_features(p); - uninit_rendering(p); - gl_sc_set_cache_dir(p->sc, p->opts.shader_cache_dir); - p->ra->use_pbo = p->opts.pbo; - gl_video_setup_hooks(p); - reinit_osd(p); - - if (p->opts.interpolation && !p->global->opts->video_sync && !p->dsi_warned) { - MP_WARN(p, "Interpolation now requires enabling display-sync mode.\n" - "E.g.: --video-sync=display-resample\n"); - p->dsi_warned = true; - } -} - -void gl_video_configure_queue(struct gl_video *p, struct vo *vo) -{ - int queue_size = 1; - - // Figure out an adequate size for the interpolation queue. The larger - // the radius, the earlier we need to queue frames. - if (p->opts.interpolation) { - const struct filter_kernel *kernel = - mp_find_filter_kernel(p->opts.scaler[SCALER_TSCALE].kernel.name); - if (kernel) { - // filter_scale wouldn't be correctly initialized were we to use it here. - // This is fine since we're always upsampling, but beware if downsampling - // is added! - double radius = kernel->f.radius; - radius = radius > 0 ? radius : p->opts.scaler[SCALER_TSCALE].radius; - queue_size += 1 + ceil(radius); - } else { - // Oversample/linear case - queue_size += 2; - } - } - - vo_set_queue_params(vo, 0, queue_size); -} - -static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param) -{ - char s[20] = {0}; - int r = 1; - bool tscale = bstr_equals0(name, "tscale"); - if (bstr_equals0(param, "help")) { - r = M_OPT_EXIT; - } else { - snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); - if (!handle_scaler_opt(s, tscale)) - r = M_OPT_INVALID; - } - if (r < 1) { - mp_info(log, "Available scalers:\n"); - for (const char *const *filter = tscale ? fixed_tscale_filters - : fixed_scale_filters; - *filter; filter++) { - mp_info(log, " %s\n", *filter); - } - for (int n = 0; mp_filter_kernels[n].f.name; n++) { - if (!tscale || !mp_filter_kernels[n].polar) - mp_info(log, " %s\n", mp_filter_kernels[n].f.name); - } - if (s[0]) - mp_fatal(log, "No scaler named '%s' found!\n", s); - } - return r; -} - -static int validate_window_opt(struct mp_log *log, const m_option_t *opt, - struct bstr name, struct bstr param) -{ - char s[20] = {0}; - int r = 1; - if (bstr_equals0(param, "help")) { - r = M_OPT_EXIT; - } else { - snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); - const struct filter_window *window = mp_find_filter_window(s); - if (!window) - r = M_OPT_INVALID; - } - if (r < 1) { - mp_info(log, "Available windows:\n"); - for (int n = 0; mp_filter_windows[n].name; n++) - mp_info(log, " %s\n", mp_filter_windows[n].name); - if (s[0]) - mp_fatal(log, "No window named '%s' found!\n", s); - } - return r; -} - -float gl_video_scale_ambient_lux(float lmin, float lmax, - float rmin, float rmax, float lux) -{ - assert(lmax > lmin); - - float num = (rmax - rmin) * (log10(lux) - log10(lmin)); - float den = log10(lmax) - log10(lmin); - float result = num / den + rmin; - - // clamp the result - float max = MPMAX(rmax, rmin); - float min = MPMIN(rmax, rmin); - return MPMAX(MPMIN(result, max), min); -} - -void gl_video_set_ambient_lux(struct gl_video *p, int lux) -{ - if (p->opts.gamma_auto) { - float gamma = gl_video_scale_ambient_lux(16.0, 64.0, 2.40, 1.961, lux); - MP_VERBOSE(p, "ambient light changed: %dlux (gamma: %f)\n", lux, gamma); - p->opts.gamma = MPMIN(1.0, 1.961 / gamma); - } -} - -void gl_video_set_hwdec(struct gl_video *p, struct ra_hwdec *hwdec) -{ - unref_current_image(p); - ra_hwdec_mapper_free(&p->hwdec_mapper); - p->hwdec = hwdec; -} - -static void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size) -{ - struct ra_buf_params params = { - .type = RA_BUF_TYPE_TEX_UPLOAD, - .host_mapped = true, - .size = size, - }; - - struct ra_buf *buf = ra_buf_create(p->ra, ¶ms); - if (!buf) - return NULL; - - MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers); - p->dr_buffers[p->num_dr_buffers++] = (struct dr_buffer){ .buf = buf }; - - return buf->data; -}; - -static void gl_video_dr_free_buffer(void *opaque, uint8_t *data) -{ - struct gl_video *p = opaque; - - for (int n = 0; n < p->num_dr_buffers; n++) { - struct dr_buffer *buffer = &p->dr_buffers[n]; - if (buffer->buf->data == data) { - assert(!buffer->mpi); // can't be freed while it has a ref - ra_buf_free(p->ra, &buffer->buf); - MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n); - return; - } - } - // not found - must not happen - assert(0); -} - -struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h, - int stride_align) -{ - int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align); - if (size < 0) - return NULL; - - int alloc_size = size + stride_align; - void *ptr = gl_video_dr_alloc_buffer(p, alloc_size); - if (!ptr) - return NULL; - - // (we expect vo.c to proxy the free callback, so it happens in the same - // thread it was allocated in, removing the need for synchronization) - struct mp_image *res = mp_image_from_buffer(imgfmt, w, h, stride_align, - ptr, alloc_size, p, - gl_video_dr_free_buffer); - if (!res) - gl_video_dr_free_buffer(p, ptr); - return res; -} diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h deleted file mode 100644 index d163bc8405..0000000000 --- a/video/out/opengl/video.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#ifndef MP_GL_VIDEO_H -#define MP_GL_VIDEO_H - -#include - -#include "options/m_option.h" -#include "sub/osd.h" -#include "utils.h" -#include "lcms.h" -#include "shader_cache.h" -#include "video/csputils.h" -#include "video/out/filter_kernels.h" -#include "video/out/vo.h" - -// Assume we have this many texture units for sourcing additional passes. -// The actual texture unit assignment is dynamic. -#define TEXUNIT_VIDEO_NUM 6 - -struct scaler_fun { - char *name; - float params[2]; - float blur; - float taper; -}; - -struct scaler_config { - struct scaler_fun kernel; - struct scaler_fun window; - float radius; - float antiring; - float cutoff; - float clamp; -}; - -struct scaler { - int index; - struct scaler_config conf; - double scale_factor; - bool initialized; - struct filter_kernel *kernel; - struct ra_tex *lut; - struct fbotex sep_fbo; - bool insufficient; - int lut_size; - - // kernel points here - struct filter_kernel kernel_storage; -}; - -enum scaler_unit { - SCALER_SCALE, // luma/video - SCALER_DSCALE, // luma-video downscaling - SCALER_CSCALE, // chroma upscaling - SCALER_TSCALE, // temporal scaling (interpolation) - SCALER_COUNT -}; - -enum dither_algo { - DITHER_NONE = 0, - DITHER_FRUIT, - DITHER_ORDERED, -}; - -enum alpha_mode { - ALPHA_NO = 0, - ALPHA_YES, - ALPHA_BLEND, - ALPHA_BLEND_TILES, -}; - -enum blend_subs_mode { - BLEND_SUBS_NO = 0, - BLEND_SUBS_YES, - BLEND_SUBS_VIDEO, -}; - -enum tone_mapping { - TONE_MAPPING_CLIP, - TONE_MAPPING_MOBIUS, - TONE_MAPPING_REINHARD, - TONE_MAPPING_HABLE, - TONE_MAPPING_GAMMA, - TONE_MAPPING_LINEAR, -}; - -// How many frames to average over for HDR peak detection -#define PEAK_DETECT_FRAMES 100 - -struct gl_video_opts { - int dumb_mode; - struct scaler_config scaler[4]; - int scaler_lut_size; - float gamma; - int gamma_auto; - int target_prim; - int target_trc; - int target_brightness; - int tone_mapping; - int compute_hdr_peak; - float tone_mapping_param; - float tone_mapping_desat; - int gamut_warning; - int linear_scaling; - int correct_downscaling; - int sigmoid_upscaling; - float sigmoid_center; - float sigmoid_slope; - int scaler_resizes_only; - int pbo; - int dither_depth; - int dither_algo; - int dither_size; - int temporal_dither; - int temporal_dither_period; - char *fbo_format; - int alpha_mode; - int use_rectangle; - struct m_color background; - int interpolation; - float interpolation_threshold; - int blend_subs; - char **user_shaders; - int deband; - struct deband_opts *deband_opts; - float unsharp; - int tex_pad_x, tex_pad_y; - struct mp_icc_opts *icc_opts; - int early_flush; - char *shader_cache_dir; -}; - -extern const struct m_sub_options gl_video_conf; - -struct gl_video; -struct vo_frame; - -struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, - struct mpv_global *g); -void gl_video_uninit(struct gl_video *p); -void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd); -void gl_video_update_options(struct gl_video *p); -bool gl_video_check_format(struct gl_video *p, int mp_format); -void gl_video_config(struct gl_video *p, struct mp_image_params *params); -void gl_video_set_output_depth(struct gl_video *p, int r, int g, int b); -void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, - struct fbodst target); -void gl_video_resize(struct gl_video *p, - struct mp_rect *src, struct mp_rect *dst, - struct mp_osd_res *osd); -void gl_video_set_fb_depth(struct gl_video *p, int fb_depth); -void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out); -void gl_video_set_clear_color(struct gl_video *p, struct m_color color); -void gl_video_set_osd_pts(struct gl_video *p, double pts); -bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *osd, - double pts); - -float gl_video_scale_ambient_lux(float lmin, float lmax, - float rmin, float rmax, float lux); -void gl_video_set_ambient_lux(struct gl_video *p, int lux); -void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data); -bool gl_video_icc_auto_enabled(struct gl_video *p); -bool gl_video_gamma_auto_enabled(struct gl_video *p); -struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p); - -void gl_video_reset(struct gl_video *p); -bool gl_video_showing_interpolated_frame(struct gl_video *p); - -struct ra_hwdec; -void gl_video_set_hwdec(struct gl_video *p, struct ra_hwdec *hwdec); - -struct vo; -void gl_video_configure_queue(struct gl_video *p, struct vo *vo); - -struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h, - int stride_align); - - -#endif diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c deleted file mode 100644 index 60c5ce82ac..0000000000 --- a/video/out/opengl/video_shaders.c +++ /dev/null @@ -1,872 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include - -#include "video_shaders.h" -#include "video.h" - -#define GLSL(x) gl_sc_add(sc, #x "\n"); -#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) -#define GLSLH(x) gl_sc_hadd(sc, #x "\n"); -#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) - -// Set up shared/commonly used variables and macros -void sampler_prelude(struct gl_shader_cache *sc, int tex_num) -{ - GLSLF("#undef tex\n"); - GLSLF("#undef texmap\n"); - GLSLF("#define tex texture%d\n", tex_num); - GLSLF("#define texmap texmap%d\n", tex_num); - GLSLF("vec2 pos = texcoord%d;\n", tex_num); - GLSLF("vec2 size = texture_size%d;\n", tex_num); - GLSLF("vec2 pt = pixel_size%d;\n", tex_num); -} - -static void pass_sample_separated_get_weights(struct gl_shader_cache *sc, - struct scaler *scaler) -{ - gl_sc_uniform_texture(sc, "lut", scaler->lut); - GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut_size); - - int N = scaler->kernel->size; - int width = (N + 3) / 4; // round up - - GLSLF("float weights[%d];\n", N); - for (int i = 0; i < N; i++) { - if (i % 4 == 0) - GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width); - GLSLF("weights[%d] = c[%d];\n", i, i % 4); - } -} - -// Handle a single pass (either vertical or horizontal). The direction is given -// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is -// used instead (samples from texture0 through textureN) -void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, - int d_x, int d_y) -{ - int N = scaler->kernel->size; - bool use_ar = scaler->conf.antiring > 0; - bool planar = d_x == 0 && d_y == 0; - GLSL(color = vec4(0.0);) - GLSLF("{\n"); - if (!planar) { - GLSLF("vec2 dir = vec2(%d.0, %d.0);\n", d_x, d_y); - GLSL(pt *= dir;) - GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);) - GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d.0);\n", N / 2 - 1); - } - GLSL(vec4 c;) - if (use_ar) { - GLSL(vec4 hi = vec4(0.0);) - GLSL(vec4 lo = vec4(1.0);) - } - pass_sample_separated_get_weights(sc, scaler); - GLSLF("// scaler samples\n"); - for (int n = 0; n < N; n++) { - if (planar) { - GLSLF("c = texture(texture%d, texcoord%d);\n", n, n); - } else { - GLSLF("c = texture(tex, base + pt * vec2(%d.0));\n", n); - } - GLSLF("color += vec4(weights[%d]) * c;\n", n); - if (use_ar && (n == N/2-1 || n == N/2)) { - GLSL(lo = min(lo, c);) - GLSL(hi = max(hi, c);) - } - } - if (use_ar) - GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", - scaler->conf.antiring); - GLSLF("}\n"); -} - -// Subroutine for computing and adding an individual texel contribution -// If subtexel < 0 and offset < 0, samples directly. -// If subtexel >= 0, takes the texel from cN[subtexel] -// If offset >= 0, takes the texel from inN[rel.y+y+offset][rel.x+x+offset] -static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, - int x, int y, int subtexel, int offset, int components) -{ - double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale; - double radius_cutoff = scaler->kernel->radius_cutoff; - - // Since we can't know the subpixel position in advance, assume a - // worst case scenario - int yy = y > 0 ? y-1 : y; - int xx = x > 0 ? x-1 : x; - double dmax = sqrt(xx*xx + yy*yy); - // Skip samples definitely outside the radius - if (dmax >= radius_cutoff) - return; - GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y); - // Check for samples that might be skippable - bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2; - if (maybe_skippable) - GLSLF("if (d < %f) {\n", radius_cutoff); - - // get the weight for this pixel - if (scaler->lut->params.dimensions == 1) { - GLSLF("w = tex1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n", - radius, scaler->lut_size); - } else { - GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n", - radius, scaler->lut_size); - } - GLSL(wsum += w;) - - if (subtexel < 0 && offset < 0) { - GLSLF("c0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y); - GLSL(color += vec4(w) * c0;) - } else if (subtexel >= 0) { - for (int n = 0; n < components; n++) - GLSLF("color[%d] += w * c%d[%d];\n", n, n, subtexel); - } else if (offset >= 0) { - for (int n = 0; n lut); - - GLSLF("// scaler samples\n"); - int bound = ceil(scaler->kernel->radius_cutoff); - for (int y = 1-bound; y <= bound; y += 2) { - for (int x = 1-bound; x <= bound; x += 2) { - // First we figure out whether it's more efficient to use direct - // sampling or gathering. The problem is that gathering 4 texels - // only to discard some of them is very wasteful, so only do it if - // we suspect it will be a win rather than a loss. This is the case - // exactly when all four texels are within bounds - bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff; - - // textureGather is only supported in GLSL 400+ - if (glsl_version < 400) - use_gather = false; - - if (use_gather) { - // Gather the four surrounding texels simultaneously - for (int n = 0; n < components; n++) { - GLSLF("c%d = textureGatherOffset(tex, base, ivec2(%d, %d), %d);\n", - n, x, y, n); - } - - // Mix in all of the points with their weights - for (int p = 0; p < 4; p++) { - // The four texels are gathered counterclockwise starting - // from the bottom left - static const int xo[4] = {0, 1, 1, 0}; - static const int yo[4] = {1, 1, 0, 0}; - if (x+xo[p] > bound || y+yo[p] > bound) - continue; - polar_sample(sc, scaler, x+xo[p], y+yo[p], p, -1, components); - } - } else { - // switch to direct sampling instead, for efficiency/compatibility - for (int yy = y; yy <= bound && yy <= y+1; yy++) { - for (int xx = x; xx <= bound && xx <= x+1; xx++) - polar_sample(sc, scaler, xx, yy, -1, -1, components); - } - } - } - } - - GLSL(color = color / vec4(wsum);) - GLSLF("}\n"); -} - -// bw/bh: block size -// iw/ih: input size (pre-calculated to fit all required texels) -void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, - int components, int bw, int bh, int iw, int ih) -{ - int bound = ceil(scaler->kernel->radius_cutoff); - int offset = bound - 1; // padding top/left - - GLSL(color = vec4(0.0);) - GLSLF("{\n"); - GLSL(vec2 wpos = texmap(gl_WorkGroupID * gl_WorkGroupSize);) - GLSL(vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5));) - GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) - GLSL(vec2 base = pos - pt * fcoord;) - GLSL(ivec2 rel = ivec2(round((base - wbase) * size));) - GLSLF("float w, d, wsum = 0.0;\n"); - gl_sc_uniform_texture(sc, "lut", scaler->lut); - - // Load all relevant texels into shmem - gl_sc_enable_extension(sc, "GL_ARB_arrays_of_arrays"); - for (int c = 0; c < components; c++) - GLSLHF("shared float in%d[%d][%d];\n", c, ih, iw); - - GLSL(vec4 c;) - GLSLF("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) {\n", ih, bh); - GLSLF("for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) {\n", iw, bw); - GLSLF("c = texture(tex, wbase + pt * vec2(x - %d, y - %d));\n", offset, offset); - for (int c = 0; c < components; c++) - GLSLF("in%d[y][x] = c[%d];\n", c, c); - GLSLF("}}\n"); - GLSL(groupMemoryBarrier();) - GLSL(barrier();) - - // Dispatch the actual samples - GLSLF("// scaler samples\n"); - for (int y = 1-bound; y <= bound; y++) { - for (int x = 1-bound; x <= bound; x++) - polar_sample(sc, scaler, x, y, -1, offset, components); - } - - GLSL(color = color / vec4(wsum);) - GLSLF("}\n"); -} - -static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s) -{ - // Explanation of how bicubic scaling with only 4 texel fetches is done: - // http://www.mate.tue.nl/mate/pdfs/10318.pdf - // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' - // Explanation why this algorithm normally always blurs, even with unit - // scaling: - // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf - // 'GPU Prefilter for Accurate Cubic B-spline Interpolation' - GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s" - " + vec4(1, 0, -0.5, 0.5);\n", t, s); - GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); - GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); - GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); - GLSLF("%s.xy += vec2(1.0 + %s, 1.0 - %s);\n", t, s, s); -} - -void pass_sample_bicubic_fast(struct gl_shader_cache *sc) -{ - GLSLF("{\n"); - GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));) - bicubic_calcweights(sc, "parmx", "fcoord.x"); - bicubic_calcweights(sc, "parmy", "fcoord.y"); - GLSL(vec4 cdelta;) - GLSL(cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);) - GLSL(cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);) - // first y-interpolation - GLSL(vec4 ar = texture(tex, pos + cdelta.xy);) - GLSL(vec4 ag = texture(tex, pos + cdelta.xw);) - GLSL(vec4 ab = mix(ag, ar, parmy.b);) - // second y-interpolation - GLSL(vec4 br = texture(tex, pos + cdelta.zy);) - GLSL(vec4 bg = texture(tex, pos + cdelta.zw);) - GLSL(vec4 aa = mix(bg, br, parmy.b);) - // x-interpolation - GLSL(color = mix(aa, ab, parmx.b);) - GLSLF("}\n"); -} - -void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, - int w, int h) -{ - GLSLF("{\n"); - GLSL(vec2 pos = pos - vec2(0.5) * pt;) // round to nearest - GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) - // Determine the mixing coefficient vector - gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h}); - GLSL(vec2 coeff = fcoord * output_size/size;) - float threshold = scaler->conf.kernel.params[0]; - threshold = isnan(threshold) ? 0.0 : threshold; - GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold); - GLSL(coeff = clamp(coeff, 0.0, 1.0);) - // Compute the right blend of colors - GLSL(color = texture(tex, pos + pt * (coeff - fcoord));) - GLSLF("}\n"); -} - -// Common constants for SMPTE ST.2084 (HDR) -static const float PQ_M1 = 2610./4096 * 1./4, - PQ_M2 = 2523./4096 * 128, - PQ_C1 = 3424./4096, - PQ_C2 = 2413./4096 * 32, - PQ_C3 = 2392./4096 * 32; - -// Common constants for ARIB STD-B67 (HLG) -static const float HLG_A = 0.17883277, - HLG_B = 0.28466892, - HLG_C = 0.55991073; - -// Common constants for Panasonic V-Log -static const float VLOG_B = 0.00873, - VLOG_C = 0.241514, - VLOG_D = 0.598206; - -// Common constants for Sony S-Log -static const float SLOG_A = 0.432699, - SLOG_B = 0.037584, - SLOG_C = 0.616596 + 0.03, - SLOG_P = 3.538813, - SLOG_Q = 0.030001, - SLOG_K2 = 155.0 / 219.0; - -// Linearize (expand), given a TRC as input. In essence, this is the ITU-R -// EOTF, calculated on an idealized (reference) monitor with a white point of -// MP_REF_WHITE and infinite contrast. -void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) -{ - if (trc == MP_CSP_TRC_LINEAR) - return; - - GLSLF("// linearize\n"); - - // Note that this clamp may technically violate the definition of - // ITU-R BT.2100, which allows for sub-blacks and super-whites to be - // displayed on the display where such would be possible. That said, the - // problem is that not all gamma curves are well-defined on the values - // outside this range, so we ignore it and just clip anyway for sanity. - GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) - - switch (trc) { - case MP_CSP_TRC_SRGB: - GLSL(color.rgb = mix(color.rgb * vec3(1.0/12.92), - pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), - lessThan(vec3(0.04045), color.rgb));) - break; - case MP_CSP_TRC_BT_1886: - GLSL(color.rgb = pow(color.rgb, vec3(2.4));) - break; - case MP_CSP_TRC_GAMMA18: - GLSL(color.rgb = pow(color.rgb, vec3(1.8));) - break; - case MP_CSP_TRC_GAMMA22: - GLSL(color.rgb = pow(color.rgb, vec3(2.2));) - break; - case MP_CSP_TRC_GAMMA28: - GLSL(color.rgb = pow(color.rgb, vec3(2.8));) - break; - case MP_CSP_TRC_PRO_PHOTO: - GLSL(color.rgb = mix(color.rgb * vec3(1.0/16.0), - pow(color.rgb, vec3(1.8)), - lessThan(vec3(0.03125), color.rgb));) - break; - case MP_CSP_TRC_PQ: - GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2); - GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n" - " / (vec3(%f) - vec3(%f) * color.rgb);\n", - PQ_C1, PQ_C2, PQ_C3); - GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M1); - // PQ's output range is 0-10000, but we need it to be relative to to - // MP_REF_WHITE instead, so rescale - GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE); - break; - case MP_CSP_TRC_HLG: - GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n" - " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n" - " lessThan(vec3(0.5), color.rgb));\n", - HLG_C, HLG_A, HLG_B); - break; - case MP_CSP_TRC_V_LOG: - GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" - " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" - " - vec3(%f), \n" - " lessThanEqual(vec3(0.181), color.rgb)); \n", - VLOG_D, VLOG_C, VLOG_B); - break; - case MP_CSP_TRC_S_LOG1: - GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n" - " - vec3(%f);\n", - SLOG_C, SLOG_A, SLOG_B); - break; - case MP_CSP_TRC_S_LOG2: - GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" - " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" - " - vec3(%f)) * vec3(1.0/%f), \n" - " lessThanEqual(vec3(%f), color.rgb)); \n", - SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q); - break; - default: - abort(); - } - - // Rescale to prevent clipping on non-float textures - GLSLF("color.rgb *= vec3(1.0/%f);\n", mp_trc_nom_peak(trc)); -} - -// Delinearize (compress), given a TRC as output. This corresponds to the -// inverse EOTF (not the OETF) in ITU-R terminology, again assuming a -// reference monitor. -void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) -{ - if (trc == MP_CSP_TRC_LINEAR) - return; - - GLSLF("// delinearize\n"); - GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) - GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(trc)); - - switch (trc) { - case MP_CSP_TRC_SRGB: - GLSL(color.rgb = mix(color.rgb * vec3(12.92), - vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) - - vec3(0.055), - lessThanEqual(vec3(0.0031308), color.rgb));) - break; - case MP_CSP_TRC_BT_1886: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) - break; - case MP_CSP_TRC_GAMMA18: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));) - break; - case MP_CSP_TRC_GAMMA22: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));) - break; - case MP_CSP_TRC_GAMMA28: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));) - break; - case MP_CSP_TRC_PRO_PHOTO: - GLSL(color.rgb = mix(color.rgb * vec3(16.0), - pow(color.rgb, vec3(1.0/1.8)), - lessThanEqual(vec3(0.001953), color.rgb));) - break; - case MP_CSP_TRC_PQ: - GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE); - GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1); - GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" - " / (vec3(1.0) + vec3(%f) * color.rgb);\n", - PQ_C1, PQ_C2, PQ_C3); - GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M2); - break; - case MP_CSP_TRC_HLG: - GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n" - " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n" - " lessThan(vec3(1.0), color.rgb));\n", - HLG_A, HLG_B, HLG_C); - break; - case MP_CSP_TRC_V_LOG: - GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n" - " vec3(%f) * log(color.rgb + vec3(%f)) \n" - " + vec3(%f), \n" - " lessThanEqual(vec3(0.01), color.rgb)); \n", - VLOG_C / M_LN10, VLOG_B, VLOG_D); - break; - case MP_CSP_TRC_S_LOG1: - GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n", - SLOG_A / M_LN10, SLOG_B, SLOG_C); - break; - case MP_CSP_TRC_S_LOG2: - GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n" - " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n" - " + vec3(%f), \n" - " lessThanEqual(vec3(0.0), color.rgb)); \n", - SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C); - break; - default: - abort(); - } -} - -// Apply the OOTF mapping from a given light type to display-referred light. -// The extra peak parameter is used to scale the values before and after -// the OOTF, and can be inferred using mp_trc_nom_peak -void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) -{ - if (light == MP_CSP_LIGHT_DISPLAY) - return; - - GLSLF("// apply ootf\n"); - GLSLF("color.rgb *= vec3(%f);\n", peak); - - switch (light) - { - case MP_CSP_LIGHT_SCENE_HLG: - // HLG OOTF from BT.2100, assuming a reference display with a - // peak of 1000 cd/m² -> gamma = 1.2 - GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), 0.2));\n", - (1000 / MP_REF_WHITE) / pow(12, 1.2)); - break; - case MP_CSP_LIGHT_SCENE_709_1886: - // This OOTF is defined by encoding the result as 709 and then decoding - // it as 1886; although this is called 709_1886 we actually use the - // more precise (by one decimal) values from BT.2020 instead - GLSL(color.rgb = mix(color.rgb * vec3(4.5), - vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), - lessThan(vec3(0.0181), color.rgb));) - GLSL(color.rgb = pow(color.rgb, vec3(2.4));) - break; - case MP_CSP_LIGHT_SCENE_1_2: - GLSL(color.rgb = pow(color.rgb, vec3(1.2));) - break; - default: - abort(); - } - - GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); -} - -// Inverse of the function pass_ootf, for completeness' sake. -void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) -{ - if (light == MP_CSP_LIGHT_DISPLAY) - return; - - GLSLF("// apply inverse ootf\n"); - GLSLF("color.rgb *= vec3(%f);\n", peak); - - switch (light) - { - case MP_CSP_LIGHT_SCENE_HLG: - GLSLF("color.rgb *= vec3(1.0/%f);\n", (1000 / MP_REF_WHITE) / pow(12, 1.2)); - GLSL(color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), 0.2/1.2)));) - break; - case MP_CSP_LIGHT_SCENE_709_1886: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) - GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), - pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), - vec3(1/0.45)), - lessThan(vec3(0.08145), color.rgb));) - break; - case MP_CSP_LIGHT_SCENE_1_2: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));) - break; - default: - abort(); - } - - GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); -} - -// Tone map from a known peak brightness to the range [0,1]. If ref_peak -// is 0, we will use peak detection instead -static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, - enum tone_mapping algo, float param, float desat) -{ - GLSLF("// HDR tone mapping\n"); - - // Desaturate the color using a coefficient dependent on the luminance - GLSL(float luma = dot(dst_luma, color.rgb);) - if (desat > 0) { - GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat); - GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);) - } - - // To prevent discoloration due to out-of-bounds clipping, we need to make - // sure to reduce the value range as far as necessary to keep the entire - // signal in range, so tone map based on the brightest component. - GLSL(float sig = max(max(color.r, color.g), color.b);) - GLSL(float sig_orig = sig;) - - if (!ref_peak) { - // For performance, we want to do as few atomic operations on global - // memory as possible, so use an atomic in shmem for the work group. - // We also want slightly more stable values, so use the group average - // instead of the group max - GLSLHF("shared uint group_sum = 0;\n"); - GLSLF("atomicAdd(group_sum, uint(sig * %f));\n", MP_REF_WHITE); - - // Have one thread in each work group update the frame maximum - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSL(if (gl_LocalInvocationIndex == 0)) - GLSL(atomicMax(frame_max[index], group_sum / - (gl_WorkGroupSize.x * gl_WorkGroupSize.y));) - - // Finally, have one thread per invocation update the total maximum - // and advance the index - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation - GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); - GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n"); - GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE); - GLSL(index = next;) - GLSL(}) - - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSLF("float sig_peak = 1.0/%f * float(sig_peak_raw);\n", - MP_REF_WHITE * PEAK_DETECT_FRAMES); - } else { - GLSLHF("const float sig_peak = %f;\n", ref_peak); - } - - switch (algo) { - case TONE_MAPPING_CLIP: - GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param); - break; - - case TONE_MAPPING_MOBIUS: - GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param); - // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0 - // where M(x) = scale * (x+a)/(x+b) - GLSLF("float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak);\n"); - GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / " - "max(1e-6, sig_peak - 1.0);\n"); - GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n"); - GLSL(sig = mix(sig, scale * (sig + a) / (sig + b), sig > j);) - break; - - case TONE_MAPPING_REINHARD: { - float contrast = isnan(param) ? 0.5 : param, - offset = (1.0 - contrast) / contrast; - GLSLF("sig = sig / (sig + %f);\n", offset); - GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset); - GLSL(sig *= scale;) - break; - } - - case TONE_MAPPING_HABLE: { - float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; - GLSLHF("float hable(float x) {\n"); - GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n", - A, C*B, D*E, A, B, D*F, E/F); - GLSLHF("}\n"); - GLSL(sig = hable(sig) / hable(sig_peak);) - break; - } - - case TONE_MAPPING_GAMMA: { - float gamma = isnan(param) ? 1.8 : param; - GLSLF("const float cutoff = 0.05, gamma = %f;\n", 1.0/gamma); - GLSL(float scale = pow(cutoff / sig_peak, gamma) / cutoff;) - GLSL(sig = sig > cutoff ? pow(sig / sig_peak, gamma) : scale * sig;) - break; - } - - case TONE_MAPPING_LINEAR: { - float coeff = isnan(param) ? 1.0 : param; - GLSLF("sig = %f / sig_peak * sig;\n", coeff); - break; - } - - default: - abort(); - } - - // Apply the computed scale factor to the color, linearly to prevent - // discoloration - GLSL(color.rgb *= sig / sig_orig;) -} - -// Map colors from one source space to another. These source spaces must be -// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any -// auto-guessing. If is_linear is true, we assume the input has already been -// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will -// detect the peak instead of relying on metadata. Note that this requires -// the caller to have already bound the appropriate SSBO and set up the -// compute shader metadata -void pass_color_map(struct gl_shader_cache *sc, - struct mp_colorspace src, struct mp_colorspace dst, - enum tone_mapping algo, float tone_mapping_param, - float tone_mapping_desat, bool detect_peak, - bool gamut_warning, bool is_linear) -{ - GLSLF("// color mapping\n"); - - // Compute the highest encodable level - float src_range = mp_trc_nom_peak(src.gamma), - dst_range = mp_trc_nom_peak(dst.gamma); - float ref_peak = src.sig_peak / dst_range; - - // Some operations need access to the video's luma coefficients, so make - // them available - float rgb2xyz[3][3]; - mp_get_rgb2xyz_matrix(mp_get_csp_primaries(src.primaries), rgb2xyz); - gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz[1]); - mp_get_rgb2xyz_matrix(mp_get_csp_primaries(dst.primaries), rgb2xyz); - gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz[1]); - - // All operations from here on require linear light as a starting point, - // so we linearize even if src.gamma == dst.gamma when one of the other - // operations needs it - bool need_gamma = src.gamma != dst.gamma || - src.primaries != dst.primaries || - src_range != dst_range || - src.sig_peak > dst_range || - src.light != dst.light; - - if (need_gamma && !is_linear) { - pass_linearize(sc, src.gamma); - is_linear= true; - } - - if (src.light != dst.light) - pass_ootf(sc, src.light, mp_trc_nom_peak(src.gamma)); - - // Rescale the signal to compensate for differences in the encoding range - // and reference white level. This is necessary because of how mpv encodes - // brightness in textures. - if (src_range != dst_range) { - GLSLF("// rescale value range;\n"); - GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range); - } - - // Adapt to the right colorspace if necessary - if (src.primaries != dst.primaries) { - struct mp_csp_primaries csp_src = mp_get_csp_primaries(src.primaries), - csp_dst = mp_get_csp_primaries(dst.primaries); - float m[3][3] = {{0}}; - mp_get_cms_matrix(csp_src, csp_dst, MP_INTENT_RELATIVE_COLORIMETRIC, m); - gl_sc_uniform_mat3(sc, "cms_matrix", true, &m[0][0]); - GLSL(color.rgb = cms_matrix * color.rgb;) - // Since this can reduce the gamut, figure out by how much - for (int c = 0; c < 3; c++) - ref_peak = MPMAX(ref_peak, m[c][c]); - } - - // Tone map to prevent clipping when the source signal peak exceeds the - // encodable range or we've reduced the gamut - if (ref_peak > 1) { - pass_tone_map(sc, detect_peak ? 0 : ref_peak, algo, - tone_mapping_param, tone_mapping_desat); - } - - if (src.light != dst.light) - pass_inverse_ootf(sc, dst.light, mp_trc_nom_peak(dst.gamma)); - - // Warn for remaining out-of-gamut colors is enabled - if (gamut_warning) { - GLSL(if (any(greaterThan(color.rgb, vec3(1.01))))) - GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert - } - - if (is_linear) - pass_delinearize(sc, dst.gamma); -} - -// Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post. -// Obtain random numbers by calling rand(h), followed by h = permute(h) to -// update the state. Assumes the texture was hooked. -static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) -{ - GLSLH(float mod289(float x) { return x - floor(x * 1.0/289.0) * 289.0; }) - GLSLH(float permute(float x) { return mod289((34.0*x + 1.0) * x); }) - GLSLH(float rand(float x) { return fract(x * 1.0/41.0); }) - - // Initialize the PRNG by hashing the position + a random uniform - GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);) - GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);) - gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX); -} - -struct deband_opts { - int enabled; - int iterations; - float threshold; - float range; - float grain; -}; - -const struct deband_opts deband_opts_def = { - .iterations = 1, - .threshold = 64.0, - .range = 16.0, - .grain = 48.0, -}; - -#define OPT_BASE_STRUCT struct deband_opts -const struct m_sub_options deband_conf = { - .opts = (const m_option_t[]) { - OPT_INTRANGE("iterations", iterations, 0, 1, 16), - OPT_FLOATRANGE("threshold", threshold, 0, 0.0, 4096.0), - OPT_FLOATRANGE("range", range, 0, 1.0, 64.0), - OPT_FLOATRANGE("grain", grain, 0, 0.0, 4096.0), - {0} - }, - .size = sizeof(struct deband_opts), - .defaults = &deband_opts_def, -}; - -// Stochastically sample a debanded result from a hooked texture. -void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, - AVLFG *lfg, enum mp_csp_trc trc) -{ - // Initialize the PRNG - GLSLF("{\n"); - prng_init(sc, lfg); - - // Helper: Compute a stochastic approximation of the avg color around a - // pixel - GLSLHF("vec4 average(float range, inout float h) {\n"); - // Compute a random rangle and distance - GLSLH(float dist = rand(h) * range; h = permute(h);) - GLSLH(float dir = rand(h) * 6.2831853; h = permute(h);) - GLSLH(vec2 o = dist * vec2(cos(dir), sin(dir));) - - // Sample at quarter-turn intervals around the source pixel - GLSLH(vec4 ref[4];) - GLSLH(ref[0] = HOOKED_texOff(vec2( o.x, o.y));) - GLSLH(ref[1] = HOOKED_texOff(vec2(-o.y, o.x));) - GLSLH(ref[2] = HOOKED_texOff(vec2(-o.x, -o.y));) - GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));) - - // Return the (normalized) average - GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;) - GLSLHF("}\n"); - - // Sample the source pixel - GLSL(color = HOOKED_tex(HOOKED_pos);) - GLSLF("vec4 avg, diff;\n"); - for (int i = 1; i <= opts->iterations; i++) { - // Sample the average pixel and use it instead of the original if - // the difference is below the given threshold - GLSLF("avg = average(%f, h);\n", i * opts->range); - GLSL(diff = abs(color - avg);) - GLSLF("color = mix(avg, color, greaterThan(diff, vec4(%f)));\n", - opts->threshold / (i * 16384.0)); - } - - // Add some random noise to smooth out residual differences - GLSL(vec3 noise;) - GLSL(noise.x = rand(h); h = permute(h);) - GLSL(noise.y = rand(h); h = permute(h);) - GLSL(noise.z = rand(h); h = permute(h);) - - // Noise is scaled to the signal level to prevent extreme noise for HDR - float gain = opts->grain/8192.0 / mp_trc_nom_peak(trc); - GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", gain); - GLSLF("}\n"); -} - -// Assumes the texture was hooked -void pass_sample_unsharp(struct gl_shader_cache *sc, float param) { - GLSLF("{\n"); - GLSL(float st1 = 1.2;) - GLSL(vec4 p = HOOKED_tex(HOOKED_pos);) - GLSL(vec4 sum1 = HOOKED_texOff(st1 * vec2(+1, +1)) - + HOOKED_texOff(st1 * vec2(+1, -1)) - + HOOKED_texOff(st1 * vec2(-1, +1)) - + HOOKED_texOff(st1 * vec2(-1, -1));) - GLSL(float st2 = 1.5;) - GLSL(vec4 sum2 = HOOKED_texOff(st2 * vec2(+1, 0)) - + HOOKED_texOff(st2 * vec2( 0, +1)) - + HOOKED_texOff(st2 * vec2(-1, 0)) - + HOOKED_texOff(st2 * vec2( 0, -1));) - GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) - GLSLF("color = p + t * %f;\n", param); - GLSLF("}\n"); -} diff --git a/video/out/opengl/video_shaders.h b/video/out/opengl/video_shaders.h deleted file mode 100644 index 8345e4c598..0000000000 --- a/video/out/opengl/video_shaders.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#ifndef MP_GL_VIDEO_SHADERS_H -#define MP_GL_VIDEO_SHADERS_H - -#include - -#include "utils.h" -#include "video.h" - -extern const struct deband_opts deband_opts_def; -extern const struct m_sub_options deband_conf; - -void sampler_prelude(struct gl_shader_cache *sc, int tex_num); -void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, - int d_x, int d_y); -void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, - int components, int glsl_version); -void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, - int components, int bw, int bh, int iw, int ih); -void pass_sample_bicubic_fast(struct gl_shader_cache *sc); -void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, - int w, int h); - -void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); -void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); -void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak); -void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak); - -void pass_color_map(struct gl_shader_cache *sc, - struct mp_colorspace src, struct mp_colorspace dst, - enum tone_mapping algo, float tone_mapping_param, - float tone_mapping_desat, bool use_detected_peak, - bool gamut_warning, bool is_linear); - -void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, - AVLFG *lfg, enum mp_csp_trc trc); - -void pass_sample_unsharp(struct gl_shader_cache *sc, float param); - -#endif diff --git a/video/out/vo.c b/video/out/vo.c index f9c5d04e24..a40360b188 100644 --- a/video/out/vo.c +++ b/video/out/vo.c @@ -50,6 +50,7 @@ extern const struct vo_driver video_out_x11; extern const struct vo_driver video_out_vdpau; extern const struct vo_driver video_out_xv; +extern const struct vo_driver video_out_gpu; extern const struct vo_driver video_out_opengl; extern const struct vo_driver video_out_opengl_cb; extern const struct vo_driver video_out_null; @@ -69,8 +70,8 @@ const struct vo_driver *const video_out_drivers[] = #if HAVE_RPI &video_out_rpi, #endif -#if HAVE_GL - &video_out_opengl, +#if HAVE_GPU + &video_out_gpu, #endif #if HAVE_VDPAU &video_out_vdpau, @@ -107,6 +108,7 @@ const struct vo_driver *const video_out_drivers[] = &video_out_lavc, #endif #if HAVE_GL + &video_out_opengl, &video_out_opengl_cb, #endif NULL diff --git a/video/out/vo_gpu.c b/video/out/vo_gpu.c new file mode 100644 index 0000000000..5df9e06f47 --- /dev/null +++ b/video/out/vo_gpu.c @@ -0,0 +1,385 @@ +/* + * Based on vo_gl.c by Reimar Doeffinger. + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "config.h" + +#include "mpv_talloc.h" +#include "common/common.h" +#include "misc/bstr.h" +#include "common/msg.h" +#include "common/global.h" +#include "options/m_config.h" +#include "vo.h" +#include "video/mp_image.h" +#include "sub/osd.h" + +#include "gpu/context.h" +#include "gpu/hwdec.h" +#include "gpu/video.h" + +struct gpu_priv { + struct vo *vo; + struct mp_log *log; + struct ra_ctx *ctx; + + char *context_name; + char *context_type; + struct ra_ctx_opts opts; + struct gl_video *renderer; + struct ra_hwdec *hwdec; + + int events; +}; + +static void resize(struct gpu_priv *p) +{ + struct vo *vo = p->vo; + + MP_VERBOSE(vo, "Resize: %dx%d\n", vo->dwidth, vo->dheight); + + struct mp_rect src, dst; + struct mp_osd_res osd; + vo_get_src_dst_rects(vo, &src, &dst, &osd); + + gl_video_resize(p->renderer, &src, &dst, &osd); + + vo->want_redraw = true; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + + struct ra_tex *tex = sw->fns->start_frame(sw); + if (!tex) { + MP_ERR(vo, "Failed starting frame!\n"); + return; + } + + struct fbodst dst = { + .tex = tex, + .flip = sw->flip_v, + }; + + gl_video_render_frame(p->renderer, frame, dst); + if (!sw->fns->submit_frame(sw, frame)) { + MP_ERR(vo, "Failed presenting frame!\n"); + return; + } +} + +static void flip_page(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + sw->fns->swap_buffers(sw); +} + +static int query_format(struct vo *vo, int format) +{ + struct gpu_priv *p = vo->priv; + if (!gl_video_check_format(p->renderer, format)) + return 0; + return 1; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct gpu_priv *p = vo->priv; + + if (!p->ctx->fns->reconfig(p->ctx)) + return -1; + + resize(p); + gl_video_config(p->renderer, params); + + return 0; +} + +static void request_hwdec_api(struct vo *vo, void *api) +{ + struct gpu_priv *p = vo->priv; + + if (p->hwdec) + return; + + p->hwdec = ra_hwdec_load_api(p->vo->log, p->ctx->ra, p->vo->global, + vo->hwdec_devs, (intptr_t)api); + gl_video_set_hwdec(p->renderer, p->hwdec); +} + +static void call_request_hwdec_api(void *ctx, enum hwdec_type type) +{ + // Roundabout way to run hwdec loading on the VO thread. + // Redirects to request_hwdec_api(). + vo_control(ctx, VOCTRL_LOAD_HWDEC_API, (void *)(intptr_t)type); +} + +static void get_and_update_icc_profile(struct gpu_priv *p) +{ + if (gl_video_icc_auto_enabled(p->renderer)) { + MP_VERBOSE(p, "Querying ICC profile...\n"); + bstr icc = bstr0(NULL); + int r = p->ctx->fns->control(p->ctx, &p->events, VOCTRL_GET_ICC_PROFILE, &icc); + + if (r != VO_NOTAVAIL) { + if (r == VO_FALSE) { + MP_WARN(p, "Could not retrieve an ICC profile.\n"); + } else if (r == VO_NOTIMPL) { + MP_ERR(p, "icc-profile-auto not implemented on this platform.\n"); + } + + gl_video_set_icc_profile(p->renderer, icc); + } + } +} + +static void get_and_update_ambient_lighting(struct gpu_priv *p) +{ + int lux; + int r = p->ctx->fns->control(p->ctx, &p->events, VOCTRL_GET_AMBIENT_LUX, &lux); + if (r == VO_TRUE) { + gl_video_set_ambient_lux(p->renderer, lux); + } + if (r != VO_TRUE && gl_video_gamma_auto_enabled(p->renderer)) { + MP_ERR(p, "gamma_auto option provided, but querying for ambient" + " lighting is not supported on this platform\n"); + } +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + + switch (request) { + case VOCTRL_SET_PANSCAN: + resize(p); + return VO_TRUE; + case VOCTRL_SET_EQUALIZER: + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_SCREENSHOT_WIN: { + struct mp_image *screen = NULL; + if (sw->fns->screenshot) + screen = sw->fns->screenshot(sw); + if (!screen) + break; // redirect to backend + // set image parameters according to the display, if possible + screen->params.color = gl_video_get_output_colorspace(p->renderer); + *(struct mp_image **)data = screen; + return true; + } + case VOCTRL_LOAD_HWDEC_API: + request_hwdec_api(vo, data); + return true; + case VOCTRL_UPDATE_RENDER_OPTS: { + gl_video_update_options(p->renderer); + get_and_update_icc_profile(p); + gl_video_configure_queue(p->renderer, p->vo); + p->vo->want_redraw = true; + return true; + } + case VOCTRL_RESET: + gl_video_reset(p->renderer); + return true; + case VOCTRL_PAUSE: + if (gl_video_showing_interpolated_frame(p->renderer)) + vo->want_redraw = true; + return true; + case VOCTRL_PERFORMANCE_DATA: + gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data); + return true; + } + + int events = 0; + int r = p->ctx->fns->control(p->ctx, &events, request, data); + if (events & VO_EVENT_ICC_PROFILE_CHANGED) { + get_and_update_icc_profile(p); + vo->want_redraw = true; + } + if (events & VO_EVENT_AMBIENT_LIGHTING_CHANGED) { + get_and_update_ambient_lighting(p); + vo->want_redraw = true; + } + events |= p->events; + p->events = 0; + if (events & VO_EVENT_RESIZE) + resize(p); + if (events & VO_EVENT_EXPOSE) + vo->want_redraw = true; + vo_event(vo, events); + + return r; +} + +static void wakeup(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + if (p->ctx && p->ctx->fns->wakeup) + p->ctx->fns->wakeup(p->ctx); +} + +static void wait_events(struct vo *vo, int64_t until_time_us) +{ + struct gpu_priv *p = vo->priv; + if (p->ctx && p->ctx->fns->wait_events) { + p->ctx->fns->wait_events(p->ctx, until_time_us); + } else { + vo_wait_default(vo, until_time_us); + } +} + +static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h, + int stride_align) +{ + struct gpu_priv *p = vo->priv; + + return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align); +} + +static void uninit(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + + gl_video_uninit(p->renderer); + ra_hwdec_uninit(p->hwdec); + if (vo->hwdec_devs) { + hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL); + hwdec_devices_destroy(vo->hwdec_devs); + } + ra_ctx_destroy(&p->ctx); +} + +static int preinit(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + p->vo = vo; + p->log = vo->log; + + int alpha_mode; + mp_read_option_raw(vo->global, "alpha", &m_option_type_choice, &alpha_mode); + + struct ra_ctx_opts opts = p->opts; + opts.want_alpha = alpha_mode == 1; + + p->ctx = ra_ctx_create(vo, p->context_type, p->context_name, opts); + if (!p->ctx) + goto err_out; + assert(p->ctx->ra); + assert(p->ctx->swapchain); + struct ra_swapchain *sw = p->ctx->swapchain; + + p->renderer = gl_video_init(p->ctx->ra, vo->log, vo->global); + gl_video_set_osd_source(p->renderer, vo->osd); + gl_video_configure_queue(p->renderer, vo); + + get_and_update_icc_profile(p); + + vo->hwdec_devs = hwdec_devices_create(); + + hwdec_devices_set_loader(vo->hwdec_devs, call_request_hwdec_api, vo); + + p->hwdec = ra_hwdec_load(p->vo->log, p->ctx->ra, vo->global, + vo->hwdec_devs, vo->opts->gl_hwdec_interop); + gl_video_set_hwdec(p->renderer, p->hwdec); + + int fb_depth = sw->fns->color_depth ? sw->fns->color_depth(sw) : 0; + if (fb_depth) + MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth); + gl_video_set_fb_depth(p->renderer, fb_depth); + + return 0; + +err_out: + uninit(vo); + return -1; +} + +#define OPT_BASE_STRUCT struct gpu_priv +static const m_option_t options[] = { + OPT_STRING_VALIDATE("gpu-context", context_name, 0, ra_ctx_validate_context), + OPT_STRING_VALIDATE("gpu-api", context_type, 0, ra_ctx_validate_api), + OPT_FLAG("gpu-debug", opts.debug, 0), + OPT_FLAG("gpu-sw", opts.allow_sw, 0), + OPT_INTRANGE("swapchain-depth", opts.swapchain_depth, 0, 1, 8), + {0} +}; + +static const struct gpu_priv defaults = { .opts = { + .swapchain_depth = 3, +}}; + +const struct vo_driver video_out_gpu = { + .description = "Shader-based GPU Renderer", + .name = "gpu", + .caps = VO_CAP_ROTATE90, + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .get_image = get_image, + .draw_frame = draw_frame, + .flip_page = flip_page, + .wait_events = wait_events, + .wakeup = wakeup, + .uninit = uninit, + .priv_size = sizeof(struct gpu_priv), + .priv_defaults = &defaults, + .options = options, +}; + +static int preinit_opengl(struct vo *vo) +{ + MP_WARN(vo, "--vo=opengl was replaced by --vo=gpu --gpu-api=opengl, and will" + " be removed in the future!\n"); + + struct gpu_priv *p = vo->priv; + p->context_type = "opengl"; + return preinit(vo); +} + +const struct vo_driver video_out_opengl = { + .description = "Shader-based GPU Renderer", + .name = "opengl", + .caps = VO_CAP_ROTATE90, + .preinit = preinit_opengl, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .get_image = get_image, + .draw_frame = draw_frame, + .flip_page = flip_page, + .wait_events = wait_events, + .wakeup = wakeup, + .uninit = uninit, + .priv_size = sizeof(struct gpu_priv), + .priv_defaults = &defaults, + .options = options, +}; diff --git a/video/out/vo_opengl.c b/video/out/vo_opengl.c deleted file mode 100644 index 72691e56c2..0000000000 --- a/video/out/vo_opengl.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Based on vo_gl.c by Reimar Doeffinger. - * - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "config.h" - -#include "mpv_talloc.h" -#include "common/common.h" -#include "misc/bstr.h" -#include "common/msg.h" -#include "common/global.h" -#include "options/m_config.h" -#include "vo.h" -#include "video/mp_image.h" -#include "sub/osd.h" - -#include "opengl/context.h" -#include "opengl/utils.h" -#include "opengl/hwdec.h" -#include "opengl/osd.h" -#include "filter_kernels.h" -#include "video/hwdec.h" -#include "opengl/video.h" -#include "opengl/ra_gl.h" - -#define NUM_VSYNC_FENCES 10 - -struct vo_opengl_opts { - int use_glFinish; - int waitvsync; - int use_gl_debug; - int allow_sw; - int swap_interval; - int vsync_fences; - char *backend; - int es; - int pattern[2]; -}; - -struct gl_priv { - struct vo *vo; - struct mp_log *log; - MPGLContext *glctx; - GL *gl; - struct ra *ra; - - struct vo_opengl_opts opts; - - struct gl_video *renderer; - - struct ra_hwdec *hwdec; - - int events; - - int frames_rendered; - unsigned int prev_sgi_sync_count; - - // check-pattern sub-option; for testing/debugging - int last_pattern; - int matches, mismatches; - - GLsync vsync_fences[NUM_VSYNC_FENCES]; - int num_vsync_fences; -}; - -static void resize(struct gl_priv *p) -{ - struct vo *vo = p->vo; - - MP_VERBOSE(vo, "Resize: %dx%d\n", vo->dwidth, vo->dheight); - - struct mp_rect src, dst; - struct mp_osd_res osd; - vo_get_src_dst_rects(vo, &src, &dst, &osd); - - gl_video_resize(p->renderer, &src, &dst, &osd); - - vo->want_redraw = true; -} - -static void check_pattern(struct vo *vo, int item) -{ - struct gl_priv *p = vo->priv; - int expected = p->opts.pattern[p->last_pattern]; - if (item == expected) { - p->last_pattern++; - if (p->last_pattern >= 2) - p->last_pattern = 0; - p->matches++; - } else { - p->mismatches++; - MP_WARN(vo, "wrong pattern, expected %d got %d (hit: %d, mis: %d)\n", - expected, item, p->matches, p->mismatches); - } -} - -static void draw_frame(struct vo *vo, struct vo_frame *frame) -{ - struct gl_priv *p = vo->priv; - GL *gl = p->gl; - - mpgl_start_frame(p->glctx); - - if (gl->FenceSync && p->num_vsync_fences < p->opts.vsync_fences) { - GLsync fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);; - if (fence) - p->vsync_fences[p->num_vsync_fences++] = fence; - } - - struct fbodst target = { - .tex = ra_create_wrapped_fb(p->ra, p->glctx->main_fb, - vo->dwidth, vo->dheight), - .flip = !p->glctx->flip_v, - }; - gl_video_render_frame(p->renderer, frame, target); - ra_tex_free(p->ra, &target.tex); - - if (p->opts.use_glFinish) - gl->Finish(); -} - -static void flip_page(struct vo *vo) -{ - struct gl_priv *p = vo->priv; - GL *gl = p->gl; - - mpgl_swap_buffers(p->glctx); - - p->frames_rendered++; - if (p->frames_rendered > 5 && !p->opts.use_gl_debug) - ra_gl_set_debug(p->ra, false); - - if (p->opts.use_glFinish) - gl->Finish(); - - if (p->opts.waitvsync || p->opts.pattern[0]) { - if (gl->GetVideoSync) { - unsigned int n1 = 0, n2 = 0; - gl->GetVideoSync(&n1); - if (p->opts.waitvsync) - gl->WaitVideoSync(2, (n1 + 1) % 2, &n2); - int step = n1 - p->prev_sgi_sync_count; - p->prev_sgi_sync_count = n1; - MP_DBG(vo, "Flip counts: %u->%u, step=%d\n", n1, n2, step); - if (p->opts.pattern[0]) - check_pattern(vo, step); - } else { - MP_WARN(vo, "GLX_SGI_video_sync not available, disabling.\n"); - p->opts.waitvsync = 0; - p->opts.pattern[0] = 0; - } - } - while (p->opts.vsync_fences > 0 && p->num_vsync_fences >= p->opts.vsync_fences) { - gl->ClientWaitSync(p->vsync_fences[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9); - gl->DeleteSync(p->vsync_fences[0]); - MP_TARRAY_REMOVE_AT(p->vsync_fences, p->num_vsync_fences, 0); - } -} - -static int query_format(struct vo *vo, int format) -{ - struct gl_priv *p = vo->priv; - if (!gl_video_check_format(p->renderer, format)) - return 0; - return 1; -} - -static int reconfig(struct vo *vo, struct mp_image_params *params) -{ - struct gl_priv *p = vo->priv; - - if (mpgl_reconfig_window(p->glctx) < 0) - return -1; - - resize(p); - - gl_video_config(p->renderer, params); - - return 0; -} - -static void request_hwdec_api(struct vo *vo, void *api) -{ - struct gl_priv *p = vo->priv; - - if (p->hwdec) - return; - - p->hwdec = ra_hwdec_load_api(p->vo->log, p->ra, p->vo->global, - vo->hwdec_devs, (intptr_t)api); - gl_video_set_hwdec(p->renderer, p->hwdec); -} - -static void call_request_hwdec_api(void *ctx, enum hwdec_type type) -{ - // Roundabout way to run hwdec loading on the VO thread. - // Redirects to request_hwdec_api(). - vo_control(ctx, VOCTRL_LOAD_HWDEC_API, (void *)(intptr_t)type); -} - -static void get_and_update_icc_profile(struct gl_priv *p) -{ - if (gl_video_icc_auto_enabled(p->renderer)) { - MP_VERBOSE(p, "Querying ICC profile...\n"); - bstr icc = bstr0(NULL); - int r = mpgl_control(p->glctx, &p->events, VOCTRL_GET_ICC_PROFILE, &icc); - - if (r != VO_NOTAVAIL) { - if (r == VO_FALSE) { - MP_WARN(p, "Could not retrieve an ICC profile.\n"); - } else if (r == VO_NOTIMPL) { - MP_ERR(p, "icc-profile-auto not implemented on this platform.\n"); - } - - gl_video_set_icc_profile(p->renderer, icc); - } - } -} - -static void get_and_update_ambient_lighting(struct gl_priv *p) -{ - int lux; - int r = mpgl_control(p->glctx, &p->events, VOCTRL_GET_AMBIENT_LUX, &lux); - if (r == VO_TRUE) { - gl_video_set_ambient_lux(p->renderer, lux); - } - if (r != VO_TRUE && gl_video_gamma_auto_enabled(p->renderer)) { - MP_ERR(p, "gamma_auto option provided, but querying for ambient" - " lighting is not supported on this platform\n"); - } -} - -static int control(struct vo *vo, uint32_t request, void *data) -{ - struct gl_priv *p = vo->priv; - - switch (request) { - case VOCTRL_SET_PANSCAN: - resize(p); - return VO_TRUE; - case VOCTRL_SET_EQUALIZER: - vo->want_redraw = true; - return VO_TRUE; - case VOCTRL_SCREENSHOT_WIN: { - struct mp_image *screen = gl_read_fbo_contents(p->gl, p->glctx->main_fb, - vo->dwidth, vo->dheight); - if (!screen) - break; // redirect to backend - // set image parameters according to the display, if possible - screen->params.color = gl_video_get_output_colorspace(p->renderer); - if (p->glctx->flip_v) - mp_image_vflip(screen); - *(struct mp_image **)data = screen; - return true; - } - case VOCTRL_LOAD_HWDEC_API: - request_hwdec_api(vo, data); - return true; - case VOCTRL_UPDATE_RENDER_OPTS: { - gl_video_update_options(p->renderer); - get_and_update_icc_profile(p); - gl_video_configure_queue(p->renderer, p->vo); - p->vo->want_redraw = true; - return true; - } - case VOCTRL_RESET: - gl_video_reset(p->renderer); - return true; - case VOCTRL_PAUSE: - if (gl_video_showing_interpolated_frame(p->renderer)) - vo->want_redraw = true; - return true; - case VOCTRL_PERFORMANCE_DATA: - gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data); - return true; - } - - int events = 0; - int r = mpgl_control(p->glctx, &events, request, data); - if (events & VO_EVENT_ICC_PROFILE_CHANGED) { - get_and_update_icc_profile(p); - vo->want_redraw = true; - } - if (events & VO_EVENT_AMBIENT_LIGHTING_CHANGED) { - get_and_update_ambient_lighting(p); - vo->want_redraw = true; - } - events |= p->events; - p->events = 0; - if (events & VO_EVENT_RESIZE) - resize(p); - if (events & VO_EVENT_EXPOSE) - vo->want_redraw = true; - vo_event(vo, events); - - return r; -} - -static void wakeup(struct vo *vo) -{ - struct gl_priv *p = vo->priv; - if (p->glctx && p->glctx->driver->wakeup) - p->glctx->driver->wakeup(p->glctx); -} - -static void wait_events(struct vo *vo, int64_t until_time_us) -{ - struct gl_priv *p = vo->priv; - if (p->glctx->driver->wait_events) { - p->glctx->driver->wait_events(p->glctx, until_time_us); - } else { - vo_wait_default(vo, until_time_us); - } -} - -static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h, - int stride_align) -{ - struct gl_priv *p = vo->priv; - - return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align); -} - -static void uninit(struct vo *vo) -{ - struct gl_priv *p = vo->priv; - - gl_video_uninit(p->renderer); - ra_hwdec_uninit(p->hwdec); - if (vo->hwdec_devs) { - hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL); - hwdec_devices_destroy(vo->hwdec_devs); - } - ra_free(&p->ra); - mpgl_uninit(p->glctx); -} - -static int preinit(struct vo *vo) -{ - struct gl_priv *p = vo->priv; - p->vo = vo; - p->log = vo->log; - - int vo_flags = 0; - - int alpha_mode; - mp_read_option_raw(vo->global, "alpha", &m_option_type_choice, &alpha_mode); - - if (alpha_mode == 1) - vo_flags |= VOFLAG_ALPHA; - - if (p->opts.use_gl_debug) - vo_flags |= VOFLAG_GL_DEBUG; - - if (p->opts.es == 1) - vo_flags |= VOFLAG_GLES; - if (p->opts.es == 2) - vo_flags |= VOFLAG_GLES | VOFLAG_GLES2; - if (p->opts.es == -1) - vo_flags |= VOFLAG_NO_GLES; - - if (p->opts.allow_sw) - vo_flags |= VOFLAG_SW; - - p->glctx = mpgl_init(vo, p->opts.backend, vo_flags); - if (!p->glctx) - goto err_out; - p->gl = p->glctx->gl; - - if (p->gl->SwapInterval) { - p->gl->SwapInterval(p->opts.swap_interval); - } else { - MP_VERBOSE(vo, "swap_control extension missing.\n"); - } - - p->ra = ra_create_gl(p->gl, vo->log); - if (!p->ra) - goto err_out; - - p->renderer = gl_video_init(p->ra, vo->log, vo->global); - gl_video_set_osd_source(p->renderer, vo->osd); - gl_video_configure_queue(p->renderer, vo); - - get_and_update_icc_profile(p); - - vo->hwdec_devs = hwdec_devices_create(); - - hwdec_devices_set_loader(vo->hwdec_devs, call_request_hwdec_api, vo); - - p->hwdec = ra_hwdec_load(p->vo->log, p->ra, vo->global, - vo->hwdec_devs, vo->opts->gl_hwdec_interop); - gl_video_set_hwdec(p->renderer, p->hwdec); - - gl_check_error(p->gl, p->log, "before retrieving framebuffer depth"); - int fb_depth = gl_get_fb_depth(p->gl, p->glctx->main_fb); - gl_check_error(p->gl, p->log, "retrieving framebuffer depth"); - if (fb_depth) - MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth); - gl_video_set_fb_depth(p->renderer, fb_depth); - - return 0; - -err_out: - uninit(vo); - return -1; -} - -#define OPT_BASE_STRUCT struct gl_priv - -const struct vo_driver video_out_opengl = { - .description = "Extended OpenGL Renderer", - .name = "opengl", - .caps = VO_CAP_ROTATE90, - .preinit = preinit, - .query_format = query_format, - .reconfig = reconfig, - .control = control, - .get_image = get_image, - .draw_frame = draw_frame, - .flip_page = flip_page, - .wait_events = wait_events, - .wakeup = wakeup, - .uninit = uninit, - .priv_size = sizeof(struct gl_priv), - .options = (const m_option_t[]) { - OPT_FLAG("opengl-glfinish", opts.use_glFinish, 0), - OPT_FLAG("opengl-waitvsync", opts.waitvsync, 0), - OPT_INT("opengl-swapinterval", opts.swap_interval, 0), - OPT_FLAG("opengl-debug", opts.use_gl_debug, 0), - OPT_STRING_VALIDATE("opengl-backend", opts.backend, 0, - mpgl_validate_backend_opt), - OPT_FLAG("opengl-sw", opts.allow_sw, 0), - OPT_CHOICE("opengl-es", opts.es, 0, ({"no", -1}, {"auto", 0}, - {"yes", 1}, {"force2", 2})), - OPT_INTPAIR("opengl-check-pattern", opts.pattern, 0), - OPT_INTRANGE("opengl-vsync-fences", opts.vsync_fences, 0, - 0, NUM_VSYNC_FENCES), - - {0} - }, - .priv_defaults = &(const struct gl_priv){ - .opts = { - .swap_interval = 1, - }, - }, -}; diff --git a/video/out/vo_opengl_cb.c b/video/out/vo_opengl_cb.c index ea6aaa9048..7e95e8bd31 100644 --- a/video/out/vo_opengl_cb.c +++ b/video/out/vo_opengl_cb.c @@ -24,9 +24,10 @@ #include "common/global.h" #include "player/client.h" +#include "gpu/video.h" +#include "gpu/hwdec.h" #include "opengl/common.h" -#include "opengl/video.h" -#include "opengl/hwdec.h" +#include "opengl/context.h" #include "opengl/ra_gl.h" #include "libmpv/opengl_cb.h" @@ -86,7 +87,7 @@ struct mpv_opengl_cb_context { // application's OpenGL context is current - i.e. only while the // host application is calling certain mpv_opengl_cb_* APIs. GL *gl; - struct ra *ra; + struct ra_ctx *ra_ctx; struct gl_video *renderer; struct ra_hwdec *hwdec; struct m_config_cache *vo_opts_cache; @@ -171,16 +172,36 @@ int mpv_opengl_cb_init_gl(struct mpv_opengl_cb_context *ctx, const char *exts, return MPV_ERROR_UNSUPPORTED; } - ctx->ra = ra_create_gl(ctx->gl, ctx->log); - if (!ctx->ra) + // initialize a blank ra_ctx to reuse ra_gl_ctx + ctx->ra_ctx = talloc_zero(ctx, struct ra_ctx); + ctx->ra_ctx->log = ctx->log; + ctx->ra_ctx->global = ctx->global; + ctx->ra_ctx->opts = (struct ra_ctx_opts) { + .probing = false, + .allow_sw = true, + }; + + static const struct ra_swapchain_fns empty_swapchain_fns = {0}; + struct ra_gl_ctx_params gl_params = { + // vo_opengl_cb is essentially like a gigantic external swapchain where + // the user is in charge of presentation / swapping etc. But we don't + // actually need to provide any of these functions, since we can just + // not call them to begin with - so just set it to an empty object to + // signal to ra_gl_ctx that we don't care about its latency emulation + // functionality + .external_swapchain = &empty_swapchain_fns + }; + + ctx->gl->SwapInterval = NULL; // we shouldn't randomly change this, so lock it + if (!ra_gl_ctx_init(ctx->ra_ctx, ctx->gl, gl_params)) return MPV_ERROR_UNSUPPORTED; - ctx->renderer = gl_video_init(ctx->ra, ctx->log, ctx->global); + ctx->renderer = gl_video_init(ctx->ra_ctx->ra, ctx->log, ctx->global); m_config_cache_update(ctx->vo_opts_cache); ctx->hwdec_devs = hwdec_devices_create(); - ctx->hwdec = ra_hwdec_load(ctx->log, ctx->ra, ctx->global, + ctx->hwdec = ra_hwdec_load(ctx->log, ctx->ra_ctx->ra, ctx->global, ctx->hwdec_devs, ctx->vo_opts->gl_hwdec_interop); gl_video_set_hwdec(ctx->renderer, ctx->hwdec); @@ -221,7 +242,7 @@ int mpv_opengl_cb_uninit_gl(struct mpv_opengl_cb_context *ctx) ctx->hwdec = NULL; hwdec_devices_destroy(ctx->hwdec_devs); ctx->hwdec_devs = NULL; - ra_free(&ctx->ra); + ra_ctx_destroy(&ctx->ra_ctx); talloc_free(ctx->gl); ctx->gl = NULL; return 0; @@ -236,11 +257,6 @@ int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) return MPV_ERROR_UNSUPPORTED; } - struct fbodst target = { - .tex = ra_create_wrapped_fb(ctx->ra, fbo, vp_w, abs(vp_h)), - .flip = vp_h < 0, - }; - reset_gl_state(ctx->gl); pthread_mutex_lock(&ctx->lock); @@ -280,7 +296,7 @@ int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) mp_read_option_raw(ctx->global, "opengl-debug", &m_option_type_flag, &debug); ctx->gl->debug_context = debug; - ra_gl_set_debug(ctx->ra, debug); + ra_gl_set_debug(ctx->ra_ctx->ra, debug); if (gl_video_icc_auto_enabled(ctx->renderer)) MP_ERR(ctx, "icc-profile-auto is not available with opengl-cb\n"); } @@ -316,7 +332,14 @@ int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) pthread_mutex_unlock(&ctx->lock); MP_STATS(ctx, "glcb-render"); + struct ra_swapchain *sw = ctx->ra_ctx->swapchain; + ra_gl_ctx_resize(sw, vp_w, abs(vp_h), fbo); + struct fbodst target = { + .tex = ra_gl_ctx_start_frame(sw), + .flip = vp_h < 0, + }; gl_video_render_frame(ctx->renderer, frame, target); + ra_gl_ctx_submit_frame(sw, frame); reset_gl_state(ctx->gl); @@ -328,8 +351,6 @@ int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) pthread_cond_wait(&ctx->wakeup, &ctx->lock); pthread_mutex_unlock(&ctx->lock); - ra_tex_free(ctx->ra, &target.tex); - return 0; } diff --git a/video/out/vo_rpi.c b/video/out/vo_rpi.c index 5b5d62c78f..8b819af163 100644 --- a/video/out/vo_rpi.c +++ b/video/out/vo_rpi.c @@ -44,7 +44,7 @@ #include "sub/osd.h" #include "opengl/ra_gl.h" -#include "opengl/video.h" +#include "gpu/video.h" struct mp_egl_rpi { struct mp_log *log; -- cgit v1.2.3