diff options
Diffstat (limited to 'video')
-rw-r--r-- | video/out/opengl/common.c | 15 | ||||
-rw-r--r-- | video/out/opengl/common.h | 4 | ||||
-rw-r--r-- | video/out/opengl/nnedi3.c | 217 | ||||
-rw-r--r-- | video/out/opengl/nnedi3.h | 47 | ||||
-rw-r--r-- | video/out/opengl/nnedi3_weights.bin | bin | 0 -> 161280 bytes | |||
-rw-r--r-- | video/out/opengl/utils.c | 29 | ||||
-rw-r--r-- | video/out/opengl/utils.h | 2 | ||||
-rw-r--r-- | video/out/opengl/video.c | 58 | ||||
-rw-r--r-- | video/out/opengl/video.h | 1 |
9 files changed, 368 insertions, 5 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index fae969ee2a..194c5d59a1 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -104,6 +104,7 @@ static const struct gl_functions gl_functions[] = { DEF_FN(AttachShader), DEF_FN(BindAttribLocation), DEF_FN(BindBuffer), + DEF_FN(BindBufferBase), DEF_FN(BindTexture), DEF_FN(BlendFuncSeparate), DEF_FN(BufferData), @@ -315,6 +316,16 @@ static const struct gl_functions gl_functions[] = { {0} }, }, + // uniform buffer object extensions, requires OpenGL 3.1. + { + .ver_core = 310, + .extension = "ARB_uniform_buffer_object", + .functions = (const struct gl_function[]) { + DEF_FN(GetUniformBlockIndex), + DEF_FN(UniformBlockBinding), + {0} + }, + }, }; #undef FN_OFFS @@ -466,10 +477,10 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), gl->glsl_version = 120; if (gl->version >= 300) gl->glsl_version = 130; - // Specifically needed for OSX (normally we request 3.0 contexts only, but - // OSX always creates 3.2 contexts when requesting a core context). if (gl->version >= 320) gl->glsl_version = 150; + if (gl->version >= 330) + gl->glsl_version = 330; } if (is_software_gl(gl)) { diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h index 3f5ab0ebde..d87be595ba 100644 --- a/video/out/opengl/common.h +++ b/video/out/opengl/common.h @@ -192,6 +192,7 @@ struct GL { void (GLAPIENTRY *GenBuffers)(GLsizei, GLuint *); void (GLAPIENTRY *DeleteBuffers)(GLsizei, const GLuint *); void (GLAPIENTRY *BindBuffer)(GLenum, GLuint); + void (GLAPIENTRY *BindBufferBase)(GLenum, GLuint, GLuint); GLvoid * (GLAPIENTRY * MapBuffer)(GLenum, GLenum); GLboolean (GLAPIENTRY *UnmapBuffer)(GLenum); void (GLAPIENTRY *BufferData)(GLenum, intptr_t, const GLvoid *, GLenum); @@ -260,6 +261,9 @@ struct GL { GLint (GLAPIENTRY *GetVideoSync)(GLuint *); GLint (GLAPIENTRY *WaitVideoSync)(GLint, GLint, unsigned int *); + GLuint (GLAPIENTRY *GetUniformBlockIndex)(GLuint, const GLchar *); + void (GLAPIENTRY *UniformBlockBinding)(GLuint, GLuint, GLuint); + void (GLAPIENTRY *DebugMessageCallback)(MP_GLDEBUGPROC callback, const void *userParam); diff --git a/video/out/opengl/nnedi3.c b/video/out/opengl/nnedi3.c new file mode 100644 index 0000000000..fb704d2ab6 --- /dev/null +++ b/video/out/opengl/nnedi3.c @@ -0,0 +1,217 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + * + * You can alternatively redistribute this file and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + */ + +#include "nnedi3.h" + +#include <assert.h> +#include <stdint.h> +#include <float.h> + +#include <libavutil/bswap.h> + +#include "video.h" + +#define GLSL(x) gl_sc_add(sc, #x "\n"); +#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) +#define GLSLH(x) gl_sc_hadd(sc, #x "\n"); +#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) + +const struct nnedi3_opts nnedi3_opts_def = { + .neurons = 1, + .window = 0, + .upload = NNEDI3_UPLOAD_UBO, +}; + +#define OPT_BASE_STRUCT struct nnedi3_opts +const struct m_sub_options nnedi3_conf = { + .opts = (const m_option_t[]) { + OPT_CHOICE("neurons", neurons, 0, + ({"16", 0}, + {"32", 1}, + {"64", 2}, + {"128", 3})), + OPT_CHOICE("window", window, 0, + ({"8x4", 0}, + {"8x6", 1})), + OPT_CHOICE("upload", upload, 0, + ({"ubo", NNEDI3_UPLOAD_UBO}, + {"shader", NNEDI3_UPLOAD_SHADER})), + {0} + }, + .size = sizeof(struct nnedi3_opts), + .defaults = &nnedi3_opts_def, +}; + +const static char nnedi3_weights[40320 * 4 + 1] = +#include "video/out/opengl/nnedi3_weights.inc" +; + +const int nnedi3_weight_offsets[9] = + {0, 1088, 3264, 7616, 16320, 17920, 21120, 27520, 40320}; + +const int nnedi3_neurons[4] = {16, 32, 64, 128}; +const int nnedi3_window_width[2] = {8, 8}; +const int nnedi3_window_height[2] = {4, 6}; + +const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size) +{ + int idx = conf->window * 4 + conf->neurons; + const int offset = nnedi3_weight_offsets[idx]; + *size = (nnedi3_weight_offsets[idx + 1] - offset) * 4; + return (const float*)(nnedi3_weights + offset * 4); +} + +void pass_nnedi3(struct gl_shader_cache *sc, int planes, int tex_num, + int step, const struct nnedi3_opts *conf, + struct gl_transform *transform) +{ + assert(0 <= step && step < 2); + + if (!conf) + conf = &nnedi3_opts_def; + + const int neurons = nnedi3_neurons[conf->neurons]; + const int width = nnedi3_window_width[conf->window]; + const int height = nnedi3_window_height[conf->window]; + + const int offset = nnedi3_weight_offsets[conf->window * 4 + conf->neurons]; + const uint32_t *weights = (const int*)(nnedi3_weights + offset * 4); + + GLSLF("// nnedi3 (tex %d, step %d, neurons %d, window %dx%d, mode %d)\n", + tex_num, step + 1, neurons, width, height, conf->upload); + + // This is required since each row will be encoded into vec4s + assert(width % 4 == 0); + const int sample_count = width * height / 4; + + if (conf->upload == NNEDI3_UPLOAD_UBO) { + char buf[32]; + snprintf(buf, sizeof(buf), "vec4 weights[%d];", + neurons * (sample_count * 2 + 1)); + gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0); + } else if (conf->upload == NNEDI3_UPLOAD_SHADER) { + // Somehow necessary for hard coding approach. + GLSLH(#pragma optionNV(fastprecision on)) + } + + GLSLHF("float nnedi3(sampler2D tex, vec2 pos, vec2 tex_size, int plane) {\n"); + + if (step == 0) { + *transform = (struct gl_transform){{{1.0,0.0}, {0.0,2.0}}, {0.0,-0.5}}; + + GLSLH(if (fract(pos.y * tex_size.y) < 0.5) + return texture(tex, pos + vec2(0, 0.25) / tex_size)[plane];) + GLSLHF("#define GET(i, j) " + "(texture(tex, pos+vec2((i)-(%f),(j)-(%f)+0.25)/tex_size)[plane])\n", + width / 2.0 - 1, (height - 1) / 2.0); + } else { + *transform = (struct gl_transform){{{2.0,0.0}, {0.0,1.0}}, {-0.5,0.0}}; + + GLSLH(if (fract(pos.x * tex_size.x) < 0.5) + return texture(tex, pos + vec2(0.25, 0) / tex_size)[plane];) + GLSLHF("#define GET(i, j) " + "(texture(tex, pos+vec2((j)-(%f)+0.25,(i)-(%f))/tex_size)[plane])\n", + (height - 1) / 2.0, width / 2.0 - 1); + } + + GLSLHF("vec4 samples[%d];\n", sample_count); + + for (int y = 0; y < height; y++) + for (int x = 0; x < width; x += 4) { + GLSLHF("samples[%d] = vec4(GET(%d, %d), GET(%d, %d)," + "GET(%d, %d), GET(%d, %d));\n", + (y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y); + } + + GLSLHF("float sum = 0, sumsq = 0;" + "for (int i = 0; i < %d; i++) {" + "sum += dot(samples[i], vec4(1.0));" + "sumsq += dot(samples[i], samples[i]);" + "}\n", sample_count); + + GLSLHF("float mstd0 = sum / %d.0;\n" + "float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n" + "float mstd2 = mix(0, inversesqrt(mstd1), mstd1 >= %.12e);\n" + "mstd1 *= mstd2;\n", + width * height, width * height, FLT_EPSILON); + + GLSLHF("float vsum = 0, wsum = 0, sum1, sum2;\n"); + + if (conf->upload == NNEDI3_UPLOAD_SHADER) { + GLSLH(#define T(x) intBitsToFloat(x)) + GLSLH(#define W(i,w0,w1,w2,w3) dot(samples[i],vec4(T(w0),T(w1),T(w2),T(w3)))) + + GLSLHF("#define WS(w0,w1) " + "sum1 = exp(sum1 * mstd2 + T(w0));" + "sum2 = sum2 * mstd2 + T(w1);" + "wsum += sum1;" + "vsum += sum1*(sum2/(1+abs(sum2)));\n"); + + for (int n = 0; n < neurons; n++) { + const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n; + for (int s = 0; s < 2; s++) { + GLSLHF("sum%d", s + 1); + for (int i = 0; i < sample_count; i++) { + GLSLHF("%cW(%d,%d,%d,%d,%d)", i == 0 ? '=' : '+', i, + (int)av_le2ne32(weights_ptr[0]), + (int)av_le2ne32(weights_ptr[1]), + (int)av_le2ne32(weights_ptr[2]), + (int)av_le2ne32(weights_ptr[3])); + weights_ptr += 4; + } + GLSLHF(";"); + } + GLSLHF("WS(%d,%d);\n", (int)av_le2ne32(weights_ptr[0]), + (int)av_le2ne32(weights_ptr[1])); + } + } else if (conf->upload == NNEDI3_UPLOAD_UBO) { + GLSLH(int idx = 0;) + + GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons); + + for (int s = 0; s < 2; s++) { + GLSLHF("sum%d = 0;\n" + "for (int i = 0; i < %d; i++) {" + "sum%d += dot(samples[i], weights[idx++]);" + "}\n", + s + 1, sample_count, s + 1); + } + + GLSLH(sum1 = exp(sum1 * mstd2 + weights[idx][0]); + sum2 = sum2 * mstd2 + weights[idx++][1]; + wsum += sum1; + vsum += sum1*(sum2/(1+abs(sum2)));) + + GLSLHF("}\n"); + } + + GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0, 1);) + + GLSLHF("}\n"); // nnedi3 + + GLSL(vec4 color = vec4(1.0);) + + for (int i = 0; i < planes; i++) { + GLSLF("color[%d] = nnedi3(texture%d, texcoord%d, texture_size%d, %d);\n", + i, tex_num, tex_num, tex_num, i); + } +} diff --git a/video/out/opengl/nnedi3.h b/video/out/opengl/nnedi3.h new file mode 100644 index 0000000000..ae0104ef04 --- /dev/null +++ b/video/out/opengl/nnedi3.h @@ -0,0 +1,47 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + * + * You can alternatively redistribute this file and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + */ + +#ifndef MP_GL_NNEDI3_H +#define MP_GL_NNEDI3_H + +#include "common.h" +#include "utils.h" + +#define NNEDI3_UPLOAD_UBO 0 +#define NNEDI3_UPLOAD_SHADER 1 + +struct nnedi3_opts { + int neurons; + int window; + int upload; +}; + +extern const struct nnedi3_opts nnedi3_opts_def; +extern const struct m_sub_options nnedi3_conf; + +const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size); + +void pass_nnedi3(struct gl_shader_cache *sc, int planes, int tex_num, + int step, const struct nnedi3_opts *conf, + struct gl_transform *transform); + +#endif diff --git a/video/out/opengl/nnedi3_weights.bin b/video/out/opengl/nnedi3_weights.bin Binary files differnew file mode 100644 index 0000000000..e1659d848c --- /dev/null +++ b/video/out/opengl/nnedi3_weights.bin diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index 0026090df9..6ddb333e47 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -482,6 +482,7 @@ enum uniform_type { UT_i, UT_f, UT_m, + UT_buffer, }; struct sc_uniform { @@ -493,6 +494,10 @@ struct sc_uniform { union { GLfloat f[9]; GLint i[4]; + struct { + char* text; + GLint binding; + } buffer; } v; }; @@ -535,8 +540,11 @@ void gl_sc_reset(struct gl_shader_cache *sc) { sc->text[0] = '\0'; sc->header_text[0] = '\0'; - for (int n = 0; n < sc->num_uniforms; n++) + for (int n = 0; n < sc->num_uniforms; n++) { talloc_free(sc->uniforms[n].name); + if (sc->uniforms[n].type == UT_buffer) + talloc_free(sc->uniforms[n].v.buffer.text); + } sc->num_uniforms = 0; } @@ -697,6 +705,15 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, transpose3x3(&u->v.f[0]); } +void gl_sc_uniform_buffer(struct gl_shader_cache *sc, char *name, + const char *text, int binding) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_buffer; + u->v.buffer.text = talloc_strdup(sc, text); + u->v.buffer.binding = binding; +} + // This will call glBindAttribLocation() on the shader before it's linked // (OpenGL requires this to happen before linking). Basically, it associates // the input variable names with the fields in the vao. @@ -723,6 +740,11 @@ static const char *vao_glsl_type(const struct gl_vao_entry *e) // Assumes program is current (gl->UseProgram(program)). static void update_uniform(GL *gl, GLuint program, struct sc_uniform *u) { + if (u->type == UT_buffer) { + GLuint idx = gl->GetUniformBlockIndex(program, u->name); + gl->UniformBlockBinding(program, idx, u->v.buffer.binding); + return; + } GLint loc = gl->GetUniformLocation(program, u->name); if (loc < 0) return; @@ -885,7 +907,10 @@ void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc) ADD(frag, "%s", frag_vaos); for (int n = 0; n < sc->num_uniforms; n++) { struct sc_uniform *u = &sc->uniforms[n]; - ADD(frag, "uniform %s %s;\n", u->glsl_type, u->name); + if (u->type == UT_buffer) + ADD(frag, "uniform %s { %s };\n", u->name, u->v.buffer.text); + else + ADD(frag, "uniform %s %s;\n", u->glsl_type, u->name); } // custom shader header if (sc->header_text[0]) { diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 8682366a24..fa76ee85a8 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -139,6 +139,8 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, bool transpose, GLfloat *v); void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, bool transpose, GLfloat *v); +void gl_sc_uniform_buffer(struct gl_shader_cache *sc, char *name, + const char *text, int binding); void gl_sc_set_vao(struct gl_shader_cache *sc, struct gl_vao *vao); void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc); void gl_sc_reset(struct gl_shader_cache *sc); diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index b69330d1a9..cd638ccd5e 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -39,6 +39,7 @@ #include "osd.h" #include "stream/stream.h" #include "superxbr.h" +#include "nnedi3.h" #include "video_shaders.h" #include "video/out/filter_kernels.h" #include "video/out/aspect.h" @@ -156,6 +157,8 @@ struct gl_video { GLuint dither_texture; int dither_size; + GLuint nnedi3_weights_buffer; + struct mp_image_params real_image_params; // configured format struct mp_image_params image_params; // texture format (mind hwdec case) struct mp_imgfmt_desc image_desc; @@ -444,12 +447,16 @@ const struct m_sub_options gl_video_conf = { OPT_FLAG("deband", deband, 0), OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0), OPT_FLOAT("sharpen", unsharp, 0), - OPT_CHOICE("prescale", prescale, 0, ({"none", 0}, {"superxbr", 1})), + OPT_CHOICE("prescale", prescale, 0, + ({"none", 0}, + {"superxbr", 1}, + {"nnedi3", 2})), OPT_INTRANGE("prescale-passes", prescale_passes, 0, 1, MAX_PRESCALE_PASSES), OPT_FLOATRANGE("prescale-downscaling-threshold", prescale_downscaling_threshold, 0, 0.0, 32.0), OPT_SUBSTRUCT("superxbr", superxbr_opts, superxbr_conf, 0), + OPT_SUBSTRUCT("nnedi3", nnedi3_opts, nnedi3_conf, 0), OPT_REMOVED("approx-gamma", "this is always enabled now"), OPT_REMOVED("cscale-down", "chroma is never downscaled"), @@ -597,6 +604,8 @@ static void uninit_rendering(struct gl_video *p) gl->DeleteTextures(1, &p->dither_texture); p->dither_texture = 0; + gl->DeleteBuffers(1, &p->nnedi3_weights_buffer); + fbotex_uninit(&p->chroma_merge_fbo); fbotex_uninit(&p->chroma_deband_fbo); fbotex_uninit(&p->indirect_fbo); @@ -1202,6 +1211,10 @@ static void pass_prescale(struct gl_video *p, int src_tex_num, int dst_tex_num, pass_superxbr(p->sc, planes, tex_num, step, p->opts.superxbr_opts, &transform); break; + case 2: + pass_nnedi3(p->sc, planes, tex_num, step, + p->opts.nnedi3_opts, &transform); + break; default: abort(); } @@ -1230,6 +1243,27 @@ static bool pass_prescale_luma(struct gl_video *p, float tex_mul, struct src_tex *prescaled_tex, int *prescaled_planes) { + if (p->opts.prescale == 2 && + p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) + { + // nnedi3 are configured to use uniform buffer objects. + if (!p->nnedi3_weights_buffer) { + p->gl->GenBuffers(1, &p->nnedi3_weights_buffer); + p->gl->BindBufferBase(GL_UNIFORM_BUFFER, 0, + p->nnedi3_weights_buffer); + int weights_size; + const float *weights = + get_nnedi3_weights(p->opts.nnedi3_opts, &weights_size); + + MP_VERBOSE(p, "Uploading NNEDI3 weights via uniform buffer (size=%d)\n", + weights_size); + + // We don't know the endianness of GPU, just assume it's little + // endian. + p->gl->BufferData(GL_UNIFORM_BUFFER, weights_size, weights, + GL_STATIC_DRAW); + } + } // number of passes to apply prescaler, can be zero. int prescale_passes = get_prescale_passes(p); @@ -2384,6 +2418,22 @@ static void check_gl_features(struct gl_video *p) p->opts.deband = 0; MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); } + + if (p->opts.prescale == 2) { + if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) { + // Check features for uniform buffer objects. + if (!p->gl->GetUniformBlockIndex || !p->gl->UniformBlockBinding) { + MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.1 required).\n"); + p->opts.prescale = 0; + } + } else if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_SHADER) { + // Check features for hard coding approach. + if (p->gl->glsl_version < 330) { + MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.3 required).\n"); + p->opts.prescale = 0; + } + } + } } static void init_gl(struct gl_video *p) @@ -2708,6 +2758,7 @@ static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src) talloc_free(dst->post_shaders); talloc_free(dst->deband_opts); talloc_free(dst->superxbr_opts); + talloc_free(dst->nnedi3_opts); *dst = *src; @@ -2719,6 +2770,11 @@ static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src) src->superxbr_opts); } + if (src->nnedi3_opts) { + dst->nnedi3_opts = m_sub_options_copy(NULL, &nnedi3_conf, + src->nnedi3_opts); + } + for (int n = 0; n < 4; n++) { dst->scaler[n].kernel.name = (char *)handle_scaler_opt(dst->scaler[n].kernel.name, n == 3); diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h index 0ed7b7cb41..df55ede27d 100644 --- a/video/out/opengl/video.h +++ b/video/out/opengl/video.h @@ -102,6 +102,7 @@ struct gl_video_opts { int prescale_passes; float prescale_downscaling_threshold; struct superxbr_opts *superxbr_opts; + struct nnedi3_opts *nnedi3_opts; }; extern const struct m_sub_options gl_video_conf; |