diff options
author | Niklas Haas <git@nand.wakku.to> | 2015-09-05 14:03:00 +0200 |
---|---|---|
committer | wm4 <wm4@nowhere> | 2015-09-09 18:17:44 +0200 |
commit | eb56807b414229a00cd2bee5e74beae5a01e8fb1 (patch) | |
tree | 7d2c22371b50208e989c35932b511e5271a83946 /video/out/opengl | |
parent | 7929e36e9329c842790193389000e968f5a57426 (diff) |
vo_opengl: move self-contained shader routines to a separate file
This is mostly to cut down somewhat on the amount of code bloat in
video.c by moving out helper functions (including scaler kernels and
color management routines) to a separate file.
It would certainly be possible to move out more functions (eg. dithering
or CMS code) with some extra effort/refactoring, but this is a start.
Signed-off-by: wm4 <wm4@nowhere>
Diffstat (limited to 'video/out/opengl')
-rw-r--r-- | video/out/opengl/video.c | 365 | ||||
-rw-r--r-- | video/out/opengl/video.h | 25 | ||||
-rw-r--r-- | video/out/opengl/video_shaders.c | 339 | ||||
-rw-r--r-- | video/out/opengl/video_shaders.h | 43 |
4 files changed, 424 insertions, 348 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index 3633329d19..7d6102ff82 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -36,6 +36,7 @@ #include "utils.h" #include "hwdec.h" #include "osd.h" +#include "video_shaders.h" #include "video/out/filter_kernels.h" #include "video/out/aspect.h" #include "video/out/bitmap_packer.h" @@ -45,14 +46,6 @@ // Pixel width of 1D lookup textures. #define LOOKUP_TEXTURE_SIZE 256 -// Texture units 0-5 are used by the video, and for free use by the passes -#define TEXUNIT_VIDEO_NUM 6 - -// Other texture units are reserved for specific purposes -#define TEXUNIT_SCALERS TEXUNIT_VIDEO_NUM -#define TEXUNIT_3DLUT (TEXUNIT_SCALERS+4) -#define TEXUNIT_DITHER (TEXUNIT_3DLUT+1) - // scale/cscale arguments that map directly to shader filter routines. // Note that the convolution filters are not included in this list. static const char *const fixed_scale_filters[] = { @@ -110,21 +103,6 @@ struct video_image { struct mp_image *mpi; // original input image }; -struct scaler { - int index; - struct scaler_config conf; - double scale_factor; - bool initialized; - struct filter_kernel *kernel; - GLuint gl_lut; - GLenum gl_target; - struct fbotex sep_fbo; - bool insufficient; - - // kernel points here - struct filter_kernel kernel_storage; -}; - struct fbosurface { struct fbotex fbotex; double pts; @@ -1030,84 +1008,7 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, debug_check_gl(p, "after initializing scaler"); } -// Set up shared/commonly used variables -static void sampler_prelude(struct gl_video *p, int tex_num) -{ - GLSLF("#define tex texture%d\n", tex_num); - GLSLF("vec2 pos = texcoord%d;\n", tex_num); - GLSLF("vec2 size = texture_size%d;\n", tex_num); - GLSLF("vec2 pt = vec2(1.0) / size;\n"); -} - -static void pass_sample_separated_get_weights(struct gl_video *p, - struct scaler *scaler) -{ - gl_sc_uniform_sampler(p->sc, "lut", scaler->gl_target, - TEXUNIT_SCALERS + scaler->index); - - int N = scaler->kernel->size; - if (N == 2) { - GLSL(vec2 c1 = texture(lut, vec2(0.5, fcoord)).RG;) - GLSL(float weights[2] = float[](c1.r, c1.g);) - } else if (N == 6) { - GLSL(vec4 c1 = texture(lut, vec2(0.25, fcoord));) - GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord));) - GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);) - } else { - GLSLF("float weights[%d];\n", N); - for (int n = 0; n < N / 4; n++) { - GLSLF("c = texture(lut, vec2(1.0 / %d + %d / float(%d), fcoord));\n", - N / 2, n, N / 4); - GLSLF("weights[%d] = c.r;\n", n * 4 + 0); - GLSLF("weights[%d] = c.g;\n", n * 4 + 1); - GLSLF("weights[%d] = c.b;\n", n * 4 + 2); - GLSLF("weights[%d] = c.a;\n", n * 4 + 3); - } - } -} - -// Handle a single pass (either vertical or horizontal). The direction is given -// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is -// used instead (samples from texture0 through textureN) -static void pass_sample_separated_gen(struct gl_video *p, struct scaler *scaler, - int d_x, int d_y) -{ - int N = scaler->kernel->size; - bool use_ar = scaler->conf.antiring > 0; - bool planar = d_x == 0 && d_y == 0; - GLSL(vec4 color = vec4(0.0);) - GLSLF("{\n"); - if (!planar) { - GLSLF("vec2 dir = vec2(%d, %d);\n", d_x, d_y); - GLSL(pt *= dir;) - GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);) - GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d);\n", N / 2 - 1); - } - GLSL(vec4 c;) - if (use_ar) { - GLSL(vec4 hi = vec4(0.0);) - GLSL(vec4 lo = vec4(1.0);) - } - pass_sample_separated_get_weights(p, scaler); - GLSLF("// scaler samples\n"); - for (int n = 0; n < N; n++) { - if (planar) { - GLSLF("c = texture(texture%d, texcoord%d);\n", n, n); - } else { - GLSLF("c = texture(tex, base + pt * vec2(%d));\n", n); - } - GLSLF("color += vec4(weights[%d]) * c;\n", n); - if (use_ar && (n == N/2-1 || n == N/2)) { - GLSL(lo = min(lo, c);) - GLSL(hi = max(hi, c);) - } - } - if (use_ar) - GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", - scaler->conf.antiring); - GLSLF("}\n"); -} - +// Special helper for sampling from two separated stages static void pass_sample_separated(struct gl_video *p, int src_tex, struct scaler *scaler, int w, int h, struct gl_transform transform) @@ -1118,179 +1019,15 @@ static void pass_sample_separated(struct gl_video *p, int src_tex, GLSLF("// pass 1\n"); p->pass_tex[src_tex].src.y0 = src_new.y0; p->pass_tex[src_tex].src.y1 = src_new.y1; - pass_sample_separated_gen(p, scaler, 0, 1); + pass_sample_separated_gen(p->sc, scaler, 0, 1); int src_w = p->pass_tex[src_tex].src.x1 - p->pass_tex[src_tex].src.x0; finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, src_tex, FBOTEX_FUZZY_H); // Restore the sample source for the second pass - sampler_prelude(p, src_tex); + sampler_prelude(p->sc, src_tex); GLSLF("// pass 2\n"); p->pass_tex[src_tex].src.x0 = src_new.x0; p->pass_tex[src_tex].src.x1 = src_new.x1; - pass_sample_separated_gen(p, scaler, 1, 0); -} - -static void pass_sample_polar(struct gl_video *p, struct scaler *scaler) -{ - double radius = scaler->kernel->f.radius; - int bound = (int)ceil(radius); - bool use_ar = scaler->conf.antiring > 0; - GLSL(vec4 color = vec4(0.0);) - GLSLF("{\n"); - GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) - GLSL(vec2 base = pos - fcoord * pt;) - GLSL(vec4 c;) - GLSLF("float w, d, wsum = 0.0;\n"); - if (use_ar) { - GLSL(vec4 lo = vec4(1.0);) - GLSL(vec4 hi = vec4(0.0);) - } - gl_sc_uniform_sampler(p->sc, "lut", scaler->gl_target, - TEXUNIT_SCALERS + scaler->index); - GLSLF("// scaler samples\n"); - for (int y = 1-bound; y <= bound; y++) { - for (int x = 1-bound; x <= bound; x++) { - // Since we can't know the subpixel position in advance, assume a - // worst case scenario - int yy = y > 0 ? y-1 : y; - int xx = x > 0 ? x-1 : x; - double dmax = sqrt(xx*xx + yy*yy); - // Skip samples definitely outside the radius - if (dmax >= radius) - continue; - GLSLF("d = length(vec2(%d, %d) - fcoord)/%f;\n", x, y, radius); - // Check for samples that might be skippable - if (dmax >= radius - 1) - GLSLF("if (d < 1.0) {\n"); - GLSL(w = texture1D(lut, d).r;) - GLSL(wsum += w;) - GLSLF("c = texture(tex, base + pt * vec2(%d, %d));\n", x, y); - GLSL(color += vec4(w) * c;) - if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) { - GLSL(lo = min(lo, c);) - GLSL(hi = max(hi, c);) - } - if (dmax >= radius -1) - GLSLF("}\n"); - } - } - GLSL(color = color / vec4(wsum);) - if (use_ar) - GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", - scaler->conf.antiring); - GLSLF("}\n"); -} - -static void bicubic_calcweights(struct gl_video *p, const char *t, const char *s) -{ - // Explanation of how bicubic scaling with only 4 texel fetches is done: - // http://www.mate.tue.nl/mate/pdfs/10318.pdf - // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' - // Explanation why this algorithm normally always blurs, even with unit - // scaling: - // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf - // 'GPU Prefilter for Accurate Cubic B-spline Interpolation' - GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s" - " + vec4(1, 0, -0.5, 0.5);\n", t, s); - GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); - GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); - GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); - GLSLF("%s.xy += vec2(1 + %s, 1 - %s);\n", t, s, s); -} - -static void pass_sample_bicubic_fast(struct gl_video *p) -{ - GLSL(vec4 color;) - GLSLF("{\n"); - GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));) - bicubic_calcweights(p, "parmx", "fcoord.x"); - bicubic_calcweights(p, "parmy", "fcoord.y"); - GLSL(vec4 cdelta;) - GLSL(cdelta.xz = parmx.RG * vec2(-pt.x, pt.x);) - GLSL(cdelta.yw = parmy.RG * vec2(-pt.y, pt.y);) - // first y-interpolation - GLSL(vec4 ar = texture(tex, pos + cdelta.xy);) - GLSL(vec4 ag = texture(tex, pos + cdelta.xw);) - GLSL(vec4 ab = mix(ag, ar, parmy.b);) - // second y-interpolation - GLSL(vec4 br = texture(tex, pos + cdelta.zy);) - GLSL(vec4 bg = texture(tex, pos + cdelta.zw);) - GLSL(vec4 aa = mix(bg, br, parmy.b);) - // x-interpolation - GLSL(color = mix(aa, ab, parmx.b);) - GLSLF("}\n"); -} - -static void pass_sample_sharpen3(struct gl_video *p, struct scaler *scaler) -{ - GLSL(vec4 color;) - GLSLF("{\n"); - GLSL(vec2 st = pt * 0.5;) - GLSL(vec4 p = texture(tex, pos);) - GLSL(vec4 sum = texture(tex, pos + st * vec2(+1, +1)) - + texture(tex, pos + st * vec2(+1, -1)) - + texture(tex, pos + st * vec2(-1, +1)) - + texture(tex, pos + st * vec2(-1, -1));) - float param = scaler->conf.kernel.params[0]; - param = isnan(param) ? 0.5 : param; - GLSLF("color = p + (p - 0.25 * sum) * %f;\n", param); - GLSLF("}\n"); -} - -static void pass_sample_sharpen5(struct gl_video *p, struct scaler *scaler) -{ - GLSL(vec4 color;) - GLSLF("{\n"); - GLSL(vec2 st1 = pt * 1.2;) - GLSL(vec4 p = texture(tex, pos);) - GLSL(vec4 sum1 = texture(tex, pos + st1 * vec2(+1, +1)) - + texture(tex, pos + st1 * vec2(+1, -1)) - + texture(tex, pos + st1 * vec2(-1, +1)) - + texture(tex, pos + st1 * vec2(-1, -1));) - GLSL(vec2 st2 = pt * 1.5;) - GLSL(vec4 sum2 = texture(tex, pos + st2 * vec2(+1, 0)) - + texture(tex, pos + st2 * vec2( 0, +1)) - + texture(tex, pos + st2 * vec2(-1, 0)) - + texture(tex, pos + st2 * vec2( 0, -1));) - GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) - float param = scaler->conf.kernel.params[0]; - param = isnan(param) ? 0.5 : param; - GLSLF("color = p + t * %f;\n", param); - GLSLF("}\n"); -} - -static void pass_sample_oversample(struct gl_video *p, struct scaler *scaler, - int w, int h) -{ - GLSL(vec4 color;) - GLSLF("{\n"); - GLSL(vec2 pos = pos + vec2(0.5) * pt;) // round to nearest - GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) - // We only need to sample from the four corner pixels since we're using - // nearest neighbour and can compute the exact transition point - GLSL(vec2 baseNW = pos - fcoord * pt;) - GLSL(vec2 baseNE = baseNW + vec2(pt.x, 0.0);) - GLSL(vec2 baseSW = baseNW + vec2(0.0, pt.y);) - GLSL(vec2 baseSE = baseNW + pt;) - // Determine the mixing coefficient vector - gl_sc_uniform_vec2(p->sc, "output_size", (float[2]){w, h}); - GLSL(vec2 coeff = vec2((baseSE - pos) * output_size);) - GLSL(coeff = clamp(coeff, 0.0, 1.0);) - float threshold = scaler->conf.kernel.params[0]; - if (threshold > 0) { // also rules out NAN - GLSLF("coeff = mix(coeff, vec2(0.0), " - "lessThanEqual(coeff, vec2(%f)));\n", threshold); - GLSLF("coeff = mix(coeff, vec2(1.0), " - "greaterThanEqual(coeff, vec2(%f)));\n", 1.0 - threshold); - } - // Compute the right blend of colors - GLSL(vec4 left = mix(texture(tex, baseSW), - texture(tex, baseNW), - coeff.y);) - GLSL(vec4 right = mix(texture(tex, baseSE), - texture(tex, baseNE), - coeff.y);) - GLSL(color = mix(right, left, coeff.x);) - GLSLF("}\n"); + pass_sample_separated_gen(p->sc, scaler, 1, 0); } // Sample. This samples from the texture ID given by src_tex. It's hardcoded to @@ -1307,7 +1044,7 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler, int w, int h, struct gl_transform transform) { reinit_scaler(p, scaler, conf, scale_factor, filter_sizes); - sampler_prelude(p, src_tex); + sampler_prelude(p->sc, src_tex); // Set up the transformation for everything other than separated scaling if (!scaler->kernel || scaler->kernel->polar) @@ -1318,13 +1055,13 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler, if (strcmp(name, "bilinear") == 0) { GLSL(vec4 color = texture(tex, pos);) } else if (strcmp(name, "bicubic_fast") == 0) { - pass_sample_bicubic_fast(p); + pass_sample_bicubic_fast(p->sc); } else if (strcmp(name, "sharpen3") == 0) { - pass_sample_sharpen3(p, scaler); + pass_sample_sharpen3(p->sc, scaler); } else if (strcmp(name, "sharpen5") == 0) { - pass_sample_sharpen5(p, scaler); + pass_sample_sharpen5(p->sc, scaler); } else if (strcmp(name, "oversample") == 0) { - pass_sample_oversample(p, scaler, w, h); + pass_sample_oversample(p->sc, scaler, w, h); } else if (strcmp(name, "custom") == 0) { const char *body = gl_sc_loadfile(p->sc, p->opts.scale_shader); if (body) { @@ -1335,7 +1072,7 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler, p->opts.scale_shader = NULL; } } else if (scaler->kernel && scaler->kernel->polar) { - pass_sample_polar(p, scaler); + pass_sample_polar(p->sc, scaler); } else if (scaler->kernel) { pass_sample_separated(p, src_tex, scaler, w, h, transform); } else { @@ -1546,74 +1283,6 @@ static void get_scale_factors(struct gl_video *p, double xy[2]) (double)(p->src_rect.y1 - p->src_rect.y0); } -// Linearize (expand), given a TRC as input -static void pass_linearize(struct gl_video *p, enum mp_csp_trc trc) -{ - if (trc == MP_CSP_TRC_LINEAR) - return; - - GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) - switch (trc) { - case MP_CSP_TRC_SRGB: - GLSL(color.rgb = mix(color.rgb / vec3(12.92), - pow((color.rgb + vec3(0.055))/vec3(1.055), - vec3(2.4)), - lessThan(vec3(0.04045), color.rgb));) - break; - case MP_CSP_TRC_BT_1886: - GLSL(color.rgb = pow(color.rgb, vec3(1.961));) - break; - case MP_CSP_TRC_GAMMA18: - GLSL(color.rgb = pow(color.rgb, vec3(1.8));) - break; - case MP_CSP_TRC_GAMMA22: - GLSL(color.rgb = pow(color.rgb, vec3(2.2));) - break; - case MP_CSP_TRC_GAMMA28: - GLSL(color.rgb = pow(color.rgb, vec3(2.8));) - break; - case MP_CSP_TRC_PRO_PHOTO: - GLSL(color.rgb = mix(color.rgb / vec3(16.0), - pow(color.rgb, vec3(1.8)), - lessThan(vec3(0.03125), color.rgb));) - break; - } -} - -// Delinearize (compress), given a TRC as output -static void pass_delinearize(struct gl_video *p, enum mp_csp_trc trc) -{ - if (trc == MP_CSP_TRC_LINEAR) - return; - - GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) - switch (trc) { - case MP_CSP_TRC_SRGB: - GLSL(color.rgb = mix(color.rgb * vec3(12.92), - vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) - - vec3(0.055), - lessThanEqual(vec3(0.0031308), color.rgb));) - break; - case MP_CSP_TRC_BT_1886: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.961));) - break; - case MP_CSP_TRC_GAMMA18: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));) - break; - case MP_CSP_TRC_GAMMA22: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));) - break; - case MP_CSP_TRC_GAMMA28: - GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));) - break; - case MP_CSP_TRC_PRO_PHOTO: - GLSL(color.rgb = mix(color.rgb * vec3(16.0), - pow(color.rgb, vec3(1.0/1.8)), - lessThanEqual(vec3(0.001953), color.rgb));) - break; - } -} - // Compute the cropped and rotated transformation of the video source rectangle. // vp_w and vp_h are set to the _destination_ video size. static void compute_src_transform(struct gl_video *p, struct gl_transform *tr, @@ -1669,7 +1338,7 @@ static void pass_scale_main(struct gl_video *p) // Pre-conversion, like linear light/sigmoidization GLSLF("// scaler pre-conversion\n"); if (p->use_linear) - pass_linearize(p, p->image_params.gamma); + pass_linearize(p->sc, p->image_params.gamma); bool use_sigmoid = p->use_linear && p->opts.sigmoid_upscaling && upscaling; float sig_center, sig_slope, sig_offset, sig_scale; @@ -1733,7 +1402,7 @@ static void pass_colormanage(struct gl_video *p, enum mp_csp_prim prim_src, bool need_cms = prim_src != prim_dst || p->use_lut_3d; bool need_gamma = trc_src != trc_dst || need_cms; if (need_gamma) - pass_linearize(p, trc_src); + pass_linearize(p->sc, trc_src); // Adapt to the right colorspace if necessary if (prim_src != prim_dst) { struct mp_csp_primaries csp_src = mp_get_csp_primaries(prim_src), @@ -1752,7 +1421,7 @@ static void pass_colormanage(struct gl_video *p, enum mp_csp_prim prim_src, GLSL(color.rgb = texture3D(lut_3d, color.rgb).rgb;) } if (need_gamma) - pass_delinearize(p, trc_dst); + pass_delinearize(p->sc, trc_dst); } static void pass_dither(struct gl_video *p) @@ -1982,13 +1651,13 @@ static void pass_render_frame(struct gl_video *p) rect.mt *= scale[1]; rect.mb *= scale[1]; // We should always blend subtitles in non-linear light if (p->use_linear) - pass_delinearize(p, p->image_params.gamma); + pass_delinearize(p->sc, p->image_params.gamma); finish_pass_fbo(p, &p->blend_subs_fbo, vp_w, vp_h, 0, FBOTEX_FUZZY); pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, vp_w, vp_h, p->blend_subs_fbo.fbo, false); GLSL(vec4 color = texture(texture0, texcoord0);) if (p->use_linear) - pass_linearize(p, p->image_params.gamma); + pass_linearize(p->sc, p->image_params.gamma); } apply_shaders(p, p->opts.post_shaders, &p->post_fbo[0], 0, vp_w, vp_h); @@ -2164,7 +1833,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, inter_coeff);) } else { gl_sc_uniform_f(p->sc, "fcoord", mix); - pass_sample_separated_gen(p, tscale, 0, 0); + pass_sample_separated_gen(p->sc, tscale, 0, 0); } // Load all the required frames diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h index a52f35c424..65fdb6c3ab 100644 --- a/video/out/opengl/video.h +++ b/video/out/opengl/video.h @@ -22,6 +22,16 @@ #include "options/m_option.h" #include "sub/osd.h" #include "common.h" +#include "utils.h" +#include "video/out/filter_kernels.h" + +// Texture units 0-5 are used by the video, and for free use by the passes +#define TEXUNIT_VIDEO_NUM 6 + +// Other texture units are reserved for specific purposes +#define TEXUNIT_SCALERS TEXUNIT_VIDEO_NUM +#define TEXUNIT_3DLUT (TEXUNIT_SCALERS+4) +#define TEXUNIT_DITHER (TEXUNIT_3DLUT+1) struct lut3d { uint16_t *data; @@ -42,6 +52,21 @@ struct scaler_config { int clamp; }; +struct scaler { + int index; + struct scaler_config conf; + double scale_factor; + bool initialized; + struct filter_kernel *kernel; + GLuint gl_lut; + GLenum gl_target; + struct fbotex sep_fbo; + bool insufficient; + + // kernel points here + struct filter_kernel kernel_storage; +}; + struct gl_video_opts { int dumb_mode; struct scaler_config scaler[4]; diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c new file mode 100644 index 0000000000..26d5636184 --- /dev/null +++ b/video/out/opengl/video_shaders.c @@ -0,0 +1,339 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + * + * You can alternatively redistribute this file and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + */ + +#include <math.h> + +#include "video_shaders.h" +#include "video.h" + +#define GLSL(x) gl_sc_add(sc, #x "\n"); +#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) + +// Set up shared/commonly used variables +void sampler_prelude(struct gl_shader_cache *sc, int tex_num) +{ + GLSLF("#define tex texture%d\n", tex_num); + GLSLF("vec2 pos = texcoord%d;\n", tex_num); + GLSLF("vec2 size = texture_size%d;\n", tex_num); + GLSLF("vec2 pt = vec2(1.0) / size;\n"); +} + +static void pass_sample_separated_get_weights(struct gl_shader_cache *sc, + struct scaler *scaler) +{ + gl_sc_uniform_sampler(sc, "lut", scaler->gl_target, + TEXUNIT_SCALERS + scaler->index); + + int N = scaler->kernel->size; + if (N == 2) { + GLSL(vec2 c1 = texture(lut, vec2(0.5, fcoord)).RG;) + GLSL(float weights[2] = float[](c1.r, c1.g);) + } else if (N == 6) { + GLSL(vec4 c1 = texture(lut, vec2(0.25, fcoord));) + GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord));) + GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);) + } else { + GLSLF("float weights[%d];\n", N); + for (int n = 0; n < N / 4; n++) { + GLSLF("c = texture(lut, vec2(1.0 / %d + %d / float(%d), fcoord));\n", + N / 2, n, N / 4); + GLSLF("weights[%d] = c.r;\n", n * 4 + 0); + GLSLF("weights[%d] = c.g;\n", n * 4 + 1); + GLSLF("weights[%d] = c.b;\n", n * 4 + 2); + GLSLF("weights[%d] = c.a;\n", n * 4 + 3); + } + } +} + +// Handle a single pass (either vertical or horizontal). The direction is given +// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is +// used instead (samples from texture0 through textureN) +void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, + int d_x, int d_y) +{ + int N = scaler->kernel->size; + bool use_ar = scaler->conf.antiring > 0; + bool planar = d_x == 0 && d_y == 0; + GLSL(vec4 color = vec4(0.0);) + GLSLF("{\n"); + if (!planar) { + GLSLF("vec2 dir = vec2(%d, %d);\n", d_x, d_y); + GLSL(pt *= dir;) + GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);) + GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d);\n", N / 2 - 1); + } + GLSL(vec4 c;) + if (use_ar) { + GLSL(vec4 hi = vec4(0.0);) + GLSL(vec4 lo = vec4(1.0);) + } + pass_sample_separated_get_weights(sc, scaler); + GLSLF("// scaler samples\n"); + for (int n = 0; n < N; n++) { + if (planar) { + GLSLF("c = texture(texture%d, texcoord%d);\n", n, n); + } else { + GLSLF("c = texture(tex, base + pt * vec2(%d));\n", n); + } + GLSLF("color += vec4(weights[%d]) * c;\n", n); + if (use_ar && (n == N/2-1 || n == N/2)) { + GLSL(lo = min(lo, c);) + GLSL(hi = max(hi, c);) + } + } + if (use_ar) + GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", + scaler->conf.antiring); + GLSLF("}\n"); +} + +void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler) +{ + double radius = scaler->kernel->f.radius; + int bound = (int)ceil(radius); + bool use_ar = scaler->conf.antiring > 0; + GLSL(vec4 color = vec4(0.0);) + GLSLF("{\n"); + GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) + GLSL(vec2 base = pos - fcoord * pt;) + GLSL(vec4 c;) + GLSLF("float w, d, wsum = 0.0;\n"); + if (use_ar) { + GLSL(vec4 lo = vec4(1.0);) + GLSL(vec4 hi = vec4(0.0);) + } + gl_sc_uniform_sampler(sc, "lut", scaler->gl_target, + TEXUNIT_SCALERS + scaler->index); + GLSLF("// scaler samples\n"); + for (int y = 1-bound; y <= bound; y++) { + for (int x = 1-bound; x <= bound; x++) { + // Since we can't know the subpixel position in advance, assume a + // worst case scenario + int yy = y > 0 ? y-1 : y; + int xx = x > 0 ? x-1 : x; + double dmax = sqrt(xx*xx + yy*yy); + // Skip samples definitely outside the radius + if (dmax >= radius) + continue; + GLSLF("d = length(vec2(%d, %d) - fcoord)/%f;\n", x, y, radius); + // Check for samples that might be skippable + if (dmax >= radius - 1) + GLSLF("if (d < 1.0) {\n"); + GLSL(w = texture1D(lut, d).r;) + GLSL(wsum += w;) + GLSLF("c = texture(tex, base + pt * vec2(%d, %d));\n", x, y); + GLSL(color += vec4(w) * c;) + if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) { + GLSL(lo = min(lo, c);) + GLSL(hi = max(hi, c);) + } + if (dmax >= radius -1) + GLSLF("}\n"); + } + } + GLSL(color = color / vec4(wsum);) + if (use_ar) + GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", + scaler->conf.antiring); + GLSLF("}\n"); +} + +static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s) +{ + // Explanation of how bicubic scaling with only 4 texel fetches is done: + // http://www.mate.tue.nl/mate/pdfs/10318.pdf + // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' + // Explanation why this algorithm normally always blurs, even with unit + // scaling: + // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf + // 'GPU Prefilter for Accurate Cubic B-spline Interpolation' + GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s" + " + vec4(1, 0, -0.5, 0.5);\n", t, s); + GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); + GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); + GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); + GLSLF("%s.xy += vec2(1 + %s, 1 - %s);\n", t, s, s); +} + +void pass_sample_bicubic_fast(struct gl_shader_cache *sc) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));) + bicubic_calcweights(sc, "parmx", "fcoord.x"); + bicubic_calcweights(sc, "parmy", "fcoord.y"); + GLSL(vec4 cdelta;) + GLSL(cdelta.xz = parmx.RG * vec2(-pt.x, pt.x);) + GLSL(cdelta.yw = parmy.RG * vec2(-pt.y, pt.y);) + // first y-interpolation + GLSL(vec4 ar = texture(tex, pos + cdelta.xy);) + GLSL(vec4 ag = texture(tex, pos + cdelta.xw);) + GLSL(vec4 ab = mix(ag, ar, parmy.b);) + // second y-interpolation + GLSL(vec4 br = texture(tex, pos + cdelta.zy);) + GLSL(vec4 bg = texture(tex, pos + cdelta.zw);) + GLSL(vec4 aa = mix(bg, br, parmy.b);) + // x-interpolation + GLSL(color = mix(aa, ab, parmx.b);) + GLSLF("}\n"); +} + +void pass_sample_sharpen3(struct gl_shader_cache *sc, struct scaler *scaler) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 st = pt * 0.5;) + GLSL(vec4 p = texture(tex, pos);) + GLSL(vec4 sum = texture(tex, pos + st * vec2(+1, +1)) + + texture(tex, pos + st * vec2(+1, -1)) + + texture(tex, pos + st * vec2(-1, +1)) + + texture(tex, pos + st * vec2(-1, -1));) + float param = scaler->conf.kernel.params[0]; + param = isnan(param) ? 0.5 : param; + GLSLF("color = p + (p - 0.25 * sum) * %f;\n", param); + GLSLF("}\n"); +} + +void pass_sample_sharpen5(struct gl_shader_cache *sc, struct scaler *scaler) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 st1 = pt * 1.2;) + GLSL(vec4 p = texture(tex, pos);) + GLSL(vec4 sum1 = texture(tex, pos + st1 * vec2(+1, +1)) + + texture(tex, pos + st1 * vec2(+1, -1)) + + texture(tex, pos + st1 * vec2(-1, +1)) + + texture(tex, pos + st1 * vec2(-1, -1));) + GLSL(vec2 st2 = pt * 1.5;) + GLSL(vec4 sum2 = texture(tex, pos + st2 * vec2(+1, 0)) + + texture(tex, pos + st2 * vec2( 0, +1)) + + texture(tex, pos + st2 * vec2(-1, 0)) + + texture(tex, pos + st2 * vec2( 0, -1));) + GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) + float param = scaler->conf.kernel.params[0]; + param = isnan(param) ? 0.5 : param; + GLSLF("color = p + t * %f;\n", param); + GLSLF("}\n"); +} + +void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, + int w, int h) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 pos = pos + vec2(0.5) * pt;) // round to nearest + GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) + // We only need to sample from the four corner pixels since we're using + // nearest neighbour and can compute the exact transition point + GLSL(vec2 baseNW = pos - fcoord * pt;) + GLSL(vec2 baseNE = baseNW + vec2(pt.x, 0.0);) + GLSL(vec2 baseSW = baseNW + vec2(0.0, pt.y);) + GLSL(vec2 baseSE = baseNW + pt;) + // Determine the mixing coefficient vector + gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h}); + GLSL(vec2 coeff = vec2((baseSE - pos) * output_size);) + GLSL(coeff = clamp(coeff, 0.0, 1.0);) + float threshold = scaler->conf.kernel.params[0]; + if (threshold > 0) { // also rules out NAN + GLSLF("coeff = mix(coeff, vec2(0.0), " + "lessThanEqual(coeff, vec2(%f)));\n", threshold); + GLSLF("coeff = mix(coeff, vec2(1.0), " + "greaterThanEqual(coeff, vec2(%f)));\n", 1.0 - threshold); + } + // Compute the right blend of colors + GLSL(vec4 left = mix(texture(tex, baseSW), + texture(tex, baseNW), + coeff.y);) + GLSL(vec4 right = mix(texture(tex, baseSE), + texture(tex, baseNE), + coeff.y);) + GLSL(color = mix(right, left, coeff.x);) + GLSLF("}\n"); +} + +// Linearize (expand), given a TRC as input +void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) +{ + if (trc == MP_CSP_TRC_LINEAR) + return; + + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + switch (trc) { + case MP_CSP_TRC_SRGB: + GLSL(color.rgb = mix(color.rgb / vec3(12.92), + pow((color.rgb + vec3(0.055))/vec3(1.055), + vec3(2.4)), + lessThan(vec3(0.04045), color.rgb));) + break; + case MP_CSP_TRC_BT_1886: + GLSL(color.rgb = pow(color.rgb, vec3(1.961));) + break; + case MP_CSP_TRC_GAMMA18: + GLSL(color.rgb = pow(color.rgb, vec3(1.8));) + break; + case MP_CSP_TRC_GAMMA22: + GLSL(color.rgb = pow(color.rgb, vec3(2.2));) + break; + case MP_CSP_TRC_GAMMA28: + GLSL(color.rgb = pow(color.rgb, vec3(2.8));) + break; + case MP_CSP_TRC_PRO_PHOTO: + GLSL(color.rgb = mix(color.rgb / vec3(16.0), + pow(color.rgb, vec3(1.8)), + lessThan(vec3(0.03125), color.rgb));) + break; + } +} + +// Delinearize (compress), given a TRC as output +void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) +{ + if (trc == MP_CSP_TRC_LINEAR) + return; + + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + switch (trc) { + case MP_CSP_TRC_SRGB: + GLSL(color.rgb = mix(color.rgb * vec3(12.92), + vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) + - vec3(0.055), + lessThanEqual(vec3(0.0031308), color.rgb));) + break; + case MP_CSP_TRC_BT_1886: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.961));) + break; + case MP_CSP_TRC_GAMMA18: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));) + break; + case MP_CSP_TRC_GAMMA22: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));) + break; + case MP_CSP_TRC_GAMMA28: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));) + break; + case MP_CSP_TRC_PRO_PHOTO: + GLSL(color.rgb = mix(color.rgb * vec3(16.0), + pow(color.rgb, vec3(1.0/1.8)), + lessThanEqual(vec3(0.001953), color.rgb));) + break; + } +} diff --git a/video/out/opengl/video_shaders.h b/video/out/opengl/video_shaders.h new file mode 100644 index 0000000000..05d622750c --- /dev/null +++ b/video/out/opengl/video_shaders.h @@ -0,0 +1,43 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + * + * You can alternatively redistribute this file and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + */ + +#ifndef MP_GL_VIDEO_SHADERS_H +#define MP_GL_VIDEO_SHADERS_H + +#include "common.h" +#include "utils.h" +#include "video.h" + +void sampler_prelude(struct gl_shader_cache *sc, int tex_num); +void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, + int d_x, int d_y); +void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler); +void pass_sample_bicubic_fast(struct gl_shader_cache *sc); +void pass_sample_sharpen3(struct gl_shader_cache *sc, struct scaler *scaler); +void pass_sample_sharpen5(struct gl_shader_cache *sc, struct scaler *scaler); +void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, + int w, int h); + +void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); +void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); + +#endif |