aboutsummaryrefslogtreecommitdiffhomepage
path: root/video/out/opengl
diff options
context:
space:
mode:
authorGravatar Niklas Haas <git@nand.wakku.to>2015-09-05 14:03:00 +0200
committerGravatar wm4 <wm4@nowhere>2015-09-09 18:17:44 +0200
commiteb56807b414229a00cd2bee5e74beae5a01e8fb1 (patch)
tree7d2c22371b50208e989c35932b511e5271a83946 /video/out/opengl
parent7929e36e9329c842790193389000e968f5a57426 (diff)
vo_opengl: move self-contained shader routines to a separate file
This is mostly to cut down somewhat on the amount of code bloat in video.c by moving out helper functions (including scaler kernels and color management routines) to a separate file. It would certainly be possible to move out more functions (eg. dithering or CMS code) with some extra effort/refactoring, but this is a start. Signed-off-by: wm4 <wm4@nowhere>
Diffstat (limited to 'video/out/opengl')
-rw-r--r--video/out/opengl/video.c365
-rw-r--r--video/out/opengl/video.h25
-rw-r--r--video/out/opengl/video_shaders.c339
-rw-r--r--video/out/opengl/video_shaders.h43
4 files changed, 424 insertions, 348 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 3633329d19..7d6102ff82 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -36,6 +36,7 @@
#include "utils.h"
#include "hwdec.h"
#include "osd.h"
+#include "video_shaders.h"
#include "video/out/filter_kernels.h"
#include "video/out/aspect.h"
#include "video/out/bitmap_packer.h"
@@ -45,14 +46,6 @@
// Pixel width of 1D lookup textures.
#define LOOKUP_TEXTURE_SIZE 256
-// Texture units 0-5 are used by the video, and for free use by the passes
-#define TEXUNIT_VIDEO_NUM 6
-
-// Other texture units are reserved for specific purposes
-#define TEXUNIT_SCALERS TEXUNIT_VIDEO_NUM
-#define TEXUNIT_3DLUT (TEXUNIT_SCALERS+4)
-#define TEXUNIT_DITHER (TEXUNIT_3DLUT+1)
-
// scale/cscale arguments that map directly to shader filter routines.
// Note that the convolution filters are not included in this list.
static const char *const fixed_scale_filters[] = {
@@ -110,21 +103,6 @@ struct video_image {
struct mp_image *mpi; // original input image
};
-struct scaler {
- int index;
- struct scaler_config conf;
- double scale_factor;
- bool initialized;
- struct filter_kernel *kernel;
- GLuint gl_lut;
- GLenum gl_target;
- struct fbotex sep_fbo;
- bool insufficient;
-
- // kernel points here
- struct filter_kernel kernel_storage;
-};
-
struct fbosurface {
struct fbotex fbotex;
double pts;
@@ -1030,84 +1008,7 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
debug_check_gl(p, "after initializing scaler");
}
-// Set up shared/commonly used variables
-static void sampler_prelude(struct gl_video *p, int tex_num)
-{
- GLSLF("#define tex texture%d\n", tex_num);
- GLSLF("vec2 pos = texcoord%d;\n", tex_num);
- GLSLF("vec2 size = texture_size%d;\n", tex_num);
- GLSLF("vec2 pt = vec2(1.0) / size;\n");
-}
-
-static void pass_sample_separated_get_weights(struct gl_video *p,
- struct scaler *scaler)
-{
- gl_sc_uniform_sampler(p->sc, "lut", scaler->gl_target,
- TEXUNIT_SCALERS + scaler->index);
-
- int N = scaler->kernel->size;
- if (N == 2) {
- GLSL(vec2 c1 = texture(lut, vec2(0.5, fcoord)).RG;)
- GLSL(float weights[2] = float[](c1.r, c1.g);)
- } else if (N == 6) {
- GLSL(vec4 c1 = texture(lut, vec2(0.25, fcoord));)
- GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord));)
- GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);)
- } else {
- GLSLF("float weights[%d];\n", N);
- for (int n = 0; n < N / 4; n++) {
- GLSLF("c = texture(lut, vec2(1.0 / %d + %d / float(%d), fcoord));\n",
- N / 2, n, N / 4);
- GLSLF("weights[%d] = c.r;\n", n * 4 + 0);
- GLSLF("weights[%d] = c.g;\n", n * 4 + 1);
- GLSLF("weights[%d] = c.b;\n", n * 4 + 2);
- GLSLF("weights[%d] = c.a;\n", n * 4 + 3);
- }
- }
-}
-
-// Handle a single pass (either vertical or horizontal). The direction is given
-// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is
-// used instead (samples from texture0 through textureN)
-static void pass_sample_separated_gen(struct gl_video *p, struct scaler *scaler,
- int d_x, int d_y)
-{
- int N = scaler->kernel->size;
- bool use_ar = scaler->conf.antiring > 0;
- bool planar = d_x == 0 && d_y == 0;
- GLSL(vec4 color = vec4(0.0);)
- GLSLF("{\n");
- if (!planar) {
- GLSLF("vec2 dir = vec2(%d, %d);\n", d_x, d_y);
- GLSL(pt *= dir;)
- GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);)
- GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d);\n", N / 2 - 1);
- }
- GLSL(vec4 c;)
- if (use_ar) {
- GLSL(vec4 hi = vec4(0.0);)
- GLSL(vec4 lo = vec4(1.0);)
- }
- pass_sample_separated_get_weights(p, scaler);
- GLSLF("// scaler samples\n");
- for (int n = 0; n < N; n++) {
- if (planar) {
- GLSLF("c = texture(texture%d, texcoord%d);\n", n, n);
- } else {
- GLSLF("c = texture(tex, base + pt * vec2(%d));\n", n);
- }
- GLSLF("color += vec4(weights[%d]) * c;\n", n);
- if (use_ar && (n == N/2-1 || n == N/2)) {
- GLSL(lo = min(lo, c);)
- GLSL(hi = max(hi, c);)
- }
- }
- if (use_ar)
- GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
- scaler->conf.antiring);
- GLSLF("}\n");
-}
-
+// Special helper for sampling from two separated stages
static void pass_sample_separated(struct gl_video *p, int src_tex,
struct scaler *scaler, int w, int h,
struct gl_transform transform)
@@ -1118,179 +1019,15 @@ static void pass_sample_separated(struct gl_video *p, int src_tex,
GLSLF("// pass 1\n");
p->pass_tex[src_tex].src.y0 = src_new.y0;
p->pass_tex[src_tex].src.y1 = src_new.y1;
- pass_sample_separated_gen(p, scaler, 0, 1);
+ pass_sample_separated_gen(p->sc, scaler, 0, 1);
int src_w = p->pass_tex[src_tex].src.x1 - p->pass_tex[src_tex].src.x0;
finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, src_tex, FBOTEX_FUZZY_H);
// Restore the sample source for the second pass
- sampler_prelude(p, src_tex);
+ sampler_prelude(p->sc, src_tex);
GLSLF("// pass 2\n");
p->pass_tex[src_tex].src.x0 = src_new.x0;
p->pass_tex[src_tex].src.x1 = src_new.x1;
- pass_sample_separated_gen(p, scaler, 1, 0);
-}
-
-static void pass_sample_polar(struct gl_video *p, struct scaler *scaler)
-{
- double radius = scaler->kernel->f.radius;
- int bound = (int)ceil(radius);
- bool use_ar = scaler->conf.antiring > 0;
- GLSL(vec4 color = vec4(0.0);)
- GLSLF("{\n");
- GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
- GLSL(vec2 base = pos - fcoord * pt;)
- GLSL(vec4 c;)
- GLSLF("float w, d, wsum = 0.0;\n");
- if (use_ar) {
- GLSL(vec4 lo = vec4(1.0);)
- GLSL(vec4 hi = vec4(0.0);)
- }
- gl_sc_uniform_sampler(p->sc, "lut", scaler->gl_target,
- TEXUNIT_SCALERS + scaler->index);
- GLSLF("// scaler samples\n");
- for (int y = 1-bound; y <= bound; y++) {
- for (int x = 1-bound; x <= bound; x++) {
- // Since we can't know the subpixel position in advance, assume a
- // worst case scenario
- int yy = y > 0 ? y-1 : y;
- int xx = x > 0 ? x-1 : x;
- double dmax = sqrt(xx*xx + yy*yy);
- // Skip samples definitely outside the radius
- if (dmax >= radius)
- continue;
- GLSLF("d = length(vec2(%d, %d) - fcoord)/%f;\n", x, y, radius);
- // Check for samples that might be skippable
- if (dmax >= radius - 1)
- GLSLF("if (d < 1.0) {\n");
- GLSL(w = texture1D(lut, d).r;)
- GLSL(wsum += w;)
- GLSLF("c = texture(tex, base + pt * vec2(%d, %d));\n", x, y);
- GLSL(color += vec4(w) * c;)
- if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) {
- GLSL(lo = min(lo, c);)
- GLSL(hi = max(hi, c);)
- }
- if (dmax >= radius -1)
- GLSLF("}\n");
- }
- }
- GLSL(color = color / vec4(wsum);)
- if (use_ar)
- GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
- scaler->conf.antiring);
- GLSLF("}\n");
-}
-
-static void bicubic_calcweights(struct gl_video *p, const char *t, const char *s)
-{
- // Explanation of how bicubic scaling with only 4 texel fetches is done:
- // http://www.mate.tue.nl/mate/pdfs/10318.pdf
- // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines'
- // Explanation why this algorithm normally always blurs, even with unit
- // scaling:
- // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf
- // 'GPU Prefilter for Accurate Cubic B-spline Interpolation'
- GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s"
- " + vec4(1, 0, -0.5, 0.5);\n", t, s);
- GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s);
- GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s);
- GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t);
- GLSLF("%s.xy += vec2(1 + %s, 1 - %s);\n", t, s, s);
-}
-
-static void pass_sample_bicubic_fast(struct gl_video *p)
-{
- GLSL(vec4 color;)
- GLSLF("{\n");
- GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));)
- bicubic_calcweights(p, "parmx", "fcoord.x");
- bicubic_calcweights(p, "parmy", "fcoord.y");
- GLSL(vec4 cdelta;)
- GLSL(cdelta.xz = parmx.RG * vec2(-pt.x, pt.x);)
- GLSL(cdelta.yw = parmy.RG * vec2(-pt.y, pt.y);)
- // first y-interpolation
- GLSL(vec4 ar = texture(tex, pos + cdelta.xy);)
- GLSL(vec4 ag = texture(tex, pos + cdelta.xw);)
- GLSL(vec4 ab = mix(ag, ar, parmy.b);)
- // second y-interpolation
- GLSL(vec4 br = texture(tex, pos + cdelta.zy);)
- GLSL(vec4 bg = texture(tex, pos + cdelta.zw);)
- GLSL(vec4 aa = mix(bg, br, parmy.b);)
- // x-interpolation
- GLSL(color = mix(aa, ab, parmx.b);)
- GLSLF("}\n");
-}
-
-static void pass_sample_sharpen3(struct gl_video *p, struct scaler *scaler)
-{
- GLSL(vec4 color;)
- GLSLF("{\n");
- GLSL(vec2 st = pt * 0.5;)
- GLSL(vec4 p = texture(tex, pos);)
- GLSL(vec4 sum = texture(tex, pos + st * vec2(+1, +1))
- + texture(tex, pos + st * vec2(+1, -1))
- + texture(tex, pos + st * vec2(-1, +1))
- + texture(tex, pos + st * vec2(-1, -1));)
- float param = scaler->conf.kernel.params[0];
- param = isnan(param) ? 0.5 : param;
- GLSLF("color = p + (p - 0.25 * sum) * %f;\n", param);
- GLSLF("}\n");
-}
-
-static void pass_sample_sharpen5(struct gl_video *p, struct scaler *scaler)
-{
- GLSL(vec4 color;)
- GLSLF("{\n");
- GLSL(vec2 st1 = pt * 1.2;)
- GLSL(vec4 p = texture(tex, pos);)
- GLSL(vec4 sum1 = texture(tex, pos + st1 * vec2(+1, +1))
- + texture(tex, pos + st1 * vec2(+1, -1))
- + texture(tex, pos + st1 * vec2(-1, +1))
- + texture(tex, pos + st1 * vec2(-1, -1));)
- GLSL(vec2 st2 = pt * 1.5;)
- GLSL(vec4 sum2 = texture(tex, pos + st2 * vec2(+1, 0))
- + texture(tex, pos + st2 * vec2( 0, +1))
- + texture(tex, pos + st2 * vec2(-1, 0))
- + texture(tex, pos + st2 * vec2( 0, -1));)
- GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;)
- float param = scaler->conf.kernel.params[0];
- param = isnan(param) ? 0.5 : param;
- GLSLF("color = p + t * %f;\n", param);
- GLSLF("}\n");
-}
-
-static void pass_sample_oversample(struct gl_video *p, struct scaler *scaler,
- int w, int h)
-{
- GLSL(vec4 color;)
- GLSLF("{\n");
- GLSL(vec2 pos = pos + vec2(0.5) * pt;) // round to nearest
- GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
- // We only need to sample from the four corner pixels since we're using
- // nearest neighbour and can compute the exact transition point
- GLSL(vec2 baseNW = pos - fcoord * pt;)
- GLSL(vec2 baseNE = baseNW + vec2(pt.x, 0.0);)
- GLSL(vec2 baseSW = baseNW + vec2(0.0, pt.y);)
- GLSL(vec2 baseSE = baseNW + pt;)
- // Determine the mixing coefficient vector
- gl_sc_uniform_vec2(p->sc, "output_size", (float[2]){w, h});
- GLSL(vec2 coeff = vec2((baseSE - pos) * output_size);)
- GLSL(coeff = clamp(coeff, 0.0, 1.0);)
- float threshold = scaler->conf.kernel.params[0];
- if (threshold > 0) { // also rules out NAN
- GLSLF("coeff = mix(coeff, vec2(0.0), "
- "lessThanEqual(coeff, vec2(%f)));\n", threshold);
- GLSLF("coeff = mix(coeff, vec2(1.0), "
- "greaterThanEqual(coeff, vec2(%f)));\n", 1.0 - threshold);
- }
- // Compute the right blend of colors
- GLSL(vec4 left = mix(texture(tex, baseSW),
- texture(tex, baseNW),
- coeff.y);)
- GLSL(vec4 right = mix(texture(tex, baseSE),
- texture(tex, baseNE),
- coeff.y);)
- GLSL(color = mix(right, left, coeff.x);)
- GLSLF("}\n");
+ pass_sample_separated_gen(p->sc, scaler, 1, 0);
}
// Sample. This samples from the texture ID given by src_tex. It's hardcoded to
@@ -1307,7 +1044,7 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler,
int w, int h, struct gl_transform transform)
{
reinit_scaler(p, scaler, conf, scale_factor, filter_sizes);
- sampler_prelude(p, src_tex);
+ sampler_prelude(p->sc, src_tex);
// Set up the transformation for everything other than separated scaling
if (!scaler->kernel || scaler->kernel->polar)
@@ -1318,13 +1055,13 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler,
if (strcmp(name, "bilinear") == 0) {
GLSL(vec4 color = texture(tex, pos);)
} else if (strcmp(name, "bicubic_fast") == 0) {
- pass_sample_bicubic_fast(p);
+ pass_sample_bicubic_fast(p->sc);
} else if (strcmp(name, "sharpen3") == 0) {
- pass_sample_sharpen3(p, scaler);
+ pass_sample_sharpen3(p->sc, scaler);
} else if (strcmp(name, "sharpen5") == 0) {
- pass_sample_sharpen5(p, scaler);
+ pass_sample_sharpen5(p->sc, scaler);
} else if (strcmp(name, "oversample") == 0) {
- pass_sample_oversample(p, scaler, w, h);
+ pass_sample_oversample(p->sc, scaler, w, h);
} else if (strcmp(name, "custom") == 0) {
const char *body = gl_sc_loadfile(p->sc, p->opts.scale_shader);
if (body) {
@@ -1335,7 +1072,7 @@ static void pass_sample(struct gl_video *p, int src_tex, struct scaler *scaler,
p->opts.scale_shader = NULL;
}
} else if (scaler->kernel && scaler->kernel->polar) {
- pass_sample_polar(p, scaler);
+ pass_sample_polar(p->sc, scaler);
} else if (scaler->kernel) {
pass_sample_separated(p, src_tex, scaler, w, h, transform);
} else {
@@ -1546,74 +1283,6 @@ static void get_scale_factors(struct gl_video *p, double xy[2])
(double)(p->src_rect.y1 - p->src_rect.y0);
}
-// Linearize (expand), given a TRC as input
-static void pass_linearize(struct gl_video *p, enum mp_csp_trc trc)
-{
- if (trc == MP_CSP_TRC_LINEAR)
- return;
-
- GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
- switch (trc) {
- case MP_CSP_TRC_SRGB:
- GLSL(color.rgb = mix(color.rgb / vec3(12.92),
- pow((color.rgb + vec3(0.055))/vec3(1.055),
- vec3(2.4)),
- lessThan(vec3(0.04045), color.rgb));)
- break;
- case MP_CSP_TRC_BT_1886:
- GLSL(color.rgb = pow(color.rgb, vec3(1.961));)
- break;
- case MP_CSP_TRC_GAMMA18:
- GLSL(color.rgb = pow(color.rgb, vec3(1.8));)
- break;
- case MP_CSP_TRC_GAMMA22:
- GLSL(color.rgb = pow(color.rgb, vec3(2.2));)
- break;
- case MP_CSP_TRC_GAMMA28:
- GLSL(color.rgb = pow(color.rgb, vec3(2.8));)
- break;
- case MP_CSP_TRC_PRO_PHOTO:
- GLSL(color.rgb = mix(color.rgb / vec3(16.0),
- pow(color.rgb, vec3(1.8)),
- lessThan(vec3(0.03125), color.rgb));)
- break;
- }
-}
-
-// Delinearize (compress), given a TRC as output
-static void pass_delinearize(struct gl_video *p, enum mp_csp_trc trc)
-{
- if (trc == MP_CSP_TRC_LINEAR)
- return;
-
- GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
- switch (trc) {
- case MP_CSP_TRC_SRGB:
- GLSL(color.rgb = mix(color.rgb * vec3(12.92),
- vec3(1.055) * pow(color.rgb, vec3(1.0/2.4))
- - vec3(0.055),
- lessThanEqual(vec3(0.0031308), color.rgb));)
- break;
- case MP_CSP_TRC_BT_1886:
- GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.961));)
- break;
- case MP_CSP_TRC_GAMMA18:
- GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));)
- break;
- case MP_CSP_TRC_GAMMA22:
- GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));)
- break;
- case MP_CSP_TRC_GAMMA28:
- GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));)
- break;
- case MP_CSP_TRC_PRO_PHOTO:
- GLSL(color.rgb = mix(color.rgb * vec3(16.0),
- pow(color.rgb, vec3(1.0/1.8)),
- lessThanEqual(vec3(0.001953), color.rgb));)
- break;
- }
-}
-
// Compute the cropped and rotated transformation of the video source rectangle.
// vp_w and vp_h are set to the _destination_ video size.
static void compute_src_transform(struct gl_video *p, struct gl_transform *tr,
@@ -1669,7 +1338,7 @@ static void pass_scale_main(struct gl_video *p)
// Pre-conversion, like linear light/sigmoidization
GLSLF("// scaler pre-conversion\n");
if (p->use_linear)
- pass_linearize(p, p->image_params.gamma);
+ pass_linearize(p->sc, p->image_params.gamma);
bool use_sigmoid = p->use_linear && p->opts.sigmoid_upscaling && upscaling;
float sig_center, sig_slope, sig_offset, sig_scale;
@@ -1733,7 +1402,7 @@ static void pass_colormanage(struct gl_video *p, enum mp_csp_prim prim_src,
bool need_cms = prim_src != prim_dst || p->use_lut_3d;
bool need_gamma = trc_src != trc_dst || need_cms;
if (need_gamma)
- pass_linearize(p, trc_src);
+ pass_linearize(p->sc, trc_src);
// Adapt to the right colorspace if necessary
if (prim_src != prim_dst) {
struct mp_csp_primaries csp_src = mp_get_csp_primaries(prim_src),
@@ -1752,7 +1421,7 @@ static void pass_colormanage(struct gl_video *p, enum mp_csp_prim prim_src,
GLSL(color.rgb = texture3D(lut_3d, color.rgb).rgb;)
}
if (need_gamma)
- pass_delinearize(p, trc_dst);
+ pass_delinearize(p->sc, trc_dst);
}
static void pass_dither(struct gl_video *p)
@@ -1982,13 +1651,13 @@ static void pass_render_frame(struct gl_video *p)
rect.mt *= scale[1]; rect.mb *= scale[1];
// We should always blend subtitles in non-linear light
if (p->use_linear)
- pass_delinearize(p, p->image_params.gamma);
+ pass_delinearize(p->sc, p->image_params.gamma);
finish_pass_fbo(p, &p->blend_subs_fbo, vp_w, vp_h, 0, FBOTEX_FUZZY);
pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, vp_w, vp_h,
p->blend_subs_fbo.fbo, false);
GLSL(vec4 color = texture(texture0, texcoord0);)
if (p->use_linear)
- pass_linearize(p, p->image_params.gamma);
+ pass_linearize(p->sc, p->image_params.gamma);
}
apply_shaders(p, p->opts.post_shaders, &p->post_fbo[0], 0, vp_w, vp_h);
@@ -2164,7 +1833,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
inter_coeff);)
} else {
gl_sc_uniform_f(p->sc, "fcoord", mix);
- pass_sample_separated_gen(p, tscale, 0, 0);
+ pass_sample_separated_gen(p->sc, tscale, 0, 0);
}
// Load all the required frames
diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h
index a52f35c424..65fdb6c3ab 100644
--- a/video/out/opengl/video.h
+++ b/video/out/opengl/video.h
@@ -22,6 +22,16 @@
#include "options/m_option.h"
#include "sub/osd.h"
#include "common.h"
+#include "utils.h"
+#include "video/out/filter_kernels.h"
+
+// Texture units 0-5 are used by the video, and for free use by the passes
+#define TEXUNIT_VIDEO_NUM 6
+
+// Other texture units are reserved for specific purposes
+#define TEXUNIT_SCALERS TEXUNIT_VIDEO_NUM
+#define TEXUNIT_3DLUT (TEXUNIT_SCALERS+4)
+#define TEXUNIT_DITHER (TEXUNIT_3DLUT+1)
struct lut3d {
uint16_t *data;
@@ -42,6 +52,21 @@ struct scaler_config {
int clamp;
};
+struct scaler {
+ int index;
+ struct scaler_config conf;
+ double scale_factor;
+ bool initialized;
+ struct filter_kernel *kernel;
+ GLuint gl_lut;
+ GLenum gl_target;
+ struct fbotex sep_fbo;
+ bool insufficient;
+
+ // kernel points here
+ struct filter_kernel kernel_storage;
+};
+
struct gl_video_opts {
int dumb_mode;
struct scaler_config scaler[4];
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
new file mode 100644
index 0000000000..26d5636184
--- /dev/null
+++ b/video/out/opengl/video_shaders.c
@@ -0,0 +1,339 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can alternatively redistribute this file and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#include <math.h>
+
+#include "video_shaders.h"
+#include "video.h"
+
+#define GLSL(x) gl_sc_add(sc, #x "\n");
+#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__)
+
+// Set up shared/commonly used variables
+void sampler_prelude(struct gl_shader_cache *sc, int tex_num)
+{
+ GLSLF("#define tex texture%d\n", tex_num);
+ GLSLF("vec2 pos = texcoord%d;\n", tex_num);
+ GLSLF("vec2 size = texture_size%d;\n", tex_num);
+ GLSLF("vec2 pt = vec2(1.0) / size;\n");
+}
+
+static void pass_sample_separated_get_weights(struct gl_shader_cache *sc,
+ struct scaler *scaler)
+{
+ gl_sc_uniform_sampler(sc, "lut", scaler->gl_target,
+ TEXUNIT_SCALERS + scaler->index);
+
+ int N = scaler->kernel->size;
+ if (N == 2) {
+ GLSL(vec2 c1 = texture(lut, vec2(0.5, fcoord)).RG;)
+ GLSL(float weights[2] = float[](c1.r, c1.g);)
+ } else if (N == 6) {
+ GLSL(vec4 c1 = texture(lut, vec2(0.25, fcoord));)
+ GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord));)
+ GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);)
+ } else {
+ GLSLF("float weights[%d];\n", N);
+ for (int n = 0; n < N / 4; n++) {
+ GLSLF("c = texture(lut, vec2(1.0 / %d + %d / float(%d), fcoord));\n",
+ N / 2, n, N / 4);
+ GLSLF("weights[%d] = c.r;\n", n * 4 + 0);
+ GLSLF("weights[%d] = c.g;\n", n * 4 + 1);
+ GLSLF("weights[%d] = c.b;\n", n * 4 + 2);
+ GLSLF("weights[%d] = c.a;\n", n * 4 + 3);
+ }
+ }
+}
+
+// Handle a single pass (either vertical or horizontal). The direction is given
+// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is
+// used instead (samples from texture0 through textureN)
+void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
+ int d_x, int d_y)
+{
+ int N = scaler->kernel->size;
+ bool use_ar = scaler->conf.antiring > 0;
+ bool planar = d_x == 0 && d_y == 0;
+ GLSL(vec4 color = vec4(0.0);)
+ GLSLF("{\n");
+ if (!planar) {
+ GLSLF("vec2 dir = vec2(%d, %d);\n", d_x, d_y);
+ GLSL(pt *= dir;)
+ GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);)
+ GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d);\n", N / 2 - 1);
+ }
+ GLSL(vec4 c;)
+ if (use_ar) {
+ GLSL(vec4 hi = vec4(0.0);)
+ GLSL(vec4 lo = vec4(1.0);)
+ }
+ pass_sample_separated_get_weights(sc, scaler);
+ GLSLF("// scaler samples\n");
+ for (int n = 0; n < N; n++) {
+ if (planar) {
+ GLSLF("c = texture(texture%d, texcoord%d);\n", n, n);
+ } else {
+ GLSLF("c = texture(tex, base + pt * vec2(%d));\n", n);
+ }
+ GLSLF("color += vec4(weights[%d]) * c;\n", n);
+ if (use_ar && (n == N/2-1 || n == N/2)) {
+ GLSL(lo = min(lo, c);)
+ GLSL(hi = max(hi, c);)
+ }
+ }
+ if (use_ar)
+ GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
+ scaler->conf.antiring);
+ GLSLF("}\n");
+}
+
+void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
+{
+ double radius = scaler->kernel->f.radius;
+ int bound = (int)ceil(radius);
+ bool use_ar = scaler->conf.antiring > 0;
+ GLSL(vec4 color = vec4(0.0);)
+ GLSLF("{\n");
+ GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
+ GLSL(vec2 base = pos - fcoord * pt;)
+ GLSL(vec4 c;)
+ GLSLF("float w, d, wsum = 0.0;\n");
+ if (use_ar) {
+ GLSL(vec4 lo = vec4(1.0);)
+ GLSL(vec4 hi = vec4(0.0);)
+ }
+ gl_sc_uniform_sampler(sc, "lut", scaler->gl_target,
+ TEXUNIT_SCALERS + scaler->index);
+ GLSLF("// scaler samples\n");
+ for (int y = 1-bound; y <= bound; y++) {
+ for (int x = 1-bound; x <= bound; x++) {
+ // Since we can't know the subpixel position in advance, assume a
+ // worst case scenario
+ int yy = y > 0 ? y-1 : y;
+ int xx = x > 0 ? x-1 : x;
+ double dmax = sqrt(xx*xx + yy*yy);
+ // Skip samples definitely outside the radius
+ if (dmax >= radius)
+ continue;
+ GLSLF("d = length(vec2(%d, %d) - fcoord)/%f;\n", x, y, radius);
+ // Check for samples that might be skippable
+ if (dmax >= radius - 1)
+ GLSLF("if (d < 1.0) {\n");
+ GLSL(w = texture1D(lut, d).r;)
+ GLSL(wsum += w;)
+ GLSLF("c = texture(tex, base + pt * vec2(%d, %d));\n", x, y);
+ GLSL(color += vec4(w) * c;)
+ if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) {
+ GLSL(lo = min(lo, c);)
+ GLSL(hi = max(hi, c);)
+ }
+ if (dmax >= radius -1)
+ GLSLF("}\n");
+ }
+ }
+ GLSL(color = color / vec4(wsum);)
+ if (use_ar)
+ GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
+ scaler->conf.antiring);
+ GLSLF("}\n");
+}
+
+static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s)
+{
+ // Explanation of how bicubic scaling with only 4 texel fetches is done:
+ // http://www.mate.tue.nl/mate/pdfs/10318.pdf
+ // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines'
+ // Explanation why this algorithm normally always blurs, even with unit
+ // scaling:
+ // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf
+ // 'GPU Prefilter for Accurate Cubic B-spline Interpolation'
+ GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s"
+ " + vec4(1, 0, -0.5, 0.5);\n", t, s);
+ GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s);
+ GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s);
+ GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t);
+ GLSLF("%s.xy += vec2(1 + %s, 1 - %s);\n", t, s, s);
+}
+
+void pass_sample_bicubic_fast(struct gl_shader_cache *sc)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));)
+ bicubic_calcweights(sc, "parmx", "fcoord.x");
+ bicubic_calcweights(sc, "parmy", "fcoord.y");
+ GLSL(vec4 cdelta;)
+ GLSL(cdelta.xz = parmx.RG * vec2(-pt.x, pt.x);)
+ GLSL(cdelta.yw = parmy.RG * vec2(-pt.y, pt.y);)
+ // first y-interpolation
+ GLSL(vec4 ar = texture(tex, pos + cdelta.xy);)
+ GLSL(vec4 ag = texture(tex, pos + cdelta.xw);)
+ GLSL(vec4 ab = mix(ag, ar, parmy.b);)
+ // second y-interpolation
+ GLSL(vec4 br = texture(tex, pos + cdelta.zy);)
+ GLSL(vec4 bg = texture(tex, pos + cdelta.zw);)
+ GLSL(vec4 aa = mix(bg, br, parmy.b);)
+ // x-interpolation
+ GLSL(color = mix(aa, ab, parmx.b);)
+ GLSLF("}\n");
+}
+
+void pass_sample_sharpen3(struct gl_shader_cache *sc, struct scaler *scaler)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 st = pt * 0.5;)
+ GLSL(vec4 p = texture(tex, pos);)
+ GLSL(vec4 sum = texture(tex, pos + st * vec2(+1, +1))
+ + texture(tex, pos + st * vec2(+1, -1))
+ + texture(tex, pos + st * vec2(-1, +1))
+ + texture(tex, pos + st * vec2(-1, -1));)
+ float param = scaler->conf.kernel.params[0];
+ param = isnan(param) ? 0.5 : param;
+ GLSLF("color = p + (p - 0.25 * sum) * %f;\n", param);
+ GLSLF("}\n");
+}
+
+void pass_sample_sharpen5(struct gl_shader_cache *sc, struct scaler *scaler)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 st1 = pt * 1.2;)
+ GLSL(vec4 p = texture(tex, pos);)
+ GLSL(vec4 sum1 = texture(tex, pos + st1 * vec2(+1, +1))
+ + texture(tex, pos + st1 * vec2(+1, -1))
+ + texture(tex, pos + st1 * vec2(-1, +1))
+ + texture(tex, pos + st1 * vec2(-1, -1));)
+ GLSL(vec2 st2 = pt * 1.5;)
+ GLSL(vec4 sum2 = texture(tex, pos + st2 * vec2(+1, 0))
+ + texture(tex, pos + st2 * vec2( 0, +1))
+ + texture(tex, pos + st2 * vec2(-1, 0))
+ + texture(tex, pos + st2 * vec2( 0, -1));)
+ GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;)
+ float param = scaler->conf.kernel.params[0];
+ param = isnan(param) ? 0.5 : param;
+ GLSLF("color = p + t * %f;\n", param);
+ GLSLF("}\n");
+}
+
+void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
+ int w, int h)
+{
+ GLSL(vec4 color;)
+ GLSLF("{\n");
+ GLSL(vec2 pos = pos + vec2(0.5) * pt;) // round to nearest
+ GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
+ // We only need to sample from the four corner pixels since we're using
+ // nearest neighbour and can compute the exact transition point
+ GLSL(vec2 baseNW = pos - fcoord * pt;)
+ GLSL(vec2 baseNE = baseNW + vec2(pt.x, 0.0);)
+ GLSL(vec2 baseSW = baseNW + vec2(0.0, pt.y);)
+ GLSL(vec2 baseSE = baseNW + pt;)
+ // Determine the mixing coefficient vector
+ gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h});
+ GLSL(vec2 coeff = vec2((baseSE - pos) * output_size);)
+ GLSL(coeff = clamp(coeff, 0.0, 1.0);)
+ float threshold = scaler->conf.kernel.params[0];
+ if (threshold > 0) { // also rules out NAN
+ GLSLF("coeff = mix(coeff, vec2(0.0), "
+ "lessThanEqual(coeff, vec2(%f)));\n", threshold);
+ GLSLF("coeff = mix(coeff, vec2(1.0), "
+ "greaterThanEqual(coeff, vec2(%f)));\n", 1.0 - threshold);
+ }
+ // Compute the right blend of colors
+ GLSL(vec4 left = mix(texture(tex, baseSW),
+ texture(tex, baseNW),
+ coeff.y);)
+ GLSL(vec4 right = mix(texture(tex, baseSE),
+ texture(tex, baseNE),
+ coeff.y);)
+ GLSL(color = mix(right, left, coeff.x);)
+ GLSLF("}\n");
+}
+
+// Linearize (expand), given a TRC as input
+void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
+{
+ if (trc == MP_CSP_TRC_LINEAR)
+ return;
+
+ GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
+ switch (trc) {
+ case MP_CSP_TRC_SRGB:
+ GLSL(color.rgb = mix(color.rgb / vec3(12.92),
+ pow((color.rgb + vec3(0.055))/vec3(1.055),
+ vec3(2.4)),
+ lessThan(vec3(0.04045), color.rgb));)
+ break;
+ case MP_CSP_TRC_BT_1886:
+ GLSL(color.rgb = pow(color.rgb, vec3(1.961));)
+ break;
+ case MP_CSP_TRC_GAMMA18:
+ GLSL(color.rgb = pow(color.rgb, vec3(1.8));)
+ break;
+ case MP_CSP_TRC_GAMMA22:
+ GLSL(color.rgb = pow(color.rgb, vec3(2.2));)
+ break;
+ case MP_CSP_TRC_GAMMA28:
+ GLSL(color.rgb = pow(color.rgb, vec3(2.8));)
+ break;
+ case MP_CSP_TRC_PRO_PHOTO:
+ GLSL(color.rgb = mix(color.rgb / vec3(16.0),
+ pow(color.rgb, vec3(1.8)),
+ lessThan(vec3(0.03125), color.rgb));)
+ break;
+ }
+}
+
+// Delinearize (compress), given a TRC as output
+void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
+{
+ if (trc == MP_CSP_TRC_LINEAR)
+ return;
+
+ GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
+ switch (trc) {
+ case MP_CSP_TRC_SRGB:
+ GLSL(color.rgb = mix(color.rgb * vec3(12.92),
+ vec3(1.055) * pow(color.rgb, vec3(1.0/2.4))
+ - vec3(0.055),
+ lessThanEqual(vec3(0.0031308), color.rgb));)
+ break;
+ case MP_CSP_TRC_BT_1886:
+ GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.961));)
+ break;
+ case MP_CSP_TRC_GAMMA18:
+ GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));)
+ break;
+ case MP_CSP_TRC_GAMMA22:
+ GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));)
+ break;
+ case MP_CSP_TRC_GAMMA28:
+ GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));)
+ break;
+ case MP_CSP_TRC_PRO_PHOTO:
+ GLSL(color.rgb = mix(color.rgb * vec3(16.0),
+ pow(color.rgb, vec3(1.0/1.8)),
+ lessThanEqual(vec3(0.001953), color.rgb));)
+ break;
+ }
+}
diff --git a/video/out/opengl/video_shaders.h b/video/out/opengl/video_shaders.h
new file mode 100644
index 0000000000..05d622750c
--- /dev/null
+++ b/video/out/opengl/video_shaders.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can alternatively redistribute this file and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#ifndef MP_GL_VIDEO_SHADERS_H
+#define MP_GL_VIDEO_SHADERS_H
+
+#include "common.h"
+#include "utils.h"
+#include "video.h"
+
+void sampler_prelude(struct gl_shader_cache *sc, int tex_num);
+void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
+ int d_x, int d_y);
+void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler);
+void pass_sample_bicubic_fast(struct gl_shader_cache *sc);
+void pass_sample_sharpen3(struct gl_shader_cache *sc, struct scaler *scaler);
+void pass_sample_sharpen5(struct gl_shader_cache *sc, struct scaler *scaler);
+void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
+ int w, int h);
+
+void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc);
+void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc);
+
+#endif