aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/vo.rst2
-rw-r--r--video/out/opengl/utils.c9
-rw-r--r--video/out/opengl/utils.h1
-rw-r--r--video/out/opengl/video.c120
4 files changed, 121 insertions, 11 deletions
diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst
index 69c554fa16..0725cf19e6 100644
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -804,7 +804,7 @@ Available video output drivers are:
angle
Direct3D11 through the OpenGL ES translation layer ANGLE. This
supports almost everything the ``win`` backend does, except ICC
- profiles, high bit depth video input, and the ``nnedi3`` prescaler.
+ profiles, and the ``nnedi3`` prescaler.
dxinterop (experimental)
Win32, using WGL for rendering and Direct3D 9Ex for presentation.
Works on Nvidia and AMD only.
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 8247588341..588b6cddf8 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -670,6 +670,15 @@ void gl_sc_uniform_sampler(struct gl_shader_cache *sc, char *name, GLenum target
u->v.i[0] = unit;
}
+void gl_sc_uniform_sampler_ui(struct gl_shader_cache *sc, char *name, int unit)
+{
+ struct sc_uniform *u = find_uniform(sc, name);
+ u->type = UT_i;
+ u->size = 1;
+ u->glsl_type = "usampler2D";
+ u->v.i[0] = unit;
+}
+
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f)
{
struct sc_uniform *u = find_uniform(sc, name);
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 280be0396d..3ec6077bf5 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -126,6 +126,7 @@ void gl_sc_hadd(struct gl_shader_cache *sc, const char *text);
void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...);
void gl_sc_uniform_sampler(struct gl_shader_cache *sc, char *name, GLenum target,
int unit);
+void gl_sc_uniform_sampler_ui(struct gl_shader_cache *sc, char *name, int unit);
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f);
void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f);
void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]);
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 9a3b0c16f2..bbf01f2cbb 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -93,6 +93,7 @@ struct texplane {
int w, h;
GLint gl_internal_format;
GLenum gl_target;
+ bool use_integer;
GLenum gl_format;
GLenum gl_type;
GLuint gl_texture;
@@ -115,6 +116,7 @@ struct fbosurface {
struct src_tex {
GLuint gl_tex;
GLenum gl_target;
+ bool use_integer;
int w, h;
struct mp_rect_f src;
};
@@ -160,10 +162,12 @@ struct gl_video {
bool is_yuv, is_packed_yuv;
bool has_alpha;
char color_swizzle[5];
+ bool use_integer_conversion;
struct video_image image;
bool dumb_mode;
+ bool forced_dumb_mode;
struct fbotex chroma_merge_fbo;
struct fbotex chroma_deband_fbo;
@@ -173,6 +177,7 @@ struct gl_video {
struct fbotex output_fbo;
struct fbotex deband_fbo;
struct fbosurface surfaces[FBOSURFACES_MAX];
+ struct fbotex integer_conv_fbo[4];
// these are duplicated so we can keep rendering back and forth between
// them to support an unlimited number of shader passes per step
@@ -260,6 +265,17 @@ static const struct fmt_entry gl_byte_formats_gles3[] = {
{0, 0, 0, 0}, // 4 x 16
};
+static const struct fmt_entry gl_ui_byte_formats_gles3[] = {
+ {0, GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // 1 x 8
+ {0, GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // 2 x 8
+ {0, GL_RGB8UI, GL_RGB_INTEGER, GL_UNSIGNED_BYTE}, // 3 x 8
+ {0, GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // 4 x 8
+ {0, GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // 1 x 16
+ {0, GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // 2 x 16
+ {0, GL_RGB16UI, GL_RGB_INTEGER, GL_UNSIGNED_SHORT}, // 3 x 16
+ {0, GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // 4 x 16
+};
+
static const struct fmt_entry gl_byte_formats_gles2[] = {
{0, GL_LUMINANCE, GL_LUMINANCE, GL_UNSIGNED_BYTE}, // 1 x 8
{0, GL_LUMINANCE_ALPHA, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE}, // 2 x 8
@@ -507,6 +523,7 @@ static void get_scale_factors(struct gl_video *p, double xy[2]);
#define GLSL(x) gl_sc_add(p->sc, #x "\n");
#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
+// Return a fixed point texture format with given characteristics.
static const struct fmt_entry *find_tex_format(GL *gl, int bytes_per_comp,
int n_channels)
{
@@ -523,6 +540,19 @@ static const struct fmt_entry *find_tex_format(GL *gl, int bytes_per_comp,
return &fmts[n_channels - 1 + (bytes_per_comp - 1) * 4];
}
+static bool is_integer_format(const struct fmt_entry *fmt)
+{
+ // Tests only the formats which we actually declare somewhere.
+ switch (fmt->format) {
+ case GL_RED_INTEGER:
+ case GL_RG_INTEGER:
+ case GL_RGB_INTEGER:
+ case GL_RGBA_INTEGER:
+ return true;
+ }
+ return false;
+}
+
static const char *load_cached_file(struct gl_video *p, const char *path)
{
if (!path || !path[0])
@@ -624,6 +654,9 @@ static void uninit_rendering(struct gl_video *p)
fbotex_uninit(&p->unsharp_fbo);
fbotex_uninit(&p->deband_fbo);
+ for (int n = 0; n < 4; n++)
+ fbotex_uninit(&p->integer_conv_fbo[n]);
+
for (int n = 0; n < 2; n++) {
fbotex_uninit(&p->pre_fbo[n]);
fbotex_uninit(&p->post_fbo[n]);
@@ -722,8 +755,9 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg
for (int n = 0; n < p->plane_count; n++) {
struct texplane *t = &vimg->planes[n];
p->pass_tex[n] = (struct src_tex){
- .gl_tex = vimg->planes[n].gl_texture,
+ .gl_tex = t->gl_texture,
.gl_target = t->gl_target,
+ .use_integer = t->use_integer,
.w = t->w,
.h = t->h,
.src = {0, 0, t->w, t->h},
@@ -735,11 +769,11 @@ static void init_video(struct gl_video *p)
{
GL *gl = p->gl;
- check_gl_features(p);
-
init_format(p->image_params.imgfmt, p);
p->gl_target = p->opts.use_rectangle ? GL_TEXTURE_RECTANGLE : GL_TEXTURE_2D;
+ check_gl_features(p);
+
if (p->hwdec_active) {
if (p->hwdec->driver->reinit(p->hwdec, &p->image_params) < 0)
MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
@@ -783,8 +817,9 @@ static void init_video(struct gl_video *p)
plane->w, plane->h, 0,
plane->gl_format, plane->gl_type, NULL);
- gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ int filter = plane->use_integer ? GL_NEAREST : GL_LINEAR;
+ gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, filter);
+ gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, filter);
gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
@@ -838,7 +873,11 @@ static void pass_prepare_src_tex(struct gl_video *p)
snprintf(texture_name, sizeof(texture_name), "texture%d", n);
snprintf(texture_size, sizeof(texture_size), "texture_size%d", n);
- gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n);
+ if (s->use_integer) {
+ gl_sc_uniform_sampler_ui(sc, texture_name, n);
+ } else {
+ gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n);
+ }
float f[2] = {1, 1};
if (s->gl_target != GL_TEXTURE_RECTANGLE) {
f[0] = s->w;
@@ -1334,6 +1373,37 @@ static bool pass_prescale_luma(struct gl_video *p, float tex_mul,
return true;
}
+// The input textures are in an integer format (non-fixed-point), like R16UI.
+// Convert it to float in an extra pass.
+static void pass_integer_conversion(struct gl_video *p)
+{
+ double tex_mul = 1 / mp_get_csp_mul(p->image_params.colorspace,
+ p->image_desc.component_bits,
+ p->image_desc.component_full_bits);
+ uint64_t tex_max = 1ull << p->image_desc.component_full_bits;
+ tex_mul *= 1.0 / (tex_max - 1);
+
+ struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
+ assert(sizeof(pass_tex) == sizeof(p->pass_tex));
+ memcpy(pass_tex, p->pass_tex, sizeof(pass_tex));
+
+ for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
+ if (!p->pass_tex[n].gl_tex)
+ continue;
+ GLSLF("// integer conversion plane %d\n", n);
+ GLSLF("uvec4 icolor = texture(texture%d, texcoord%d);\n", n, n);
+ GLSLF("vec4 color = vec4(icolor) * tex_mul;\n");
+ gl_sc_uniform_f(p->sc, "tex_mul", tex_mul);
+ int c_w = p->pass_tex[n].src.x1 - p->pass_tex[n].src.x0;
+ int c_h = p->pass_tex[n].src.y1 - p->pass_tex[n].src.y0;
+ finish_pass_fbo(p, &p->integer_conv_fbo[n], c_w, c_h, n, 0);
+ pass_tex[n] = p->pass_tex[n];
+ memcpy(p->pass_tex, pass_tex, sizeof(p->pass_tex));
+ }
+
+ p->use_normalized_range = true;
+}
+
// sample from video textures, set "color" variable to yuv value
static void pass_read_video(struct gl_video *p)
{
@@ -1342,9 +1412,14 @@ static void pass_read_video(struct gl_video *p)
struct gl_transform chromafix;
pass_set_image_textures(p, &p->image, &chromafix);
+ if (p->use_integer_conversion)
+ pass_integer_conversion(p);
+
float tex_mul = 1 / mp_get_csp_mul(p->image_params.colorspace,
p->image_desc.component_bits,
p->image_desc.component_full_bits);
+ if (p->use_normalized_range)
+ tex_mul = 1.0;
struct src_tex prescaled_tex;
struct gl_transform offset = {{{0}}};
@@ -2389,6 +2464,8 @@ static bool test_fbo(struct gl_video *p)
static bool check_dumb_mode(struct gl_video *p)
{
struct gl_video_opts *o = &p->opts;
+ if (p->use_integer_conversion)
+ return false;
if (o->dumb_mode)
return true;
if (o->target_prim || o->target_trc || o->linear_scaling ||
@@ -2434,8 +2511,9 @@ static void check_gl_features(struct gl_video *p)
MP_WARN(p, "Disabling PBOs (GLES unsupported).\n");
}
+ p->forced_dumb_mode = p->opts.dumb_mode || !have_fbo || !have_texrg;
bool voluntarily_dumb = check_dumb_mode(p);
- if (p->opts.dumb_mode || !have_fbo || !have_texrg || voluntarily_dumb) {
+ if (p->forced_dumb_mode || voluntarily_dumb) {
if (voluntarily_dumb) {
MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n");
} else if (!p->opts.dumb_mode) {
@@ -2638,6 +2716,17 @@ static void packed_fmt_swizzle(char w[5], const struct fmt_entry *texfmt,
w[4] = '\0';
}
+// Like find_tex_format(), but takes bits (not bytes), and but if no fixed point
+// format is available, return an unsigned integer format.
+static const struct fmt_entry *find_plane_format(GL *gl, int bytes_per_comp,
+ int n_channels)
+{
+ const struct fmt_entry *e = find_tex_format(gl, bytes_per_comp, n_channels);
+ if (e->format || gl->es < 300)
+ return e;
+ return &gl_ui_byte_formats_gles3[n_channels - 1 + (bytes_per_comp - 1) * 4];
+}
+
static bool init_format(int fmt, struct gl_video *init)
{
struct GL *gl = init->gl;
@@ -2665,7 +2754,7 @@ static bool init_format(int fmt, struct gl_video *init)
int bits = desc.component_bits;
if ((desc.flags & MP_IMGFLAG_NE) && bits >= 8 && bits <= 16) {
init->has_alpha = desc.num_planes > 3;
- plane_format[0] = find_tex_format(gl, (bits + 7) / 8, 1);
+ plane_format[0] = find_plane_format(gl, (bits + 7) / 8, 1);
for (int p = 1; p < desc.num_planes; p++)
plane_format[p] = plane_format[0];
// RGB/planar
@@ -2679,8 +2768,8 @@ static bool init_format(int fmt, struct gl_video *init)
if (desc.flags & MP_IMGFLAG_YUV_NV) {
int bits = desc.component_bits;
if ((desc.flags & MP_IMGFLAG_NE) && bits >= 8 && bits <= 16) {
- plane_format[0] = find_tex_format(gl, (bits + 7) / 8, 1);
- plane_format[1] = find_tex_format(gl, (bits + 7) / 8, 2);
+ plane_format[0] = find_plane_format(gl, (bits + 7) / 8, 1);
+ plane_format[1] = find_plane_format(gl, (bits + 7) / 8, 2);
if (desc.flags & MP_IMGFLAG_YUV_NV_SWAP)
snprintf(init->color_swizzle, sizeof(init->color_swizzle), "rbga");
goto supported;
@@ -2735,10 +2824,20 @@ supported:
return false;
}
+ int use_integer = -1;
for (int p = 0; p < desc.num_planes; p++) {
if (!plane_format[p]->format)
return false;
+ int use_int_plane = !!is_integer_format(plane_format[p]);
+ if (use_integer < 0)
+ use_integer = use_int_plane;
+ if (use_integer != use_int_plane)
+ return false; // mixed planes not supported
}
+ init->use_integer_conversion = use_integer;
+
+ if (init->use_integer_conversion && init->forced_dumb_mode)
+ return false;
for (int p = 0; p < desc.num_planes; p++) {
struct texplane *plane = &init->image.planes[p];
@@ -2747,6 +2846,7 @@ supported:
plane->gl_format = format->format;
plane->gl_internal_format = format->internal_format;
plane->gl_type = format->type;
+ plane->use_integer = init->use_integer_conversion;
}
init->is_yuv = desc.flags & MP_IMGFLAG_YUV;