4 files changed, 121 insertions, 11 deletions
diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst
index 69c554fa16..0725cf19e6 100644
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -804,7 +804,7 @@ Available video output drivers are:
         angle
             Direct3D11 through the OpenGL ES translation layer ANGLE. This
             supports almost everything the ``win`` backend does, except ICC
-            profiles, high bit depth video input, and the ``nnedi3`` prescaler.
+            profiles, and the ``nnedi3`` prescaler.
         dxinterop (experimental)
             Win32, using WGL for rendering and Direct3D 9Ex for presentation.
             Works on Nvidia and AMD only.
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 8247588341..588b6cddf8 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -670,6 +670,15 @@ void gl_sc_uniform_sampler(struct gl_shader_cache *sc, char *name, GLenum target
     u->v.i[0] = unit;
 }
 
+void gl_sc_uniform_sampler_ui(struct gl_shader_cache *sc, char *name, int unit)
+{
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->type = UT_i;
+    u->size = 1;
+    u->glsl_type = "usampler2D";
+    u->v.i[0] = unit;
+}
+
 void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f)
 {
     struct sc_uniform *u = find_uniform(sc, name);
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 280be0396d..3ec6077bf5 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -126,6 +126,7 @@ void gl_sc_hadd(struct gl_shader_cache *sc, const char *text);
 void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...);
 void gl_sc_uniform_sampler(struct gl_shader_cache *sc, char *name, GLenum target,
                            int unit);
+void gl_sc_uniform_sampler_ui(struct gl_shader_cache *sc, char *name, int unit);
 void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f);
 void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f);
 void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]);
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 9a3b0c16f2..bbf01f2cbb 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -93,6 +93,7 @@ struct texplane {
     int w, h;
     GLint gl_internal_format;
     GLenum gl_target;
+    bool use_integer;
     GLenum gl_format;
     GLenum gl_type;
     GLuint gl_texture;
@@ -115,6 +116,7 @@ struct fbosurface {
 struct src_tex {
     GLuint gl_tex;
     GLenum gl_target;
+    bool use_integer;
     int w, h;
     struct mp_rect_f src;
 };
@@ -160,10 +162,12 @@ struct gl_video {
     bool is_yuv, is_packed_yuv;
     bool has_alpha;
     char color_swizzle[5];
+    bool use_integer_conversion;
 
     struct video_image image;
 
     bool dumb_mode;
+    bool forced_dumb_mode;
 
     struct fbotex chroma_merge_fbo;
     struct fbotex chroma_deband_fbo;
@@ -173,6 +177,7 @@ struct gl_video {
     struct fbotex output_fbo;
     struct fbotex deband_fbo;
     struct fbosurface surfaces[FBOSURFACES_MAX];
+    struct fbotex integer_conv_fbo[4];
 
     // these are duplicated so we can keep rendering back and forth between
     // them to support an unlimited number of shader passes per step
@@ -260,6 +265,17 @@ static const struct fmt_entry gl_byte_formats_gles3[] = {
     {0, 0,           0,         0},                     // 4 x 16
 };
 
+static const struct fmt_entry gl_ui_byte_formats_gles3[] = {
+    {0, GL_R8UI,      GL_RED_INTEGER,   GL_UNSIGNED_BYTE},  // 1 x 8
+    {0, GL_RG8UI,     GL_RG_INTEGER,    GL_UNSIGNED_BYTE},  // 2 x 8
+    {0, GL_RGB8UI,    GL_RGB_INTEGER,   GL_UNSIGNED_BYTE},  // 3 x 8
+    {0, GL_RGBA8UI,   GL_RGBA_INTEGER,  GL_UNSIGNED_BYTE},  // 4 x 8
+    {0, GL_R16UI,     GL_RED_INTEGER,   GL_UNSIGNED_SHORT}, // 1 x 16
+    {0, GL_RG16UI,    GL_RG_INTEGER,    GL_UNSIGNED_SHORT}, // 2 x 16
+    {0, GL_RGB16UI,   GL_RGB_INTEGER,   GL_UNSIGNED_SHORT}, // 3 x 16
+    {0, GL_RGBA16UI,  GL_RGBA_INTEGER,  GL_UNSIGNED_SHORT}, // 4 x 16
+};
+
 static const struct fmt_entry gl_byte_formats_gles2[] = {
     {0, GL_LUMINANCE,           GL_LUMINANCE,       GL_UNSIGNED_BYTE}, // 1 x 8
     {0, GL_LUMINANCE_ALPHA,     GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE}, // 2 x 8
@@ -507,6 +523,7 @@ static void get_scale_factors(struct gl_video *p, double xy[2]);
 #define GLSL(x) gl_sc_add(p->sc, #x "\n");
 #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
 
+// Return a fixed point texture format with given characteristics.
 static const struct fmt_entry *find_tex_format(GL *gl, int bytes_per_comp,
                                                int n_channels)
 {
@@ -523,6 +540,19 @@ static const struct fmt_entry *find_tex_format(GL *gl, int bytes_per_comp,
     return &fmts[n_channels - 1 + (bytes_per_comp - 1) * 4];
 }
 
+static bool is_integer_format(const struct fmt_entry *fmt)
+{
+    // Tests only the formats which we actually declare somewhere.
+    switch (fmt->format) {
+    case GL_RED_INTEGER:
+    case GL_RG_INTEGER:
+    case GL_RGB_INTEGER:
+    case GL_RGBA_INTEGER:
+        return true;
+    }
+    return false;
+}
+
 static const char *load_cached_file(struct gl_video *p, const char *path)
 {
     if (!path || !path[0])
@@ -624,6 +654,9 @@ static void uninit_rendering(struct gl_video *p)
     fbotex_uninit(&p->unsharp_fbo);
     fbotex_uninit(&p->deband_fbo);
 
+    for (int n = 0; n < 4; n++)
+        fbotex_uninit(&p->integer_conv_fbo[n]);
+
     for (int n = 0; n < 2; n++) {
         fbotex_uninit(&p->pre_fbo[n]);
         fbotex_uninit(&p->post_fbo[n]);
@@ -722,8 +755,9 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg
     for (int n = 0; n < p->plane_count; n++) {
         struct texplane *t = &vimg->planes[n];
         p->pass_tex[n] = (struct src_tex){
-            .gl_tex = vimg->planes[n].gl_texture,
+            .gl_tex = t->gl_texture,
             .gl_target = t->gl_target,
+            .use_integer = t->use_integer,
             .w = t->w,
             .h = t->h,
             .src = {0, 0, t->w, t->h},
@@ -735,11 +769,11 @@ static void init_video(struct gl_video *p)
 {
     GL *gl = p->gl;
 
-    check_gl_features(p);
-
     init_format(p->image_params.imgfmt, p);
     p->gl_target = p->opts.use_rectangle ? GL_TEXTURE_RECTANGLE : GL_TEXTURE_2D;
 
+    check_gl_features(p);
+
     if (p->hwdec_active) {
         if (p->hwdec->driver->reinit(p->hwdec, &p->image_params) < 0)
             MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
@@ -783,8 +817,9 @@ static void init_video(struct gl_video *p)
                            plane->w, plane->h, 0,
                            plane->gl_format, plane->gl_type, NULL);
 
-            gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-            gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+            int filter = plane->use_integer ? GL_NEAREST : GL_LINEAR;
+            gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, filter);
+            gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, filter);
             gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
             gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
         }
@@ -838,7 +873,11 @@ static void pass_prepare_src_tex(struct gl_video *p)
         snprintf(texture_name, sizeof(texture_name), "texture%d", n);
         snprintf(texture_size, sizeof(texture_size), "texture_size%d", n);
 
-        gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n);
+        if (s->use_integer) {
+            gl_sc_uniform_sampler_ui(sc, texture_name, n);
+        } else {
+            gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n);
+        }
         float f[2] = {1, 1};
         if (s->gl_target != GL_TEXTURE_RECTANGLE) {
             f[0] = s->w;
@@ -1334,6 +1373,37 @@ static bool pass_prescale_luma(struct gl_video *p, float tex_mul,
     return true;
 }
 
+// The input textures are in an integer format (non-fixed-point), like R16UI.
+// Convert it to float in an extra pass.
+static void pass_integer_conversion(struct gl_video *p)
+{
+    double tex_mul = 1 / mp_get_csp_mul(p->image_params.colorspace,
+                                        p->image_desc.component_bits,
+                                        p->image_desc.component_full_bits);
+    uint64_t tex_max = 1ull << p->image_desc.component_full_bits;
+    tex_mul *= 1.0 / (tex_max - 1);
+
+    struct src_tex pass_tex[TEXUNIT_VIDEO_NUM];
+    assert(sizeof(pass_tex) == sizeof(p->pass_tex));
+    memcpy(pass_tex, p->pass_tex, sizeof(pass_tex));
+
+    for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
+        if (!p->pass_tex[n].gl_tex)
+            continue;
+        GLSLF("// integer conversion plane %d\n", n);
+        GLSLF("uvec4 icolor = texture(texture%d, texcoord%d);\n", n, n);
+        GLSLF("vec4 color = vec4(icolor) * tex_mul;\n");
+        gl_sc_uniform_f(p->sc, "tex_mul", tex_mul);
+        int c_w = p->pass_tex[n].src.x1 - p->pass_tex[n].src.x0;
+        int c_h = p->pass_tex[n].src.y1 - p->pass_tex[n].src.y0;
+        finish_pass_fbo(p, &p->integer_conv_fbo[n], c_w, c_h, n, 0);
+        pass_tex[n] = p->pass_tex[n];
+        memcpy(p->pass_tex, pass_tex, sizeof(p->pass_tex));
+    }
+
+    p->use_normalized_range = true;
+}
+
 // sample from video textures, set "color" variable to yuv value
 static void pass_read_video(struct gl_video *p)
 {
@@ -1342,9 +1412,14 @@ static void pass_read_video(struct gl_video *p)
     struct gl_transform chromafix;
     pass_set_image_textures(p, &p->image, &chromafix);
 
+    if (p->use_integer_conversion)
+        pass_integer_conversion(p);
+
     float tex_mul = 1 / mp_get_csp_mul(p->image_params.colorspace,
                                        p->image_desc.component_bits,
                                        p->image_desc.component_full_bits);
+    if (p->use_normalized_range)
+        tex_mul = 1.0;
 
     struct src_tex prescaled_tex;
     struct gl_transform offset = {{{0}}};
@@ -2389,6 +2464,8 @@ static bool test_fbo(struct gl_video *p)
 static bool check_dumb_mode(struct gl_video *p)
 {
     struct gl_video_opts *o = &p->opts;
+    if (p->use_integer_conversion)
+        return false;
     if (o->dumb_mode)
         return true;
     if (o->target_prim || o->target_trc || o->linear_scaling ||
@@ -2434,8 +2511,9 @@ static void check_gl_features(struct gl_video *p)
         MP_WARN(p, "Disabling PBOs (GLES unsupported).\n");
     }
 
+    p->forced_dumb_mode = p->opts.dumb_mode || !have_fbo || !have_texrg;
     bool voluntarily_dumb = check_dumb_mode(p);
-    if (p->opts.dumb_mode || !have_fbo || !have_texrg || voluntarily_dumb) {
+    if (p->forced_dumb_mode || voluntarily_dumb) {
         if (voluntarily_dumb) {
             MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n");
         } else if (!p->opts.dumb_mode) {
@@ -2638,6 +2716,17 @@ static void packed_fmt_swizzle(char w[5], const struct fmt_entry *texfmt,
     w[4] = '\0';
 }
 
+// Like find_tex_format(), but takes bits (not bytes), and but if no fixed point
+// format is available, return an unsigned integer format.
+static const struct fmt_entry *find_plane_format(GL *gl, int bytes_per_comp,
+                                                 int n_channels)
+{
+    const struct fmt_entry *e = find_tex_format(gl, bytes_per_comp, n_channels);
+    if (e->format || gl->es < 300)
+        return e;
+    return &gl_ui_byte_formats_gles3[n_channels - 1 + (bytes_per_comp - 1) * 4];
+}
+
 static bool init_format(int fmt, struct gl_video *init)
 {
     struct GL *gl = init->gl;
@@ -2665,7 +2754,7 @@ static bool init_format(int fmt, struct gl_video *init)
         int bits = desc.component_bits;
         if ((desc.flags & MP_IMGFLAG_NE) && bits >= 8 && bits <= 16) {
             init->has_alpha = desc.num_planes > 3;
-            plane_format[0] = find_tex_format(gl, (bits + 7) / 8, 1);
+            plane_format[0] = find_plane_format(gl, (bits + 7) / 8, 1);
             for (int p = 1; p < desc.num_planes; p++)
                 plane_format[p] = plane_format[0];
             // RGB/planar
@@ -2679,8 +2768,8 @@ static bool init_format(int fmt, struct gl_video *init)
     if (desc.flags & MP_IMGFLAG_YUV_NV) {
         int bits = desc.component_bits;
         if ((desc.flags & MP_IMGFLAG_NE) && bits >= 8 && bits <= 16) {
-            plane_format[0] = find_tex_format(gl, (bits + 7) / 8, 1);
-            plane_format[1] = find_tex_format(gl, (bits + 7) / 8, 2);
+            plane_format[0] = find_plane_format(gl, (bits + 7) / 8, 1);
+            plane_format[1] = find_plane_format(gl, (bits + 7) / 8, 2);
             if (desc.flags & MP_IMGFLAG_YUV_NV_SWAP)
                 snprintf(init->color_swizzle, sizeof(init->color_swizzle), "rbga");
             goto supported;
@@ -2735,10 +2824,20 @@ supported:
             return false;
     }
 
+    int use_integer = -1;
     for (int p = 0; p < desc.num_planes; p++) {
         if (!plane_format[p]->format)
             return false;
+        int use_int_plane = !!is_integer_format(plane_format[p]);
+        if (use_integer < 0)
+            use_integer = use_int_plane;
+        if (use_integer != use_int_plane)
+            return false; // mixed planes not supported
     }
+    init->use_integer_conversion = use_integer;
+
+    if (init->use_integer_conversion && init->forced_dumb_mode)
+        return false;
 
     for (int p = 0; p < desc.num_planes; p++) {
         struct texplane *plane = &init->image.planes[p];
@@ -2747,6 +2846,7 @@ supported:
         plane->gl_format = format->format;
         plane->gl_internal_format = format->internal_format;
         plane->gl_type = format->type;
+        plane->use_integer = init->use_integer_conversion;
     }
 
     init->is_yuv = desc.flags & MP_IMGFLAG_YUV;