diff options
Diffstat (limited to 'video/out/gpu')
-rw-r--r-- | video/out/gpu/video.c | 47 | ||||
-rw-r--r-- | video/out/gpu/video.h | 2 | ||||
-rw-r--r-- | video/out/gpu/video_shaders.c | 128 |
3 files changed, 107 insertions, 70 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 24b14c537b..9bf7baeb77 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -313,9 +313,9 @@ static const struct gl_video_opts gl_video_opts_def = { .alpha_mode = ALPHA_BLEND_TILES, .background = {0, 0, 0, 255}, .gamma = 1.0f, - .tone_mapping = TONE_MAPPING_MOBIUS, + .tone_mapping = TONE_MAPPING_HABLE, .tone_mapping_param = NAN, - .tone_mapping_desat = 1.0, + .tone_mapping_desat = 0.5, .early_flush = -1, .hwdec_interop = "auto", }; @@ -358,7 +358,10 @@ const struct m_sub_options gl_video_conf = { {"hable", TONE_MAPPING_HABLE}, {"gamma", TONE_MAPPING_GAMMA}, {"linear", TONE_MAPPING_LINEAR})), - OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0), + OPT_CHOICE("hdr-compute-peak", compute_hdr_peak, 0, + ({"auto", 0}, + {"yes", 1}, + {"no", -1})), OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0), OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0), OPT_FLAG("gamut-warning", gamut_warning, 0), @@ -2442,20 +2445,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool dst.gamma = MP_CSP_TRC_GAMMA22; } - bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma); + bool detect_peak = p->opts.compute_hdr_peak >= 0 && mp_trc_is_hdr(src.gamma); if (detect_peak && !p->hdr_peak_ssbo) { struct { - unsigned int sig_peak_raw; - unsigned int index; + unsigned int counter; + unsigned int frame_idx; + unsigned int frame_num; unsigned int frame_max[PEAK_DETECT_FRAMES+1]; + unsigned int frame_sum[PEAK_DETECT_FRAMES+1]; + unsigned int total_max; + unsigned int total_sum; } peak_ssbo = {0}; - // Prefill with safe values - int safe = MP_REF_WHITE * mp_trc_nom_peak(p->image_params.color.gamma); - peak_ssbo.sig_peak_raw = PEAK_DETECT_FRAMES * safe; - for (int i = 0; i < PEAK_DETECT_FRAMES+1; i++) - peak_ssbo.frame_max[i] = safe; - struct ra_buf_params params = { .type = RA_BUF_TYPE_SHADER_STORAGE, .size = sizeof(peak_ssbo), @@ -2465,7 +2466,8 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool p->hdr_peak_ssbo = ra_buf_create(ra, ¶ms); if (!p->hdr_peak_ssbo) { MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n"); - detect_peak = (p->opts.compute_hdr_peak = false); + detect_peak = false; + p->opts.compute_hdr_peak = -1; } } @@ -2473,9 +2475,15 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool pass_describe(p, "detect HDR peak"); pass_is_compute(p, 8, 8); // 8x8 is good for performance gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, - "uint sig_peak_raw;" - "uint index;" - "uint frame_max[%d];", PEAK_DETECT_FRAMES + 1 + "uint counter;" + "uint frame_idx;" + "uint frame_num;" + "uint frame_max[%d];" + "uint frame_sum[%d];" + "uint total_max;" + "uint total_sum;", + PEAK_DETECT_FRAMES + 1, + PEAK_DETECT_FRAMES + 1 ); } @@ -3504,9 +3512,10 @@ static void check_gl_features(struct gl_video *p) p->opts.deband = 0; MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); } - if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) { - p->opts.compute_hdr_peak = 0; - MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n"); + if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak >= 0) { + int msgl = p->opts.compute_hdr_peak == 1 ? MSGL_WARN : MSGL_V; + MP_MSG(p, msgl, "Disabling HDR peak computation (no compute shaders).\n"); + p->opts.compute_hdr_peak = -1; } } diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h index 78f8828f99..71666059f9 100644 --- a/video/out/gpu/video.h +++ b/video/out/gpu/video.h @@ -96,7 +96,7 @@ enum tone_mapping { }; // How many frames to average over for HDR peak detection -#define PEAK_DETECT_FRAMES 100 +#define PEAK_DETECT_FRAMES 20 struct gl_video_opts { int dumb_mode; diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 3e71c31369..8e33255390 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -553,13 +553,63 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa default: abort(); } +} - GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); +// Average light level for SDR signals. This is equal to a signal level of 0.5 +// under a typical presentation gamma of about 2.0. +static const float sdr_avg = 0.25; + +static void hdr_update_peak(struct gl_shader_cache *sc) +{ + // For performance, we want to do as few atomic operations on global + // memory as possible, so use an atomic in shmem for the work group. + GLSLH(shared uint wg_sum;); + GLSL(wg_sum = 0;) + + // Have each thread update the work group sum with the local value + GLSL(barrier();) + GLSLF("atomicAdd(wg_sum, uint(sig * %f));\n", MP_REF_WHITE); + + // Have one thread per work group update the global atomics. We use the + // work group average even for the global sum, to make the values slightly + // more stable and smooth out tiny super-highlights. + GLSL(memoryBarrierShared();) + GLSL(barrier();) + GLSL(if (gl_LocalInvocationIndex == 0) {) + GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);) + GLSL( atomicMax(frame_max[frame_idx], wg_avg);) + GLSL( atomicAdd(frame_sum[frame_idx], wg_avg);) + GLSL(}) + + // Update the sig_peak/sig_avg from the old SSBO state + GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) + GLSL(if (frame_num > 0) {) + GLSLF(" float peak = float(total_max) / (%f * float(frame_num));\n", MP_REF_WHITE); + GLSLF(" float avg = float(total_sum) / (%f * float(frame_num * num_wg));\n", MP_REF_WHITE); + GLSLF(" sig_peak = max(1.0, peak);\n"); + GLSLF(" sig_avg = max(%f, avg);\n", sdr_avg); + GLSL(}); + + // Finally, to update the global state, we increment a counter per dispatch + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {) + GLSL( counter = 0;) + // Add the current frame, then subtract and reset the next frame + GLSLF(" uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); + GLSL( total_max += frame_max[frame_idx] - frame_max[next];) + GLSL( total_sum += frame_sum[frame_idx] - frame_sum[next];) + GLSL( frame_max[next] = frame_sum[next] = 0;) + // Update the index and count + GLSL( frame_idx = next;) + GLSLF(" frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES); + GLSL(}) } // Tone map from a known peak brightness to the range [0,1]. If ref_peak // is 0, we will use peak detection instead -static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, +static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak, + float src_peak, float dst_range, enum tone_mapping algo, float param, float desat) { GLSLF("// HDR tone mapping\n"); @@ -568,6 +618,16 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, // sure to reduce the value range as far as necessary to keep the entire // signal in range, so tone map based on the brightest component. GLSL(float sig = max(max(color.r, color.g), color.b);) + GLSLF("float sig_peak = %f;\n", src_peak); + GLSLF("float sig_avg = %f;\n", sdr_avg); + + // Rescale the variables in order to bring it into a representation where + // 1.0 represents the dst_peak. This is because all of the tone mapping + // algorithms are defined in such a way that they map to the range [0.0, 1.0]. + if (dst_range > 1.0) { + GLSLF("sig *= %f;\n", 1.0 / dst_range); + GLSLF("sig_peak *= %f;\n", 1.0 / dst_range); + } // Desaturate the color using a coefficient dependent on the signal if (desat > 0) { @@ -578,41 +638,14 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, GLSL(sig = mix(sig, luma, coeff);) // also make sure to update `sig` } - if (!ref_peak) { - // For performance, we want to do as few atomic operations on global - // memory as possible, so use an atomic in shmem for the work group. - // We also want slightly more stable values, so use the group average - // instead of the group max - GLSLHF("shared uint group_sum = 0;\n"); - GLSLF("atomicAdd(group_sum, uint(sig * %f));\n", MP_REF_WHITE); - - // Have one thread in each work group update the frame maximum - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSL(if (gl_LocalInvocationIndex == 0)) - GLSL(atomicMax(frame_max[index], group_sum / - (gl_WorkGroupSize.x * gl_WorkGroupSize.y));) - - // Finally, have one thread per invocation update the total maximum - // and advance the index - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation - GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); - GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n"); - GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE); - GLSL(index = next;) - GLSL(}) - - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSLF("float sig_peak = 1.0/%f * float(sig_peak_raw);\n", - MP_REF_WHITE * PEAK_DETECT_FRAMES); - } else { - GLSLHF("const float sig_peak = %f;\n", ref_peak); - } + if (detect_peak) + hdr_update_peak(sc); GLSL(float sig_orig = sig;) + GLSLF("float slope = min(1.0, %f / sig_avg);\n", sdr_avg); + GLSL(sig *= slope;) + GLSL(sig_peak *= slope;) + switch (algo) { case TONE_MAPPING_CLIP: GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param); @@ -668,6 +701,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, // Apply the computed scale factor to the color, linearly to prevent // discoloration + GLSL(sig = min(sig, 1.0);) GLSL(color.rgb *= sig / sig_orig;) } @@ -689,7 +723,6 @@ void pass_color_map(struct gl_shader_cache *sc, // Compute the highest encodable level float src_range = mp_trc_nom_peak(src.gamma), dst_range = mp_trc_nom_peak(dst.gamma); - float ref_peak = src.sig_peak / dst_range; // Some operations need access to the video's luma coefficients, so make // them available @@ -709,20 +742,13 @@ void pass_color_map(struct gl_shader_cache *sc, src.light != dst.light; if (need_gamma && !is_linear) { + // We also pull it up so that 1.0 is the reference white pass_linearize(sc, src.gamma); - is_linear= true; + is_linear = true; } if (src.light != dst.light) - pass_ootf(sc, src.light, mp_trc_nom_peak(src.gamma)); - - // Rescale the signal to compensate for differences in the encoding range - // and reference white level. This is necessary because of how mpv encodes - // brightness in textures. - if (src_range != dst_range) { - GLSLF("// rescale value range;\n"); - GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range); - } + pass_ootf(sc, src.light, src_range); // Adapt to the right colorspace if necessary if (src.primaries != dst.primaries) { @@ -734,18 +760,20 @@ void pass_color_map(struct gl_shader_cache *sc, GLSL(color.rgb = cms_matrix * color.rgb;) // Since this can reduce the gamut, figure out by how much for (int c = 0; c < 3; c++) - ref_peak = MPMAX(ref_peak, m[c][c]); + src.sig_peak = MPMAX(src.sig_peak, m[c][c]); } // Tone map to prevent clipping when the source signal peak exceeds the // encodable range or we've reduced the gamut - if (ref_peak > 1) { - pass_tone_map(sc, detect_peak ? 0 : ref_peak, algo, + if (src.sig_peak > dst_range) { + GLSLF("color.rgb *= vec3(%f);\n", src_range); + pass_tone_map(sc, detect_peak, src.sig_peak, dst_range, algo, tone_mapping_param, tone_mapping_desat); + GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range); } if (src.light != dst.light) - pass_inverse_ootf(sc, dst.light, mp_trc_nom_peak(dst.gamma)); + pass_inverse_ootf(sc, dst.light, dst_range); // Warn for remaining out-of-gamut colors is enabled if (gamut_warning) { |