diff --git a/libavfilter/af_afftdn.c b/libavfilter/af_afftdn.c index ac2a499c7f..687a1691a1 100644 --- a/libavfilter/af_afftdn.c +++ b/libavfilter/af_afftdn.c @@ -92,11 +92,6 @@ typedef struct DeNoiseChannel { double noise_band_avi[NB_PROFILE_BANDS]; double noise_band_var[NB_PROFILE_BANDS]; - double sfm_threshold; - double sfm_alpha; - double sfm_results[3]; - int sfm_fail_flags[SFM_FLAGS_SIZE]; - int sfm_fail_total; double noise_reduction; double last_noise_reduction; double noise_floor; @@ -155,7 +150,6 @@ typedef struct AudioFFTDeNoiseContext { double window_weight; double floor; double sample_floor; - double auto_floor; int noise_band_edge[NB_PROFILE_BANDS + 2]; int noise_band_count; @@ -301,42 +295,6 @@ static double process_get_band_noise(AudioFFTDeNoiseContext *s, return sum; } -static void calculate_sfm(AudioFFTDeNoiseContext *s, - DeNoiseChannel *dnch, - int start, int end) -{ - double d1 = 0.0, d2 = 1.0; - int i = 0, j = 0; - - for (int k = start; k < end; k++) { - if (dnch->noisy_data[k] > s->sample_floor) { - j++; - d1 += dnch->noisy_data[k]; - d2 *= dnch->noisy_data[k]; - if (d2 > 1.0E100) { - d2 *= 1.0E-100; - i++; - } else if (d2 < 1.0E-100) { - d2 *= 1.0E100; - i--; - } - } - } - if (j > 1) { - d1 /= j; - dnch->sfm_results[0] = d1; - d2 = log(d2) + 230.2585 * i; - d2 /= j; - d1 = log(d1); - dnch->sfm_results[1] = d1; - dnch->sfm_results[2] = d1 - d2; - } else { - dnch->sfm_results[0] = s->auto_floor; - dnch->sfm_results[1] = dnch->sfm_threshold; - dnch->sfm_results[2] = dnch->sfm_threshold; - } -} - static double limit_gain(double a, double b) { if (a > 1.0) @@ -346,116 +304,88 @@ static double limit_gain(double a, double b) return 1.0; } +static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral, + double floor, int len, double *rnum, double *rden) +{ + double num = 0., den = 0.; + int size = 0; + + for (int n = 0; n < len; n++) { + const double v = spectral[n]; + if (v > floor) { + num += log(v); + den += v; + size++; + } + } + + size = FFMAX(size, 1); + + num /= size; + den /= size; + + num = exp(num); + + *rnum = num; + *rden = den; +} + +static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var); + +static double floor_offset(const double *S, int size, double mean) +{ + double offset = 0.0; + + for (int n = 0; n < size; n++) { + const double p = S[n] - mean; + + offset = fmax(offset, fabs(p)); + } + + return offset / mean; +} + static void process_frame(AVFilterContext *ctx, AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, AVComplexFloat *fft_data, double *prior, double *prior_band_excit, int track_noise) { AVFilterLink *outlink = ctx->outputs[0]; - const double sample_floor = s->sample_floor; const double *abs_var = dnch->abs_var; const double ratio = outlink->frame_count_out ? s->ratio : 1.0; const double rratio = 1. - ratio; const int *bin2band = s->bin2band; + double *noisy_data = dnch->noisy_data; double *band_excit = dnch->band_excit; double *band_amt = dnch->band_amt; double *gain = dnch->gain; - int n = 0, i1; - for (int i = 0; i < s->fft_length2; i++) { - double sqr_new_gain, new_gain, mag, mag_abs_var, new_mag_abs_var; + for (int i = 0; i < s->bin_count; i++) { + double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var; - mag = fft_data[i].re * fft_data[i].re + fft_data[i].im * fft_data[i].im; - if (mag > sample_floor) - n = i; - - dnch->noisy_data[i] = mag; - mag_abs_var = mag / abs_var[i]; + noisy_data[i] = mag = hypot(fft_data[i].re, fft_data[i].im); + power = mag * mag; + mag_abs_var = power / abs_var[i]; new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0); new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var); sqr_new_gain = new_gain * new_gain; prior[i] = mag_abs_var * sqr_new_gain; - dnch->clean_data[i] = mag * sqr_new_gain; + dnch->clean_data[i] = power * sqr_new_gain; gain[i] = new_gain; } - if (n > s->fft_length2 - 2) { - n = s->bin_count; - i1 = s->noise_band_count; - } else { - i1 = 0; - for (int i = 0; i <= s->noise_band_count; i++) { - if (n > 1.1 * s->noise_band_edge[i]) { - i1 = i; - } - } - } + if (track_noise) { + double flatness, num, den; - if (track_noise && (i1 > s->noise_band_count / 2)) { - int j = FFMIN(n, s->noise_band_edge[i1]); - int m = 3, k; + spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den); - for (k = i1 - 1; k >= 0; k--) { - int i = s->noise_band_edge[k]; - calculate_sfm(s, dnch, i, j); - dnch->noise_band_sample[k] = dnch->sfm_results[0]; - if (dnch->sfm_results[2] + 0.013 * m * fmax(0.0, dnch->sfm_results[1] - 20.53) >= dnch->sfm_threshold) { - break; - } - j = i; - m++; - } + flatness = num / den; + if (flatness > 0.8) { + const double offset = floor_offset(noisy_data, s->bin_count, den); + const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.); - if (k < i1 - 1) { - double sum = 0.0, min, max; - int i; - - for (i = i1 - 1; i > k; i--) { - min = log(dnch->noise_band_sample[i] / dnch->noise_band_auto_var[i]); - sum += min; - } - - i = i1 - k - 1; - if (i < 5) { - min = 3.0E-4 * i * i; - } else { - min = 3.0E-4 * (8 * i - 16); - } - if (i < 3) { - max = 2.0E-4 * i * i; - } else { - max = 2.0E-4 * (4 * i - 4); - } - - if (s->track_residual) { - if (dnch->last_noise_floor > dnch->last_residual_floor + 9) { - min *= 0.5; - max *= 0.75; - } else if (dnch->last_noise_floor > dnch->last_residual_floor + 6) { - min *= 0.4; - max *= 1.0; - } else if (dnch->last_noise_floor > dnch->last_residual_floor + 4) { - min *= 0.3; - max *= 1.3; - } else if (dnch->last_noise_floor > dnch->last_residual_floor + 2) { - min *= 0.2; - max *= 1.6; - } else if (dnch->last_noise_floor > dnch->last_residual_floor) { - min *= 0.1; - max *= 2.0; - } else { - min = 0.0; - max *= 2.5; - } - } - - sum = av_clipd(sum, -min, max); - sum = exp(sum); - for (int i = 0; i < NB_PROFILE_BANDS; i++) - dnch->noise_band_auto_var[i] *= sum; - } else if (dnch->sfm_results[2] >= dnch->sfm_threshold) { - dnch->sfm_fail_flags[s->block_count & SFM_FLAGS_MASK] = 1; - dnch->sfm_fail_total += 1; + dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9; + set_parameters(s, dnch, 1, 0); } } @@ -495,7 +425,7 @@ static void process_frame(AVFilterContext *ctx, } } - for (int i = 0; i < s->fft_length2; i++) { + for (int i = 0; i < s->bin_count; i++) { const double new_gain = gain[i]; fft_data[i].re *= new_gain; @@ -556,7 +486,6 @@ static void set_band_parameters(AudioFFTDeNoiseContext *s, d4 = (m - i) / d2; dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C); } - dnch->rel_var[s->fft_length2] = exp(band_noise * C); for (i = 0; i < NB_PROFILE_BANDS; i++) dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C); @@ -667,7 +596,7 @@ static int config_input(AVFilterLink *inlink) s->fft_length2 = 1 << (32 - ff_clz(s->window_length)); s->fft_length = s->fft_length2; s->buffer_length = s->fft_length * 2; - s->bin_count = s->fft_length2 + 1; + s->bin_count = s->fft_length2 / 2 + 1; s->band_centre[0] = 80; for (i = 1; i < NB_PROFILE_BANDS; i++) { @@ -712,7 +641,7 @@ static int config_input(AVFilterLink *inlink) for (i = 0; i < s->bin_count; i++) s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2)); - s->number_of_bands = s->bin2band[s->fft_length2] + 1; + s->number_of_bands = s->bin2band[s->bin_count - 1] + 1; s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha)); s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta)); @@ -745,19 +674,6 @@ static int config_input(AVFilterLink *inlink) reduce_mean(dnch->band_noise); - dnch->sfm_threshold = 0.8; - dnch->sfm_alpha = 0.05; - for (i = 0; i < SFM_FLAGS_SIZE; i++) - dnch->sfm_fail_flags[i] = 0; - - dnch->sfm_fail_total = 0; - j = FFMAX((int)(10.0 * (1.3 - dnch->sfm_threshold)), 1); - - for (i = 0; i < SFM_FLAGS_SIZE; i += j) { - dnch->sfm_fail_flags[i] = 1; - dnch->sfm_fail_total += 1; - } - dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt)); dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt)); dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit)); @@ -884,7 +800,6 @@ static int config_input(AVFilterLink *inlink) s->window_weight = 0.5 * sum; s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight; s->sample_floor = s->floor * exp(4.144600506562284); - s->auto_floor = s->floor * exp(6.907667510937141); for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { DeNoiseChannel *dnch = &s->dnch[ch]; @@ -965,7 +880,6 @@ static void sample_noise_block(AudioFFTDeNoiseContext *s, mag2 = fmax(mag2, s->sample_floor); - dnch->noisy_data[i] = mag2; var += mag2; n++; } @@ -1034,7 +948,7 @@ static void set_noise_profile(AudioFFTDeNoiseContext *s, for (int m = 0; m < NB_PROFILE_BANDS; m++) temp[m] -= d1; - new_noise_floor = round(d1) + 2.5; + new_noise_floor = d1; if (new_profile) { av_log(s, AV_LOG_INFO, "bn="); @@ -1066,16 +980,6 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_job double *dst = dnch->out_samples; float *fft_in = dnch->fft_in; - if (s->track_noise) { - int i = s->block_count & SFM_FLAGS_MASK; - - if (dnch->sfm_fail_flags[i]) - dnch->sfm_fail_total--; - dnch->sfm_fail_flags[i] = 0; - dnch->sfm_threshold *= 1.0 - dnch->sfm_alpha; - dnch->sfm_threshold += dnch->sfm_alpha * ((1.0 / SFM_FLAGS_SIZE) * dnch->sfm_fail_total); - } - for (int m = 0; m < window_length; m++) fft_in[m] = window[m] * src[m] * (1LL << 23); @@ -1098,25 +1002,6 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_job return 0; } -static void get_auto_noise_levels(AudioFFTDeNoiseContext *s, - DeNoiseChannel *dnch, - double *levels) -{ - if (s->noise_band_count > 0) { - for (int i = 0; i < s->noise_band_count; i++) { - levels[i] = 10.0 * log10(dnch->noise_band_auto_var[i] / s->floor) - 100.0; - } - if (s->noise_band_count < NB_PROFILE_BANDS) { - for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++) - levels[i] = levels[i - 1]; - } - } else { - for (int i = 0; i < NB_PROFILE_BANDS; i++) { - levels[i] = -100.0; - } - } -} - static int output_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *ctx = inlink->dst; @@ -1139,10 +1024,7 @@ static int output_frame(AVFilterLink *inlink, AVFrame *in) for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { DeNoiseChannel *dnch = &s->dnch[ch]; - double levels[NB_PROFILE_BANDS]; - get_auto_noise_levels(s, dnch, levels); - set_noise_profile(s, dnch, levels, 0); average += dnch->noise_floor; max = fmax(max, dnch->noise_floor); min = fmin(min, dnch->noise_floor);