From 86e719150d3fdb27f70c0d9d0951019575fda72a Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Sun, 13 Aug 2023 16:31:40 +0300 Subject: [PATCH] remove unneeded stuff --- src/transcription-filter.cpp | 14 +++----------- src/whisper-processing.cpp | 16 ++++++++++------ src/whisper-processing.h | 2 +- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index dfaed85..c7eac16 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -6,7 +6,6 @@ #include "whisper-processing.h" #include "whisper-language.h" - inline enum speaker_layout convert_speaker_layout(uint8_t channels) { switch (channels) { @@ -167,9 +166,7 @@ void transcription_filter_update(void *data, obs_data_t *s) struct transcription_filter_data *gf = static_cast(data); - gf->filler_p_threshold = (float)obs_data_get_double(s, "filler_p_threshold"); gf->log_level = (int)obs_data_get_int(s, "log_level"); - gf->do_silence = obs_data_get_bool(s, "do_silence"); gf->vad_enabled = obs_data_get_bool(s, "vad_enabled"); gf->log_words = obs_data_get_bool(s, "log_words"); @@ -381,8 +378,6 @@ void transcription_filter_deactivate(void *data) void transcription_filter_defaults(obs_data_t *s) { - obs_data_set_default_double(s, "filler_p_threshold", 0.75); - obs_data_set_default_bool(s, "do_silence", true); obs_data_set_default_bool(s, "vad_enabled", true); obs_data_set_default_int(s, "log_level", LOG_DEBUG); obs_data_set_default_bool(s, "log_words", true); @@ -419,16 +414,13 @@ obs_properties_t *transcription_filter_properties(void *data) { obs_properties_t *ppts = obs_properties_create(); - obs_properties_add_float_slider(ppts, "filler_p_threshold", "filler_p_threshold", 0.0f, - 1.0f, 0.05f); - obs_properties_add_bool(ppts, "do_silence", "do_silence"); - obs_properties_add_bool(ppts, "vad_enabled", "vad_enabled"); - obs_property_t *list = obs_properties_add_list(ppts, "log_level", "log_level", + obs_properties_add_bool(ppts, "vad_enabled", "VAD Enabled"); + obs_property_t *list = obs_properties_add_list(ppts, "log_level", "Log level", OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT); obs_property_list_add_int(list, "DEBUG", LOG_DEBUG); obs_property_list_add_int(list, "INFO", LOG_INFO); obs_property_list_add_int(list, "WARNING", LOG_WARNING); - obs_properties_add_bool(ppts, "log_words", "log_words"); + obs_properties_add_bool(ppts, "log_words", "Log output words"); obs_property_t *sources = obs_properties_add_list(ppts, "subtitle_sources", "subtitle_sources", OBS_COMBO_TYPE_LIST, diff --git a/src/whisper-processing.cpp b/src/whisper-processing.cpp index dec729a..c692f8c 100644 --- a/src/whisper-processing.cpp +++ b/src/whisper-processing.cpp @@ -4,6 +4,7 @@ #include "plugin-support.h" #include "transcription-filter-data.h" +#include "whisper-processing.h" #include #include @@ -286,12 +287,15 @@ void process_audio_from_buffer(struct transcription_filter_data *gf) "audio processing took too long (%d ms), reducing overlap to %lu ms", (int)duration, gf->overlap_ms); } else if (!skipped_inference) { - // try to increase overlap up to 75% of the segment - gf->overlap_ms = std::min((uint64_t)gf->overlap_ms + 10, - (uint64_t)((float)new_frames_from_infos_ms * 0.75f)); - gf->overlap_frames = gf->overlap_ms * gf->sample_rate / 1000; - obs_log(gf->log_level, "audio processing took %d ms, increasing overlap to %lu ms", - (int)duration, gf->overlap_ms); + if (gf->overlap_ms < OVERLAP_SIZE_MSEC) { + // try to increase overlap up to OVERLAP_SIZE_MSEC + gf->overlap_ms = std::min((uint64_t)gf->overlap_ms + 10, + (uint64_t)OVERLAP_SIZE_MSEC); + gf->overlap_frames = gf->overlap_ms * gf->sample_rate / 1000; + obs_log(gf->log_level, + "audio processing took %d ms, increasing overlap to %lu ms", + (int)duration, gf->overlap_ms); + } } } diff --git a/src/whisper-processing.h b/src/whisper-processing.h index a132a12..5c5b28f 100644 --- a/src/whisper-processing.h +++ b/src/whisper-processing.h @@ -6,7 +6,7 @@ // at 16Khz, 3000 msec is 48000 samples #define WHISPER_FRAME_SIZE 48000 // overlap in msec -#define OVERLAP_SIZE_MSEC 340 +#define OVERLAP_SIZE_MSEC 200 void whisper_loop(void *data); struct whisper_context *init_whisper_context(const std::string &model_path);