remove unneeded stuff

2024-11-07 18:57:14 +00:00 · 2023-08-13 16:31:40 +03:00 · 2023-08-13 16:31:40 +03:00 · 86e719150d
commit 86e719150d
parent e246e80448
3 changed files with 14 additions and 18 deletions
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@ -6,7 +6,6 @@
 #include "whisper-processing.h"
 #include "whisper-language.h"
 inline enum speaker_layout convert_speaker_layout(uint8_t channels)
 {
 	switch (channels) {
@ -167,9 +166,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
 	struct transcription_filter_data *gf =
 		static_cast<struct transcription_filter_data *>(data);
 	gf->filler_p_threshold = (float)obs_data_get_double(s, "filler_p_threshold");
 	gf->log_level = (int)obs_data_get_int(s, "log_level");
 	gf->do_silence = obs_data_get_bool(s, "do_silence");
 	gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
 	gf->log_words = obs_data_get_bool(s, "log_words");
@ -381,8 +378,6 @@ void transcription_filter_deactivate(void *data)
 void transcription_filter_defaults(obs_data_t *s)
 {
 	obs_data_set_default_double(s, "filler_p_threshold", 0.75);
 	obs_data_set_default_bool(s, "do_silence", true);
 	obs_data_set_default_bool(s, "vad_enabled", true);
 	obs_data_set_default_int(s, "log_level", LOG_DEBUG);
 	obs_data_set_default_bool(s, "log_words", true);
@ -419,16 +414,13 @@ obs_properties_t *transcription_filter_properties(void *data)
 {
 	obs_properties_t *ppts = obs_properties_create();
-	obs_properties_add_float_slider(ppts, "filler_p_threshold", "filler_p_threshold", 0.0f,
+	obs_properties_add_bool(ppts, "vad_enabled", "VAD Enabled");
-					1.0f, 0.05f);
+	obs_property_t *list = obs_properties_add_list(ppts, "log_level", "Log level",
 	obs_properties_add_bool(ppts, "do_silence", "do_silence");
 	obs_properties_add_bool(ppts, "vad_enabled", "vad_enabled");
 	obs_property_t *list = obs_properties_add_list(ppts, "log_level", "log_level",
 						       OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
 	obs_property_list_add_int(list, "DEBUG", LOG_DEBUG);
 	obs_property_list_add_int(list, "INFO", LOG_INFO);
 	obs_property_list_add_int(list, "WARNING", LOG_WARNING);
-	obs_properties_add_bool(ppts, "log_words", "log_words");
+	obs_properties_add_bool(ppts, "log_words", "Log output words");
 	obs_property_t *sources = obs_properties_add_list(ppts, "subtitle_sources",
 							  "subtitle_sources", OBS_COMBO_TYPE_LIST,
--- a/src/whisper-processing.cpp
+++ b/src/whisper-processing.cpp
@ -4,6 +4,7 @@
 #include "plugin-support.h"
 #include "transcription-filter-data.h"
 #include "whisper-processing.h"
 #include <algorithm>
 #include <cctype>
@ -286,12 +287,15 @@ void process_audio_from_buffer(struct transcription_filter_data *gf)
 			"audio processing took too long (%d ms), reducing overlap to %lu ms",
 			(int)duration, gf->overlap_ms);
 	} else if (!skipped_inference) {
-		// try to increase overlap up to 75% of the segment
+		if (gf->overlap_ms < OVERLAP_SIZE_MSEC) {
-		gf->overlap_ms = std::min((uint64_t)gf->overlap_ms + 10,
+			// try to increase overlap up to OVERLAP_SIZE_MSEC
-					  (uint64_t)((float)new_frames_from_infos_ms * 0.75f));
+			gf->overlap_ms = std::min((uint64_t)gf->overlap_ms + 10,
-		gf->overlap_frames = gf->overlap_ms * gf->sample_rate / 1000;
+						  (uint64_t)OVERLAP_SIZE_MSEC);
-		obs_log(gf->log_level, "audio processing took %d ms, increasing overlap to %lu ms",
+			gf->overlap_frames = gf->overlap_ms * gf->sample_rate / 1000;
-			(int)duration, gf->overlap_ms);
+			obs_log(gf->log_level,
 				"audio processing took %d ms, increasing overlap to %lu ms",
 				(int)duration, gf->overlap_ms);
 		}
 	}
 }
--- a/src/whisper-processing.h
+++ b/src/whisper-processing.h
@ -6,7 +6,7 @@
 // at 16Khz, 3000 msec is 48000 samples
 #define WHISPER_FRAME_SIZE 48000
 // overlap in msec
-#define OVERLAP_SIZE_MSEC 340
+#define OVERLAP_SIZE_MSEC 200
 void whisper_loop(void *data);
 struct whisper_context *init_whisper_context(const std::string &model_path);