From 86e719150d3fdb27f70c0d9d0951019575fda72a Mon Sep 17 00:00:00 2001
From: Roy Shilkrot <roy.shil@gmail.com>
Date: Sun, 13 Aug 2023 16:31:40 +0300
Subject: [PATCH] remove unneeded stuff

---
 src/transcription-filter.cpp | 14 +++-----------
 src/whisper-processing.cpp   | 16 ++++++++++------
 src/whisper-processing.h     |  2 +-
 3 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
index dfaed85..c7eac16 100644
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@@ -6,7 +6,6 @@
 #include "whisper-processing.h"
 #include "whisper-language.h"
 
-
 inline enum speaker_layout convert_speaker_layout(uint8_t channels)
 {
 	switch (channels) {
@@ -167,9 +166,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
 	struct transcription_filter_data *gf =
 		static_cast<struct transcription_filter_data *>(data);
 
-	gf->filler_p_threshold = (float)obs_data_get_double(s, "filler_p_threshold");
 	gf->log_level = (int)obs_data_get_int(s, "log_level");
-	gf->do_silence = obs_data_get_bool(s, "do_silence");
 	gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
 	gf->log_words = obs_data_get_bool(s, "log_words");
 
@@ -381,8 +378,6 @@ void transcription_filter_deactivate(void *data)
 
 void transcription_filter_defaults(obs_data_t *s)
 {
-	obs_data_set_default_double(s, "filler_p_threshold", 0.75);
-	obs_data_set_default_bool(s, "do_silence", true);
 	obs_data_set_default_bool(s, "vad_enabled", true);
 	obs_data_set_default_int(s, "log_level", LOG_DEBUG);
 	obs_data_set_default_bool(s, "log_words", true);
@@ -419,16 +414,13 @@ obs_properties_t *transcription_filter_properties(void *data)
 {
 	obs_properties_t *ppts = obs_properties_create();
 
-	obs_properties_add_float_slider(ppts, "filler_p_threshold", "filler_p_threshold", 0.0f,
-					1.0f, 0.05f);
-	obs_properties_add_bool(ppts, "do_silence", "do_silence");
-	obs_properties_add_bool(ppts, "vad_enabled", "vad_enabled");
-	obs_property_t *list = obs_properties_add_list(ppts, "log_level", "log_level",
+	obs_properties_add_bool(ppts, "vad_enabled", "VAD Enabled");
+	obs_property_t *list = obs_properties_add_list(ppts, "log_level", "Log level",
 						       OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
 	obs_property_list_add_int(list, "DEBUG", LOG_DEBUG);
 	obs_property_list_add_int(list, "INFO", LOG_INFO);
 	obs_property_list_add_int(list, "WARNING", LOG_WARNING);
-	obs_properties_add_bool(ppts, "log_words", "log_words");
+	obs_properties_add_bool(ppts, "log_words", "Log output words");
 
 	obs_property_t *sources = obs_properties_add_list(ppts, "subtitle_sources",
 							  "subtitle_sources", OBS_COMBO_TYPE_LIST,
diff --git a/src/whisper-processing.cpp b/src/whisper-processing.cpp
index dec729a..c692f8c 100644
--- a/src/whisper-processing.cpp
+++ b/src/whisper-processing.cpp
@@ -4,6 +4,7 @@
 
 #include "plugin-support.h"
 #include "transcription-filter-data.h"
+#include "whisper-processing.h"
 
 #include <algorithm>
 #include <cctype>
@@ -286,12 +287,15 @@ void process_audio_from_buffer(struct transcription_filter_data *gf)
 			"audio processing took too long (%d ms), reducing overlap to %lu ms",
 			(int)duration, gf->overlap_ms);
 	} else if (!skipped_inference) {
-		// try to increase overlap up to 75% of the segment
-		gf->overlap_ms = std::min((uint64_t)gf->overlap_ms + 10,
-					  (uint64_t)((float)new_frames_from_infos_ms * 0.75f));
-		gf->overlap_frames = gf->overlap_ms * gf->sample_rate / 1000;
-		obs_log(gf->log_level, "audio processing took %d ms, increasing overlap to %lu ms",
-			(int)duration, gf->overlap_ms);
+		if (gf->overlap_ms < OVERLAP_SIZE_MSEC) {
+			// try to increase overlap up to OVERLAP_SIZE_MSEC
+			gf->overlap_ms = std::min((uint64_t)gf->overlap_ms + 10,
+						  (uint64_t)OVERLAP_SIZE_MSEC);
+			gf->overlap_frames = gf->overlap_ms * gf->sample_rate / 1000;
+			obs_log(gf->log_level,
+				"audio processing took %d ms, increasing overlap to %lu ms",
+				(int)duration, gf->overlap_ms);
+		}
 	}
 }
 
diff --git a/src/whisper-processing.h b/src/whisper-processing.h
index a132a12..5c5b28f 100644
--- a/src/whisper-processing.h
+++ b/src/whisper-processing.h
@@ -6,7 +6,7 @@
 // at 16Khz, 3000 msec is 48000 samples
 #define WHISPER_FRAME_SIZE 48000
 // overlap in msec
-#define OVERLAP_SIZE_MSEC 340
+#define OVERLAP_SIZE_MSEC 200
 
 void whisper_loop(void *data);
 struct whisper_context *init_whisper_context(const std::string &model_path);