From b14ba3e93fbafe5d9cdb1e81502ae624d6018162 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Sun, 20 Aug 2023 01:25:19 +0300 Subject: [PATCH 01/14] attempt fix --- data/locale/en-US.ini | 2 +- src/transcription-filter.cpp | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini index 04f4f44..2e1af16 100644 --- a/data/locale/en-US.ini +++ b/data/locale/en-US.ini @@ -1 +1 @@ -transcription_filterAudioFilter=LocalVocal Transcription +transcription_filterAudioFilter="LocalVocal Transcription" diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index b36855c..15079eb 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -328,6 +328,8 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->wshiper_thread_cv = std::unique_ptr(new std::condition_variable()); gf->text_source_mutex = std::unique_ptr(new std::mutex()); + gf->text_source = nullptr; + gf->text_source_name = nullptr; // set the callback to set the text in the output text source (subtitles) gf->setTextCallback = [gf](const std::string &str) { @@ -426,9 +428,12 @@ obs_properties_t *transcription_filter_properties(void *data) obs_property_list_add_int(list, "WARNING", LOG_WARNING); obs_properties_add_bool(ppts, "log_words", "Log output words"); - obs_property_t *sources = obs_properties_add_list(ppts, "subtitle_sources", - "subtitle_sources", OBS_COMBO_TYPE_LIST, - OBS_COMBO_FORMAT_STRING); + obs_property_t *sources = + obs_properties_add_list(ppts, "subtitle_sources", "Subtitles Text Source", + OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); + // Add "none" option + obs_property_list_add_string(sources, "None / No output", "none"); + // Add text sources obs_enum_sources(add_sources_to_list, sources); // Add a list of available whisper models to download From f84e48fb6c9fd05db5a94743e2ba6df93146acd3 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Sun, 20 Aug 2023 07:13:37 +0300 Subject: [PATCH 02/14] minor fixes --- src/transcription-filter-data.h | 18 +++++++++--------- src/transcription-filter.cpp | 19 +++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index 4457368..00a8c31 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -37,11 +37,11 @@ struct transcription_filter_data { struct circlebuf input_buffers[MAX_PREPROC_CHANNELS]; /* Resampler */ - audio_resampler_t *resampler; + audio_resampler_t *resampler = nullptr; /* whisper */ std::string whisper_model_path = "models/ggml-tiny.en.bin"; - struct whisper_context *whisper_context; + struct whisper_context *whisper_context = nullptr; whisper_full_params whisper_params; float filler_p_threshold; @@ -50,21 +50,21 @@ struct transcription_filter_data { bool vad_enabled; int log_level; bool log_words; - bool active; + bool active = false; // Text source to output the subtitles - obs_weak_source_t *text_source; - char *text_source_name; - std::unique_ptr text_source_mutex; + obs_weak_source_t *text_source = nullptr; + char *text_source_name = nullptr; + std::unique_ptr text_source_mutex = nullptr; // Callback to set the text in the output text source (subtitles) std::function setTextCallback; // Use std for thread and mutex std::thread whisper_thread; - std::unique_ptr whisper_buf_mutex; - std::unique_ptr whisper_ctx_mutex; - std::unique_ptr wshiper_thread_cv; + std::unique_ptr whisper_buf_mutex = nullptr; + std::unique_ptr whisper_ctx_mutex = nullptr; + std::unique_ptr wshiper_thread_cv = nullptr; }; // Audio packet info diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 15079eb..fd69441 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -206,8 +206,9 @@ void transcription_filter_update(void *data, obs_data_t *s) obs_weak_source_release(old_weak_text_source); } - const char *new_model_path = obs_data_get_string(s, "whisper_model_path"); - if (strcmp(new_model_path, gf->whisper_model_path.c_str()) != 0) { + std::string new_model_path = obs_data_get_string(s, "whisper_model_path"); + + if (new_model_path != gf->whisper_model_path) { // model path changed, reload the model obs_log(LOG_INFO, "model path changed, reloading model"); if (gf->whisper_context != nullptr) { @@ -220,7 +221,7 @@ void transcription_filter_update(void *data, obs_data_t *s) if (gf->whisper_thread.joinable()) { gf->whisper_thread.join(); } - gf->whisper_model_path = bstrdup(new_model_path); + gf->whisper_model_path = new_model_path; // check if the model exists, if not, download it if (!check_if_model_exists(gf->whisper_model_path)) { @@ -229,8 +230,7 @@ void transcription_filter_update(void *data, obs_data_t *s) gf->whisper_model_path, [gf](int download_status) { if (download_status == 0) { obs_log(LOG_INFO, "Model download complete"); - gf->whisper_context = init_whisper_context( - gf->whisper_model_path); + gf->whisper_context = init_whisper_context(gf->whisper_model_path); gf->whisper_thread = std::thread(whisper_loop, gf); } else { obs_log(LOG_ERROR, "Model download failed"); @@ -321,8 +321,6 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->resampler = audio_resampler_create(&dst, &src); - gf->active = true; - gf->whisper_buf_mutex = std::unique_ptr(new std::mutex()); gf->whisper_ctx_mutex = std::unique_ptr(new std::mutex()); gf->wshiper_thread_cv = @@ -340,12 +338,11 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) std::lock_guard lock(*gf->text_source_mutex); - obs_weak_source_t *text_source = gf->text_source; - if (!text_source) { + if (!gf->text_source) { obs_log(LOG_ERROR, "text_source is null"); return; } - auto target = obs_weak_source_get_source(text_source); + auto target = obs_weak_source_get_source(gf->text_source); if (!target) { obs_log(LOG_ERROR, "text_source target is null"); return; @@ -362,6 +359,8 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) // start the thread gf->whisper_thread = std::thread(whisper_loop, gf); + gf->active = true; + return gf; } From c27049694ba79d88fcab116efdc02ec53cb7944a Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Sun, 20 Aug 2023 07:14:40 +0300 Subject: [PATCH 03/14] lint --- src/transcription-filter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index fd69441..5595df4 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -230,7 +230,8 @@ void transcription_filter_update(void *data, obs_data_t *s) gf->whisper_model_path, [gf](int download_status) { if (download_status == 0) { obs_log(LOG_INFO, "Model download complete"); - gf->whisper_context = init_whisper_context(gf->whisper_model_path); + gf->whisper_context = init_whisper_context( + gf->whisper_model_path); gf->whisper_thread = std::thread(whisper_loop, gf); } else { obs_log(LOG_ERROR, "Model download failed"); From 882fb3b7f8ac0a6abf8d50bd8960daf1db5d8f7f Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 21 Aug 2023 22:55:44 +0300 Subject: [PATCH 04/14] add logs --- src/transcription-filter.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 5595df4..c5b5c83 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -169,10 +169,12 @@ void transcription_filter_update(void *data, obs_data_t *s) struct transcription_filter_data *gf = static_cast(data); + obs_log(LOG_INFO, "transcription_filter_update"); gf->log_level = (int)obs_data_get_int(s, "log_level"); gf->vad_enabled = obs_data_get_bool(s, "vad_enabled"); gf->log_words = obs_data_get_bool(s, "log_words"); + obs_log(LOG_INFO, "transcription_filter: update text source"); // update the text source const char *text_source_name = obs_data_get_string(s, "subtitle_sources"); obs_weak_source_t *old_weak_text_source = NULL; @@ -203,9 +205,12 @@ void transcription_filter_update(void *data, obs_data_t *s) } if (old_weak_text_source) { + obs_log(LOG_INFO, "releasing old text source"); obs_weak_source_release(old_weak_text_source); } + obs_log(LOG_INFO, "transcription_filter: update whisper model"); + // update the whisper model path std::string new_model_path = obs_data_get_string(s, "whisper_model_path"); if (new_model_path != gf->whisper_model_path) { @@ -244,6 +249,7 @@ void transcription_filter_update(void *data, obs_data_t *s) } } + obs_log(LOG_INFO, "transcription_filter: update whisper params"); std::lock_guard lock(*gf->whisper_ctx_mutex); gf->whisper_params = whisper_full_default_params( @@ -308,9 +314,10 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->overlap_ms = OVERLAP_SIZE_MSEC; gf->overlap_frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)gf->overlap_ms)); - obs_log(LOG_INFO, "transcription_filter filter: channels %d, frames %d, sample_rate %d", + obs_log(LOG_INFO, "transcription_filter: channels %d, frames %d, sample_rate %d", (int)gf->channels, (int)gf->frames, gf->sample_rate); + obs_log(LOG_INFO, "transcription_filter: setup audio resampler"); struct resample_info src, dst; src.samples_per_sec = gf->sample_rate; src.format = AUDIO_FORMAT_FLOAT_PLANAR; @@ -322,6 +329,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->resampler = audio_resampler_create(&dst, &src); + obs_log(LOG_INFO, "transcription_filter: setup mutexes and condition variables"); gf->whisper_buf_mutex = std::unique_ptr(new std::mutex()); gf->whisper_ctx_mutex = std::unique_ptr(new std::mutex()); gf->wshiper_thread_cv = @@ -330,6 +338,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->text_source = nullptr; gf->text_source_name = nullptr; + obs_log(LOG_INFO, "transcription_filter: setup callback"); // set the callback to set the text in the output text source (subtitles) gf->setTextCallback = [gf](const std::string &str) { if (!gf->text_source) { @@ -354,9 +363,11 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) obs_source_release(target); }; + obs_log(LOG_INFO, "transcription_filter: run update"); // get the settings updated on the filter data struct transcription_filter_update(gf, settings); + obs_log(LOG_INFO, "transcription_filter: start whisper thread"); // start the thread gf->whisper_thread = std::thread(whisper_loop, gf); From 343bebd6ff688d7b49a84f75058c863a38686893 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Tue, 22 Aug 2023 09:53:04 +0300 Subject: [PATCH 05/14] mutex reset --- src/transcription-filter.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index c5b5c83..bb015f8 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -330,11 +330,10 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->resampler = audio_resampler_create(&dst, &src); obs_log(LOG_INFO, "transcription_filter: setup mutexes and condition variables"); - gf->whisper_buf_mutex = std::unique_ptr(new std::mutex()); - gf->whisper_ctx_mutex = std::unique_ptr(new std::mutex()); - gf->wshiper_thread_cv = - std::unique_ptr(new std::condition_variable()); - gf->text_source_mutex = std::unique_ptr(new std::mutex()); + gf->whisper_buf_mutex.reset(new std::mutex()); + gf->whisper_ctx_mutex.reset(new std::mutex()); + gf->wshiper_thread_cv.reset(new std::condition_variable()); + gf->text_source_mutex.reset(new std::mutex()); gf->text_source = nullptr; gf->text_source_name = nullptr; From 759a79f51e161a49884c98f4f28a62ddce705783 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Tue, 22 Aug 2023 17:08:32 +0300 Subject: [PATCH 06/14] no struct null init --- src/transcription-filter-data.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index 00a8c31..92c24a3 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -53,18 +53,18 @@ struct transcription_filter_data { bool active = false; // Text source to output the subtitles - obs_weak_source_t *text_source = nullptr; - char *text_source_name = nullptr; - std::unique_ptr text_source_mutex = nullptr; + obs_weak_source_t *text_source; + char *text_source_name; + std::unique_ptr text_source_mutex; // Callback to set the text in the output text source (subtitles) std::function setTextCallback; // Use std for thread and mutex std::thread whisper_thread; - std::unique_ptr whisper_buf_mutex = nullptr; - std::unique_ptr whisper_ctx_mutex = nullptr; - std::unique_ptr wshiper_thread_cv = nullptr; + std::unique_ptr whisper_buf_mutex; + std::unique_ptr whisper_ctx_mutex; + std::unique_ptr wshiper_thread_cv; }; // Audio packet info From f1a464a64d5e3e24ef74a92a8bd496fe5272a093 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Tue, 22 Aug 2023 18:30:17 +0300 Subject: [PATCH 07/14] remove unique ptr --- src/transcription-filter-data.h | 8 ++++---- src/transcription-filter.cpp | 13 +++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index 92c24a3..cb83d96 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -55,16 +55,16 @@ struct transcription_filter_data { // Text source to output the subtitles obs_weak_source_t *text_source; char *text_source_name; - std::unique_ptr text_source_mutex; + std::mutex *text_source_mutex; // Callback to set the text in the output text source (subtitles) std::function setTextCallback; // Use std for thread and mutex std::thread whisper_thread; - std::unique_ptr whisper_buf_mutex; - std::unique_ptr whisper_ctx_mutex; - std::unique_ptr wshiper_thread_cv; + std::mutex *whisper_buf_mutex; + std::mutex *whisper_ctx_mutex; + std::condition_variable *wshiper_thread_cv; }; // Audio packet info diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index bb015f8..11f3324 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -138,6 +138,11 @@ void transcription_filter_destroy(void *data) } circlebuf_free(&gf->info_buffer); + delete gf->whisper_buf_mutex; + delete gf->whisper_ctx_mutex; + delete gf->wshiper_thread_cv; + delete gf->text_source_mutex; + bfree(gf); } @@ -330,10 +335,10 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->resampler = audio_resampler_create(&dst, &src); obs_log(LOG_INFO, "transcription_filter: setup mutexes and condition variables"); - gf->whisper_buf_mutex.reset(new std::mutex()); - gf->whisper_ctx_mutex.reset(new std::mutex()); - gf->wshiper_thread_cv.reset(new std::condition_variable()); - gf->text_source_mutex.reset(new std::mutex()); + gf->whisper_buf_mutex = new std::mutex(); + gf->whisper_ctx_mutex = new std::mutex(); + gf->wshiper_thread_cv = new std::condition_variable(); + gf->text_source_mutex = new std::mutex(); gf->text_source = nullptr; gf->text_source_name = nullptr; From 60a3cf0364855d3860da2bbd5acffc3527a673ba Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Tue, 22 Aug 2023 19:08:47 +0300 Subject: [PATCH 08/14] guards on pointers --- src/transcription-filter-data.h | 12 ++++++------ src/transcription-filter.cpp | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index cb83d96..d238d6a 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -53,18 +53,18 @@ struct transcription_filter_data { bool active = false; // Text source to output the subtitles - obs_weak_source_t *text_source; - char *text_source_name; - std::mutex *text_source_mutex; + obs_weak_source_t *text_source = nullptr; + char *text_source_name = nullptr; + std::mutex *text_source_mutex = nullptr; // Callback to set the text in the output text source (subtitles) std::function setTextCallback; // Use std for thread and mutex std::thread whisper_thread; - std::mutex *whisper_buf_mutex; - std::mutex *whisper_ctx_mutex; - std::condition_variable *wshiper_thread_cv; + std::mutex *whisper_buf_mutex = nullptr; + std::mutex *whisper_ctx_mutex = nullptr; + std::condition_variable *wshiper_thread_cv = nullptr; }; // Audio packet info diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 11f3324..7a63a4e 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -68,6 +68,11 @@ struct obs_audio_data *transcription_filter_filter_audio(void *data, struct obs_ return audio; } + if (!gf->whisper_buf_mutex || !gf->whisper_ctx_mutex) { + obs_log(LOG_ERROR, "whisper mutexes are null"); + return audio; + } + { std::lock_guard lock(*gf->whisper_buf_mutex); // scoped lock obs_log(gf->log_level, @@ -187,6 +192,10 @@ void transcription_filter_update(void *data, obs_data_t *s) if (strcmp(text_source_name, "none") == 0 || strcmp(text_source_name, "(null)") == 0) { // new selected text source is not valid, release the old one if (gf->text_source) { + if (!gf->text_source_mutex) { + obs_log(LOG_ERROR, "text_source_mutex is null"); + return; + } std::lock_guard lock(*gf->text_source_mutex); old_weak_text_source = gf->text_source; gf->text_source = nullptr; @@ -201,6 +210,10 @@ void transcription_filter_update(void *data, obs_data_t *s) strcmp(text_source_name, gf->text_source_name) != 0) { // new text source is different from the old one, release the old one if (gf->text_source) { + if (!gf->text_source_mutex) { + obs_log(LOG_ERROR, "text_source_mutex is null"); + return; + } std::lock_guard lock(*gf->text_source_mutex); old_weak_text_source = gf->text_source; gf->text_source = nullptr; @@ -223,6 +236,10 @@ void transcription_filter_update(void *data, obs_data_t *s) obs_log(LOG_INFO, "model path changed, reloading model"); if (gf->whisper_context != nullptr) { // acquire the mutex before freeing the context + if (!gf->whisper_ctx_mutex || !gf->wshiper_thread_cv) { + obs_log(LOG_ERROR, "whisper_ctx_mutex is null"); + return; + } std::lock_guard lock(*gf->whisper_ctx_mutex); whisper_free(gf->whisper_context); gf->whisper_context = nullptr; @@ -254,6 +271,11 @@ void transcription_filter_update(void *data, obs_data_t *s) } } + if (!gf->whisper_ctx_mutex) { + obs_log(LOG_ERROR, "whisper_ctx_mutex is null"); + return; + } + obs_log(LOG_INFO, "transcription_filter: update whisper params"); std::lock_guard lock(*gf->whisper_ctx_mutex); @@ -345,6 +367,11 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) obs_log(LOG_INFO, "transcription_filter: setup callback"); // set the callback to set the text in the output text source (subtitles) gf->setTextCallback = [gf](const std::string &str) { + if (!gf->text_source_mutex) { + obs_log(LOG_ERROR, "text_source_mutex is null"); + return; + } + if (!gf->text_source) { // attempt to acquire a weak ref to the text source if it's yet available acquire_weak_text_source_ref(gf); From de346a2897aceddbd23045f65be28fa9ba6fbecc Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Wed, 23 Aug 2023 08:34:34 +0300 Subject: [PATCH 09/14] remove lambda callback --- src/transcription-filter-data.h | 2 ++ src/transcription-filter.cpp | 59 ++++++++++++++++----------------- src/whisper-processing.cpp | 6 ++-- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index d238d6a..3df3e94 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -73,4 +73,6 @@ struct transcription_filter_audio_info { uint64_t timestamp; }; +void set_text_callback(struct transcription_filter_data *gf, const std::string &str); + #endif /* TRANSCRIPTION_FILTER_DATA_H */ diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 7a63a4e..5770352 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -174,6 +174,35 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf) } } +void set_text_callback(struct transcription_filter_data *gf, const std::string &str) +{ + if (!gf->text_source_mutex) { + obs_log(LOG_ERROR, "text_source_mutex is null"); + return; + } + + if (!gf->text_source) { + // attempt to acquire a weak ref to the text source if it's yet available + acquire_weak_text_source_ref(gf); + } + + std::lock_guard lock(*gf->text_source_mutex); + + if (!gf->text_source) { + obs_log(LOG_ERROR, "text_source is null"); + return; + } + auto target = obs_weak_source_get_source(gf->text_source); + if (!target) { + obs_log(LOG_ERROR, "text_source target is null"); + return; + } + auto text_settings = obs_source_get_settings(target); + obs_data_set_string(text_settings, "text", str.c_str()); + obs_source_update(target, text_settings); + obs_source_release(target); +}; + void transcription_filter_update(void *data, obs_data_t *s) { struct transcription_filter_data *gf = @@ -364,36 +393,6 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->text_source = nullptr; gf->text_source_name = nullptr; - obs_log(LOG_INFO, "transcription_filter: setup callback"); - // set the callback to set the text in the output text source (subtitles) - gf->setTextCallback = [gf](const std::string &str) { - if (!gf->text_source_mutex) { - obs_log(LOG_ERROR, "text_source_mutex is null"); - return; - } - - if (!gf->text_source) { - // attempt to acquire a weak ref to the text source if it's yet available - acquire_weak_text_source_ref(gf); - } - - std::lock_guard lock(*gf->text_source_mutex); - - if (!gf->text_source) { - obs_log(LOG_ERROR, "text_source is null"); - return; - } - auto target = obs_weak_source_get_source(gf->text_source); - if (!target) { - obs_log(LOG_ERROR, "text_source target is null"); - return; - } - auto text_settings = obs_source_get_settings(target); - obs_data_set_string(text_settings, "text", str.c_str()); - obs_source_update(target, text_settings); - obs_source_release(target); - }; - obs_log(LOG_INFO, "transcription_filter: run update"); // get the settings updated on the filter data struct transcription_filter_update(gf, settings); diff --git a/src/whisper-processing.cpp b/src/whisper-processing.cpp index bc3cda5..69bbfb8 100644 --- a/src/whisper-processing.cpp +++ b/src/whisper-processing.cpp @@ -254,16 +254,16 @@ void process_audio_from_buffer(struct transcription_filter_data *gf) if (inference_result.result == DETECTION_RESULT_SPEECH) { // output inference result to a text source - gf->setTextCallback(inference_result.text); + set_text_callback(gf, inference_result.text); } else if (inference_result.result == DETECTION_RESULT_SILENCE) { // output inference result to a text source - gf->setTextCallback("[silence]"); + set_text_callback(gf, "[silence]"); } } else { if (gf->log_words) { obs_log(LOG_INFO, "skipping inference"); } - gf->setTextCallback(""); + set_text_callback(gf, ""); } // end of timer From 6ccff434f7e8b0811107a0e688d309110aa9e866 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Wed, 23 Aug 2023 14:26:07 +0300 Subject: [PATCH 10/14] swap thread --- src/transcription-filter.cpp | 4 +++- src/whisper-processing.cpp | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 5770352..ccf7079 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -399,10 +399,12 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) obs_log(LOG_INFO, "transcription_filter: start whisper thread"); // start the thread - gf->whisper_thread = std::thread(whisper_loop, gf); + std::thread new_whisper_thread(whisper_loop, gf); + gf->whisper_thread.swap(new_whisper_thread); gf->active = true; + obs_log(LOG_INFO, "transcription_filter: filter created."); return gf; } diff --git a/src/whisper-processing.cpp b/src/whisper-processing.cpp index 69bbfb8..be21906 100644 --- a/src/whisper-processing.cpp +++ b/src/whisper-processing.cpp @@ -297,6 +297,11 @@ void process_audio_from_buffer(struct transcription_filter_data *gf) void whisper_loop(void *data) { + if (data == nullptr) { + obs_log(LOG_ERROR, "whisper_loop: data is null"); + return; + } + struct transcription_filter_data *gf = static_cast(data); const size_t segment_size = gf->frames * sizeof(float); From eca1afafa3aeeea848e7d233b6855abd19277130 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Fri, 25 Aug 2023 12:17:09 +0300 Subject: [PATCH 11/14] text source name attempt --- src/transcription-filter.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index ccf7079..0d35d08 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -215,10 +215,11 @@ void transcription_filter_update(void *data, obs_data_t *s) obs_log(LOG_INFO, "transcription_filter: update text source"); // update the text source - const char *text_source_name = obs_data_get_string(s, "subtitle_sources"); + const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources"); obs_weak_source_t *old_weak_text_source = NULL; - if (strcmp(text_source_name, "none") == 0 || strcmp(text_source_name, "(null)") == 0) { + if (strcmp(new_text_source_name, "none") == 0 || + strcmp(new_text_source_name, "(null)") == 0 || strcmp(new_text_source_name, "") == 0) { // new selected text source is not valid, release the old one if (gf->text_source) { if (!gf->text_source_mutex) { @@ -236,7 +237,7 @@ void transcription_filter_update(void *data, obs_data_t *s) } else { // new selected text source is valid, check if it's different from the old one if (gf->text_source_name == nullptr || - strcmp(text_source_name, gf->text_source_name) != 0) { + strcmp(new_text_source_name, gf->text_source_name) != 0) { // new text source is different from the old one, release the old one if (gf->text_source) { if (!gf->text_source_mutex) { @@ -247,7 +248,7 @@ void transcription_filter_update(void *data, obs_data_t *s) old_weak_text_source = gf->text_source; gf->text_source = nullptr; } - gf->text_source_name = bstrdup(text_source_name); + gf->text_source_name = bstrdup(new_text_source_name); } } @@ -288,7 +289,8 @@ void transcription_filter_update(void *data, obs_data_t *s) obs_log(LOG_INFO, "Model download complete"); gf->whisper_context = init_whisper_context( gf->whisper_model_path); - gf->whisper_thread = std::thread(whisper_loop, gf); + std::thread new_whisper_thread(whisper_loop, gf); + gf->whisper_thread.swap(new_whisper_thread); } else { obs_log(LOG_ERROR, "Model download failed"); } @@ -296,7 +298,8 @@ void transcription_filter_update(void *data, obs_data_t *s) } else { // Model exists, just load it gf->whisper_context = init_whisper_context(gf->whisper_model_path); - gf->whisper_thread = std::thread(whisper_loop, gf); + std::thread new_whisper_thread(whisper_loop, gf); + gf->whisper_thread.swap(new_whisper_thread); } } @@ -391,7 +394,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->wshiper_thread_cv = new std::condition_variable(); gf->text_source_mutex = new std::mutex(); gf->text_source = nullptr; - gf->text_source_name = nullptr; + gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources")); obs_log(LOG_INFO, "transcription_filter: run update"); // get the settings updated on the filter data struct From 5c19aca621aadb9edd97ea75283beef6a55e2174 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Wed, 6 Sep 2023 22:22:03 -0400 Subject: [PATCH 12/14] bzalloc --- src/transcription-filter.cpp | 13 ++++++++++--- src/whisper-processing.cpp | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 0d35d08..43959e3 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -101,10 +101,11 @@ const char *transcription_filter_name(void *unused) void transcription_filter_destroy(void *data) { + obs_log(LOG_INFO, "transcription_filter_destroy"); + struct transcription_filter_data *gf = static_cast(data); - obs_log(LOG_INFO, "transcription_filter_destroy"); { std::lock_guard lock(*gf->whisper_ctx_mutex); if (gf->whisper_context != nullptr) { @@ -205,10 +206,16 @@ void set_text_callback(struct transcription_filter_data *gf, const std::string & void transcription_filter_update(void *data, obs_data_t *s) { + obs_log(LOG_INFO, "transcription_filter_update"); + + if (!data) { + obs_log(LOG_ERROR, "transcription_filter_update: data is null"); + return; + } + struct transcription_filter_data *gf = static_cast(data); - obs_log(LOG_INFO, "transcription_filter_update"); gf->log_level = (int)obs_data_get_int(s, "log_level"); gf->vad_enabled = obs_data_get_bool(s, "vad_enabled"); gf->log_words = obs_data_get_bool(s, "log_words"); @@ -343,7 +350,7 @@ void transcription_filter_update(void *data, obs_data_t *s) void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) { struct transcription_filter_data *gf = static_cast( - bmalloc(sizeof(struct transcription_filter_data))); + bzalloc(sizeof(struct transcription_filter_data))); // Get the number of channels for the input source gf->channels = audio_output_get_channels(obs_get_audio()); diff --git a/src/whisper-processing.cpp b/src/whisper-processing.cpp index be21906..7465a5a 100644 --- a/src/whisper-processing.cpp +++ b/src/whisper-processing.cpp @@ -72,6 +72,7 @@ bool vad_simple(float *pcmf32, size_t pcm32f_size, uint32_t sample_rate, float v struct whisper_context *init_whisper_context(const std::string &model_path) { + obs_log(LOG_INFO, "Loading whisper model from %s", model_path.c_str()); struct whisper_context *ctx = whisper_init_from_file(obs_module_file(model_path.c_str())); if (ctx == nullptr) { obs_log(LOG_ERROR, "Failed to load whisper model"); From 5dd8901dc5ca3a5cd3de3d9753be059c1cc6654e Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Wed, 6 Sep 2023 22:23:42 -0400 Subject: [PATCH 13/14] lint --- src/transcription-filter.cpp | 8 ++++---- src/whisper-processing.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 43959e3..02b1963 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -208,10 +208,10 @@ void transcription_filter_update(void *data, obs_data_t *s) { obs_log(LOG_INFO, "transcription_filter_update"); - if (!data) { - obs_log(LOG_ERROR, "transcription_filter_update: data is null"); - return; - } + if (!data) { + obs_log(LOG_ERROR, "transcription_filter_update: data is null"); + return; + } struct transcription_filter_data *gf = static_cast(data); diff --git a/src/whisper-processing.cpp b/src/whisper-processing.cpp index 7465a5a..c05525e 100644 --- a/src/whisper-processing.cpp +++ b/src/whisper-processing.cpp @@ -72,7 +72,7 @@ bool vad_simple(float *pcmf32, size_t pcm32f_size, uint32_t sample_rate, float v struct whisper_context *init_whisper_context(const std::string &model_path) { - obs_log(LOG_INFO, "Loading whisper model from %s", model_path.c_str()); + obs_log(LOG_INFO, "Loading whisper model from %s", model_path.c_str()); struct whisper_context *ctx = whisper_init_from_file(obs_module_file(model_path.c_str())); if (ctx == nullptr) { obs_log(LOG_ERROR, "Failed to load whisper model"); From 45c1db7dad8572b263cadc694035aa3bf40f0a19 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Fri, 8 Sep 2023 11:15:54 -0400 Subject: [PATCH 14/14] logging level --- src/transcription-filter.cpp | 37 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 02b1963..9b0446d 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -101,11 +101,10 @@ const char *transcription_filter_name(void *unused) void transcription_filter_destroy(void *data) { - obs_log(LOG_INFO, "transcription_filter_destroy"); - struct transcription_filter_data *gf = static_cast(data); + obs_log(gf->log_level, "transcription_filter_destroy"); { std::lock_guard lock(*gf->whisper_ctx_mutex); if (gf->whisper_context != nullptr) { @@ -206,21 +205,15 @@ void set_text_callback(struct transcription_filter_data *gf, const std::string & void transcription_filter_update(void *data, obs_data_t *s) { - obs_log(LOG_INFO, "transcription_filter_update"); - - if (!data) { - obs_log(LOG_ERROR, "transcription_filter_update: data is null"); - return; - } - struct transcription_filter_data *gf = static_cast(data); + obs_log(gf->log_level, "transcription_filter_update"); gf->log_level = (int)obs_data_get_int(s, "log_level"); gf->vad_enabled = obs_data_get_bool(s, "vad_enabled"); gf->log_words = obs_data_get_bool(s, "log_words"); - obs_log(LOG_INFO, "transcription_filter: update text source"); + obs_log(gf->log_level, "transcription_filter: update text source"); // update the text source const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources"); obs_weak_source_t *old_weak_text_source = NULL; @@ -260,11 +253,11 @@ void transcription_filter_update(void *data, obs_data_t *s) } if (old_weak_text_source) { - obs_log(LOG_INFO, "releasing old text source"); + obs_log(gf->log_level, "releasing old text source"); obs_weak_source_release(old_weak_text_source); } - obs_log(LOG_INFO, "transcription_filter: update whisper model"); + obs_log(gf->log_level, "transcription_filter: update whisper model"); // update the whisper model path std::string new_model_path = obs_data_get_string(s, "whisper_model_path"); @@ -315,7 +308,7 @@ void transcription_filter_update(void *data, obs_data_t *s) return; } - obs_log(LOG_INFO, "transcription_filter: update whisper params"); + obs_log(gf->log_level, "transcription_filter: update whisper params"); std::lock_guard lock(*gf->whisper_ctx_mutex); gf->whisper_params = whisper_full_default_params( @@ -350,7 +343,7 @@ void transcription_filter_update(void *data, obs_data_t *s) void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) { struct transcription_filter_data *gf = static_cast( - bzalloc(sizeof(struct transcription_filter_data))); + bmalloc(sizeof(struct transcription_filter_data))); // Get the number of channels for the input source gf->channels = audio_output_get_channels(obs_get_audio()); @@ -380,10 +373,10 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->overlap_ms = OVERLAP_SIZE_MSEC; gf->overlap_frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)gf->overlap_ms)); - obs_log(LOG_INFO, "transcription_filter: channels %d, frames %d, sample_rate %d", + obs_log(gf->log_level, "transcription_filter: channels %d, frames %d, sample_rate %d", (int)gf->channels, (int)gf->frames, gf->sample_rate); - obs_log(LOG_INFO, "transcription_filter: setup audio resampler"); + obs_log(gf->log_level, "transcription_filter: setup audio resampler"); struct resample_info src, dst; src.samples_per_sec = gf->sample_rate; src.format = AUDIO_FORMAT_FLOAT_PLANAR; @@ -395,7 +388,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->resampler = audio_resampler_create(&dst, &src); - obs_log(LOG_INFO, "transcription_filter: setup mutexes and condition variables"); + obs_log(gf->log_level, "transcription_filter: setup mutexes and condition variables"); gf->whisper_buf_mutex = new std::mutex(); gf->whisper_ctx_mutex = new std::mutex(); gf->wshiper_thread_cv = new std::condition_variable(); @@ -403,18 +396,18 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->text_source = nullptr; gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources")); - obs_log(LOG_INFO, "transcription_filter: run update"); + obs_log(gf->log_level, "transcription_filter: run update"); // get the settings updated on the filter data struct transcription_filter_update(gf, settings); - obs_log(LOG_INFO, "transcription_filter: start whisper thread"); + obs_log(gf->log_level, "transcription_filter: start whisper thread"); // start the thread std::thread new_whisper_thread(whisper_loop, gf); gf->whisper_thread.swap(new_whisper_thread); gf->active = true; - obs_log(LOG_INFO, "transcription_filter: filter created."); + obs_log(gf->log_level, "transcription_filter: filter created."); return gf; } @@ -422,7 +415,7 @@ void transcription_filter_activate(void *data) { struct transcription_filter_data *gf = static_cast(data); - obs_log(LOG_INFO, "transcription_filter filter activated"); + obs_log(gf->log_level, "transcription_filter filter activated"); gf->active = true; } @@ -430,7 +423,7 @@ void transcription_filter_deactivate(void *data) { struct transcription_filter_data *gf = static_cast(data); - obs_log(LOG_INFO, "transcription_filter filter deactivated"); + obs_log(gf->log_level, "transcription_filter filter deactivated"); gf->active = false; }