From 11a51e7512e4566af93143eb7f659bee8a91c2a4 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Fri, 8 Sep 2023 12:25:10 -0400 Subject: [PATCH 1/7] add text file output --- src/transcription-filter-data.h | 2 + src/transcription-filter.cpp | 86 ++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 28 deletions(-) diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index 3df3e94..e61990c 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -58,6 +58,8 @@ struct transcription_filter_data { std::mutex *text_source_mutex = nullptr; // Callback to set the text in the output text source (subtitles) std::function setTextCallback; + // Output file path to write the subtitles + std::string output_file_path; // Use std for thread and mutex std::thread whisper_thread; diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 9b0446d..4f10b0c 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -8,6 +8,7 @@ #include "model-utils/model-downloader.h" #include +#include inline enum speaker_layout convert_speaker_layout(uint8_t channels) { @@ -176,31 +177,38 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf) void set_text_callback(struct transcription_filter_data *gf, const std::string &str) { - if (!gf->text_source_mutex) { - obs_log(LOG_ERROR, "text_source_mutex is null"); - return; - } + if (gf->output_file_path != "") { + // Write to file, do not append + std::ofstream output_file(gf->output_file_path, std::ios::out | std::ios::trunc); + output_file << str; + output_file.close(); + } else { + if (!gf->text_source_mutex) { + obs_log(LOG_ERROR, "text_source_mutex is null"); + return; + } - if (!gf->text_source) { - // attempt to acquire a weak ref to the text source if it's yet available - acquire_weak_text_source_ref(gf); - } + if (!gf->text_source) { + // attempt to acquire a weak ref to the text source if it's yet available + acquire_weak_text_source_ref(gf); + } - std::lock_guard lock(*gf->text_source_mutex); + std::lock_guard lock(*gf->text_source_mutex); - if (!gf->text_source) { - obs_log(LOG_ERROR, "text_source is null"); - return; - } - auto target = obs_weak_source_get_source(gf->text_source); - if (!target) { - obs_log(LOG_ERROR, "text_source target is null"); - return; - } - auto text_settings = obs_source_get_settings(target); - obs_data_set_string(text_settings, "text", str.c_str()); - obs_source_update(target, text_settings); - obs_source_release(target); + if (!gf->text_source) { + obs_log(LOG_ERROR, "text_source is null"); + return; + } + auto target = obs_weak_source_get_source(gf->text_source); + if (!target) { + obs_log(LOG_ERROR, "text_source target is null"); + return; + } + auto text_settings = obs_source_get_settings(target); + obs_data_set_string(text_settings, "text", str.c_str()); + obs_source_update(target, text_settings); + obs_source_release(target); + } }; void transcription_filter_update(void *data, obs_data_t *s) @@ -219,7 +227,9 @@ void transcription_filter_update(void *data, obs_data_t *s) obs_weak_source_t *old_weak_text_source = NULL; if (strcmp(new_text_source_name, "none") == 0 || - strcmp(new_text_source_name, "(null)") == 0 || strcmp(new_text_source_name, "") == 0) { + strcmp(new_text_source_name, "(null)") == 0 || + strcmp(new_text_source_name, "text_file") == 0 || + strlen(new_text_source_name) == 0) { // new selected text source is not valid, release the old one if (gf->text_source) { if (!gf->text_source_mutex) { @@ -234,6 +244,11 @@ void transcription_filter_update(void *data, obs_data_t *s) bfree(gf->text_source_name); gf->text_source_name = nullptr; } + if (strcmp(new_text_source_name, "text_file") == 0) { + gf->output_file_path = obs_data_get_string(s, "subtitle_output_filename"); + } else { + gf->output_file_path = ""; + } } else { // new selected text source is valid, check if it's different from the old one if (gf->text_source_name == nullptr || @@ -430,7 +445,7 @@ void transcription_filter_deactivate(void *data) void transcription_filter_defaults(obs_data_t *s) { obs_data_set_default_bool(s, "vad_enabled", true); - obs_data_set_default_int(s, "log_level", LOG_DEBUG); + obs_data_set_default_int(s, "log_level", LOG_INFO); obs_data_set_default_bool(s, "log_words", true); obs_data_set_default_string(s, "whisper_model_path", "models/ggml-tiny.en.bin"); obs_data_set_default_string(s, "whisper_language_select", "en"); @@ -474,13 +489,28 @@ obs_properties_t *transcription_filter_properties(void *data) obs_property_list_add_int(list, "WARNING", LOG_WARNING); obs_properties_add_bool(ppts, "log_words", "Log output words"); - obs_property_t *sources = - obs_properties_add_list(ppts, "subtitle_sources", "Subtitles Text Source", + obs_property_t *subs_output = + obs_properties_add_list(ppts, "subtitle_sources", "Subtitles Output", OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); // Add "none" option - obs_property_list_add_string(sources, "None / No output", "none"); + obs_property_list_add_string(subs_output, "None / No output", "none"); + obs_property_list_add_string(subs_output, "Text File output", "text_file"); // Add text sources - obs_enum_sources(add_sources_to_list, sources); + obs_enum_sources(add_sources_to_list, subs_output); + + obs_properties_add_path(ppts, "subtitle_output_filename", "Output filename", OBS_PATH_FILE_SAVE, "Text (*.txt)", NULL); + + obs_property_set_modified_callback(subs_output, [](obs_properties_t *props, obs_property_t *property, obs_data_t *settings) { + const char *new_output = obs_data_get_string(settings, "subtitle_sources"); + if (strcmp(new_output, "text_file") == 0) { + // Show the output filename selection input + obs_property_set_visible(obs_properties_get(props, "subtitle_output_filename"), true); + } else { + // Hide the output filename selection input + obs_property_set_visible(obs_properties_get(props, "subtitle_output_filename"), false); + } + return true; + }); // Add a list of available whisper models to download obs_property_t *whisper_models_list = From 0fb43fd42ad8be43f382c0f30e97ad83f4f2bce3 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Fri, 8 Sep 2023 12:26:05 -0400 Subject: [PATCH 2/7] lint --- src/transcription-filter-data.h | 4 +- src/transcription-filter.cpp | 100 +++++++++++++++++--------------- 2 files changed, 54 insertions(+), 50 deletions(-) diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index e61990c..e8cedb2 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -58,8 +58,8 @@ struct transcription_filter_data { std::mutex *text_source_mutex = nullptr; // Callback to set the text in the output text source (subtitles) std::function setTextCallback; - // Output file path to write the subtitles - std::string output_file_path; + // Output file path to write the subtitles + std::string output_file_path; // Use std for thread and mutex std::thread whisper_thread; diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 4f10b0c..35d3a1b 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -177,38 +177,38 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf) void set_text_callback(struct transcription_filter_data *gf, const std::string &str) { - if (gf->output_file_path != "") { - // Write to file, do not append - std::ofstream output_file(gf->output_file_path, std::ios::out | std::ios::trunc); - output_file << str; - output_file.close(); - } else { - if (!gf->text_source_mutex) { - obs_log(LOG_ERROR, "text_source_mutex is null"); - return; - } + if (gf->output_file_path != "") { + // Write to file, do not append + std::ofstream output_file(gf->output_file_path, std::ios::out | std::ios::trunc); + output_file << str; + output_file.close(); + } else { + if (!gf->text_source_mutex) { + obs_log(LOG_ERROR, "text_source_mutex is null"); + return; + } - if (!gf->text_source) { - // attempt to acquire a weak ref to the text source if it's yet available - acquire_weak_text_source_ref(gf); - } + if (!gf->text_source) { + // attempt to acquire a weak ref to the text source if it's yet available + acquire_weak_text_source_ref(gf); + } - std::lock_guard lock(*gf->text_source_mutex); + std::lock_guard lock(*gf->text_source_mutex); - if (!gf->text_source) { - obs_log(LOG_ERROR, "text_source is null"); - return; - } - auto target = obs_weak_source_get_source(gf->text_source); - if (!target) { - obs_log(LOG_ERROR, "text_source target is null"); - return; - } - auto text_settings = obs_source_get_settings(target); - obs_data_set_string(text_settings, "text", str.c_str()); - obs_source_update(target, text_settings); - obs_source_release(target); - } + if (!gf->text_source) { + obs_log(LOG_ERROR, "text_source is null"); + return; + } + auto target = obs_weak_source_get_source(gf->text_source); + if (!target) { + obs_log(LOG_ERROR, "text_source target is null"); + return; + } + auto text_settings = obs_source_get_settings(target); + obs_data_set_string(text_settings, "text", str.c_str()); + obs_source_update(target, text_settings); + obs_source_release(target); + } }; void transcription_filter_update(void *data, obs_data_t *s) @@ -228,8 +228,7 @@ void transcription_filter_update(void *data, obs_data_t *s) if (strcmp(new_text_source_name, "none") == 0 || strcmp(new_text_source_name, "(null)") == 0 || - strcmp(new_text_source_name, "text_file") == 0 || - strlen(new_text_source_name) == 0) { + strcmp(new_text_source_name, "text_file") == 0 || strlen(new_text_source_name) == 0) { // new selected text source is not valid, release the old one if (gf->text_source) { if (!gf->text_source_mutex) { @@ -244,11 +243,11 @@ void transcription_filter_update(void *data, obs_data_t *s) bfree(gf->text_source_name); gf->text_source_name = nullptr; } - if (strcmp(new_text_source_name, "text_file") == 0) { - gf->output_file_path = obs_data_get_string(s, "subtitle_output_filename"); - } else { - gf->output_file_path = ""; - } + if (strcmp(new_text_source_name, "text_file") == 0) { + gf->output_file_path = obs_data_get_string(s, "subtitle_output_filename"); + } else { + gf->output_file_path = ""; + } } else { // new selected text source is valid, check if it's different from the old one if (gf->text_source_name == nullptr || @@ -498,19 +497,24 @@ obs_properties_t *transcription_filter_properties(void *data) // Add text sources obs_enum_sources(add_sources_to_list, subs_output); - obs_properties_add_path(ppts, "subtitle_output_filename", "Output filename", OBS_PATH_FILE_SAVE, "Text (*.txt)", NULL); + obs_properties_add_path(ppts, "subtitle_output_filename", "Output filename", + OBS_PATH_FILE_SAVE, "Text (*.txt)", NULL); - obs_property_set_modified_callback(subs_output, [](obs_properties_t *props, obs_property_t *property, obs_data_t *settings) { - const char *new_output = obs_data_get_string(settings, "subtitle_sources"); - if (strcmp(new_output, "text_file") == 0) { - // Show the output filename selection input - obs_property_set_visible(obs_properties_get(props, "subtitle_output_filename"), true); - } else { - // Hide the output filename selection input - obs_property_set_visible(obs_properties_get(props, "subtitle_output_filename"), false); - } - return true; - }); + obs_property_set_modified_callback(subs_output, [](obs_properties_t *props, + obs_property_t *property, + obs_data_t *settings) { + const char *new_output = obs_data_get_string(settings, "subtitle_sources"); + if (strcmp(new_output, "text_file") == 0) { + // Show the output filename selection input + obs_property_set_visible( + obs_properties_get(props, "subtitle_output_filename"), true); + } else { + // Hide the output filename selection input + obs_property_set_visible( + obs_properties_get(props, "subtitle_output_filename"), false); + } + return true; + }); // Add a list of available whisper models to download obs_property_t *whisper_models_list = From 889e757b17f36d7b88cac5d884294977ac6d748a Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Fri, 8 Sep 2023 12:39:26 -0400 Subject: [PATCH 3/7] fix unused param --- src/transcription-filter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 35d3a1b..a2692e2 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -503,6 +503,7 @@ obs_properties_t *transcription_filter_properties(void *data) obs_property_set_modified_callback(subs_output, [](obs_properties_t *props, obs_property_t *property, obs_data_t *settings) { + UNUSED_PARAMETER(property); const char *new_output = obs_data_get_string(settings, "subtitle_sources"); if (strcmp(new_output, "text_file") == 0) { // Show the output filename selection input From 6f157c8f78b234c73533a0951797153a637071d7 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 11 Sep 2023 01:23:27 -0400 Subject: [PATCH 4/7] add guard on file name null --- src/transcription-filter.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index a2692e2..df98c6e 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -243,10 +243,14 @@ void transcription_filter_update(void *data, obs_data_t *s) bfree(gf->text_source_name); gf->text_source_name = nullptr; } + gf->output_file_path = ""; if (strcmp(new_text_source_name, "text_file") == 0) { - gf->output_file_path = obs_data_get_string(s, "subtitle_output_filename"); - } else { - gf->output_file_path = ""; + // set the output file path + const char *output_file_path = + obs_data_get_string(s, "subtitle_output_filename"); + if (output_file_path != nullptr && strlen(output_file_path) > 0) { + gf->output_file_path = output_file_path; + } } } else { // new selected text source is valid, check if it's different from the old one From e22fb28f96980099e5ffd4715acfa55bb0735a4b Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 11 Sep 2023 01:49:39 -0400 Subject: [PATCH 5/7] safeguards on file name --- src/transcription-filter.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index df98c6e..0b16f64 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -177,7 +177,7 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf) void set_text_callback(struct transcription_filter_data *gf, const std::string &str) { - if (gf->output_file_path != "") { + if (gf->output_file_path != "" && !gf->text_source_name) { // Write to file, do not append std::ofstream output_file(gf->output_file_path, std::ios::out | std::ios::trunc); output_file << str; @@ -226,7 +226,8 @@ void transcription_filter_update(void *data, obs_data_t *s) const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources"); obs_weak_source_t *old_weak_text_source = NULL; - if (strcmp(new_text_source_name, "none") == 0 || + if (new_text_source_name == nullptr || + strcmp(new_text_source_name, "none") == 0 || strcmp(new_text_source_name, "(null)") == 0 || strcmp(new_text_source_name, "text_file") == 0 || strlen(new_text_source_name) == 0) { // new selected text source is not valid, release the old one @@ -266,6 +267,11 @@ void transcription_filter_update(void *data, obs_data_t *s) old_weak_text_source = gf->text_source; gf->text_source = nullptr; } + if (gf->text_source_name) { + // free the old text source name + bfree(gf->text_source_name); + gf->text_source_name = nullptr; + } gf->text_source_name = bstrdup(new_text_source_name); } } @@ -413,6 +419,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->text_source_mutex = new std::mutex(); gf->text_source = nullptr; gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources")); + gf->output_file_path = std::string(""); obs_log(gf->log_level, "transcription_filter: run update"); // get the settings updated on the filter data struct From 63db685e0d4caf1f5c496f70186680585f6898d7 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 11 Sep 2023 01:50:42 -0400 Subject: [PATCH 6/7] lint --- src/transcription-filter.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 0b16f64..3c41a6b 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -226,8 +226,7 @@ void transcription_filter_update(void *data, obs_data_t *s) const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources"); obs_weak_source_t *old_weak_text_source = NULL; - if (new_text_source_name == nullptr || - strcmp(new_text_source_name, "none") == 0 || + if (new_text_source_name == nullptr || strcmp(new_text_source_name, "none") == 0 || strcmp(new_text_source_name, "(null)") == 0 || strcmp(new_text_source_name, "text_file") == 0 || strlen(new_text_source_name) == 0) { // new selected text source is not valid, release the old one @@ -267,11 +266,11 @@ void transcription_filter_update(void *data, obs_data_t *s) old_weak_text_source = gf->text_source; gf->text_source = nullptr; } - if (gf->text_source_name) { - // free the old text source name - bfree(gf->text_source_name); - gf->text_source_name = nullptr; - } + if (gf->text_source_name) { + // free the old text source name + bfree(gf->text_source_name); + gf->text_source_name = nullptr; + } gf->text_source_name = bstrdup(new_text_source_name); } } @@ -419,7 +418,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->text_source_mutex = new std::mutex(); gf->text_source = nullptr; gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources")); - gf->output_file_path = std::string(""); + gf->output_file_path = std::string(""); obs_log(gf->log_level, "transcription_filter: run update"); // get the settings updated on the filter data struct From 251e7931754c9b2d65a876e15363a97c04261fc7 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 11 Sep 2023 02:21:59 -0400 Subject: [PATCH 7/7] try fix crash on string assign --- src/transcription-filter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index 3c41a6b..8c69fb1 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -366,7 +366,7 @@ void transcription_filter_update(void *data, obs_data_t *s) void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) { struct transcription_filter_data *gf = static_cast( - bmalloc(sizeof(struct transcription_filter_data))); + bzalloc(sizeof(struct transcription_filter_data))); // Get the number of channels for the input source gf->channels = audio_output_get_channels(obs_get_audio()); @@ -387,7 +387,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) } gf->context = filter; - gf->whisper_model_path = obs_data_get_string(settings, "whisper_model_path"); + gf->whisper_model_path = std::string(obs_data_get_string(settings, "whisper_model_path")); gf->whisper_context = init_whisper_context(gf->whisper_model_path); if (gf->whisper_context == nullptr) { obs_log(LOG_ERROR, "Failed to load whisper model");