diff --git a/src/model-utils/model-downloader-ui.cpp b/src/model-utils/model-downloader-ui.cpp index d25fe03..2dac46e 100644 --- a/src/model-utils/model-downloader-ui.cpp +++ b/src/model-utils/model-downloader-ui.cpp @@ -125,11 +125,23 @@ std::string get_filename_from_url(const std::string &url) void ModelDownloadWorker::download_model() { - char *config_folder = obs_module_get_config_path(obs_current_module(), "models"); - const std::filesystem::path module_config_models_folder = - std::filesystem::absolute(config_folder); + char *config_folder = obs_module_config_path("models"); +#ifdef _WIN32 + // convert mbstring to wstring + int count = MultiByteToWideChar(CP_UTF8, 0, config_folder, strlen(config_folder), NULL, 0); + std::wstring config_folder_str(count, 0); + MultiByteToWideChar(CP_UTF8, 0, config_folder, strlen(config_folder), &config_folder_str[0], + count); + obs_log(LOG_INFO, "Download: Config models folder: %S", config_folder_str.c_str()); +#else + std::string config_folder_str = config_folder; + obs_log(LOG_INFO, "Download: Config models folder: %s", config_folder_str.c_str()); +#endif bfree(config_folder); + const std::filesystem::path module_config_models_folder = + std::filesystem::absolute(config_folder_str); + // Check if the config folder exists if (!std::filesystem::exists(module_config_models_folder)) { obs_log(LOG_WARNING, "Config folder does not exist: %s", diff --git a/src/model-utils/model-downloader.cpp b/src/model-utils/model-downloader.cpp index 8f69e47..67bfafd 100644 --- a/src/model-utils/model-downloader.cpp +++ b/src/model-utils/model-downloader.cpp @@ -27,9 +27,25 @@ std::string find_model_folder(const ModelInfo &model_info) } // Check if model exists in the config folder - char *config_folder = obs_module_get_config_path(obs_current_module(), "models"); + char *config_folder = obs_module_config_path("models"); + if (!config_folder) { + obs_log(LOG_INFO, "Config folder not set."); + return ""; + } +#ifdef _WIN32 + // convert mbstring to wstring + int count = MultiByteToWideChar(CP_UTF8, 0, config_folder, strlen(config_folder), NULL, 0); + std::wstring config_folder_str(count, 0); + MultiByteToWideChar(CP_UTF8, 0, config_folder, strlen(config_folder), &config_folder_str[0], + count); + obs_log(LOG_INFO, "Config models folder: %S", config_folder_str.c_str()); +#else + std::string config_folder_str = config_folder; + obs_log(LOG_INFO, "Config models folder: %s", config_folder_str.c_str()); +#endif + const std::filesystem::path module_config_models_folder = - std::filesystem::absolute(config_folder); + std::filesystem::absolute(config_folder_str); bfree(config_folder); obs_log(LOG_INFO, "Checking if model '%s' exists in config...", @@ -38,9 +54,9 @@ std::string find_model_folder(const ModelInfo &model_info) const std::string model_local_config_path = (module_config_models_folder / model_info.local_folder_name).string(); - obs_log(LOG_INFO, "Model path in config: %s", model_local_config_path.c_str()); + obs_log(LOG_INFO, "Lookig for model in config: %s", model_local_config_path.c_str()); if (std::filesystem::exists(model_local_config_path)) { - obs_log(LOG_INFO, "Model exists in config folder: %s", + obs_log(LOG_INFO, "Model folder exists in config folder: %s", model_local_config_path.c_str()); return model_local_config_path; } diff --git a/src/transcription-filter-callbacks.cpp b/src/transcription-filter-callbacks.cpp index 294d3e1..19843db 100644 --- a/src/transcription-filter-callbacks.cpp +++ b/src/transcription-filter-callbacks.cpp @@ -98,6 +98,11 @@ void send_sentence_to_file(struct transcription_filter_data *gf, output_file << str_copy << std::endl; output_file.close(); } else { + if (result.start_timestamp_ms == 0 && result.end_timestamp_ms == 0) { + // No timestamps, do not save the sentence to srt + return; + } + obs_log(gf->log_level, "Saving sentence to file %s, sentence #%d", gf->output_file_path.c_str(), gf->sentence_number); // Append sentence to file in .srt format @@ -147,18 +152,10 @@ void set_text_callback(struct transcription_filter_data *gf, const DetectionResultWithText &resultIn) { DetectionResultWithText result = resultIn; - uint64_t now = now_ms(); - if (result.text.empty() || result.result != DETECTION_RESULT_SPEECH) { - // check if we should clear the current sub depending on the minimum subtitle duration - if ((now - gf->last_sub_render_time) > gf->min_sub_duration) { - // clear the current sub, run an empty sub - result.text = ""; - } else { - // nothing to do, the incoming sub is empty - return; - } + if (!result.text.empty() && result.result == DETECTION_RESULT_SPEECH) { + gf->last_sub_render_time = now_ms(); + gf->cleared_last_sub = false; } - gf->last_sub_render_time = now; std::string str_copy = result.text; @@ -186,10 +183,6 @@ void set_text_callback(struct transcription_filter_data *gf, obs_log(gf->log_level, "------ Suppressed text: '%s' -> '%s'", original_str_copy.c_str(), str_copy.c_str()); } - if (remove_leading_trailing_nonalpha(str_copy).empty()) { - // if the text is empty after suppression, return - return; - } } if (gf->buffered_output) { diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h index b1fded4..5d61c8f 100644 --- a/src/transcription-filter-data.h +++ b/src/transcription-filter-data.h @@ -38,6 +38,7 @@ struct transcription_filter_data { size_t min_sub_duration; // Last time a subtitle was rendered uint64_t last_sub_render_time; + bool cleared_last_sub; /* PCM buffers */ float *copy_buffers[MAX_PREPROC_CHANNELS]; diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp index c74e720..22b36f0 100644 --- a/src/transcription-filter.cpp +++ b/src/transcription-filter.cpp @@ -186,7 +186,7 @@ void transcription_filter_update(void *data, obs_data_t *s) gf->sentence_number = 1; gf->process_while_muted = obs_data_get_bool(s, "process_while_muted"); gf->min_sub_duration = (int)obs_data_get_int(s, "min_sub_duration"); - gf->last_sub_render_time = 0; + gf->last_sub_render_time = now_ms(); bool new_buffered_output = obs_data_get_bool(s, "buffered_output"); int new_buffer_num_lines = (int)obs_data_get_int(s, "buffer_num_lines"); int new_buffer_num_chars_per_line = (int)obs_data_get_int(s, "buffer_num_chars_per_line"); @@ -428,7 +428,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter) gf->frames = (size_t)((float)gf->sample_rate / (1000.0f / MAX_MS_WORK_BUFFER)); gf->last_num_frames = 0; gf->min_sub_duration = (int)obs_data_get_int(settings, "min_sub_duration"); - gf->last_sub_render_time = 0; + gf->last_sub_render_time = now_ms(); gf->log_level = (int)obs_data_get_int(settings, "log_level"); gf->save_srt = obs_data_get_bool(settings, "subtitle_save_srt"); gf->truncate_output_file = obs_data_get_bool(settings, "truncate_output_file"); diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp index b7f9bf2..f5b8ea1 100644 --- a/src/whisper-utils/whisper-processing.cpp +++ b/src/whisper-utils/whisper-processing.cpp @@ -504,7 +504,20 @@ void whisper_loop(void *data) current_vad_state = vad_based_segmentation(gf, current_vad_state); } - // Sleep for 10 ms using the condition variable wshiper_thread_cv + if (!gf->cleared_last_sub) { + // check if we should clear the current sub depending on the minimum subtitle duration + uint64_t now = now_ms(); + if ((now - gf->last_sub_render_time) > gf->min_sub_duration) { + // clear the current sub, call the callback with an empty string + obs_log(LOG_INFO, + "Clearing current subtitle. now: %lu ms, last: %lu ms", now, + gf->last_sub_render_time); + set_text_callback(gf, {DETECTION_RESULT_UNKNOWN, "", 0, 0, {}}); + gf->cleared_last_sub = true; + } + } + + // Sleep using the condition variable wshiper_thread_cv // This will wake up the thread if there is new data in the input buffer // or if the whisper context is null std::unique_lock lock(gf->whisper_ctx_mutex);