mirror of
https://github.com/occ-ai/obs-localvocal
synced 2024-11-07 18:57:14 +00:00
Merge branch 'master' into roy.add_openblas
This commit is contained in:
commit
e839afaef4
@ -58,6 +58,8 @@ struct transcription_filter_data {
|
|||||||
std::mutex *text_source_mutex = nullptr;
|
std::mutex *text_source_mutex = nullptr;
|
||||||
// Callback to set the text in the output text source (subtitles)
|
// Callback to set the text in the output text source (subtitles)
|
||||||
std::function<void(const std::string &str)> setTextCallback;
|
std::function<void(const std::string &str)> setTextCallback;
|
||||||
|
// Output file path to write the subtitles
|
||||||
|
std::string output_file_path;
|
||||||
|
|
||||||
// Use std for thread and mutex
|
// Use std for thread and mutex
|
||||||
std::thread whisper_thread;
|
std::thread whisper_thread;
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include "model-utils/model-downloader.h"
|
#include "model-utils/model-downloader.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
inline enum speaker_layout convert_speaker_layout(uint8_t channels)
|
inline enum speaker_layout convert_speaker_layout(uint8_t channels)
|
||||||
{
|
{
|
||||||
@ -176,31 +177,38 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf)
|
|||||||
|
|
||||||
void set_text_callback(struct transcription_filter_data *gf, const std::string &str)
|
void set_text_callback(struct transcription_filter_data *gf, const std::string &str)
|
||||||
{
|
{
|
||||||
if (!gf->text_source_mutex) {
|
if (gf->output_file_path != "" && !gf->text_source_name) {
|
||||||
obs_log(LOG_ERROR, "text_source_mutex is null");
|
// Write to file, do not append
|
||||||
return;
|
std::ofstream output_file(gf->output_file_path, std::ios::out | std::ios::trunc);
|
||||||
}
|
output_file << str;
|
||||||
|
output_file.close();
|
||||||
|
} else {
|
||||||
|
if (!gf->text_source_mutex) {
|
||||||
|
obs_log(LOG_ERROR, "text_source_mutex is null");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!gf->text_source) {
|
if (!gf->text_source) {
|
||||||
// attempt to acquire a weak ref to the text source if it's yet available
|
// attempt to acquire a weak ref to the text source if it's yet available
|
||||||
acquire_weak_text_source_ref(gf);
|
acquire_weak_text_source_ref(gf);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(*gf->text_source_mutex);
|
std::lock_guard<std::mutex> lock(*gf->text_source_mutex);
|
||||||
|
|
||||||
if (!gf->text_source) {
|
if (!gf->text_source) {
|
||||||
obs_log(LOG_ERROR, "text_source is null");
|
obs_log(LOG_ERROR, "text_source is null");
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
auto target = obs_weak_source_get_source(gf->text_source);
|
||||||
|
if (!target) {
|
||||||
|
obs_log(LOG_ERROR, "text_source target is null");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto text_settings = obs_source_get_settings(target);
|
||||||
|
obs_data_set_string(text_settings, "text", str.c_str());
|
||||||
|
obs_source_update(target, text_settings);
|
||||||
|
obs_source_release(target);
|
||||||
}
|
}
|
||||||
auto target = obs_weak_source_get_source(gf->text_source);
|
|
||||||
if (!target) {
|
|
||||||
obs_log(LOG_ERROR, "text_source target is null");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto text_settings = obs_source_get_settings(target);
|
|
||||||
obs_data_set_string(text_settings, "text", str.c_str());
|
|
||||||
obs_source_update(target, text_settings);
|
|
||||||
obs_source_release(target);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void transcription_filter_update(void *data, obs_data_t *s)
|
void transcription_filter_update(void *data, obs_data_t *s)
|
||||||
@ -218,8 +226,9 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
|||||||
const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources");
|
const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources");
|
||||||
obs_weak_source_t *old_weak_text_source = NULL;
|
obs_weak_source_t *old_weak_text_source = NULL;
|
||||||
|
|
||||||
if (strcmp(new_text_source_name, "none") == 0 ||
|
if (new_text_source_name == nullptr || strcmp(new_text_source_name, "none") == 0 ||
|
||||||
strcmp(new_text_source_name, "(null)") == 0 || strcmp(new_text_source_name, "") == 0) {
|
strcmp(new_text_source_name, "(null)") == 0 ||
|
||||||
|
strcmp(new_text_source_name, "text_file") == 0 || strlen(new_text_source_name) == 0) {
|
||||||
// new selected text source is not valid, release the old one
|
// new selected text source is not valid, release the old one
|
||||||
if (gf->text_source) {
|
if (gf->text_source) {
|
||||||
if (!gf->text_source_mutex) {
|
if (!gf->text_source_mutex) {
|
||||||
@ -234,6 +243,15 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
|||||||
bfree(gf->text_source_name);
|
bfree(gf->text_source_name);
|
||||||
gf->text_source_name = nullptr;
|
gf->text_source_name = nullptr;
|
||||||
}
|
}
|
||||||
|
gf->output_file_path = "";
|
||||||
|
if (strcmp(new_text_source_name, "text_file") == 0) {
|
||||||
|
// set the output file path
|
||||||
|
const char *output_file_path =
|
||||||
|
obs_data_get_string(s, "subtitle_output_filename");
|
||||||
|
if (output_file_path != nullptr && strlen(output_file_path) > 0) {
|
||||||
|
gf->output_file_path = output_file_path;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// new selected text source is valid, check if it's different from the old one
|
// new selected text source is valid, check if it's different from the old one
|
||||||
if (gf->text_source_name == nullptr ||
|
if (gf->text_source_name == nullptr ||
|
||||||
@ -248,6 +266,11 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
|||||||
old_weak_text_source = gf->text_source;
|
old_weak_text_source = gf->text_source;
|
||||||
gf->text_source = nullptr;
|
gf->text_source = nullptr;
|
||||||
}
|
}
|
||||||
|
if (gf->text_source_name) {
|
||||||
|
// free the old text source name
|
||||||
|
bfree(gf->text_source_name);
|
||||||
|
gf->text_source_name = nullptr;
|
||||||
|
}
|
||||||
gf->text_source_name = bstrdup(new_text_source_name);
|
gf->text_source_name = bstrdup(new_text_source_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -343,7 +366,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
|||||||
void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
|
void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
|
||||||
{
|
{
|
||||||
struct transcription_filter_data *gf = static_cast<struct transcription_filter_data *>(
|
struct transcription_filter_data *gf = static_cast<struct transcription_filter_data *>(
|
||||||
bmalloc(sizeof(struct transcription_filter_data)));
|
bzalloc(sizeof(struct transcription_filter_data)));
|
||||||
|
|
||||||
// Get the number of channels for the input source
|
// Get the number of channels for the input source
|
||||||
gf->channels = audio_output_get_channels(obs_get_audio());
|
gf->channels = audio_output_get_channels(obs_get_audio());
|
||||||
@ -364,7 +387,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
|
|||||||
}
|
}
|
||||||
|
|
||||||
gf->context = filter;
|
gf->context = filter;
|
||||||
gf->whisper_model_path = obs_data_get_string(settings, "whisper_model_path");
|
gf->whisper_model_path = std::string(obs_data_get_string(settings, "whisper_model_path"));
|
||||||
gf->whisper_context = init_whisper_context(gf->whisper_model_path);
|
gf->whisper_context = init_whisper_context(gf->whisper_model_path);
|
||||||
if (gf->whisper_context == nullptr) {
|
if (gf->whisper_context == nullptr) {
|
||||||
obs_log(LOG_ERROR, "Failed to load whisper model");
|
obs_log(LOG_ERROR, "Failed to load whisper model");
|
||||||
@ -395,6 +418,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
|
|||||||
gf->text_source_mutex = new std::mutex();
|
gf->text_source_mutex = new std::mutex();
|
||||||
gf->text_source = nullptr;
|
gf->text_source = nullptr;
|
||||||
gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources"));
|
gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources"));
|
||||||
|
gf->output_file_path = std::string("");
|
||||||
|
|
||||||
obs_log(gf->log_level, "transcription_filter: run update");
|
obs_log(gf->log_level, "transcription_filter: run update");
|
||||||
// get the settings updated on the filter data struct
|
// get the settings updated on the filter data struct
|
||||||
@ -430,7 +454,7 @@ void transcription_filter_deactivate(void *data)
|
|||||||
void transcription_filter_defaults(obs_data_t *s)
|
void transcription_filter_defaults(obs_data_t *s)
|
||||||
{
|
{
|
||||||
obs_data_set_default_bool(s, "vad_enabled", true);
|
obs_data_set_default_bool(s, "vad_enabled", true);
|
||||||
obs_data_set_default_int(s, "log_level", LOG_DEBUG);
|
obs_data_set_default_int(s, "log_level", LOG_INFO);
|
||||||
obs_data_set_default_bool(s, "log_words", true);
|
obs_data_set_default_bool(s, "log_words", true);
|
||||||
obs_data_set_default_string(s, "whisper_model_path", "models/ggml-tiny.en.bin");
|
obs_data_set_default_string(s, "whisper_model_path", "models/ggml-tiny.en.bin");
|
||||||
obs_data_set_default_string(s, "whisper_language_select", "en");
|
obs_data_set_default_string(s, "whisper_language_select", "en");
|
||||||
@ -474,13 +498,34 @@ obs_properties_t *transcription_filter_properties(void *data)
|
|||||||
obs_property_list_add_int(list, "WARNING", LOG_WARNING);
|
obs_property_list_add_int(list, "WARNING", LOG_WARNING);
|
||||||
obs_properties_add_bool(ppts, "log_words", "Log output words");
|
obs_properties_add_bool(ppts, "log_words", "Log output words");
|
||||||
|
|
||||||
obs_property_t *sources =
|
obs_property_t *subs_output =
|
||||||
obs_properties_add_list(ppts, "subtitle_sources", "Subtitles Text Source",
|
obs_properties_add_list(ppts, "subtitle_sources", "Subtitles Output",
|
||||||
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
|
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
|
||||||
// Add "none" option
|
// Add "none" option
|
||||||
obs_property_list_add_string(sources, "None / No output", "none");
|
obs_property_list_add_string(subs_output, "None / No output", "none");
|
||||||
|
obs_property_list_add_string(subs_output, "Text File output", "text_file");
|
||||||
// Add text sources
|
// Add text sources
|
||||||
obs_enum_sources(add_sources_to_list, sources);
|
obs_enum_sources(add_sources_to_list, subs_output);
|
||||||
|
|
||||||
|
obs_properties_add_path(ppts, "subtitle_output_filename", "Output filename",
|
||||||
|
OBS_PATH_FILE_SAVE, "Text (*.txt)", NULL);
|
||||||
|
|
||||||
|
obs_property_set_modified_callback(subs_output, [](obs_properties_t *props,
|
||||||
|
obs_property_t *property,
|
||||||
|
obs_data_t *settings) {
|
||||||
|
UNUSED_PARAMETER(property);
|
||||||
|
const char *new_output = obs_data_get_string(settings, "subtitle_sources");
|
||||||
|
if (strcmp(new_output, "text_file") == 0) {
|
||||||
|
// Show the output filename selection input
|
||||||
|
obs_property_set_visible(
|
||||||
|
obs_properties_get(props, "subtitle_output_filename"), true);
|
||||||
|
} else {
|
||||||
|
// Hide the output filename selection input
|
||||||
|
obs_property_set_visible(
|
||||||
|
obs_properties_get(props, "subtitle_output_filename"), false);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
// Add a list of available whisper models to download
|
// Add a list of available whisper models to download
|
||||||
obs_property_t *whisper_models_list =
|
obs_property_t *whisper_models_list =
|
||||||
|
Loading…
Reference in New Issue
Block a user