mirror of
https://github.com/occ-ai/obs-localvocal
synced 2024-11-08 03:08:07 +00:00
Merge pull request #31 from obs-ai/roy.fix_russian_win32_and_model_load
fix model load, fix Russian utf8 on Windows
This commit is contained in:
commit
3c4c1c3ddd
@ -43,6 +43,7 @@ struct transcription_filter_data {
|
|||||||
|
|
||||||
/* whisper */
|
/* whisper */
|
||||||
char *whisper_model_path = nullptr;
|
char *whisper_model_path = nullptr;
|
||||||
|
std::string whisper_model_file_currently_loaded = "";
|
||||||
struct whisper_context *whisper_context = nullptr;
|
struct whisper_context *whisper_context = nullptr;
|
||||||
whisper_full_params whisper_params;
|
whisper_full_params whisper_params;
|
||||||
|
|
||||||
@ -50,7 +51,7 @@ struct transcription_filter_data {
|
|||||||
|
|
||||||
bool do_silence;
|
bool do_silence;
|
||||||
bool vad_enabled;
|
bool vad_enabled;
|
||||||
int log_level;
|
int log_level = LOG_DEBUG;
|
||||||
bool log_words;
|
bool log_words;
|
||||||
bool caption_to_stream;
|
bool caption_to_stream;
|
||||||
bool active = false;
|
bool active = false;
|
||||||
|
@ -10,6 +10,12 @@
|
|||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <bitset>
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <Windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
inline enum speaker_layout convert_speaker_layout(uint8_t channels)
|
inline enum speaker_layout convert_speaker_layout(uint8_t channels)
|
||||||
{
|
{
|
||||||
@ -175,6 +181,21 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf)
|
|||||||
|
|
||||||
void set_text_callback(struct transcription_filter_data *gf, const std::string &str)
|
void set_text_callback(struct transcription_filter_data *gf, const std::string &str)
|
||||||
{
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
// Russian UTF8 charset on Windows output has a bug, instead of 0xd? it outputs
|
||||||
|
// 0xf?, so we need to replace it. This doesn't affect any other charset, which
|
||||||
|
// outputs the correct UTF8 output. (Except maybe for Greek?)
|
||||||
|
std::string str_copy = str;
|
||||||
|
for (size_t i = 0; i < str_copy.size(); ++i) {
|
||||||
|
// if the char MSBs starts with 0xf replace the MSBs with 0xd
|
||||||
|
if ((str_copy.c_str()[i] & 0xf0) == 0xf0) {
|
||||||
|
str_copy[i] = (str_copy.c_str()[i] & 0x0f) | 0xd0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
std::string str_copy = str;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (gf->caption_to_stream) {
|
if (gf->caption_to_stream) {
|
||||||
obs_output_t *streaming_output = obs_frontend_get_streaming_output();
|
obs_output_t *streaming_output = obs_frontend_get_streaming_output();
|
||||||
if (streaming_output) {
|
if (streaming_output) {
|
||||||
@ -210,7 +231,7 @@ void set_text_callback(struct transcription_filter_data *gf, const std::string &
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
auto text_settings = obs_source_get_settings(target);
|
auto text_settings = obs_source_get_settings(target);
|
||||||
obs_data_set_string(text_settings, "text", str.c_str());
|
obs_data_set_string(text_settings, "text", str_copy.c_str());
|
||||||
obs_source_update(target, text_settings);
|
obs_source_update(target, text_settings);
|
||||||
obs_source_release(target);
|
obs_source_release(target);
|
||||||
}
|
}
|
||||||
@ -218,6 +239,7 @@ void set_text_callback(struct transcription_filter_data *gf, const std::string &
|
|||||||
|
|
||||||
void shutdown_whisper_thread(struct transcription_filter_data *gf)
|
void shutdown_whisper_thread(struct transcription_filter_data *gf)
|
||||||
{
|
{
|
||||||
|
obs_log(gf->log_level, "shutdown_whisper_thread");
|
||||||
if (gf->whisper_context != nullptr) {
|
if (gf->whisper_context != nullptr) {
|
||||||
// acquire the mutex before freeing the context
|
// acquire the mutex before freeing the context
|
||||||
if (!gf->whisper_ctx_mutex || !gf->wshiper_thread_cv) {
|
if (!gf->whisper_ctx_mutex || !gf->wshiper_thread_cv) {
|
||||||
@ -232,6 +254,28 @@ void shutdown_whisper_thread(struct transcription_filter_data *gf)
|
|||||||
if (gf->whisper_thread.joinable()) {
|
if (gf->whisper_thread.joinable()) {
|
||||||
gf->whisper_thread.join();
|
gf->whisper_thread.join();
|
||||||
}
|
}
|
||||||
|
if (gf->whisper_model_path != nullptr) {
|
||||||
|
bfree(gf->whisper_model_path);
|
||||||
|
gf->whisper_model_path = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void start_whisper_thread_with_path(struct transcription_filter_data *gf, const std::string &path)
|
||||||
|
{
|
||||||
|
obs_log(gf->log_level, "start_whisper_thread_with_path: %s", path.c_str());
|
||||||
|
if (!gf->whisper_ctx_mutex) {
|
||||||
|
obs_log(LOG_ERROR, "cannot init whisper: whisper_ctx_mutex is null");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::lock_guard<std::mutex> lock(*gf->whisper_ctx_mutex);
|
||||||
|
if (gf->whisper_context != nullptr) {
|
||||||
|
obs_log(LOG_ERROR, "cannot init whisper: whisper_context is not null");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
gf->whisper_context = init_whisper_context(path);
|
||||||
|
gf->whisper_model_file_currently_loaded = path;
|
||||||
|
std::thread new_whisper_thread(whisper_loop, gf);
|
||||||
|
gf->whisper_thread.swap(new_whisper_thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
void transcription_filter_update(void *data, obs_data_t *s)
|
void transcription_filter_update(void *data, obs_data_t *s)
|
||||||
@ -239,8 +283,9 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
|||||||
struct transcription_filter_data *gf =
|
struct transcription_filter_data *gf =
|
||||||
static_cast<struct transcription_filter_data *>(data);
|
static_cast<struct transcription_filter_data *>(data);
|
||||||
|
|
||||||
obs_log(gf->log_level, "transcription_filter_update");
|
|
||||||
gf->log_level = (int)obs_data_get_int(s, "log_level");
|
gf->log_level = (int)obs_data_get_int(s, "log_level");
|
||||||
|
obs_log(gf->log_level, "transcription_filter_update");
|
||||||
|
|
||||||
gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
|
gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
|
||||||
gf->log_words = obs_data_get_bool(s, "log_words");
|
gf->log_words = obs_data_get_bool(s, "log_words");
|
||||||
gf->caption_to_stream = obs_data_get_bool(s, "caption_to_stream");
|
gf->caption_to_stream = obs_data_get_bool(s, "caption_to_stream");
|
||||||
@ -310,20 +355,21 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
|||||||
obs_log(gf->log_level, "transcription_filter: update whisper model");
|
obs_log(gf->log_level, "transcription_filter: update whisper model");
|
||||||
// update the whisper model path
|
// update the whisper model path
|
||||||
std::string new_model_path = obs_data_get_string(s, "whisper_model_path");
|
std::string new_model_path = obs_data_get_string(s, "whisper_model_path");
|
||||||
|
const bool is_external_model = new_model_path.find("!!!external!!!") != std::string::npos;
|
||||||
|
|
||||||
if (gf->whisper_model_path == nullptr ||
|
if (gf->whisper_model_path == nullptr ||
|
||||||
strcmp(new_model_path.c_str(), gf->whisper_model_path) != 0) {
|
strcmp(new_model_path.c_str(), gf->whisper_model_path) != 0 || is_external_model) {
|
||||||
// model path changed, reload the model
|
// model path changed, reload the model
|
||||||
obs_log(LOG_INFO, "model path changed, reloading model");
|
obs_log(gf->log_level, "model path changed from %s to %s", gf->whisper_model_path,
|
||||||
shutdown_whisper_thread(gf);
|
new_model_path.c_str());
|
||||||
if (gf->whisper_model_path != nullptr) {
|
|
||||||
bfree(gf->whisper_model_path);
|
|
||||||
}
|
|
||||||
gf->whisper_model_path = bstrdup(new_model_path.c_str());
|
|
||||||
|
|
||||||
// check if the new model is external file
|
// check if the new model is external file
|
||||||
if (new_model_path.find("!!!external!!!") == std::string::npos) {
|
if (!is_external_model) {
|
||||||
// new model is not external file
|
// new model is not external file
|
||||||
|
shutdown_whisper_thread(gf);
|
||||||
|
|
||||||
|
gf->whisper_model_path = bstrdup(new_model_path.c_str());
|
||||||
|
|
||||||
// check if the model exists, if not, download it
|
// check if the model exists, if not, download it
|
||||||
std::string model_file_found = find_model_file(gf->whisper_model_path);
|
std::string model_file_found = find_model_file(gf->whisper_model_path);
|
||||||
if (model_file_found == "") {
|
if (model_file_found == "") {
|
||||||
@ -334,29 +380,39 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
|||||||
if (download_status == 0) {
|
if (download_status == 0) {
|
||||||
obs_log(LOG_INFO,
|
obs_log(LOG_INFO,
|
||||||
"Model download complete");
|
"Model download complete");
|
||||||
gf->whisper_context =
|
start_whisper_thread_with_path(gf, path);
|
||||||
init_whisper_context(path);
|
|
||||||
std::thread new_whisper_thread(whisper_loop,
|
|
||||||
gf);
|
|
||||||
gf->whisper_thread.swap(new_whisper_thread);
|
|
||||||
} else {
|
} else {
|
||||||
obs_log(LOG_ERROR, "Model download failed");
|
obs_log(LOG_ERROR, "Model download failed");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// Model exists, just load it
|
// Model exists, just load it
|
||||||
gf->whisper_context = init_whisper_context(model_file_found);
|
start_whisper_thread_with_path(gf, model_file_found);
|
||||||
std::thread new_whisper_thread(whisper_loop, gf);
|
|
||||||
gf->whisper_thread.swap(new_whisper_thread);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// new model is local file, get file location from file property
|
// new model is external file, get file location from file property
|
||||||
std::string external_model_file_path =
|
std::string external_model_file_path =
|
||||||
obs_data_get_string(s, "whisper_model_path_external");
|
obs_data_get_string(s, "whisper_model_path_external");
|
||||||
gf->whisper_context = init_whisper_context(external_model_file_path);
|
if (external_model_file_path.empty()) {
|
||||||
std::thread new_whisper_thread(whisper_loop, gf);
|
obs_log(LOG_WARNING, "External model file path is empty");
|
||||||
gf->whisper_thread.swap(new_whisper_thread);
|
} else {
|
||||||
|
// check if the external model file is not currently loaded
|
||||||
|
if (gf->whisper_model_file_currently_loaded ==
|
||||||
|
external_model_file_path) {
|
||||||
|
obs_log(LOG_INFO, "External model file is already loaded");
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
shutdown_whisper_thread(gf);
|
||||||
|
gf->whisper_model_path = bstrdup(new_model_path.c_str());
|
||||||
|
start_whisper_thread_with_path(gf,
|
||||||
|
external_model_file_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// model path did not change
|
||||||
|
obs_log(LOG_INFO, "model path did not change: %s == %s", gf->whisper_model_path,
|
||||||
|
new_model_path.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!gf->whisper_ctx_mutex) {
|
if (!gf->whisper_ctx_mutex) {
|
||||||
@ -409,6 +465,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
|
|||||||
gf->step_size_msec = step_by_step_processing
|
gf->step_size_msec = step_by_step_processing
|
||||||
? (int)obs_data_get_int(settings, "step_size_msec")
|
? (int)obs_data_get_int(settings, "step_size_msec")
|
||||||
: BUFFER_SIZE_MSEC;
|
: BUFFER_SIZE_MSEC;
|
||||||
|
gf->log_level = (int)obs_data_get_int(settings, "log_level");
|
||||||
|
|
||||||
for (size_t i = 0; i < MAX_AUDIO_CHANNELS; i++) {
|
for (size_t i = 0; i < MAX_AUDIO_CHANNELS; i++) {
|
||||||
circlebuf_init(&gf->input_buffers[i]);
|
circlebuf_init(&gf->input_buffers[i]);
|
||||||
@ -423,7 +480,6 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
|
|||||||
}
|
}
|
||||||
|
|
||||||
gf->context = filter;
|
gf->context = filter;
|
||||||
gf->whisper_model_path = nullptr; // The update function will set the model path
|
|
||||||
|
|
||||||
gf->overlap_ms = OVERLAP_SIZE_MSEC;
|
gf->overlap_ms = OVERLAP_SIZE_MSEC;
|
||||||
gf->overlap_frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)gf->overlap_ms));
|
gf->overlap_frames = (size_t)((float)gf->sample_rate / (1000.0f / (float)gf->overlap_ms));
|
||||||
@ -450,6 +506,9 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
|
|||||||
gf->text_source = nullptr;
|
gf->text_source = nullptr;
|
||||||
gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources"));
|
gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources"));
|
||||||
gf->output_file_path = std::string("");
|
gf->output_file_path = std::string("");
|
||||||
|
gf->whisper_model_path = nullptr; // The update function will set the model path
|
||||||
|
gf->whisper_context = nullptr;
|
||||||
|
gf->whisper_model_file_currently_loaded = "";
|
||||||
|
|
||||||
obs_log(gf->log_level, "transcription_filter: run update");
|
obs_log(gf->log_level, "transcription_filter: run update");
|
||||||
// get the settings updated on the filter data struct
|
// get the settings updated on the filter data struct
|
||||||
@ -479,6 +538,8 @@ void transcription_filter_deactivate(void *data)
|
|||||||
|
|
||||||
void transcription_filter_defaults(obs_data_t *s)
|
void transcription_filter_defaults(obs_data_t *s)
|
||||||
{
|
{
|
||||||
|
obs_log(LOG_INFO, "transcription_filter_defaults");
|
||||||
|
|
||||||
obs_data_set_default_bool(s, "vad_enabled", true);
|
obs_data_set_default_bool(s, "vad_enabled", true);
|
||||||
obs_data_set_default_int(s, "log_level", LOG_DEBUG);
|
obs_data_set_default_int(s, "log_level", LOG_DEBUG);
|
||||||
obs_data_set_default_bool(s, "log_words", true);
|
obs_data_set_default_bool(s, "log_words", true);
|
||||||
@ -505,7 +566,7 @@ void transcription_filter_defaults(obs_data_t *s)
|
|||||||
obs_data_set_default_double(s, "thold_pt", 0.01);
|
obs_data_set_default_double(s, "thold_pt", 0.01);
|
||||||
obs_data_set_default_double(s, "thold_ptsum", 0.01);
|
obs_data_set_default_double(s, "thold_ptsum", 0.01);
|
||||||
obs_data_set_default_int(s, "max_len", 0);
|
obs_data_set_default_int(s, "max_len", 0);
|
||||||
obs_data_set_default_bool(s, "split_on_word", false);
|
obs_data_set_default_bool(s, "split_on_word", true);
|
||||||
obs_data_set_default_int(s, "max_tokens", 32);
|
obs_data_set_default_int(s, "max_tokens", 32);
|
||||||
obs_data_set_default_bool(s, "speed_up", false);
|
obs_data_set_default_bool(s, "speed_up", false);
|
||||||
obs_data_set_default_bool(s, "suppress_blank", false);
|
obs_data_set_default_bool(s, "suppress_blank", false);
|
||||||
@ -517,6 +578,8 @@ void transcription_filter_defaults(obs_data_t *s)
|
|||||||
|
|
||||||
obs_properties_t *transcription_filter_properties(void *data)
|
obs_properties_t *transcription_filter_properties(void *data)
|
||||||
{
|
{
|
||||||
|
obs_log(LOG_INFO, "transcription_filter_properties");
|
||||||
|
|
||||||
struct transcription_filter_data *gf =
|
struct transcription_filter_data *gf =
|
||||||
static_cast<struct transcription_filter_data *>(data);
|
static_cast<struct transcription_filter_data *>(data);
|
||||||
|
|
||||||
@ -603,16 +666,12 @@ obs_properties_t *transcription_filter_properties(void *data)
|
|||||||
whisper_model_path_external,
|
whisper_model_path_external,
|
||||||
[](void *data_, obs_properties_t *props, obs_property_t *property,
|
[](void *data_, obs_properties_t *props, obs_property_t *property,
|
||||||
obs_data_t *settings) {
|
obs_data_t *settings) {
|
||||||
|
obs_log(LOG_INFO, "whisper_model_path_external modified");
|
||||||
UNUSED_PARAMETER(property);
|
UNUSED_PARAMETER(property);
|
||||||
UNUSED_PARAMETER(props);
|
UNUSED_PARAMETER(props);
|
||||||
struct transcription_filter_data *gf_ =
|
struct transcription_filter_data *gf_ =
|
||||||
static_cast<struct transcription_filter_data *>(data_);
|
static_cast<struct transcription_filter_data *>(data_);
|
||||||
shutdown_whisper_thread(gf_);
|
transcription_filter_update(gf_, settings);
|
||||||
std::string external_model_file_path =
|
|
||||||
obs_data_get_string(settings, "whisper_model_path_external");
|
|
||||||
gf_->whisper_context = init_whisper_context(external_model_file_path);
|
|
||||||
std::thread new_whisper_thread(whisper_loop, gf_);
|
|
||||||
gf_->whisper_thread.swap(new_whisper_thread);
|
|
||||||
return true;
|
return true;
|
||||||
},
|
},
|
||||||
gf);
|
gf);
|
||||||
|
Loading…
Reference in New Issue
Block a user