Locale and translations

This commit is contained in:
Roy Shilkrot 2023-10-04 22:12:17 -04:00
parent bbf0284b8d
commit 1b6da3c0f9
9 changed files with 178 additions and 52 deletions

View File

@ -1 +1,39 @@
LocalVocalPlugin="LocalVocal Plugin"
transcription_filterAudioFilter="LocalVocal Transcription"
vad_enabled="VAD Enabled"
log_level="Log Level"
log_words="Log Output Words"
caption_to_stream="Stream Captions"
step_by_step_processing="Step-by-step processing (⚠️ processing will increase)"
step_size_msec="Step size (ms)"
subtitle_sources="Subtitles Output"
none_no_output="None / No output"
text_file_output="Text File output"
output_filename="Output filename"
whisper_model="Whisper Model"
external_model_file="External model file"
whisper_parameters="Whisper Parameters"
language="Language"
whisper_sampling_method="Whisper Sampling Method"
n_threads="Number of threads"
n_max_text_ctx="Max text context"
translate="Translate"
no_context="No context"
single_segment="Single segment"
print_special="Print special"
print_progress="Print progress"
print_realtime="Print realtime"
print_timestamps="Print timestamps"
token_timestamps="Token timestamps"
thold_pt="Token prob. threshold"
thold_ptsum="Token sum prob. threshold"
max_len="Max length in chars"
split_on_word="Split on word"
max_tokens="Max tokens"
speed_up="Speed up"
initial_prompt="Initial prompt"
suppress_blank="Suppress blank"
suppress_non_speech_tokens="Suppress non-speech tokens"
temperature="Temperature"
max_initial_ts="Max initial timestamps"
length_penalty="Length penalty"

40
data/locale/pt_BR.ini Normal file
View File

@ -0,0 +1,40 @@
# Portuguese (Brazil) translation
LocalVocalPlugin="Plugin LocalVocal"
transcription_filterAudioFilter="LocalVocal Transcrição"
vad_enabled="VAD Habilitado"
log_level="Nível de log"
log_words="Log Palavras"
caption_to_stream="Legendas de fluxo"
step_by_step_processing="Processamento passo a passo (⚠️ o processamento aumentará)"
step_size_msec="Tamanho do passo (ms)"
subtitle_sources="Legendas de saída"
none_no_output="Nenhum / Sem saída"
text_file_output="Saída do arquivo de texto"
output_filename="Nome do arquivo de saída"
whisper_model="Modelo Whisper"
external_model_file="Arquivo de modelo externo"
whisper_parameters="Parâmetros Whisper"
language="Língua"
whisper_sampling_method="Método de amostragem Whisper"
n_threads="Número de threads"
n_max_text_ctx="Máximo de contexto de texto"
translate="Traduzir"
no_context="Sem contexto"
single_segment="Segmento único"
print_special="Imprimir especial"
print_progress="Imprimir progresso"
print_realtime="Imprimir em tempo real"
print_timestamps="Imprimir carimbos de data"
token_timestamps="Carimbos de data"
thold_pt="Limiar token probabilidade"
thold_ptsum="Limiar token soma probabilidade"
max_len="Comprimento máximo em caracteres"
split_on_word="Dividir na palavra"
max_tokens="Máximo de tokens"
speed_up="Acelerar"
initial_prompt="Prompt inicial"
suppress_blank="Suprimir em branco"
suppress_non_speech_tokens="Suprimir tokens não fala"
temperature="Temperatura"
max_initial_ts="Tempo inicial máximo"
length_penalty="Pena de comprimento"

39
data/locale/ru_RU.ini Normal file
View File

@ -0,0 +1,39 @@
LocalVocalPlugin="Плагин LocalVocal"
transcription_filterAudioFilter="LocalVocal Транскрипция"
vad_enabled="VAD Включен"
log_level="Уровень логирования"
log_words="Логировать слова"
caption_to_stream="Потоковые субтитры"
step_by_step_processing="Пошаговая обработка (⚠️ обработка будет увеличена)"
step_size_msec="Размер шага (мс)"
subtitle_sources="Выходные субтитры"
none_no_output="Нет / Нет выхода"
text_file_output="Выходной текстовый файл"
output_filename="Имя выходного файла"
whisper_model="Whisper Модель"
external_model_file="Внешний файл модели"
whisper_parameters="Параметры Whisper"
language="Язык"
whisper_sampling_method="Метод выборки Whisper"
n_threads="Количество потоков"
n_max_text_ctx="Максимальный текстовый контекст"
translate="Перевести"
no_context="Нет контекста"
single_segment="Одиночный сегмент"
print_special="Печать специальных"
print_progress="Печать прогресса"
print_realtime="Печать в реальном времени"
print_timestamps="Печать временных меток"
token_timestamps="Временные метки токенов"
thold_pt="Порог вероятности токена"
thold_ptsum="Порог суммы вероятностей токена"
max_len="Максимальная длина в символах"
split_on_word="Разделить по слову"
max_tokens="Максимальное количество токенов"
speed_up="Ускорить"
initial_prompt="Начальный приглашение"
suppress_blank="Подавить пустое"
suppress_non_speech_tokens="Подавить токены, не относящиеся к речи"
temperature="Температура"
max_initial_ts="Максимальное начальное время"
length_penalty="Штраф за длину"

View File

@ -1,6 +1,6 @@
/*
Plugin Name
Copyright (C) <Year> <Developer> <Email Address>
obs-localvocal
Copyright (C) 2023 Roy Shilkrot roy.shil@gmail.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -32,7 +32,7 @@ extern struct obs_source_info transcription_filter_info;
bool obs_module_load(void)
{
obs_register_source(&transcription_filter_info);
blog(LOG_INFO, "plugin loaded successfully (version %s)", PLUGIN_VERSION);
obs_log(LOG_INFO, "plugin loaded successfully (version %s)", PLUGIN_VERSION);
return true;
}

View File

@ -1,6 +1,6 @@
/*
Plugin Name
Copyright (C) <Year> <Developer> <Email Address>
obs-localvocal
Copyright (C) 2023 Roy Shilkrot roy.shil@gmail.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,6 +1,6 @@
/*
Plugin Name
Copyright (C) <Year> <Developer> <Email Address>
obs-localvocal
Copyright (C) 2023 Roy Shilkrot roy.shil@gmail.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -43,7 +43,6 @@ struct transcription_filter_data {
/* whisper */
char *whisper_model_path = nullptr;
std::string whisper_model_file_currently_loaded = "";
struct whisper_context *whisper_context = nullptr;
whisper_full_params whisper_params;
@ -63,7 +62,8 @@ struct transcription_filter_data {
// Callback to set the text in the output text source (subtitles)
std::function<void(const std::string &str)> setTextCallback;
// Output file path to write the subtitles
std::string output_file_path;
std::string output_file_path = "";
std::string whisper_model_file_currently_loaded = "";
// Use std for thread and mutex
std::thread whisper_thread;

View File

@ -159,7 +159,7 @@ void transcription_filter_destroy(void *data)
void acquire_weak_text_source_ref(struct transcription_filter_data *gf)
{
if (!gf->text_source_name) {
obs_log(LOG_ERROR, "text_source_name is null");
obs_log(gf->log_level, "text_source_name is null");
return;
}
@ -222,12 +222,12 @@ void set_text_callback(struct transcription_filter_data *gf, const std::string &
std::lock_guard<std::mutex> lock(*gf->text_source_mutex);
if (!gf->text_source) {
obs_log(LOG_ERROR, "text_source is null");
obs_log(gf->log_level, "text_source is null");
return;
}
auto target = obs_weak_source_get_source(gf->text_source);
if (!target) {
obs_log(LOG_ERROR, "text_source target is null");
obs_log(gf->log_level, "text_source target is null");
return;
}
auto text_settings = obs_source_get_settings(target);
@ -454,6 +454,8 @@ void transcription_filter_update(void *data, obs_data_t *s)
void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
{
obs_log(LOG_INFO, "transcription filter create");
struct transcription_filter_data *gf = new transcription_filter_data;
// Get the number of channels for the input source
@ -503,12 +505,19 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
gf->whisper_ctx_mutex = new std::mutex();
gf->wshiper_thread_cv = new std::condition_variable();
gf->text_source_mutex = new std::mutex();
obs_log(gf->log_level, "transcription_filter: clear text source data");
gf->text_source = nullptr;
gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources"));
const char* subtitle_sources = obs_data_get_string(settings, "subtitle_sources");
if (subtitle_sources != nullptr) {
gf->text_source_name = bstrdup(subtitle_sources);
} else {
gf->text_source_name = nullptr;
}
obs_log(gf->log_level, "transcription_filter: clear paths and whisper context");
gf->whisper_model_file_currently_loaded = "";
gf->output_file_path = std::string("");
gf->whisper_model_path = nullptr; // The update function will set the model path
gf->whisper_context = nullptr;
gf->whisper_model_file_currently_loaded = "";
obs_log(gf->log_level, "transcription_filter: run update");
// get the settings updated on the filter data struct
@ -585,18 +594,18 @@ obs_properties_t *transcription_filter_properties(void *data)
obs_properties_t *ppts = obs_properties_create();
obs_properties_add_bool(ppts, "vad_enabled", "VAD Enabled");
obs_property_t *list = obs_properties_add_list(ppts, "log_level", "Log level",
obs_properties_add_bool(ppts, "vad_enabled", MT_("vad_enabled"));
obs_property_t *list = obs_properties_add_list(ppts, "log_level", MT_("log_level"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
obs_property_list_add_int(list, "DEBUG", LOG_DEBUG);
obs_property_list_add_int(list, "INFO", LOG_INFO);
obs_property_list_add_int(list, "WARNING", LOG_WARNING);
obs_properties_add_bool(ppts, "log_words", "Log output words");
obs_properties_add_bool(ppts, "caption_to_stream", "Stream captions");
obs_properties_add_bool(ppts, "log_words", MT_("log_words"));
obs_properties_add_bool(ppts, "caption_to_stream", MT_("caption_to_stream"));
obs_property_t *step_by_step_processing = obs_properties_add_bool(
ppts, "step_by_step_processing",
"Step-by-step processing (⚠️ processing will increase)");
obs_properties_add_int_slider(ppts, "step_size_msec", "Step size (ms)", 1000,
MT_("step_by_step_processing"));
obs_properties_add_int_slider(ppts, "step_size_msec", MT_("step_size_msec"), 1000,
BUFFER_SIZE_MSEC, 50);
obs_property_set_modified_callback(step_by_step_processing, [](obs_properties_t *props,
@ -610,15 +619,15 @@ obs_properties_t *transcription_filter_properties(void *data)
});
obs_property_t *subs_output =
obs_properties_add_list(ppts, "subtitle_sources", "Subtitles Output",
obs_properties_add_list(ppts, "subtitle_sources", MT_("subtitle_sources"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
// Add "none" option
obs_property_list_add_string(subs_output, "None / No output", "none");
obs_property_list_add_string(subs_output, "Text File output", "text_file");
obs_property_list_add_string(subs_output, MT_("none_no_output"), "none");
obs_property_list_add_string(subs_output, MT_("text_file_output"), "text_file");
// Add text sources
obs_enum_sources(add_sources_to_list, subs_output);
obs_properties_add_path(ppts, "subtitle_output_filename", "Output filename",
obs_properties_add_path(ppts, "subtitle_output_filename", MT_("output_filename"),
OBS_PATH_FILE_SAVE, "Text (*.txt)", NULL);
obs_property_set_modified_callback(subs_output, [](obs_properties_t *props,
@ -640,7 +649,7 @@ obs_properties_t *transcription_filter_properties(void *data)
// Add a list of available whisper models to download
obs_property_t *whisper_models_list =
obs_properties_add_list(ppts, "whisper_model_path", "Whisper Model",
obs_properties_add_list(ppts, "whisper_model_path", MT_("whisper_model"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
obs_property_list_add_string(whisper_models_list, "Tiny (Eng) 75Mb",
@ -657,7 +666,7 @@ obs_properties_t *transcription_filter_properties(void *data)
// Add a file selection input to select an external model file
obs_property_t *whisper_model_path_external =
obs_properties_add_path(ppts, "whisper_model_path_external", "External model file",
obs_properties_add_path(ppts, "whisper_model_path_external", MT_("external_model_file"),
OBS_PATH_FILE, "Model (*.bin)", NULL);
// Hide the external model file selection input
obs_property_set_visible(obs_properties_get(ppts, "whisper_model_path_external"), false);
@ -695,12 +704,12 @@ obs_properties_t *transcription_filter_properties(void *data)
});
obs_properties_t *whisper_params_group = obs_properties_create();
obs_properties_add_group(ppts, "whisper_params_group", "Whisper Parameters",
obs_properties_add_group(ppts, "whisper_params_group", MT_("whisper_parameters"),
OBS_GROUP_NORMAL, whisper_params_group);
// Add language selector
obs_property_t *whisper_language_select_list =
obs_properties_add_list(whisper_params_group, "whisper_language_select", "Language",
obs_properties_add_list(whisper_params_group, "whisper_language_select", MT_("language"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
// sort the languages by flipping the map
std::map<std::string, std::string> whisper_available_lang_flip;
@ -718,65 +727,65 @@ obs_properties_t *transcription_filter_properties(void *data)
}
obs_property_t *whisper_sampling_method_list = obs_properties_add_list(
whisper_params_group, "whisper_sampling_method", "whisper_sampling_method",
whisper_params_group, "whisper_sampling_method", MT_("whisper_sampling_method"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
obs_property_list_add_int(whisper_sampling_method_list, "Beam search",
WHISPER_SAMPLING_BEAM_SEARCH);
obs_property_list_add_int(whisper_sampling_method_list, "Greedy", WHISPER_SAMPLING_GREEDY);
// int n_threads;
obs_properties_add_int_slider(whisper_params_group, "n_threads", "n_threads", 1, 8, 1);
obs_properties_add_int_slider(whisper_params_group, "n_threads", MT_("n_threads"), 1, 8, 1);
// int n_max_text_ctx; // max tokens to use from past text as prompt for the decoder
obs_properties_add_int_slider(whisper_params_group, "n_max_text_ctx", "n_max_text_ctx", 0,
obs_properties_add_int_slider(whisper_params_group, "n_max_text_ctx", MT_("n_max_text_ctx"), 0,
16384, 100);
// int offset_ms; // start offset in ms
// int duration_ms; // audio duration to process in ms
// bool translate;
obs_properties_add_bool(whisper_params_group, "translate", "translate");
obs_properties_add_bool(whisper_params_group, "translate", MT_("translate"));
// bool no_context; // do not use past transcription (if any) as initial prompt for the decoder
obs_properties_add_bool(whisper_params_group, "no_context", "no_context");
obs_properties_add_bool(whisper_params_group, "no_context", MT_("no_context"));
// bool single_segment; // force single segment output (useful for streaming)
obs_properties_add_bool(whisper_params_group, "single_segment", "single_segment");
obs_properties_add_bool(whisper_params_group, "single_segment", MT_("single_segment"));
// bool print_special; // print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)
obs_properties_add_bool(whisper_params_group, "print_special", "print_special");
obs_properties_add_bool(whisper_params_group, "print_special", MT_("print_special"));
// bool print_progress; // print progress information
obs_properties_add_bool(whisper_params_group, "print_progress", "print_progress");
obs_properties_add_bool(whisper_params_group, "print_progress", MT_("print_progress"));
// bool print_realtime; // print results from within whisper.cpp (avoid it, use callback instead)
obs_properties_add_bool(whisper_params_group, "print_realtime", "print_realtime");
obs_properties_add_bool(whisper_params_group, "print_realtime", MT_("print_realtime"));
// bool print_timestamps; // print timestamps for each text segment when printing realtime
obs_properties_add_bool(whisper_params_group, "print_timestamps", "print_timestamps");
obs_properties_add_bool(whisper_params_group, "print_timestamps", MT_("print_timestamps"));
// bool token_timestamps; // enable token-level timestamps
obs_properties_add_bool(whisper_params_group, "token_timestamps", "token_timestamps");
obs_properties_add_bool(whisper_params_group, "token_timestamps", MT_("token_timestamps"));
// float thold_pt; // timestamp token probability threshold (~0.01)
obs_properties_add_float_slider(whisper_params_group, "thold_pt", "thold_pt", 0.0f, 1.0f,
obs_properties_add_float_slider(whisper_params_group, "thold_pt", MT_("thold_pt"), 0.0f, 1.0f,
0.05f);
// float thold_ptsum; // timestamp token sum probability threshold (~0.01)
obs_properties_add_float_slider(whisper_params_group, "thold_ptsum", "thold_ptsum", 0.0f,
obs_properties_add_float_slider(whisper_params_group, "thold_ptsum", MT_("thold_ptsum"), 0.0f,
1.0f, 0.05f);
// int max_len; // max segment length in characters
obs_properties_add_int_slider(whisper_params_group, "max_len", "max_len", 0, 100, 1);
obs_properties_add_int_slider(whisper_params_group, "max_len", MT_("max_len"), 0, 100, 1);
// bool split_on_word; // split on word rather than on token (when used with max_len)
obs_properties_add_bool(whisper_params_group, "split_on_word", "split_on_word");
obs_properties_add_bool(whisper_params_group, "split_on_word", MT_("split_on_word"));
// int max_tokens; // max tokens per segment (0 = no limit)
obs_properties_add_int_slider(whisper_params_group, "max_tokens", "max_tokens", 0, 100, 1);
obs_properties_add_int_slider(whisper_params_group, "max_tokens", MT_("max_tokens"), 0, 100, 1);
// bool speed_up; // speed-up the audio by 2x using Phase Vocoder
obs_properties_add_bool(whisper_params_group, "speed_up", "speed_up");
obs_properties_add_bool(whisper_params_group, "speed_up", MT_("speed_up"));
// const char * initial_prompt;
obs_properties_add_text(whisper_params_group, "initial_prompt", "initial_prompt",
obs_properties_add_text(whisper_params_group, "initial_prompt", MT_("initial_prompt"),
OBS_TEXT_DEFAULT);
// bool suppress_blank
obs_properties_add_bool(whisper_params_group, "suppress_blank", "suppress_blank");
obs_properties_add_bool(whisper_params_group, "suppress_blank", MT_("suppress_blank"));
// bool suppress_non_speech_tokens
obs_properties_add_bool(whisper_params_group, "suppress_non_speech_tokens",
"suppress_non_speech_tokens");
MT_("suppress_non_speech_tokens"));
// float temperature
obs_properties_add_float_slider(whisper_params_group, "temperature", "temperature", 0.0f,
obs_properties_add_float_slider(whisper_params_group, "temperature", MT_("temperature"), 0.0f,
1.0f, 0.05f);
// float max_initial_ts
obs_properties_add_float_slider(whisper_params_group, "max_initial_ts", "max_initial_ts",
obs_properties_add_float_slider(whisper_params_group, "max_initial_ts", MT_("max_initial_ts"),
0.0f, 1.0f, 0.05f);
// float length_penalty
obs_properties_add_float_slider(whisper_params_group, "length_penalty", "length_penalty",
obs_properties_add_float_slider(whisper_params_group, "length_penalty", MT_("length_penalty"),
-1.0f, 1.0f, 0.1f);
UNUSED_PARAMETER(data);

View File

@ -59,7 +59,7 @@ bool vad_simple(float *pcmf32, size_t pcm32f_size, uint32_t sample_rate, float v
energy_all /= (float)n_samples;
if (verbose) {
blog(LOG_INFO, "%s: energy_all: %f, vad_thold: %f, freq_thold: %f", __func__,
obs_log(LOG_INFO, "%s: energy_all: %f, vad_thold: %f, freq_thold: %f", __func__,
energy_all, vad_thold, freq_thold);
}