mirror of
https://github.com/occ-ai/obs-localvocal
synced 2024-11-08 03:08:07 +00:00
Merge remote-tracking branch 'origin/master' into roy.fix_win32_unicode_model_path
This commit is contained in:
commit
6441245b65
18
README.md
18
README.md
@ -2,10 +2,10 @@
|
||||
|
||||
<div align="center">
|
||||
|
||||
[![GitHub](https://img.shields.io/github/license/obs-ai/obs-localvocal)](https://github.com/obs-ai/obs-localvocal/blob/main/LICENSE)
|
||||
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/obs-ai/obs-localvocal/push.yaml)](https://github.com/obs-ai/obs-localvocal/actions/workflows/push.yaml)
|
||||
[![Total downloads](https://img.shields.io/github/downloads/obs-ai/obs-localvocal/total)](https://github.com/obs-ai/obs-localvocal/releases)
|
||||
[![GitHub release (latest by date)](https://img.shields.io/github/v/release/obs-ai/obs-localvocal)](https://github.com/obs-ai/obs-localvocal/releases)
|
||||
[![GitHub](https://img.shields.io/github/license/occ-ai/obs-localvocal)](https://github.com/occ-ai/obs-localvocal/blob/main/LICENSE)
|
||||
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/occ-ai/obs-localvocal/push.yaml)](https://github.com/occ-ai/obs-localvocal/actions/workflows/push.yaml)
|
||||
[![Total downloads](https://img.shields.io/github/downloads/occ-ai/obs-localvocal/total)](https://github.com/occ-ai/obs-localvocal/releases)
|
||||
[![GitHub release (latest by date)](https://img.shields.io/github/v/release/occ-ai/obs-localvocal)](https://github.com/occ-ai/obs-localvocal/releases)
|
||||
|
||||
</div>
|
||||
|
||||
@ -44,13 +44,13 @@ Internally the plugin is running a neural network ([OpenAI Whisper](https://gith
|
||||
It's using the [Whisper.cpp](https://github.com/ggerganov/whisper.cpp) project from [ggerganov](https://github.com/ggerganov) to run the Whisper network in a very efficient way on CPUs and GPUs.
|
||||
|
||||
Check out our other plugins:
|
||||
- [Background Removal](https://github.com/royshil/obs-backgroundremoval) removes background from webcam without a green screen.
|
||||
- 🚧 Experimental 🚧 [CleanStream](https://github.com/obs-ai/obs-cleanstream) for real-time filler word (uh,um) and profanity removal from live audio stream
|
||||
- [URL/API Source](https://github.com/obs-ai/obs-urlsource) that allows fetching live data from an API and displaying it in OBS.
|
||||
- [Polyglot](https://github.com/obs-ai/obs-polyglot) translation AI plugin for real-time, local translation to hunderds of languages
|
||||
- [Background Removal](https://github.com/occ-ai/obs-backgroundremoval) removes background from webcam without a green screen.
|
||||
- 🚧 Experimental 🚧 [CleanStream](https://github.com/occ-ai/obs-cleanstream) for real-time filler word (uh,um) and profanity removal from live audio stream
|
||||
- [URL/API Source](https://github.com/occ-ai/obs-urlsource) that allows fetching live data from an API and displaying it in OBS.
|
||||
- [Polyglot](https://github.com/occ-ai/obs-polyglot) translation AI plugin for real-time, local translation to hunderds of languages
|
||||
|
||||
## Download
|
||||
Check out the [latest releases](https://github.com/obs-ai/obs-localvocal/releases) for downloads and install instructions.
|
||||
Check out the [latest releases](https://github.com/occ-ai/obs-localvocal/releases) for downloads and install instructions.
|
||||
|
||||
## Building
|
||||
|
||||
|
@ -41,3 +41,4 @@ save_srt="Save in SRT format (no file truncation)"
|
||||
only_while_recording="Write output only while recording"
|
||||
process_while_muted="Process speech while source is muted"
|
||||
rename_file_to_match_recording="Rename file to match recording"
|
||||
min_sub_duration="Minimal subtitle duration (msec)"
|
||||
|
@ -42,3 +42,4 @@ save_srt="Salvar no formato SRT"
|
||||
only_while_recording="Escreva durante a gravação"
|
||||
process_while_muted="Processar enquanto está silenciada"
|
||||
rename_file_to_match_recording="Renomear arquivo para corresponder à gravação"
|
||||
min_sub_duration="Duração mínima da legenda (msec)"
|
||||
|
@ -41,3 +41,4 @@ save_srt="Сохранить в формате SRT"
|
||||
only_while_recording="Записывать вывод только во время записи"
|
||||
process_while_muted="Обрабатывать речь, пока источник отключен"
|
||||
rename_file_to_match_recording="Переименовать файл, чтобы соответствовать записи"
|
||||
min_sub_duration="Минимальная длительность субтитров (мс)"
|
||||
|
@ -49,6 +49,10 @@ struct transcription_filter_data {
|
||||
uint64_t start_timestamp_ms;
|
||||
// Sentence counter for srt
|
||||
size_t sentence_number;
|
||||
// Minimal subtitle duration in ms
|
||||
size_t min_sub_duration;
|
||||
// Last time a subtitle was rendered
|
||||
uint64_t last_sub_render_time;
|
||||
|
||||
/* PCM buffers */
|
||||
float *copy_buffers[MAX_PREPROC_CHANNELS];
|
||||
|
@ -227,8 +227,23 @@ inline bool is_valid_lead_byte(const uint8_t *c)
|
||||
return false;
|
||||
}
|
||||
|
||||
void set_text_callback(struct transcription_filter_data *gf, const DetectionResultWithText &result)
|
||||
void set_text_callback(struct transcription_filter_data *gf,
|
||||
const DetectionResultWithText &resultIn)
|
||||
{
|
||||
DetectionResultWithText result = resultIn;
|
||||
uint64_t now = now_ms();
|
||||
if (result.text.empty() || result.result != DETECTION_RESULT_SPEECH) {
|
||||
// check if we should clear the current sub depending on the minimum subtitle duration
|
||||
if ((now - gf->last_sub_render_time) > gf->min_sub_duration) {
|
||||
// clear the current sub, run an empty sub
|
||||
result.text = "";
|
||||
} else {
|
||||
// nothing to do, the incoming sub is empty
|
||||
return;
|
||||
}
|
||||
}
|
||||
gf->last_sub_render_time = now;
|
||||
|
||||
#ifdef _WIN32
|
||||
// Some UTF8 charsets on Windows output have a bug, instead of 0xd? it outputs
|
||||
// 0xf?, and 0xc? becomes 0xe?, so we need to fix it.
|
||||
@ -411,6 +426,8 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
||||
gf->start_timestamp_ms = now_ms();
|
||||
gf->sentence_number = 1;
|
||||
gf->process_while_muted = obs_data_get_bool(s, "process_while_muted");
|
||||
gf->min_sub_duration = (int)obs_data_get_int(s, "min_sub_duration");
|
||||
gf->last_sub_render_time = 0;
|
||||
|
||||
obs_log(gf->log_level, "transcription_filter: update text source");
|
||||
// update the text source
|
||||
@ -530,7 +547,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
||||
}
|
||||
} else {
|
||||
// model path did not change
|
||||
obs_log(LOG_INFO, "model path did not change: %s == %s", gf->whisper_model_path,
|
||||
obs_log(LOG_DEBUG, "model path did not change: %s == %s", gf->whisper_model_path,
|
||||
new_model_path.c_str());
|
||||
}
|
||||
|
||||
@ -586,6 +603,8 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
|
||||
gf->step_size_msec = step_by_step_processing
|
||||
? (int)obs_data_get_int(settings, "step_size_msec")
|
||||
: BUFFER_SIZE_MSEC;
|
||||
gf->min_sub_duration = (int)obs_data_get_int(settings, "min_sub_duration");
|
||||
gf->last_sub_render_time = 0;
|
||||
gf->log_level = (int)obs_data_get_int(settings, "log_level");
|
||||
gf->save_srt = obs_data_get_bool(settings, "subtitle_save_srt");
|
||||
gf->save_only_while_recording = obs_data_get_bool(settings, "only_while_recording");
|
||||
@ -728,6 +747,7 @@ void transcription_filter_defaults(obs_data_t *s)
|
||||
obs_data_set_default_bool(s, "only_while_recording", false);
|
||||
obs_data_set_default_bool(s, "rename_file_to_match_recording", true);
|
||||
obs_data_set_default_int(s, "step_size_msec", 1000);
|
||||
obs_data_set_default_int(s, "min_sub_duration", 3000);
|
||||
|
||||
// Whisper parameters
|
||||
obs_data_set_default_int(s, "whisper_sampling_method", WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
@ -776,6 +796,8 @@ obs_properties_t *transcription_filter_properties(void *data)
|
||||
ppts, "step_by_step_processing", MT_("step_by_step_processing"));
|
||||
obs_properties_add_int_slider(ppts, "step_size_msec", MT_("step_size_msec"), 1000,
|
||||
BUFFER_SIZE_MSEC, 50);
|
||||
obs_properties_add_int_slider(ppts, "min_sub_duration", MT_("min_sub_duration"), 1000, 5000,
|
||||
50);
|
||||
|
||||
obs_property_set_modified_callback(step_by_step_processing, [](obs_properties_t *props,
|
||||
obs_property_t *property,
|
||||
|
Loading…
Reference in New Issue
Block a user