mirror of
https://github.com/occ-ai/obs-localvocal
synced 2024-11-07 18:57:14 +00:00
remove unneeded stuff
This commit is contained in:
parent
e246e80448
commit
86e719150d
@ -6,7 +6,6 @@
|
|||||||
#include "whisper-processing.h"
|
#include "whisper-processing.h"
|
||||||
#include "whisper-language.h"
|
#include "whisper-language.h"
|
||||||
|
|
||||||
|
|
||||||
inline enum speaker_layout convert_speaker_layout(uint8_t channels)
|
inline enum speaker_layout convert_speaker_layout(uint8_t channels)
|
||||||
{
|
{
|
||||||
switch (channels) {
|
switch (channels) {
|
||||||
@ -167,9 +166,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
|
|||||||
struct transcription_filter_data *gf =
|
struct transcription_filter_data *gf =
|
||||||
static_cast<struct transcription_filter_data *>(data);
|
static_cast<struct transcription_filter_data *>(data);
|
||||||
|
|
||||||
gf->filler_p_threshold = (float)obs_data_get_double(s, "filler_p_threshold");
|
|
||||||
gf->log_level = (int)obs_data_get_int(s, "log_level");
|
gf->log_level = (int)obs_data_get_int(s, "log_level");
|
||||||
gf->do_silence = obs_data_get_bool(s, "do_silence");
|
|
||||||
gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
|
gf->vad_enabled = obs_data_get_bool(s, "vad_enabled");
|
||||||
gf->log_words = obs_data_get_bool(s, "log_words");
|
gf->log_words = obs_data_get_bool(s, "log_words");
|
||||||
|
|
||||||
@ -381,8 +378,6 @@ void transcription_filter_deactivate(void *data)
|
|||||||
|
|
||||||
void transcription_filter_defaults(obs_data_t *s)
|
void transcription_filter_defaults(obs_data_t *s)
|
||||||
{
|
{
|
||||||
obs_data_set_default_double(s, "filler_p_threshold", 0.75);
|
|
||||||
obs_data_set_default_bool(s, "do_silence", true);
|
|
||||||
obs_data_set_default_bool(s, "vad_enabled", true);
|
obs_data_set_default_bool(s, "vad_enabled", true);
|
||||||
obs_data_set_default_int(s, "log_level", LOG_DEBUG);
|
obs_data_set_default_int(s, "log_level", LOG_DEBUG);
|
||||||
obs_data_set_default_bool(s, "log_words", true);
|
obs_data_set_default_bool(s, "log_words", true);
|
||||||
@ -419,16 +414,13 @@ obs_properties_t *transcription_filter_properties(void *data)
|
|||||||
{
|
{
|
||||||
obs_properties_t *ppts = obs_properties_create();
|
obs_properties_t *ppts = obs_properties_create();
|
||||||
|
|
||||||
obs_properties_add_float_slider(ppts, "filler_p_threshold", "filler_p_threshold", 0.0f,
|
obs_properties_add_bool(ppts, "vad_enabled", "VAD Enabled");
|
||||||
1.0f, 0.05f);
|
obs_property_t *list = obs_properties_add_list(ppts, "log_level", "Log level",
|
||||||
obs_properties_add_bool(ppts, "do_silence", "do_silence");
|
|
||||||
obs_properties_add_bool(ppts, "vad_enabled", "vad_enabled");
|
|
||||||
obs_property_t *list = obs_properties_add_list(ppts, "log_level", "log_level",
|
|
||||||
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
|
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
|
||||||
obs_property_list_add_int(list, "DEBUG", LOG_DEBUG);
|
obs_property_list_add_int(list, "DEBUG", LOG_DEBUG);
|
||||||
obs_property_list_add_int(list, "INFO", LOG_INFO);
|
obs_property_list_add_int(list, "INFO", LOG_INFO);
|
||||||
obs_property_list_add_int(list, "WARNING", LOG_WARNING);
|
obs_property_list_add_int(list, "WARNING", LOG_WARNING);
|
||||||
obs_properties_add_bool(ppts, "log_words", "log_words");
|
obs_properties_add_bool(ppts, "log_words", "Log output words");
|
||||||
|
|
||||||
obs_property_t *sources = obs_properties_add_list(ppts, "subtitle_sources",
|
obs_property_t *sources = obs_properties_add_list(ppts, "subtitle_sources",
|
||||||
"subtitle_sources", OBS_COMBO_TYPE_LIST,
|
"subtitle_sources", OBS_COMBO_TYPE_LIST,
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#include "plugin-support.h"
|
#include "plugin-support.h"
|
||||||
#include "transcription-filter-data.h"
|
#include "transcription-filter-data.h"
|
||||||
|
#include "whisper-processing.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
@ -286,12 +287,15 @@ void process_audio_from_buffer(struct transcription_filter_data *gf)
|
|||||||
"audio processing took too long (%d ms), reducing overlap to %lu ms",
|
"audio processing took too long (%d ms), reducing overlap to %lu ms",
|
||||||
(int)duration, gf->overlap_ms);
|
(int)duration, gf->overlap_ms);
|
||||||
} else if (!skipped_inference) {
|
} else if (!skipped_inference) {
|
||||||
// try to increase overlap up to 75% of the segment
|
if (gf->overlap_ms < OVERLAP_SIZE_MSEC) {
|
||||||
gf->overlap_ms = std::min((uint64_t)gf->overlap_ms + 10,
|
// try to increase overlap up to OVERLAP_SIZE_MSEC
|
||||||
(uint64_t)((float)new_frames_from_infos_ms * 0.75f));
|
gf->overlap_ms = std::min((uint64_t)gf->overlap_ms + 10,
|
||||||
gf->overlap_frames = gf->overlap_ms * gf->sample_rate / 1000;
|
(uint64_t)OVERLAP_SIZE_MSEC);
|
||||||
obs_log(gf->log_level, "audio processing took %d ms, increasing overlap to %lu ms",
|
gf->overlap_frames = gf->overlap_ms * gf->sample_rate / 1000;
|
||||||
(int)duration, gf->overlap_ms);
|
obs_log(gf->log_level,
|
||||||
|
"audio processing took %d ms, increasing overlap to %lu ms",
|
||||||
|
(int)duration, gf->overlap_ms);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
// at 16Khz, 3000 msec is 48000 samples
|
// at 16Khz, 3000 msec is 48000 samples
|
||||||
#define WHISPER_FRAME_SIZE 48000
|
#define WHISPER_FRAME_SIZE 48000
|
||||||
// overlap in msec
|
// overlap in msec
|
||||||
#define OVERLAP_SIZE_MSEC 340
|
#define OVERLAP_SIZE_MSEC 200
|
||||||
|
|
||||||
void whisper_loop(void *data);
|
void whisper_loop(void *data);
|
||||||
struct whisper_context *init_whisper_context(const std::string &model_path);
|
struct whisper_context *init_whisper_context(const std::string &model_path);
|
||||||
|
Loading…
Reference in New Issue
Block a user