mirror of
https://github.com/occ-ai/obs-localvocal
synced 2024-11-07 18:57:14 +00:00
81 lines
2.1 KiB
C++
81 lines
2.1 KiB
C++
#ifndef TRANSCRIPTION_FILTER_DATA_H
|
|
#define TRANSCRIPTION_FILTER_DATA_H
|
|
|
|
#include <obs.h>
|
|
#include <util/circlebuf.h>
|
|
#include <util/darray.h>
|
|
#include <media-io/audio-resampler.h>
|
|
|
|
#include <whisper.h>
|
|
|
|
#include <thread>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <condition_variable>
|
|
#include <functional>
|
|
#include <string>
|
|
|
|
#define MAX_PREPROC_CHANNELS 2
|
|
|
|
#define MT_ obs_module_text
|
|
|
|
struct transcription_filter_data {
|
|
obs_source_t *context; // obs input source
|
|
size_t channels; // number of channels
|
|
uint32_t sample_rate; // input sample rate
|
|
// How many input frames (in input sample rate) are needed for the next whisper frame
|
|
size_t frames;
|
|
// How many ms/frames are needed to overlap with the next whisper frame
|
|
size_t overlap_frames;
|
|
size_t overlap_ms;
|
|
// How many frames were processed in the last whisper frame (this is dynamic)
|
|
size_t last_num_frames;
|
|
|
|
/* PCM buffers */
|
|
float *copy_buffers[MAX_PREPROC_CHANNELS];
|
|
struct circlebuf info_buffer;
|
|
struct circlebuf input_buffers[MAX_PREPROC_CHANNELS];
|
|
|
|
/* Resampler */
|
|
audio_resampler_t *resampler = nullptr;
|
|
|
|
/* whisper */
|
|
std::string whisper_model_path = "models/ggml-tiny.en.bin";
|
|
struct whisper_context *whisper_context = nullptr;
|
|
whisper_full_params whisper_params;
|
|
|
|
float filler_p_threshold;
|
|
|
|
bool do_silence;
|
|
bool vad_enabled;
|
|
int log_level;
|
|
bool log_words;
|
|
bool active = false;
|
|
|
|
// Text source to output the subtitles
|
|
obs_weak_source_t *text_source = nullptr;
|
|
char *text_source_name = nullptr;
|
|
std::mutex *text_source_mutex = nullptr;
|
|
// Callback to set the text in the output text source (subtitles)
|
|
std::function<void(const std::string &str)> setTextCallback;
|
|
// Output file path to write the subtitles
|
|
std::string output_file_path;
|
|
|
|
// Use std for thread and mutex
|
|
std::thread whisper_thread;
|
|
|
|
std::mutex *whisper_buf_mutex = nullptr;
|
|
std::mutex *whisper_ctx_mutex = nullptr;
|
|
std::condition_variable *wshiper_thread_cv = nullptr;
|
|
};
|
|
|
|
// Audio packet info
|
|
struct transcription_filter_audio_info {
|
|
uint32_t frames;
|
|
uint64_t timestamp;
|
|
};
|
|
|
|
void set_text_callback(struct transcription_filter_data *gf, const std::string &str);
|
|
|
|
#endif /* TRANSCRIPTION_FILTER_DATA_H */
|