obs-localvocal/src/transcription-filter-data.h
Roy Shilkrot 0fb43fd42a lint
2023-09-08 12:26:05 -04:00

81 lines
2.1 KiB
C++

#ifndef TRANSCRIPTION_FILTER_DATA_H
#define TRANSCRIPTION_FILTER_DATA_H
#include <obs.h>
#include <util/circlebuf.h>
#include <util/darray.h>
#include <media-io/audio-resampler.h>
#include <whisper.h>
#include <thread>
#include <memory>
#include <mutex>
#include <condition_variable>
#include <functional>
#include <string>
#define MAX_PREPROC_CHANNELS 2
#define MT_ obs_module_text
struct transcription_filter_data {
obs_source_t *context; // obs input source
size_t channels; // number of channels
uint32_t sample_rate; // input sample rate
// How many input frames (in input sample rate) are needed for the next whisper frame
size_t frames;
// How many ms/frames are needed to overlap with the next whisper frame
size_t overlap_frames;
size_t overlap_ms;
// How many frames were processed in the last whisper frame (this is dynamic)
size_t last_num_frames;
/* PCM buffers */
float *copy_buffers[MAX_PREPROC_CHANNELS];
struct circlebuf info_buffer;
struct circlebuf input_buffers[MAX_PREPROC_CHANNELS];
/* Resampler */
audio_resampler_t *resampler = nullptr;
/* whisper */
std::string whisper_model_path = "models/ggml-tiny.en.bin";
struct whisper_context *whisper_context = nullptr;
whisper_full_params whisper_params;
float filler_p_threshold;
bool do_silence;
bool vad_enabled;
int log_level;
bool log_words;
bool active = false;
// Text source to output the subtitles
obs_weak_source_t *text_source = nullptr;
char *text_source_name = nullptr;
std::mutex *text_source_mutex = nullptr;
// Callback to set the text in the output text source (subtitles)
std::function<void(const std::string &str)> setTextCallback;
// Output file path to write the subtitles
std::string output_file_path;
// Use std for thread and mutex
std::thread whisper_thread;
std::mutex *whisper_buf_mutex = nullptr;
std::mutex *whisper_ctx_mutex = nullptr;
std::condition_variable *wshiper_thread_cv = nullptr;
};
// Audio packet info
struct transcription_filter_audio_info {
uint32_t frames;
uint64_t timestamp;
};
void set_text_callback(struct transcription_filter_data *gf, const std::string &str);
#endif /* TRANSCRIPTION_FILTER_DATA_H */