mirror of
https://github.com/myshell-ai/OpenVoice
synced 2024-11-22 15:09:21 +00:00
add audio hash to audio name to avoid confusion
This commit is contained in:
parent
1445029808
commit
9d4342cafd
@ -5,19 +5,21 @@ from glob import glob
|
||||
import numpy as np
|
||||
from pydub import AudioSegment
|
||||
from faster_whisper import WhisperModel
|
||||
import hashlib
|
||||
import base64
|
||||
import librosa
|
||||
from whisper_timestamped.transcribe import get_audio_tensor, get_vad_segments
|
||||
|
||||
model_size = "medium"
|
||||
# Run on GPU with FP16
|
||||
model = None
|
||||
def split_audio_whisper(audio_path, target_dir='processed'):
|
||||
def split_audio_whisper(audio_path, target_dir, audio_name):
|
||||
global model
|
||||
if model is None:
|
||||
model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
||||
audio = AudioSegment.from_file(audio_path)
|
||||
max_len = len(audio)
|
||||
|
||||
audio_name = os.path.basename(audio_path).rsplit('.', 1)[0]
|
||||
target_folder = os.path.join(target_dir, audio_name)
|
||||
|
||||
segments, info = model.transcribe(audio_path, beam_size=5, word_timestamps=True)
|
||||
@ -69,7 +71,7 @@ def split_audio_whisper(audio_path, target_dir='processed'):
|
||||
return wavs_folder
|
||||
|
||||
|
||||
def split_audio_vad(audio_path, target_dir, split_seconds=10.0):
|
||||
def split_audio_vad(audio_path, target_dir, audio_name, split_seconds=10.0):
|
||||
SAMPLE_RATE = 16000
|
||||
audio_vad = get_audio_tensor(audio_path)
|
||||
segments = get_vad_segments(
|
||||
@ -90,7 +92,6 @@ def split_audio_vad(audio_path, target_dir, split_seconds=10.0):
|
||||
|
||||
audio_dur = audio_active.duration_seconds
|
||||
print(f'after vad: dur = {audio_dur}')
|
||||
audio_name = os.path.basename(audio_path).rsplit('.', 1)[0]
|
||||
target_folder = os.path.join(target_dir, audio_name)
|
||||
wavs_folder = os.path.join(target_folder, 'wavs')
|
||||
os.makedirs(wavs_folder, exist_ok=True)
|
||||
@ -112,13 +113,18 @@ def split_audio_vad(audio_path, target_dir, split_seconds=10.0):
|
||||
return wavs_folder
|
||||
|
||||
|
||||
|
||||
def hash_numpy_array(array):
|
||||
array_bytes = array.tobytes()
|
||||
hash_object = hashlib.sha256(array_bytes)
|
||||
hash_value = hash_object.digest()
|
||||
base64_value = base64.b64encode(hash_value)
|
||||
return base64_value.decode('utf-8')[:16].replace('/', '&')
|
||||
|
||||
|
||||
def get_se(audio_path, vc_model, target_dir='processed', vad=True):
|
||||
device = vc_model.device
|
||||
|
||||
audio_name = os.path.basename(audio_path).rsplit('.', 1)[0]
|
||||
audio_hash = hash_numpy_array(librosa.load(audio_path, mono=True)[0])
|
||||
audio_name = os.path.basename(audio_path).rsplit('.', 1)[0] + '_' + audio_hash
|
||||
se_path = os.path.join(target_dir, audio_name, 'se.pth')
|
||||
|
||||
if os.path.isfile(se_path):
|
||||
@ -127,9 +133,9 @@ def get_se(audio_path, vc_model, target_dir='processed', vad=True):
|
||||
if os.path.isdir(audio_path):
|
||||
wavs_folder = audio_path
|
||||
elif vad:
|
||||
wavs_folder = split_audio_vad(audio_path, target_dir)
|
||||
wavs_folder = split_audio_vad(audio_path, target_dir, audio_name)
|
||||
else:
|
||||
wavs_folder = split_audio_whisper(audio_path, target_dir)
|
||||
wavs_folder = split_audio_whisper(audio_path, target_dir, audio_name)
|
||||
|
||||
audio_segs = glob(f'{wavs_folder}/*.wav')
|
||||
if len(audio_segs) == 0:
|
||||
|
Loading…
Reference in New Issue
Block a user