|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
APP_ROOT = Path(__file__).resolve().parent |
|
|
if str(APP_ROOT) not in sys.path: |
|
|
sys.path.insert(0, str(APP_ROOT)) |
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
from TTS.api import TTS |
|
|
|
|
|
try: |
|
|
from TTS.tts.utils.monotonic_align import core as _monotonic_align_core |
|
|
except ImportError as exc: |
|
|
raise RuntimeError( |
|
|
"monotonic_align extension is missing. Run `pip install -e .` during build to compile XTTS dependencies." |
|
|
) from exc |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) |
|
|
|
|
|
def voice_clone(text: str, speaker_wav: str, language: str): |
|
|
|
|
|
print("Speaker wav:", speaker_wav) |
|
|
tts.tts_to_file(text=text, speaker_wav=speaker_wav, language=language, file_path="output.wav") |
|
|
return "output.wav" |
|
|
|
|
|
iface = gr.Interface(fn=voice_clone, theme="Nymbo/Nymbo_Theme", |
|
|
inputs=[gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"), |
|
|
gr.Audio(type="filepath", label="Upload audio file"), |
|
|
gr.Radio(['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], label="language"), |
|
|
], |
|
|
outputs=gr.Audio(type="filepath", label="Generated audio file"), |
|
|
title="Voice Cloning") |
|
|
|
|
|
iface.launch() |