Spaces:
Runtime error
Runtime error
Commit
·
39b6e8b
1
Parent(s):
d6c5415
Upload 4 files
Browse files- consts.py +3 -0
- utils_base.py +56 -0
- utils_label.py +99 -0
- utils_sambert.py +141 -0
consts.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
port = 6006
|
| 2 |
+
base_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'
|
| 3 |
+
base_model_version = 'v1.0.6'
|
utils_base.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 基础方法封装
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import glob
|
| 5 |
+
|
| 6 |
+
# 绝对路径获取方法
|
| 7 |
+
curPath = os.path.dirname(os.path.abspath(__file__))
|
| 8 |
+
def getAbsPath (relativePath):
|
| 9 |
+
joinPath = os.path.join(curPath, relativePath)
|
| 10 |
+
return os.path.normpath(
|
| 11 |
+
os.path.abspath(joinPath)
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
# 数据集存放路径
|
| 15 |
+
datasets_dir = getAbsPath('./datasets')
|
| 16 |
+
if not shutil.os.path.exists(datasets_dir):
|
| 17 |
+
shutil.os.makedirs(datasets_dir)
|
| 18 |
+
|
| 19 |
+
# 获取数据集列表 ----------------------------------------------------
|
| 20 |
+
def get_dataset_list():
|
| 21 |
+
contents = os.listdir(datasets_dir)
|
| 22 |
+
sub_dirs = [
|
| 23 |
+
content
|
| 24 |
+
for content in contents
|
| 25 |
+
if os.path.isdir(os.path.join(datasets_dir, content))
|
| 26 |
+
]
|
| 27 |
+
return sub_dirs
|
| 28 |
+
|
| 29 |
+
# 小模型存放路径
|
| 30 |
+
models_dir = getAbsPath('./models')
|
| 31 |
+
if not shutil.os.path.exists(models_dir):
|
| 32 |
+
shutil.os.makedirs(models_dir)
|
| 33 |
+
|
| 34 |
+
# 获取模型列表 ----------------------------------------------------
|
| 35 |
+
def get_model_list():
|
| 36 |
+
contents = os.listdir(models_dir)
|
| 37 |
+
sub_dirs = [
|
| 38 |
+
content
|
| 39 |
+
for content in contents
|
| 40 |
+
if os.path.isdir(os.path.join(models_dir, content))
|
| 41 |
+
]
|
| 42 |
+
return sub_dirs
|
| 43 |
+
|
| 44 |
+
# 确保对应的空目录存在
|
| 45 |
+
def ensure_empty_dir(dirpath):
|
| 46 |
+
if shutil.os.path.exists(dirpath):
|
| 47 |
+
shutil.rmtree(dirpath)
|
| 48 |
+
shutil.os.makedirs(dirpath)
|
| 49 |
+
|
| 50 |
+
# 获取目录中的最后一个文件
|
| 51 |
+
def get_last_file(dirpath):
|
| 52 |
+
files = glob.glob(os.path.join(dirpath, '*'))
|
| 53 |
+
sorted_files = sorted(files, key=os.path.basename)
|
| 54 |
+
if sorted_files:
|
| 55 |
+
return sorted_files[-1]
|
| 56 |
+
return False
|
utils_label.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 对 sambert 训练的数据标注处理
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import uuid
|
| 5 |
+
import librosa
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from scipy.io import wavfile
|
| 8 |
+
import numpy as np
|
| 9 |
+
import whisper
|
| 10 |
+
from modelscope.tools import run_auto_label
|
| 11 |
+
from utils_base import ensure_empty_dir, datasets_dir, get_dataset_list
|
| 12 |
+
|
| 13 |
+
# 绝对路径获取方法
|
| 14 |
+
curPath = os.path.dirname(os.path.abspath(__file__))
|
| 15 |
+
def getAbsPath (relativePath):
|
| 16 |
+
joinPath = os.path.join(curPath, relativePath)
|
| 17 |
+
return os.path.normpath(
|
| 18 |
+
os.path.abspath(joinPath)
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# 初始化 whisper 模型的加载
|
| 22 |
+
model_path = getAbsPath('../../models/whisper/medium.pt')
|
| 23 |
+
whisper_model = None
|
| 24 |
+
if shutil.os.path.exists(model_path):
|
| 25 |
+
whisper_model = whisper.load_model(model_path)
|
| 26 |
+
else:
|
| 27 |
+
whisper_model = whisper.load_model('medium')
|
| 28 |
+
|
| 29 |
+
# whisper 音频分割方法 ----------------------------------------------
|
| 30 |
+
def split_long_audio(model, filepaths, save_path, out_sr=44100):
|
| 31 |
+
# 格式化输入的音频路径(兼容单个音频和多个音频)
|
| 32 |
+
if isinstance(filepaths, str):
|
| 33 |
+
filepaths = [filepaths]
|
| 34 |
+
|
| 35 |
+
# 对音频依次做拆分并存放到临时路径
|
| 36 |
+
for file_idx, filepath in enumerate(filepaths):
|
| 37 |
+
print(f"Transcribing file {file_idx}: '{filepath}' to segments...")
|
| 38 |
+
result = model.transcribe(filepath, word_timestamps=True, task="transcribe", beam_size=5, best_of=5)
|
| 39 |
+
segments = result['segments']
|
| 40 |
+
|
| 41 |
+
# 采用 librosa 配合 scipy 做音频数据分割
|
| 42 |
+
wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)
|
| 43 |
+
wav, _ = librosa.effects.trim(wav, top_db=20)
|
| 44 |
+
peak = np.abs(wav).max()
|
| 45 |
+
if peak > 1.0:
|
| 46 |
+
wav = 0.98 * wav / peak
|
| 47 |
+
wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr)
|
| 48 |
+
wav2 /= max(wav2.max(), -wav2.min())
|
| 49 |
+
|
| 50 |
+
# 将长音频文件分割成一条条的短音频并放入指定的目录
|
| 51 |
+
for i, seg in enumerate(segments):
|
| 52 |
+
start_time = seg['start']
|
| 53 |
+
end_time = seg['end']
|
| 54 |
+
wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)]
|
| 55 |
+
wav_seg_name = f"{file_idx}_{i}.wav"
|
| 56 |
+
out_fpath = os.path.join(save_path, wav_seg_name)
|
| 57 |
+
wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16))
|
| 58 |
+
|
| 59 |
+
# 自动标注与标注后的文件打包 --------------------------------------------
|
| 60 |
+
def auto_label(audio, name):
|
| 61 |
+
if not audio or not name:
|
| 62 |
+
return '', gr.update(choices=get_dataset_list())
|
| 63 |
+
|
| 64 |
+
# 创建临时目录用于存放分割后的音频与再次标注的信息
|
| 65 |
+
input_wav = getAbsPath(f'./temp/input-{ uuid.uuid4() }')
|
| 66 |
+
ensure_empty_dir(input_wav)
|
| 67 |
+
|
| 68 |
+
work_dir = os.path.join(datasets_dir, name)
|
| 69 |
+
ensure_empty_dir(work_dir)
|
| 70 |
+
|
| 71 |
+
# 音频分割
|
| 72 |
+
split_long_audio(whisper_model, audio, input_wav)
|
| 73 |
+
|
| 74 |
+
# 音频自动标注
|
| 75 |
+
# 第一次会自动下载对应的模型
|
| 76 |
+
run_auto_label(
|
| 77 |
+
input_wav=input_wav,
|
| 78 |
+
work_dir=work_dir,
|
| 79 |
+
resource_revision='v1.0.7'
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# 移除目录
|
| 83 |
+
shutil.rmtree(input_wav)
|
| 84 |
+
|
| 85 |
+
# 返回结果
|
| 86 |
+
return '打标成功', gr.update(choices=get_dataset_list())
|
| 87 |
+
|
| 88 |
+
# 删除数据集 ----------------------------------------------------
|
| 89 |
+
# name - 删除的数据集名称
|
| 90 |
+
def delete_dataset(name):
|
| 91 |
+
try:
|
| 92 |
+
if not name:
|
| 93 |
+
return gr.update(choices=get_dataset_list())
|
| 94 |
+
|
| 95 |
+
target_dir = os.path.join(datasets_dir, name)
|
| 96 |
+
shutil.rmtree(target_dir)
|
| 97 |
+
return gr.update(choices=get_dataset_list(), value=None)
|
| 98 |
+
except Exception:
|
| 99 |
+
return gr.update(choices=get_dataset_list(), value=None)
|
utils_sambert.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 训练部分实现
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import uuid
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from modelscope.models.audio.tts import SambertHifigan
|
| 7 |
+
from modelscope.pipelines import pipeline
|
| 8 |
+
from modelscope.utils.constant import Tasks
|
| 9 |
+
|
| 10 |
+
from modelscope.metainfo import Trainers
|
| 11 |
+
from modelscope.trainers import build_trainer
|
| 12 |
+
from modelscope.utils.audio.audio_utils import TtsTrainType
|
| 13 |
+
from modelscope.hub.utils.utils import get_cache_dir
|
| 14 |
+
|
| 15 |
+
from utils_base import ensure_empty_dir, get_last_file, models_dir, get_model_list
|
| 16 |
+
import consts
|
| 17 |
+
|
| 18 |
+
# 绝对路径获取方法
|
| 19 |
+
curPath = os.path.dirname(os.path.abspath(__file__))
|
| 20 |
+
def getAbsPath (relativePath):
|
| 21 |
+
joinPath = os.path.join(curPath, relativePath)
|
| 22 |
+
return os.path.normpath(
|
| 23 |
+
os.path.abspath(joinPath)
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# 模型训练 ---------------------------------------------------------
|
| 27 |
+
# name - 训练结果(小模型)命名
|
| 28 |
+
# steps - 训练步数
|
| 29 |
+
# train_dataset_zip - 数据集zip包路径
|
| 30 |
+
def train(name, steps, train_dataset_name):
|
| 31 |
+
# 创建临时目录用于放置 训练结果
|
| 32 |
+
work_dir = getAbsPath(f'./temp/work-{ uuid.uuid4() }')
|
| 33 |
+
ensure_empty_dir(work_dir)
|
| 34 |
+
|
| 35 |
+
# 数据集目录
|
| 36 |
+
train_dataset = getAbsPath(f'./datasets/{ train_dataset_name }')
|
| 37 |
+
|
| 38 |
+
# 进行训练
|
| 39 |
+
trainer = build_trainer(
|
| 40 |
+
Trainers.speech_kantts_trainer,
|
| 41 |
+
default_args=dict(
|
| 42 |
+
# 指定要finetune的 模型/版本
|
| 43 |
+
model = consts.base_model_id,
|
| 44 |
+
model_revision = consts.base_model_version,
|
| 45 |
+
|
| 46 |
+
work_dir = work_dir, # 指定临时工作目录
|
| 47 |
+
train_dataset = train_dataset, # 数据集目录
|
| 48 |
+
|
| 49 |
+
# 训练参数
|
| 50 |
+
train_type = {
|
| 51 |
+
TtsTrainType.TRAIN_TYPE_SAMBERT: { # 配置训练AM(sambert)模型
|
| 52 |
+
'train_steps': steps + 1, # 训练多少个step
|
| 53 |
+
'save_interval_steps': 20, # 每训练多少个step保存一次checkpoint
|
| 54 |
+
'log_interval': 10 # 每训练多少个step打印一次训练日志
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
)
|
| 58 |
+
)
|
| 59 |
+
trainer.train()
|
| 60 |
+
|
| 61 |
+
# 挑选需要的文件到结果目录
|
| 62 |
+
target_dir = os.path.join(models_dir, name)
|
| 63 |
+
ensure_empty_dir(target_dir)
|
| 64 |
+
shutil.os.makedirs(os.path.join(target_dir, 'tmp_am', 'ckpt'))
|
| 65 |
+
shutil.os.makedirs(os.path.join(target_dir, 'data', 'se'))
|
| 66 |
+
|
| 67 |
+
shutil.copy(
|
| 68 |
+
get_last_file(os.path.join(work_dir, 'tmp_am', 'ckpt')),
|
| 69 |
+
os.path.join(target_dir, 'tmp_am', 'ckpt')
|
| 70 |
+
)
|
| 71 |
+
shutil.copy(
|
| 72 |
+
os.path.join(work_dir, 'tmp_am', 'config.yaml'),
|
| 73 |
+
os.path.join(target_dir, 'tmp_am'),
|
| 74 |
+
)
|
| 75 |
+
shutil.copy(
|
| 76 |
+
os.path.join(work_dir, 'data', 'audio_config.yaml'),
|
| 77 |
+
os.path.join(target_dir, 'data'),
|
| 78 |
+
)
|
| 79 |
+
shutil.copy(
|
| 80 |
+
os.path.join(work_dir, 'data', 'se', 'se.npy'),
|
| 81 |
+
os.path.join(target_dir, 'data', 'se'),
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# 清理文件
|
| 85 |
+
shutil.rmtree(work_dir)
|
| 86 |
+
shutil.rmtree(train_dataset)
|
| 87 |
+
|
| 88 |
+
# 返回结果
|
| 89 |
+
return '训练完成', gr.update(choices=get_model_list())
|
| 90 |
+
|
| 91 |
+
# 模型推理 ---------------------------------------------------------
|
| 92 |
+
# name - 使用的小模型名称
|
| 93 |
+
# txt - 需要合成音频的文字
|
| 94 |
+
def infer(name, txt):
|
| 95 |
+
try:
|
| 96 |
+
base_model_path = os.path.join(get_cache_dir(), consts.base_model_id)
|
| 97 |
+
model_path = os.path.join(models_dir, name)
|
| 98 |
+
custom_infer_abs = {
|
| 99 |
+
'voice_name': 'F7',
|
| 100 |
+
|
| 101 |
+
# 小模型部分
|
| 102 |
+
'am_ckpt': os.path.join(model_path, 'tmp_am', 'ckpt'),
|
| 103 |
+
'am_config': os.path.join(model_path, 'tmp_am', 'config.yaml'),
|
| 104 |
+
'audio_config': os.path.join(model_path, 'data', 'audio_config.yaml'),
|
| 105 |
+
'se_file': os.path.join(model_path, 'data', 'se', 'se.npy'),
|
| 106 |
+
|
| 107 |
+
# 基础模型部分
|
| 108 |
+
'voc_ckpt': os.path.join(
|
| 109 |
+
base_model_path, 'basemodel_16k', 'hifigan', 'ckpt'
|
| 110 |
+
),
|
| 111 |
+
'voc_config': os.path.join(
|
| 112 |
+
base_model_path, 'basemodel_16k', 'hifigan', 'config.yaml'
|
| 113 |
+
)
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
model = SambertHifigan(
|
| 117 |
+
base_model_path,
|
| 118 |
+
**{ 'custom_ckpt': custom_infer_abs }
|
| 119 |
+
)
|
| 120 |
+
inference = pipeline(task=Tasks.text_to_speech, model=model)
|
| 121 |
+
output = inference(input=txt)
|
| 122 |
+
|
| 123 |
+
output_path = f'/tmp/{ uuid.uuid4() }.wav'
|
| 124 |
+
with open(output_path, mode='bx') as f:
|
| 125 |
+
f.write(output['output_wav'])
|
| 126 |
+
return output_path
|
| 127 |
+
except Exception:
|
| 128 |
+
return False
|
| 129 |
+
|
| 130 |
+
# 删除模型 ---------------------------------------------------------
|
| 131 |
+
# name - 删除的小模型名称
|
| 132 |
+
def delete_model(name):
|
| 133 |
+
try:
|
| 134 |
+
if not name:
|
| 135 |
+
return gr.update(choices=get_model_list())
|
| 136 |
+
|
| 137 |
+
target_dir = os.path.join(models_dir, name)
|
| 138 |
+
shutil.rmtree(target_dir)
|
| 139 |
+
return gr.update(choices=get_model_list(), value=None)
|
| 140 |
+
except Exception:
|
| 141 |
+
return gr.update(choices=get_model_list(), value=None)
|