Spaces:
Runtime error
Runtime error
| # 对 sambert 训练的数据标注处理 | |
| import os | |
| import shutil | |
| import uuid | |
| import librosa | |
| import gradio as gr | |
| from scipy.io import wavfile | |
| import numpy as np | |
| import whisper | |
| from modelscope.tools import run_auto_label | |
| from utils_base import ensure_empty_dir, datasets_dir, get_dataset_list | |
| # 绝对路径获取方法 | |
| curPath = os.path.dirname(os.path.abspath(__file__)) | |
| def getAbsPath (relativePath): | |
| joinPath = os.path.join(curPath, relativePath) | |
| return os.path.normpath( | |
| os.path.abspath(joinPath) | |
| ) | |
| # 初始化 whisper 模型的加载 | |
| model_path = getAbsPath('../../models/whisper/medium.pt') | |
| whisper_model = None | |
| if shutil.os.path.exists(model_path): | |
| whisper_model = whisper.load_model(model_path) | |
| else: | |
| whisper_model = whisper.load_model('medium') | |
| # whisper 音频分割方法 ---------------------------------------------- | |
| def split_long_audio(model, filepaths, save_path, out_sr=44100): | |
| # 格式化输入的音频路径(兼容单个音频和多个音频) | |
| if isinstance(filepaths, str): | |
| filepaths = [filepaths] | |
| # 对音频依次做拆分并存放到临时路径 | |
| for file_idx, filepath in enumerate(filepaths): | |
| print(f"Transcribing file {file_idx}: '{filepath}' to segments...") | |
| result = model.transcribe(filepath, word_timestamps=True, task="transcribe", beam_size=5, best_of=5) | |
| segments = result['segments'] | |
| # 采用 librosa 配合 scipy 做音频数据分割 | |
| wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True) | |
| wav, _ = librosa.effects.trim(wav, top_db=20) | |
| peak = np.abs(wav).max() | |
| if peak > 1.0: | |
| wav = 0.98 * wav / peak | |
| wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr) | |
| wav2 /= max(wav2.max(), -wav2.min()) | |
| # 将长音频文件分割成一条条的短音频并放入指定的目录 | |
| for i, seg in enumerate(segments): | |
| start_time = seg['start'] | |
| end_time = seg['end'] | |
| wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)] | |
| wav_seg_name = f"{file_idx}_{i}.wav" | |
| out_fpath = os.path.join(save_path, wav_seg_name) | |
| wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16)) | |
| # 自动标注与标注后的文件打包 -------------------------------------------- | |
| def auto_label(audio, name): | |
| if not audio or not name: | |
| return '', gr.update(choices=get_dataset_list()) | |
| # 创建临时目录用于存放分割后的音频与再次标注的信息 | |
| input_wav = getAbsPath(f'./temp/input-{ uuid.uuid4() }') | |
| ensure_empty_dir(input_wav) | |
| work_dir = os.path.join(datasets_dir, name) | |
| ensure_empty_dir(work_dir) | |
| # 音频分割 | |
| split_long_audio(whisper_model, audio, input_wav) | |
| # 音频自动标注 | |
| # 第一次会自动下载对应的模型 | |
| run_auto_label( | |
| input_wav=input_wav, | |
| work_dir=work_dir, | |
| resource_revision='v1.0.7' | |
| ) | |
| # 移除目录 | |
| shutil.rmtree(input_wav) | |
| # 返回结果 | |
| return '打标成功', gr.update(choices=get_dataset_list()) | |
| # 删除数据集 ---------------------------------------------------- | |
| # name - 删除的数据集名称 | |
| def delete_dataset(name): | |
| try: | |
| if not name: | |
| return gr.update(choices=get_dataset_list()) | |
| target_dir = os.path.join(datasets_dir, name) | |
| shutil.rmtree(target_dir) | |
| return gr.update(choices=get_dataset_list(), value=None) | |
| except Exception: | |
| return gr.update(choices=get_dataset_list(), value=None) | |