Spaces:
Paused
Paused
| import os | |
| import json | |
| import random | |
| import argparse | |
| from joblib import Parallel, delayed | |
| from tqdm import tqdm | |
| from pathlib import Path | |
| def GetMetaInfo(wav_path): | |
| relative_path = wav_path.relative_to(data_root) | |
| wavlm_path = (wavlm_dir/relative_path).with_suffix('.pt') | |
| pitch_path = (pitch_dir/relative_path).with_suffix('.npy') | |
| ld_path = (ld_dir/relative_path).with_suffix('.npy') | |
| assert os.path.isfile(wavlm_path), f'{wavlm_path} does not exist.' | |
| assert os.path.isfile(pitch_path), f'{pitch_path} does not exist.' | |
| assert os.path.isfile(ld_path), f'{ld_path} does not exist.' | |
| return [str(wav_path), str(wavlm_path), str(pitch_path), str(ld_path)] | |
| def SplitDataset(wav_list:list[Path], train_valid_ratio=0.9, test_spk_list=['M26','M27','W46','W47']): | |
| ''' | |
| Split the dataset into train set, valid set, and test set. | |
| By default, it considers the OpenSinger dataset's 26th and 27th male singers (M26, M27) and | |
| 46th and 47th female singers (W46, W47) as the test set. | |
| The remaining singers' audio files are randomly divided into the train set and the valid set in a 9:1 ratio. | |
| Args: | |
| wav_list (list[Path]): List of Path objects representing the paths to the wav files. | |
| train_valid_ratio (float, optional): Ratio of the dataset to be used for training and validation. Defaults to 0.9. | |
| test_spk_list (list[str], optional): List of speaker IDs to be included in the test set. Defaults to ['M26', 'M27', 'W46', 'W47']. | |
| Returns: | |
| Tuple[list[Path], list[Path], list[Path]]: Tuple containing the train set, valid set, and test set as lists of Path objects. | |
| ''' | |
| train_list = [] | |
| valid_list = [] | |
| test_list = [] | |
| for wav_file in wav_list: | |
| singer = wav_file.parent.parent.name[0] + wav_file.stem.split('_')[0] | |
| if singer not in test_spk_list: | |
| train_list.append(wav_file) | |
| else: | |
| test_list.append(wav_file) | |
| random.shuffle(train_list) | |
| train_valid_split = int(len(train_list) * train_valid_ratio) | |
| train_list, valid_list = train_list[:train_valid_split], train_list[train_valid_split:] | |
| return train_list, valid_list, test_list | |
| def GenMetadata(data_root, wav_list, mode): | |
| ''' | |
| generate the metadata file for the dataset | |
| ''' | |
| results = Parallel(n_jobs=10)(delayed(GetMetaInfo)(wav_path) for wav_path in tqdm(wav_list)) | |
| with open(data_root/f'{mode}.json', 'w') as f: | |
| json.dump(results, f) | |
| return | |
| def main(args): | |
| global data_root, wavlm_dir, pitch_dir, ld_dir | |
| data_root = Path(args.data_root) | |
| wavlm_dir = Path(args.wavlm_dir) if args.wavlm_dir is not None else data_root/'wavlm_features' | |
| pitch_dir = Path(args.pitch_dir) if args.pitch_dir is not None else data_root/'pitch' | |
| ld_dir = Path(args.ld_dir) if args.ld_dir is not None else data_root/'loudness' | |
| wav_list = list(data_root.rglob('*.wav')) | |
| train_list, valid_list, test_list = SplitDataset(wav_list) | |
| GenMetadata(data_root, train_list, 'train') | |
| GenMetadata(data_root, valid_list, 'valid') | |
| GenMetadata(data_root, test_list, 'test') | |
| return | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| '--data_root', | |
| required=True, type=str, help='Directory of audios for the dataset.' | |
| ) | |
| parser.add_argument( | |
| '--wavlm_dir', | |
| type=str, help='Directory of wavlm features for the dataset.' | |
| ) | |
| parser.add_argument( | |
| '--pitch_dir', | |
| type=str, help='Directory of pitch for the dataset.' | |
| ) | |
| parser.add_argument( | |
| '--ld_dir', | |
| type=str, help='Directory of loudness for the dataset.' | |
| ) | |
| args = parser.parse_args() | |
| main(args) | |