NeuCoSVC-demo

Paused

App Files Files Community

NeuCoSVC-demo / dataset /metadata.py

kevinwang676

Upload folder using huggingface_hub

cfdc687 almost 2 years ago

raw

history blame contribute delete

3.75 kB

	import os
	import json
	import random
	import argparse
	from joblib import Parallel, delayed
	from tqdm import tqdm
	from pathlib import Path


	def GetMetaInfo(wav_path):
	relative_path = wav_path.relative_to(data_root)
	wavlm_path = (wavlm_dir/relative_path).with_suffix('.pt')
	pitch_path = (pitch_dir/relative_path).with_suffix('.npy')
	ld_path = (ld_dir/relative_path).with_suffix('.npy')
	assert os.path.isfile(wavlm_path), f'{wavlm_path} does not exist.'
	assert os.path.isfile(pitch_path), f'{pitch_path} does not exist.'
	assert os.path.isfile(ld_path), f'{ld_path} does not exist.'

	return [str(wav_path), str(wavlm_path), str(pitch_path), str(ld_path)]


	def SplitDataset(wav_list:list[Path], train_valid_ratio=0.9, test_spk_list=['M26','M27','W46','W47']):
	'''
	Split the dataset into train set, valid set, and test set.
	By default, it considers the OpenSinger dataset's 26th and 27th male singers (M26, M27) and
	46th and 47th female singers (W46, W47) as the test set.
	The remaining singers' audio files are randomly divided into the train set and the valid set in a 9:1 ratio.

	Args:
	wav_list (list[Path]): List of Path objects representing the paths to the wav files.
	train_valid_ratio (float, optional): Ratio of the dataset to be used for training and validation. Defaults to 0.9.
	test_spk_list (list[str], optional): List of speaker IDs to be included in the test set. Defaults to ['M26', 'M27', 'W46', 'W47'].

	Returns:
	Tuple[list[Path], list[Path], list[Path]]: Tuple containing the train set, valid set, and test set as lists of Path objects.

	'''
	train_list = []
	valid_list = []
	test_list = []

	for wav_file in wav_list:
	singer = wav_file.parent.parent.name[0] + wav_file.stem.split('_')[0]
	if singer not in test_spk_list:
	train_list.append(wav_file)
	else:
	test_list.append(wav_file)

	random.shuffle(train_list)

	train_valid_split = int(len(train_list) * train_valid_ratio)

	train_list, valid_list = train_list[:train_valid_split], train_list[train_valid_split:]

	return train_list, valid_list, test_list


	def GenMetadata(data_root, wav_list, mode):
	'''
	generate the metadata file for the dataset
	'''
	results = Parallel(n_jobs=10)(delayed(GetMetaInfo)(wav_path) for wav_path in tqdm(wav_list))

	with open(data_root/f'{mode}.json', 'w') as f:
	json.dump(results, f)

	return


	def main(args):
	global data_root, wavlm_dir, pitch_dir, ld_dir
	data_root = Path(args.data_root)
	wavlm_dir = Path(args.wavlm_dir) if args.wavlm_dir is not None else data_root/'wavlm_features'
	pitch_dir = Path(args.pitch_dir) if args.pitch_dir is not None else data_root/'pitch'
	ld_dir = Path(args.ld_dir) if args.ld_dir is not None else data_root/'loudness'
	wav_list = list(data_root.rglob('*.wav'))
	train_list, valid_list, test_list = SplitDataset(wav_list)

	GenMetadata(data_root, train_list, 'train')
	GenMetadata(data_root, valid_list, 'valid')
	GenMetadata(data_root, test_list, 'test')

	return


	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--data_root',
	required=True, type=str, help='Directory of audios for the dataset.'
	)
	parser.add_argument(
	'--wavlm_dir',
	type=str, help='Directory of wavlm features for the dataset.'
	)
	parser.add_argument(
	'--pitch_dir',
	type=str, help='Directory of pitch for the dataset.'
	)
	parser.add_argument(
	'--ld_dir',
	type=str, help='Directory of loudness for the dataset.'
	)

	args = parser.parse_args()
	main(args)