from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import torch.utils.data as data # from PIL import Image import soundfile as sf import PIL import os import os.path import pickle import random import numpy as np import pandas as pd from scipy import signal from miscc.config import cfg class TextDataset(data.Dataset): def __init__(self, data_dir, split='train',rirsize=4096): #, transform=None, target_transform=None): # self.transform = transform # self.target_transform = target_transform self.rirsize = rirsize self.data = [] self.data_dir = data_dir self.bbox = None split_dir = os.path.join(data_dir, split) self.filenames = self.load_filenames(split_dir) self.embeddings = self.load_embedding(split_dir) def get_RIR(self, RIR_path): wav,fs = sf.read(RIR_path) #Image.open(RIR_path).convert('RGB') length = wav.size # crop_length = int((16384*(80))/(64)) crop_length = 4096 #int(16384) if(length