import torch import numpy as np import librosa import librosa.display import matplotlib.pyplot as plt class AudioFeatureExtractor: def __init__(self, wavfile, sr=16000, n_fft=1024, hop_length=51, n_mels=256): self.wavfile = wavfile self.target_sr = sr self.n_fft = n_fft self.hop_length = hop_length self.n_mels = n_mels # ✅ โหลดเสียงด้วย librosa (resample อัตโนมัติ) waveform, _ = librosa.load(self.wavfile, sr=self.target_sr) waveform = torch.tensor(waveform).unsqueeze(0) self.waveform = waveform self.sr = self.target_sr def get_spectrogram(self, to_db=True): """สร้าง spectrogram แบบธรรมดา""" spec = np.abs(librosa.stft( self.waveform.squeeze(0).numpy(), n_fft=self.n_fft, hop_length=self.hop_length )) ** 2 if to_db: spec = librosa.power_to_db(spec, ref=np.max) return spec def get_melspectrogram(self): """สร้าง Mel-spectrogram""" mel_spec = librosa.feature.melspectrogram( y=self.waveform.squeeze(0).numpy(), sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels, power=2.0 ) mel_db = librosa.power_to_db(mel_spec, ref=np.max) return mel_db def normalize(self, spec): """ปรับค่าสีให้อยู่ในช่วง 0–1""" spec_min, spec_max = spec.min(), spec.max() return (spec - spec_min) / (spec_max - spec_min + 1e-6) def to_grayscale(self, spec): """แปลงให้เป็น 1-channel""" return np.expand_dims(spec, axis=0) def get_normalized_melspec(self): mel_db = self.get_melspectrogram() mel_norm = self.normalize(mel_db) return self.to_grayscale(mel_norm) def plot_melspectrogram(self, save_path=None): mel_db = self.get_melspectrogram() plt.figure(figsize=(10, 4)) librosa.display.specshow(mel_db, sr=self.sr, hop_length=self.hop_length, cmap="viridis") plt.axis("off") plt.tight_layout() if save_path: plt.savefig(save_path, bbox_inches="tight", pad_inches=0) plt.close() else: plt.show() def save_melspectrogram(self, out_path="melspec.png"): melspec = self.get_melspectrogram() plt.figure(figsize=(10, 4)) import librosa.display librosa.display.specshow(melspec, sr=self.sr, hop_length=self.hop_length) plt.axis("off") plt.tight_layout() plt.savefig(out_path, bbox_inches="tight", pad_inches=0) plt.close() return out_path