voice-pd-api / feature_extract.py
phoner45's picture
Upload 3 files
1886358 verified
import torch
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
class AudioFeatureExtractor:
def __init__(self, wavfile, sr=16000, n_fft=1024, hop_length=51, n_mels=256):
self.wavfile = wavfile
self.target_sr = sr
self.n_fft = n_fft
self.hop_length = hop_length
self.n_mels = n_mels
# ✅ โหลดเสียงด้วย librosa (resample อัตโนมัติ)
waveform, _ = librosa.load(self.wavfile, sr=self.target_sr)
waveform = torch.tensor(waveform).unsqueeze(0)
self.waveform = waveform
self.sr = self.target_sr
def get_spectrogram(self, to_db=True):
"""สร้าง spectrogram แบบธรรมดา"""
spec = np.abs(librosa.stft(
self.waveform.squeeze(0).numpy(),
n_fft=self.n_fft,
hop_length=self.hop_length
)) ** 2
if to_db:
spec = librosa.power_to_db(spec, ref=np.max)
return spec
def get_melspectrogram(self):
"""สร้าง Mel-spectrogram"""
mel_spec = librosa.feature.melspectrogram(
y=self.waveform.squeeze(0).numpy(),
sr=self.sr,
n_fft=self.n_fft,
hop_length=self.hop_length,
n_mels=self.n_mels,
power=2.0
)
mel_db = librosa.power_to_db(mel_spec, ref=np.max)
return mel_db
def normalize(self, spec):
"""ปรับค่าสีให้อยู่ในช่วง 0–1"""
spec_min, spec_max = spec.min(), spec.max()
return (spec - spec_min) / (spec_max - spec_min + 1e-6)
def to_grayscale(self, spec):
"""แปลงให้เป็น 1-channel"""
return np.expand_dims(spec, axis=0)
def get_normalized_melspec(self):
mel_db = self.get_melspectrogram()
mel_norm = self.normalize(mel_db)
return self.to_grayscale(mel_norm)
def plot_melspectrogram(self, save_path=None):
mel_db = self.get_melspectrogram()
plt.figure(figsize=(10, 4))
librosa.display.specshow(mel_db, sr=self.sr, hop_length=self.hop_length, cmap="viridis")
plt.axis("off")
plt.tight_layout()
if save_path:
plt.savefig(save_path, bbox_inches="tight", pad_inches=0)
plt.close()
else:
plt.show()
def save_melspectrogram(self, out_path="melspec.png"):
melspec = self.get_melspectrogram()
plt.figure(figsize=(10, 4))
import librosa.display
librosa.display.specshow(melspec, sr=self.sr, hop_length=self.hop_length)
plt.axis("off")
plt.tight_layout()
plt.savefig(out_path, bbox_inches="tight", pad_inches=0)
plt.close()
return out_path