| | import io |
| | import numpy as np |
| | import pydub |
| | import scipy |
| | from scipy.io import wavfile |
| | from pydub import AudioSegment |
| | import base64 |
| | import librosa |
| | import tensorflow as tf |
| |
|
| | class EndpointHandler(): |
| | |
| | def __init__(self, path): |
| | self.emotion_labels = ['Angry', 'Calm', 'Fearful', 'Happy', 'Sad'] |
| | self.emotion_model = tf.keras.models.load_model(f"{path}/models/best_model_emotion.h5") |
| | self.depression_model = tf.keras.models.load_model(f"{path}/models/best_model_depression.h5") |
| | |
| | def __call__(self, input_data): |
| | audio_base64 = input_data.pop("inputs", input_data) |
| | audio_features = self.preprocess_audio_data(audio_base64) |
| | emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features) |
| | return { |
| | "emotion": emotion_prediction, |
| | "depression": depression_prediction |
| | } |
| | |
| | def get_mfcc_features(self, features, padding): |
| | padded_features = padding - features.shape[1] |
| | if padded_features > 0: |
| | features = np.pad(features, [(0, 0), (0, padded_features)], mode='constant') |
| | elif padded_features < 0: |
| | features = features[:, padded_features:] |
| | return np.expand_dims(features, axis=0) |
| | |
| | def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5): |
| | |
| | audio_bytes = base64.b64decode(base64_string) |
| | audio_io = io.BytesIO(audio_bytes) |
| | audio = AudioSegment.from_file(audio_io, format="webm") |
| | |
| | byte_io = io.BytesIO() |
| | audio.export(byte_io, format="wav") |
| | byte_io.seek(0) |
| |
|
| | sample_rate, audio_array = wavfile.read(byte_io) |
| |
|
| | audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr) |
| | start_sample = int(offset * desired_sr) |
| | end_sample = start_sample + int(duration * desired_sr) |
| | audio_array = audio_array[start_sample:end_sample] |
| |
|
| | |
| | |
| | X = librosa.util.normalize(audio_array) |
| | return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30) |
| | |
| | def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584): |
| | emotion_features = self.get_mfcc_features(features, emotion_padding) |
| | depression_features = self.get_mfcc_features(features, depression_padding) |
| | emotion_prediction = self.emotion_model.predict(emotion_features)[0] |
| | emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)] |
| | depression_prediction = self.depression_model.predict(depression_features)[0] |
| | |
| | return emotion_prediction, depression_prediction |