Spaces:

GiftMark
/

AkanWhisperV.1

Sleeping

File size: 1,825 Bytes

765ce6e
 
 
8ce026d
2f6398c
765ce6e
2f6398c
765ce6e
233bd59
 
 
 
 
765ce6e
 
8ce026d
 
 
 
2f6398c
 
8ce026d
 
2f6398c
 
 
 
 
 
 
 
8ce026d
 
 
 
 
 
 
 
 
 
 
 
765ce6e

import gradio as gr
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch
import numpy as np
from scipy.signal import resample

# app.py
# Load your model (use_auth_token for private models)
import os
hf_token = os.getenv("HF_TOKEN")
model = WhisperForConditionalGeneration.from_pretrained("GiftMark/akan-whisper-model", token=hf_token)
processor = WhisperProcessor.from_pretrained("GiftMark/akan-whisper-model", token=hf_token)


def transcribe(audio):
    try:
        if audio is None:
            return "No audio provided."
        sampling_rate, data = audio
        data = np.array(data).astype(np.float32)
        # Ensure mono
        if len(data.shape) > 1:
            data = data[:, 0]
        # Resample if needed
        target_sr = 16000
        if sampling_rate != target_sr:
            # Calculate number of samples after resampling
            duration = data.shape[0] / sampling_rate
            new_length = int(duration * target_sr)
            data = resample(data, new_length)
            sampling_rate = target_sr

        inputs = processor(
            data, sampling_rate=sampling_rate, return_tensors="pt"
        ).input_features

        with torch.no_grad():
            predicted_ids = model.generate(inputs)
        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
        return transcription
    except Exception as e:
        print("Error during transcription:", e)
        return f"Error: {e}"

demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources=["microphone", "upload"], type="numpy", label="Record or upload Akan audio"),
    outputs=gr.Textbox(label="Transcription"),
    title="Akan Speech-to-Text Demo",
    description="Record or upload Akan audio to test the Whisper ASR model."
)

demo.launch()