from transformers import WhisperProcessor, WhisperForConditionalGeneration import torch import librosa import gradio as gr model_name = "ijyad/whisper-large-v3-Tarteel" processor = WhisperProcessor.from_pretrained(model_name) model = WhisperForConditionalGeneration.from_pretrained(model_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) def transcribe(audio_path): # Load and preprocess audio audio, sample_rate = librosa.load(audio_path, sr=16000) input_features = processor(audio, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device) # Generate prediction with torch.no_grad(): predicted_ids = model.generate(input_features) # Decode and return transcription transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] return transcription interface = gr.Interface( fn=transcribe, inputs=gr.Audio(type="filepath", label="Upload Qur'an Audio (.wav)"), outputs=gr.Textbox(label="Transcription"), title=" Quran Recitation Transcriber - ترتيل القران (صوت - كتابة)", description="Upload a Quranic recitation (in WAV format) and get the transcription." ) interface.launch()