Spaces:

MiakOnline
/

RecToTextPro2

Sleeping

App Files Files Community

RecToTextPro2 / app.py

MiakOnline

Create app.py

644ed40 verified 11 days ago

raw

history blame contribute delete

6.99 kB

	import streamlit as st
	import whisper
	import tempfile
	import os
	import time
	import re
	from pydub import AudioSegment
	from openpyxl import Workbook
	from openpyxl.styles import Font
	from docx import Document
	from docx.shared import Pt
	from docx.enum.text import WD_ALIGN_PARAGRAPH
	from io import BytesIO
	from collections import Counter

	# ---------------------------------------------------
	# PAGE CONFIG
	# ---------------------------------------------------
	st.set_page_config(
	page_title="RecToText Pro - AI Edition",
	layout="wide",
	page_icon="🎤"
	)

	# ---------------------------------------------------
	# SIDEBAR
	# ---------------------------------------------------
	st.sidebar.title("⚙️ Settings")

	model_option = st.sidebar.selectbox(
	"Select Whisper Model",
	["base", "small"]
	)

	output_mode = st.sidebar.radio(
	"Output Format",
	["Roman Urdu", "English"]
	)

	if st.sidebar.button("🧹 Clear Session"):
	st.session_state.clear()
	st.rerun()

	# ---------------------------------------------------
	# HEADER
	# ---------------------------------------------------
	st.markdown("<h1 style='text-align:center;'>🎤 RecToText Pro - AI Enhanced</h1>", unsafe_allow_html=True)
	st.markdown("<p style='text-align:center;'>Auto Title \| AI Summary \| Smart Formatting</p>", unsafe_allow_html=True)
	st.divider()

	# ---------------------------------------------------
	# FUNCTIONS
	# ---------------------------------------------------

	@st.cache_resource
	def load_model(model_size):
	return whisper.load_model(model_size)

	def clean_text(text):
	filler_words = ["um", "hmm", "acha", "matlab", "uh", "huh"]
	pattern = r'\b(?:' + '\|'.join(filler_words) + r')\b'
	text = re.sub(pattern, '', text, flags=re.IGNORECASE)
	text = re.sub(r'\s+', ' ', text).strip()
	return text

	def convert_to_roman_urdu(text):
	replacements = {
	"ہے": "hai",
	"میں": "main",
	"اور": "aur",
	"کیا": "kya",
	"آپ": "aap",
	"کی": "ki",
	"کا": "ka"
	}
	for urdu, roman in replacements.items():
	text = text.replace(urdu, roman)
	return text

	# -----------------------------
	# AI Title Detection
	# -----------------------------
	def generate_title(text):
	words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
	common_words = Counter(words).most_common(5)
	keywords = [word.capitalize() for word, _ in common_words[:3]]
	if keywords:
	return "Lecture on " + " ".join(keywords)
	return "Lecture Transcription"

	# -----------------------------
	# AI Summary Generator
	# -----------------------------
	def generate_summary(text):
	sentences = re.split(r'(?<=[.!?]) +', text)
	summary = " ".join(sentences[:5])
	return summary

	# -----------------------------
	# Smart Formatting
	# -----------------------------
	def smart_format(text):
	sentences = re.split(r'(?<=[.!?]) +', text)
	formatted = ""
	for i, sentence in enumerate(sentences):
	if len(sentence.split()) < 8:
	formatted += f"\n\n{sentence.upper()}\n"
	else:
	formatted += sentence + " "
	return formatted.strip()

	# -----------------------------
	# Excel Export
	# -----------------------------
	def create_excel(segments):
	wb = Workbook()
	ws = wb.active
	ws.title = "Transcription"

	headers = ["Timestamp", "Transcribed Text", "Cleaned Output"]
	ws.append(headers)

	for col in range(1, 4):
	ws.cell(row=1, column=col).font = Font(bold=True)

	for seg in segments:
	timestamp = f"{round(seg['start'],2)} - {round(seg['end'],2)}"
	raw_text = seg["text"]
	cleaned = clean_text(raw_text)
	ws.append([timestamp, raw_text, cleaned])

	buffer = BytesIO()
	wb.save(buffer)
	buffer.seek(0)
	return buffer

	# -----------------------------
	# Word Export
	# -----------------------------
	def create_word_document(title, summary, formatted_text):
	doc = Document()

	# Title
	doc.add_heading(title, level=1).alignment = WD_ALIGN_PARAGRAPH.CENTER

	doc.add_page_break()

	# Summary Page
	doc.add_heading("Executive Summary", level=2)
	doc.add_paragraph(summary)

	doc.add_page_break()

	# Main Content
	doc.add_heading("Full Lecture Content", level=2)

	paragraphs = formatted_text.split("\n\n")
	for para in paragraphs:
	doc.add_paragraph(para).paragraph_format.space_after = Pt(12)

	buffer = BytesIO()
	doc.save(buffer)
	buffer.seek(0)
	return buffer

	# ---------------------------------------------------
	# FILE UPLOADER
	# ---------------------------------------------------
	uploaded_file = st.file_uploader(
	"Upload Lecture Recording (.mp3, .wav, .m4a, .aac)",
	type=["mp3", "wav", "m4a", "aac"]
	)

	if uploaded_file:

	st.audio(uploaded_file)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	ext = uploaded_file.name.split(".")[-1]
	audio = AudioSegment.from_file(uploaded_file, format=ext)
	audio.export(tmp.name, format="wav")
	temp_audio_path = tmp.name

	st.info("Loading Whisper model...")
	model = load_model(model_option)

	start_time = time.time()
	with st.spinner("Transcribing..."):
	result = model.transcribe(temp_audio_path)
	end_time = time.time()

	os.remove(temp_audio_path)

	full_text = result["text"]
	segments = result["segments"]
	detected_lang = result.get("language", "Unknown")

	cleaned_text = clean_text(full_text)

	if output_mode == "Roman Urdu":
	cleaned_text = convert_to_roman_urdu(cleaned_text)

	title = generate_title(cleaned_text)
	summary = generate_summary(cleaned_text)
	formatted_text = smart_format(cleaned_text)

	word_count = len(cleaned_text.split())
	processing_time = round(end_time - start_time, 2)

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("📜 Raw Transcription")
	st.text_area("", full_text, height=350)

	with col2:
	st.subheader("✨ AI Formatted Version")
	st.text_area("", formatted_text, height=350)

	st.divider()

	st.write(f"Auto Detected Title: {title}")
	st.write(f"Detected Language: {detected_lang}")
	st.write(f"Word Count: {word_count}")
	st.write(f"Processing Time: {processing_time} sec")

	excel_file = create_excel(segments)
	word_file = create_word_document(title, summary, formatted_text)

	colA, colB = st.columns(2)

	with colA:
	st.download_button(
	"📥 Download Excel (.xlsx)",
	data=excel_file,
	file_name="RecToText_Transcription.xlsx"
	)

	with colB:
	st.download_button(
	"📄 Download Word (.docx)",
	data=word_file,
	file_name="RecToText_AI_Lecture.docx"
	)

	st.divider()
	st.markdown(
	"<p style='text-align:center;font-size:12px;'>RecToText Pro AI Edition \| Auto Title \| Smart Summary \| AI Formatting</p>",
	unsafe_allow_html=True
	)