Spaces:

retvq
/

Question-Paper-Generator

Sleeping

App Files Files Community

retvq commited on 22 days ago

Commit

2c7451e

verified ·

1 Parent(s): 42f51b7

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -20

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from langchain_core.prompts import ChatPromptTemplate
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 if not HF_TOKEN:
-    print("⚠️ Warning: TOKEN not set. The app may not work properly.")
 # Use InferenceClient directly instead of LangChain wrapper
 client = InferenceClient(token=HF_TOKEN)
@@ -32,7 +32,7 @@ def generate_question_paper(
         return "❌ Error: Maximum 5 PDF files allowed."
     if not HF_TOKEN:
-        return "❌ Error: TOKEN not configured. Please add your Hugging Face token in Space Settings > Repository secrets."
     total_questions = mcq_count + short_count + long_count
     if total_questions == 0:
@@ -40,12 +40,13 @@ def generate_question_paper(
     try:
         # A. Load all PDFs
-        progress(0, desc=f"📄 Loading {len(pdf_files)} PDF file(s)...")
         all_pages = []
         for idx, pdf_file in enumerate(pdf_files):
-            progress(0.05 + (idx * 0.1 / len(pdf_files)),
-                    desc=f"📄 Loading PDF {idx + 1}/{len(pdf_files)}...")
             loader = PyPDFLoader(pdf_file.name)
             pages = loader.load()
@@ -54,34 +55,37 @@ def generate_question_paper(
             all_pages.extend(pages)
-        progress(0.15, desc=f"✅ Loaded {len(pdf_files)} PDF(s) successfully, extracting text...")
         # B. Split Text
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000,
             chunk_overlap=100
         )
         chunks = text_splitter.split_documents(all_pages)
-        progress(0.3, desc="📝 Text extracted, preparing embeddings...")
         # C. Vector Store (FAISS)
         embeddings = FastEmbedEmbeddings()
-        progress(0.4, desc="🧠 Creating knowledge base...")
         vector_store = FAISS.from_documents(chunks, embeddings)
-        progress(0.5, desc="✅ Knowledge base ready, analyzing content...")
         # D. Retrieve Context (more chunks for multiple PDFs)
         retriever = vector_store.as_retriever(search_kwargs={"k": min(10, len(chunks))})
         context_docs = retriever.invoke("Key concepts, definitions, and important topics")
         context_text = "\n\n".join([doc.page_content for doc in context_docs])
-        progress(0.6, desc="🎯 Key concepts identified, activating AI model...")
         # E. Generate all sets
         all_outputs = []
         for set_num in range(1, num_sets + 1):
-            progress(0.6 + (set_num - 1) * 0.3 / num_sets,
-                    desc=f"📝 Generating Set {set_num}/{num_sets}...")
             # Create Prompt for this set
             sections = []
@@ -126,6 +130,9 @@ FORMAT REQUIREMENTS:
 Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""
             # F. Generate using chat completion
             messages = [{"role": "user", "content": prompt}]
@@ -144,15 +151,23 @@ Do not output conversational text. Output ONLY the exam paper in a well-formatte
                     if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
                         response += message.choices[0].delta.content or ""
                         token_count += 1
-                        # Update progress
-                        set_progress = 0.6 + (set_num - 1) * 0.3 / num_sets
-                        generation_progress = min((token_count / max_tokens) * (0.3 / num_sets), 0.3 / num_sets)
-                        progress(set_progress + generation_progress,
-                                desc=f"✍️ Generating Set {set_num}/{num_sets}... {int(generation_progress / (0.3 / num_sets) * 100)}%")
             all_outputs.append(response)
-        progress(1.0, desc="✅ All question papers generated successfully!")
         # Combine all sets
         final_output = "\n\n" + "="*80 + "\n\n".join(all_outputs)
@@ -162,9 +177,9 @@ Do not output conversational text. Output ONLY the exam paper in a well-formatte
         return f"❌ Error: {str(e)}\n\nPlease check:\n1. PDFs are valid and contain text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"
 # --- 3. The UI ---
-with gr.Blocks(title="AI Question Paper Generator") as demo:
     gr.Markdown("# 📄 AI Question Paper Generator Pro")
-    gr.Markdown("Powered by **Fine-Tuned Llama 3.2 3B**")
     gr.Markdown("⚡ Fast • 🎯 Accurate • 📚 Multi-PDF Support • 🎲 Multiple Sets")
     with gr.Row():
@@ -248,6 +263,10 @@ with gr.Blocks(title="AI Question Paper Generator") as demo:
     gr.Markdown("""
     ---
     **Features:**
     - ✅ Multiple PDF support (up to 5 files)
     - ✅ Separate difficulty control for each question type

 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 if not HF_TOKEN:
+    print("⚠️ Warning: HF_TOKEN not set. The app may not work properly.")
 # Use InferenceClient directly instead of LangChain wrapper
 client = InferenceClient(token=HF_TOKEN)
         return "❌ Error: Maximum 5 PDF files allowed."
     if not HF_TOKEN:
+        return "❌ Error: HF_TOKEN not configured. Please add your Hugging Face token in Space Settings > Repository secrets."
     total_questions = mcq_count + short_count + long_count
     if total_questions == 0:
     try:
         # A. Load all PDFs
+        progress(0, desc=f"📄 PDF file(s) uploaded, accessing {len(pdf_files)} file(s)...")
         all_pages = []
         for idx, pdf_file in enumerate(pdf_files):
+            current_progress = 0.05 + (idx * 0.1 / len(pdf_files))
+            progress(current_progress,
+                    desc=f"📂 Accessing PDF {idx + 1}/{len(pdf_files)}: {pdf_file.name.split('/')[-1][:30]}...")
             loader = PyPDFLoader(pdf_file.name)
             pages = loader.load()
             all_pages.extend(pages)
+        progress(0.15, desc=f"✅ PDF loaded successfully! Extracted {len(all_pages)} pages from {len(pdf_files)} file(s)")
         # B. Split Text
+        progress(0.20, desc="📝 Extracting text content from PDFs...")
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000,
             chunk_overlap=100
         )
         chunks = text_splitter.split_documents(all_pages)
+        progress(0.30, desc=f"✅ Text extracted successfully! Created {len(chunks)} text chunks, preparing embeddings...")
         # C. Vector Store (FAISS)
+        progress(0.35, desc="🧠 Generating embeddings for content understanding...")
         embeddings = FastEmbedEmbeddings()
+        progress(0.40, desc="🧠 Creating knowledge base from embeddings...")
         vector_store = FAISS.from_documents(chunks, embeddings)
+        progress(0.50, desc="✅ Knowledge base created successfully! Analyzing content for key concepts...")
         # D. Retrieve Context (more chunks for multiple PDFs)
+        progress(0.55, desc="🔍 Identifying key concepts and topics from content...")
         retriever = vector_store.as_retriever(search_kwargs={"k": min(10, len(chunks))})
         context_docs = retriever.invoke("Key concepts, definitions, and important topics")
         context_text = "\n\n".join([doc.page_content for doc in context_docs])
+        progress(0.60, desc=f"✅ Analysis complete! Found {len(context_docs)} key sections. Activating AI model...")
         # E. Generate all sets
         all_outputs = []
         for set_num in range(1, num_sets + 1):
+            progress(0.65 + (set_num - 1) * 0.30 / num_sets,
+                    desc=f"🤖 AI Model activated! Preparing to generate Set {set_num}/{num_sets}...")
             # Create Prompt for this set
             sections = []
 Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""
+            progress(0.70 + (set_num - 1) * 0.30 / num_sets,
+                    desc=f"✍️ Generating Question Paper Set {set_num}/{num_sets}... 0%")
             # F. Generate using chat completion
             messages = [{"role": "user", "content": prompt}]
                     if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
                         response += message.choices[0].delta.content or ""
                         token_count += 1
+                        # Calculate progress within this set (70-95% range divided by number of sets)
+                        set_start = 0.70 + (set_num - 1) * 0.30 / num_sets
+                        set_range = 0.25 / num_sets  # 25% of total progress for generation
+                        generation_progress = min((token_count / max_tokens), 1.0)
+                        current_progress = set_start + (generation_progress * set_range)
+                        percentage = int(generation_progress * 100)
+                        # Update with dynamic percentage
+                        progress(current_progress,
+                                desc=f"✍️ Generating Question Paper Set {set_num}/{num_sets}... {percentage}%")
+            progress(0.70 + set_num * 0.30 / num_sets,
+                    desc=f"✅ Set {set_num}/{num_sets} generated successfully!")
             all_outputs.append(response)
+        progress(1.0, desc=f"✅ All {num_sets} Question Paper(s) Generated Successfully! 🎉")
         # Combine all sets
         final_output = "\n\n" + "="*80 + "\n\n".join(all_outputs)
         return f"❌ Error: {str(e)}\n\nPlease check:\n1. PDFs are valid and contain text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"
 # --- 3. The UI ---
+with gr.Blocks(title="AI Question Paper Generator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📄 AI Question Paper Generator Pro")
+    gr.Markdown("Powered by **Llama 3.2 3B** via Hugging Face Inference API")
     gr.Markdown("⚡ Fast • 🎯 Accurate • 📚 Multi-PDF Support • 🎲 Multiple Sets")
     with gr.Row():
     gr.Markdown("""
     ---
+    **Setup Instructions:**
+    - Set `HF_TOKEN` in your Space's Settings → Repository secrets
+    - Get your token from https://huggingface.co/settings/tokens
     **Features:**
     - ✅ Multiple PDF support (up to 5 files)
     - ✅ Separate difficulty control for each question type