retvq commited on
Commit
2c7451e
Β·
verified Β·
1 Parent(s): 42f51b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -20
app.py CHANGED
@@ -11,7 +11,7 @@ from langchain_core.prompts import ChatPromptTemplate
11
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
12
 
13
  if not HF_TOKEN:
14
- print("⚠️ Warning: TOKEN not set. The app may not work properly.")
15
 
16
  # Use InferenceClient directly instead of LangChain wrapper
17
  client = InferenceClient(token=HF_TOKEN)
@@ -32,7 +32,7 @@ def generate_question_paper(
32
  return "❌ Error: Maximum 5 PDF files allowed."
33
 
34
  if not HF_TOKEN:
35
- return "❌ Error: TOKEN not configured. Please add your Hugging Face token in Space Settings > Repository secrets."
36
 
37
  total_questions = mcq_count + short_count + long_count
38
  if total_questions == 0:
@@ -40,12 +40,13 @@ def generate_question_paper(
40
 
41
  try:
42
  # A. Load all PDFs
43
- progress(0, desc=f"πŸ“„ Loading {len(pdf_files)} PDF file(s)...")
44
  all_pages = []
45
 
46
  for idx, pdf_file in enumerate(pdf_files):
47
- progress(0.05 + (idx * 0.1 / len(pdf_files)),
48
- desc=f"πŸ“„ Loading PDF {idx + 1}/{len(pdf_files)}...")
 
49
  loader = PyPDFLoader(pdf_file.name)
50
  pages = loader.load()
51
 
@@ -54,34 +55,37 @@ def generate_question_paper(
54
 
55
  all_pages.extend(pages)
56
 
57
- progress(0.15, desc=f"βœ… Loaded {len(pdf_files)} PDF(s) successfully, extracting text...")
58
 
59
  # B. Split Text
 
60
  text_splitter = RecursiveCharacterTextSplitter(
61
  chunk_size=1000,
62
  chunk_overlap=100
63
  )
64
  chunks = text_splitter.split_documents(all_pages)
65
- progress(0.3, desc="πŸ“ Text extracted, preparing embeddings...")
66
 
67
  # C. Vector Store (FAISS)
 
68
  embeddings = FastEmbedEmbeddings()
69
- progress(0.4, desc="🧠 Creating knowledge base...")
70
  vector_store = FAISS.from_documents(chunks, embeddings)
71
- progress(0.5, desc="βœ… Knowledge base ready, analyzing content...")
72
 
73
  # D. Retrieve Context (more chunks for multiple PDFs)
 
74
  retriever = vector_store.as_retriever(search_kwargs={"k": min(10, len(chunks))})
75
  context_docs = retriever.invoke("Key concepts, definitions, and important topics")
76
  context_text = "\n\n".join([doc.page_content for doc in context_docs])
77
- progress(0.6, desc="🎯 Key concepts identified, activating AI model...")
78
 
79
  # E. Generate all sets
80
  all_outputs = []
81
 
82
  for set_num in range(1, num_sets + 1):
83
- progress(0.6 + (set_num - 1) * 0.3 / num_sets,
84
- desc=f"πŸ“ Generating Set {set_num}/{num_sets}...")
85
 
86
  # Create Prompt for this set
87
  sections = []
@@ -126,6 +130,9 @@ FORMAT REQUIREMENTS:
126
 
127
  Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""
128
 
 
 
 
129
  # F. Generate using chat completion
130
  messages = [{"role": "user", "content": prompt}]
131
 
@@ -144,15 +151,23 @@ Do not output conversational text. Output ONLY the exam paper in a well-formatte
144
  if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
145
  response += message.choices[0].delta.content or ""
146
  token_count += 1
147
- # Update progress
148
- set_progress = 0.6 + (set_num - 1) * 0.3 / num_sets
149
- generation_progress = min((token_count / max_tokens) * (0.3 / num_sets), 0.3 / num_sets)
150
- progress(set_progress + generation_progress,
151
- desc=f"✍️ Generating Set {set_num}/{num_sets}... {int(generation_progress / (0.3 / num_sets) * 100)}%")
 
 
 
 
 
 
 
 
152
 
153
  all_outputs.append(response)
154
 
155
- progress(1.0, desc="βœ… All question papers generated successfully!")
156
 
157
  # Combine all sets
158
  final_output = "\n\n" + "="*80 + "\n\n".join(all_outputs)
@@ -162,9 +177,9 @@ Do not output conversational text. Output ONLY the exam paper in a well-formatte
162
  return f"❌ Error: {str(e)}\n\nPlease check:\n1. PDFs are valid and contain text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"
163
 
164
  # --- 3. The UI ---
165
- with gr.Blocks(title="AI Question Paper Generator") as demo:
166
  gr.Markdown("# πŸ“„ AI Question Paper Generator Pro")
167
- gr.Markdown("Powered by **Fine-Tuned Llama 3.2 3B**")
168
  gr.Markdown("⚑ Fast β€’ 🎯 Accurate β€’ πŸ“š Multi-PDF Support β€’ 🎲 Multiple Sets")
169
 
170
  with gr.Row():
@@ -248,6 +263,10 @@ with gr.Blocks(title="AI Question Paper Generator") as demo:
248
 
249
  gr.Markdown("""
250
  ---
 
 
 
 
251
  **Features:**
252
  - βœ… Multiple PDF support (up to 5 files)
253
  - βœ… Separate difficulty control for each question type
 
11
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
12
 
13
  if not HF_TOKEN:
14
+ print("⚠️ Warning: HF_TOKEN not set. The app may not work properly.")
15
 
16
  # Use InferenceClient directly instead of LangChain wrapper
17
  client = InferenceClient(token=HF_TOKEN)
 
32
  return "❌ Error: Maximum 5 PDF files allowed."
33
 
34
  if not HF_TOKEN:
35
+ return "❌ Error: HF_TOKEN not configured. Please add your Hugging Face token in Space Settings > Repository secrets."
36
 
37
  total_questions = mcq_count + short_count + long_count
38
  if total_questions == 0:
 
40
 
41
  try:
42
  # A. Load all PDFs
43
+ progress(0, desc=f"πŸ“„ PDF file(s) uploaded, accessing {len(pdf_files)} file(s)...")
44
  all_pages = []
45
 
46
  for idx, pdf_file in enumerate(pdf_files):
47
+ current_progress = 0.05 + (idx * 0.1 / len(pdf_files))
48
+ progress(current_progress,
49
+ desc=f"πŸ“‚ Accessing PDF {idx + 1}/{len(pdf_files)}: {pdf_file.name.split('/')[-1][:30]}...")
50
  loader = PyPDFLoader(pdf_file.name)
51
  pages = loader.load()
52
 
 
55
 
56
  all_pages.extend(pages)
57
 
58
+ progress(0.15, desc=f"βœ… PDF loaded successfully! Extracted {len(all_pages)} pages from {len(pdf_files)} file(s)")
59
 
60
  # B. Split Text
61
+ progress(0.20, desc="πŸ“ Extracting text content from PDFs...")
62
  text_splitter = RecursiveCharacterTextSplitter(
63
  chunk_size=1000,
64
  chunk_overlap=100
65
  )
66
  chunks = text_splitter.split_documents(all_pages)
67
+ progress(0.30, desc=f"βœ… Text extracted successfully! Created {len(chunks)} text chunks, preparing embeddings...")
68
 
69
  # C. Vector Store (FAISS)
70
+ progress(0.35, desc="🧠 Generating embeddings for content understanding...")
71
  embeddings = FastEmbedEmbeddings()
72
+ progress(0.40, desc="🧠 Creating knowledge base from embeddings...")
73
  vector_store = FAISS.from_documents(chunks, embeddings)
74
+ progress(0.50, desc="βœ… Knowledge base created successfully! Analyzing content for key concepts...")
75
 
76
  # D. Retrieve Context (more chunks for multiple PDFs)
77
+ progress(0.55, desc="πŸ” Identifying key concepts and topics from content...")
78
  retriever = vector_store.as_retriever(search_kwargs={"k": min(10, len(chunks))})
79
  context_docs = retriever.invoke("Key concepts, definitions, and important topics")
80
  context_text = "\n\n".join([doc.page_content for doc in context_docs])
81
+ progress(0.60, desc=f"βœ… Analysis complete! Found {len(context_docs)} key sections. Activating AI model...")
82
 
83
  # E. Generate all sets
84
  all_outputs = []
85
 
86
  for set_num in range(1, num_sets + 1):
87
+ progress(0.65 + (set_num - 1) * 0.30 / num_sets,
88
+ desc=f"πŸ€– AI Model activated! Preparing to generate Set {set_num}/{num_sets}...")
89
 
90
  # Create Prompt for this set
91
  sections = []
 
130
 
131
  Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""
132
 
133
+ progress(0.70 + (set_num - 1) * 0.30 / num_sets,
134
+ desc=f"✍️ Generating Question Paper Set {set_num}/{num_sets}... 0%")
135
+
136
  # F. Generate using chat completion
137
  messages = [{"role": "user", "content": prompt}]
138
 
 
151
  if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
152
  response += message.choices[0].delta.content or ""
153
  token_count += 1
154
+ # Calculate progress within this set (70-95% range divided by number of sets)
155
+ set_start = 0.70 + (set_num - 1) * 0.30 / num_sets
156
+ set_range = 0.25 / num_sets # 25% of total progress for generation
157
+ generation_progress = min((token_count / max_tokens), 1.0)
158
+ current_progress = set_start + (generation_progress * set_range)
159
+ percentage = int(generation_progress * 100)
160
+
161
+ # Update with dynamic percentage
162
+ progress(current_progress,
163
+ desc=f"✍️ Generating Question Paper Set {set_num}/{num_sets}... {percentage}%")
164
+
165
+ progress(0.70 + set_num * 0.30 / num_sets,
166
+ desc=f"βœ… Set {set_num}/{num_sets} generated successfully!")
167
 
168
  all_outputs.append(response)
169
 
170
+ progress(1.0, desc=f"βœ… All {num_sets} Question Paper(s) Generated Successfully! πŸŽ‰")
171
 
172
  # Combine all sets
173
  final_output = "\n\n" + "="*80 + "\n\n".join(all_outputs)
 
177
  return f"❌ Error: {str(e)}\n\nPlease check:\n1. PDFs are valid and contain text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"
178
 
179
  # --- 3. The UI ---
180
+ with gr.Blocks(title="AI Question Paper Generator", theme=gr.themes.Soft()) as demo:
181
  gr.Markdown("# πŸ“„ AI Question Paper Generator Pro")
182
+ gr.Markdown("Powered by **Llama 3.2 3B** via Hugging Face Inference API")
183
  gr.Markdown("⚑ Fast β€’ 🎯 Accurate β€’ πŸ“š Multi-PDF Support β€’ 🎲 Multiple Sets")
184
 
185
  with gr.Row():
 
263
 
264
  gr.Markdown("""
265
  ---
266
+ **Setup Instructions:**
267
+ - Set `HF_TOKEN` in your Space's Settings β†’ Repository secrets
268
+ - Get your token from https://huggingface.co/settings/tokens
269
+
270
  **Features:**
271
  - βœ… Multiple PDF support (up to 5 files)
272
  - βœ… Separate difficulty control for each question type