Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
| 11 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 12 |
|
| 13 |
if not HF_TOKEN:
|
| 14 |
-
print("β οΈ Warning:
|
| 15 |
|
| 16 |
# Use InferenceClient directly instead of LangChain wrapper
|
| 17 |
client = InferenceClient(token=HF_TOKEN)
|
|
@@ -32,7 +32,7 @@ def generate_question_paper(
|
|
| 32 |
return "β Error: Maximum 5 PDF files allowed."
|
| 33 |
|
| 34 |
if not HF_TOKEN:
|
| 35 |
-
return "β Error:
|
| 36 |
|
| 37 |
total_questions = mcq_count + short_count + long_count
|
| 38 |
if total_questions == 0:
|
|
@@ -40,12 +40,13 @@ def generate_question_paper(
|
|
| 40 |
|
| 41 |
try:
|
| 42 |
# A. Load all PDFs
|
| 43 |
-
progress(0, desc=f"π
|
| 44 |
all_pages = []
|
| 45 |
|
| 46 |
for idx, pdf_file in enumerate(pdf_files):
|
| 47 |
-
|
| 48 |
-
|
|
|
|
| 49 |
loader = PyPDFLoader(pdf_file.name)
|
| 50 |
pages = loader.load()
|
| 51 |
|
|
@@ -54,34 +55,37 @@ def generate_question_paper(
|
|
| 54 |
|
| 55 |
all_pages.extend(pages)
|
| 56 |
|
| 57 |
-
progress(0.15, desc=f"β
|
| 58 |
|
| 59 |
# B. Split Text
|
|
|
|
| 60 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 61 |
chunk_size=1000,
|
| 62 |
chunk_overlap=100
|
| 63 |
)
|
| 64 |
chunks = text_splitter.split_documents(all_pages)
|
| 65 |
-
progress(0.
|
| 66 |
|
| 67 |
# C. Vector Store (FAISS)
|
|
|
|
| 68 |
embeddings = FastEmbedEmbeddings()
|
| 69 |
-
progress(0.
|
| 70 |
vector_store = FAISS.from_documents(chunks, embeddings)
|
| 71 |
-
progress(0.
|
| 72 |
|
| 73 |
# D. Retrieve Context (more chunks for multiple PDFs)
|
|
|
|
| 74 |
retriever = vector_store.as_retriever(search_kwargs={"k": min(10, len(chunks))})
|
| 75 |
context_docs = retriever.invoke("Key concepts, definitions, and important topics")
|
| 76 |
context_text = "\n\n".join([doc.page_content for doc in context_docs])
|
| 77 |
-
progress(0.
|
| 78 |
|
| 79 |
# E. Generate all sets
|
| 80 |
all_outputs = []
|
| 81 |
|
| 82 |
for set_num in range(1, num_sets + 1):
|
| 83 |
-
progress(0.
|
| 84 |
-
desc=f"
|
| 85 |
|
| 86 |
# Create Prompt for this set
|
| 87 |
sections = []
|
|
@@ -126,6 +130,9 @@ FORMAT REQUIREMENTS:
|
|
| 126 |
|
| 127 |
Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""
|
| 128 |
|
|
|
|
|
|
|
|
|
|
| 129 |
# F. Generate using chat completion
|
| 130 |
messages = [{"role": "user", "content": prompt}]
|
| 131 |
|
|
@@ -144,15 +151,23 @@ Do not output conversational text. Output ONLY the exam paper in a well-formatte
|
|
| 144 |
if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
|
| 145 |
response += message.choices[0].delta.content or ""
|
| 146 |
token_count += 1
|
| 147 |
-
#
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
all_outputs.append(response)
|
| 154 |
|
| 155 |
-
progress(1.0, desc="β
All
|
| 156 |
|
| 157 |
# Combine all sets
|
| 158 |
final_output = "\n\n" + "="*80 + "\n\n".join(all_outputs)
|
|
@@ -162,9 +177,9 @@ Do not output conversational text. Output ONLY the exam paper in a well-formatte
|
|
| 162 |
return f"β Error: {str(e)}\n\nPlease check:\n1. PDFs are valid and contain text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"
|
| 163 |
|
| 164 |
# --- 3. The UI ---
|
| 165 |
-
with gr.Blocks(title="AI Question Paper Generator") as demo:
|
| 166 |
gr.Markdown("# π AI Question Paper Generator Pro")
|
| 167 |
-
gr.Markdown("Powered by **
|
| 168 |
gr.Markdown("β‘ Fast β’ π― Accurate β’ π Multi-PDF Support β’ π² Multiple Sets")
|
| 169 |
|
| 170 |
with gr.Row():
|
|
@@ -248,6 +263,10 @@ with gr.Blocks(title="AI Question Paper Generator") as demo:
|
|
| 248 |
|
| 249 |
gr.Markdown("""
|
| 250 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
**Features:**
|
| 252 |
- β
Multiple PDF support (up to 5 files)
|
| 253 |
- β
Separate difficulty control for each question type
|
|
|
|
| 11 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 12 |
|
| 13 |
if not HF_TOKEN:
|
| 14 |
+
print("β οΈ Warning: HF_TOKEN not set. The app may not work properly.")
|
| 15 |
|
| 16 |
# Use InferenceClient directly instead of LangChain wrapper
|
| 17 |
client = InferenceClient(token=HF_TOKEN)
|
|
|
|
| 32 |
return "β Error: Maximum 5 PDF files allowed."
|
| 33 |
|
| 34 |
if not HF_TOKEN:
|
| 35 |
+
return "β Error: HF_TOKEN not configured. Please add your Hugging Face token in Space Settings > Repository secrets."
|
| 36 |
|
| 37 |
total_questions = mcq_count + short_count + long_count
|
| 38 |
if total_questions == 0:
|
|
|
|
| 40 |
|
| 41 |
try:
|
| 42 |
# A. Load all PDFs
|
| 43 |
+
progress(0, desc=f"π PDF file(s) uploaded, accessing {len(pdf_files)} file(s)...")
|
| 44 |
all_pages = []
|
| 45 |
|
| 46 |
for idx, pdf_file in enumerate(pdf_files):
|
| 47 |
+
current_progress = 0.05 + (idx * 0.1 / len(pdf_files))
|
| 48 |
+
progress(current_progress,
|
| 49 |
+
desc=f"π Accessing PDF {idx + 1}/{len(pdf_files)}: {pdf_file.name.split('/')[-1][:30]}...")
|
| 50 |
loader = PyPDFLoader(pdf_file.name)
|
| 51 |
pages = loader.load()
|
| 52 |
|
|
|
|
| 55 |
|
| 56 |
all_pages.extend(pages)
|
| 57 |
|
| 58 |
+
progress(0.15, desc=f"β
PDF loaded successfully! Extracted {len(all_pages)} pages from {len(pdf_files)} file(s)")
|
| 59 |
|
| 60 |
# B. Split Text
|
| 61 |
+
progress(0.20, desc="π Extracting text content from PDFs...")
|
| 62 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 63 |
chunk_size=1000,
|
| 64 |
chunk_overlap=100
|
| 65 |
)
|
| 66 |
chunks = text_splitter.split_documents(all_pages)
|
| 67 |
+
progress(0.30, desc=f"β
Text extracted successfully! Created {len(chunks)} text chunks, preparing embeddings...")
|
| 68 |
|
| 69 |
# C. Vector Store (FAISS)
|
| 70 |
+
progress(0.35, desc="π§ Generating embeddings for content understanding...")
|
| 71 |
embeddings = FastEmbedEmbeddings()
|
| 72 |
+
progress(0.40, desc="π§ Creating knowledge base from embeddings...")
|
| 73 |
vector_store = FAISS.from_documents(chunks, embeddings)
|
| 74 |
+
progress(0.50, desc="β
Knowledge base created successfully! Analyzing content for key concepts...")
|
| 75 |
|
| 76 |
# D. Retrieve Context (more chunks for multiple PDFs)
|
| 77 |
+
progress(0.55, desc="π Identifying key concepts and topics from content...")
|
| 78 |
retriever = vector_store.as_retriever(search_kwargs={"k": min(10, len(chunks))})
|
| 79 |
context_docs = retriever.invoke("Key concepts, definitions, and important topics")
|
| 80 |
context_text = "\n\n".join([doc.page_content for doc in context_docs])
|
| 81 |
+
progress(0.60, desc=f"β
Analysis complete! Found {len(context_docs)} key sections. Activating AI model...")
|
| 82 |
|
| 83 |
# E. Generate all sets
|
| 84 |
all_outputs = []
|
| 85 |
|
| 86 |
for set_num in range(1, num_sets + 1):
|
| 87 |
+
progress(0.65 + (set_num - 1) * 0.30 / num_sets,
|
| 88 |
+
desc=f"π€ AI Model activated! Preparing to generate Set {set_num}/{num_sets}...")
|
| 89 |
|
| 90 |
# Create Prompt for this set
|
| 91 |
sections = []
|
|
|
|
| 130 |
|
| 131 |
Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""
|
| 132 |
|
| 133 |
+
progress(0.70 + (set_num - 1) * 0.30 / num_sets,
|
| 134 |
+
desc=f"βοΈ Generating Question Paper Set {set_num}/{num_sets}... 0%")
|
| 135 |
+
|
| 136 |
# F. Generate using chat completion
|
| 137 |
messages = [{"role": "user", "content": prompt}]
|
| 138 |
|
|
|
|
| 151 |
if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
|
| 152 |
response += message.choices[0].delta.content or ""
|
| 153 |
token_count += 1
|
| 154 |
+
# Calculate progress within this set (70-95% range divided by number of sets)
|
| 155 |
+
set_start = 0.70 + (set_num - 1) * 0.30 / num_sets
|
| 156 |
+
set_range = 0.25 / num_sets # 25% of total progress for generation
|
| 157 |
+
generation_progress = min((token_count / max_tokens), 1.0)
|
| 158 |
+
current_progress = set_start + (generation_progress * set_range)
|
| 159 |
+
percentage = int(generation_progress * 100)
|
| 160 |
+
|
| 161 |
+
# Update with dynamic percentage
|
| 162 |
+
progress(current_progress,
|
| 163 |
+
desc=f"βοΈ Generating Question Paper Set {set_num}/{num_sets}... {percentage}%")
|
| 164 |
+
|
| 165 |
+
progress(0.70 + set_num * 0.30 / num_sets,
|
| 166 |
+
desc=f"β
Set {set_num}/{num_sets} generated successfully!")
|
| 167 |
|
| 168 |
all_outputs.append(response)
|
| 169 |
|
| 170 |
+
progress(1.0, desc=f"β
All {num_sets} Question Paper(s) Generated Successfully! π")
|
| 171 |
|
| 172 |
# Combine all sets
|
| 173 |
final_output = "\n\n" + "="*80 + "\n\n".join(all_outputs)
|
|
|
|
| 177 |
return f"β Error: {str(e)}\n\nPlease check:\n1. PDFs are valid and contain text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"
|
| 178 |
|
| 179 |
# --- 3. The UI ---
|
| 180 |
+
with gr.Blocks(title="AI Question Paper Generator", theme=gr.themes.Soft()) as demo:
|
| 181 |
gr.Markdown("# π AI Question Paper Generator Pro")
|
| 182 |
+
gr.Markdown("Powered by **Llama 3.2 3B** via Hugging Face Inference API")
|
| 183 |
gr.Markdown("β‘ Fast β’ π― Accurate β’ π Multi-PDF Support β’ π² Multiple Sets")
|
| 184 |
|
| 185 |
with gr.Row():
|
|
|
|
| 263 |
|
| 264 |
gr.Markdown("""
|
| 265 |
---
|
| 266 |
+
**Setup Instructions:**
|
| 267 |
+
- Set `HF_TOKEN` in your Space's Settings β Repository secrets
|
| 268 |
+
- Get your token from https://huggingface.co/settings/tokens
|
| 269 |
+
|
| 270 |
**Features:**
|
| 271 |
- β
Multiple PDF support (up to 5 files)
|
| 272 |
- β
Separate difficulty control for each question type
|