Update app.py
Browse files
app.py
CHANGED
|
@@ -7,10 +7,12 @@ from PIL import Image, ImageDraw, ImageFont
|
|
| 7 |
from huggingface_hub import login
|
| 8 |
import requests
|
| 9 |
import json
|
| 10 |
-
import base64
|
| 11 |
-
import re
|
| 12 |
import time
|
| 13 |
-
import pandas as pd
|
|
|
|
|
|
|
| 14 |
# Attempt to login using environment token
|
| 15 |
try:
|
| 16 |
HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
|
|
@@ -155,38 +157,21 @@ def get_gaia_api_questions():
|
|
| 155 |
return None, f"An unexpected error occurred: {e}"
|
| 156 |
|
| 157 |
def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata_list):
|
| 158 |
-
"""
|
| 159 |
-
Fetches the content of the primary file associated with a task_id from the GAIA API.
|
| 160 |
-
Returns raw_bytes, detected_mime_type, and file_name.
|
| 161 |
-
associated_file_metadata_list is the 'files' list from the question data.
|
| 162 |
-
"""
|
| 163 |
-
# If no metadata, assume no file to fetch for this specialized getter.
|
| 164 |
-
# Or, if the API always serves THE file for task_id, then metadata is just for info.
|
| 165 |
-
# Let's assume the API /files/{task_id} always gives the relevant file if one exists for the task.
|
| 166 |
-
|
| 167 |
file_url = f"{GAIA_API_BASE_URL}/files/{task_id_for_file_fetch}"
|
| 168 |
print(f"Attempting to fetch file for task {task_id_for_file_fetch} from {file_url}")
|
| 169 |
-
|
| 170 |
try:
|
| 171 |
response = requests.get(file_url, timeout=30)
|
| 172 |
-
response.raise_for_status()
|
| 173 |
-
|
| 174 |
raw_bytes = response.content
|
| 175 |
detected_mime_type = response.headers.get('Content-Type', '').split(';')[0].strip()
|
| 176 |
-
|
| 177 |
-
# Try to get a filename from metadata if available, otherwise default
|
| 178 |
-
file_name = "attached_file"
|
| 179 |
if associated_file_metadata_list and isinstance(associated_file_metadata_list, list) and len(associated_file_metadata_list) > 0:
|
| 180 |
-
# Assuming the first file in metadata is the one fetched, or provides its name
|
| 181 |
first_file_meta = associated_file_metadata_list[0]
|
| 182 |
if isinstance(first_file_meta, dict) and 'file_name' in first_file_meta:
|
| 183 |
file_name = first_file_meta['file_name']
|
| 184 |
-
|
| 185 |
print(f"File fetched for task {task_id_for_file_fetch}. Mime-type: {detected_mime_type}, Name: {file_name}, Size: {len(raw_bytes)} bytes")
|
| 186 |
return raw_bytes, detected_mime_type, file_name
|
| 187 |
-
|
| 188 |
except requests.exceptions.HTTPError as http_err:
|
| 189 |
-
# Specifically handle 404 for "no file" vs other errors
|
| 190 |
if http_err.response.status_code == 404:
|
| 191 |
print(f"No file found (404) for task {task_id_for_file_fetch} at {file_url}.")
|
| 192 |
else:
|
|
@@ -199,136 +184,225 @@ def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata
|
|
| 199 |
print(f"Unexpected error fetching file for task {task_id_for_file_fetch}: {e_gen}")
|
| 200 |
return None, None, None
|
| 201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
else:
|
| 212 |
-
|
|
|
|
| 213 |
|
| 214 |
-
# Step 2: Remove wrapping quotes if any
|
| 215 |
-
if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")):
|
| 216 |
-
answer = answer[1:-1].strip()
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
|
|
|
| 221 |
|
| 222 |
-
|
| 223 |
-
answer = re.sub(r'\s*,\s*', ',', answer)
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
return answer
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
print(f"Agent (GAIA-Grounded Gemini) processing Task ID: {task_id}, Question: {question}")
|
| 232 |
-
if files_metadata: # This is the list of file metadata dicts
|
| 233 |
-
print(f"File metadata associated with this task: {files_metadata}")
|
| 234 |
|
| 235 |
gemini_api_key = os.environ.get("GEMINI_API_KEY")
|
| 236 |
if not gemini_api_key:
|
| 237 |
-
print("Error: GEMINI_API_KEY not found in environment variables. Please set it in Space Secrets.")
|
| 238 |
return f"ERROR_GEMINI_KEY_MISSING_FOR_TASK_{task_id}"
|
| 239 |
|
| 240 |
-
# --- GAIA-specific System Prompt ---
|
| 241 |
-
# Adapted from Figure 2 of GAIA Paper [cite: 103, 104, 105, 106, 107, 108]
|
| 242 |
system_prompt_lines = [
|
| 243 |
"You are a general AI assistant. I will ask you a question.",
|
| 244 |
-
"
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"If
|
|
|
|
|
|
|
| 248 |
"If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.",
|
| 249 |
"If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
|
| 250 |
-
"
|
| 251 |
-
"If external files are mentioned or provided, use their content if relevant and accessible to answer the question.",
|
| 252 |
]
|
| 253 |
-
# We won't send this as a separate "system" message in Gemini's typical API structure,
|
| 254 |
-
# but rather prepend it to the user question for a single turn.
|
| 255 |
|
| 256 |
-
|
| 257 |
gemini_parts = []
|
| 258 |
-
|
| 259 |
-
# Prepend system prompt guidelines to the main question text part
|
| 260 |
-
user_question_text = "\n".join(system_prompt_lines) + f"\n\nGAIA Question: {question}"
|
| 261 |
|
| 262 |
-
# --- File Handling ---
|
|
|
|
| 263 |
file_content_bytes, detected_mime_type, file_name = None, None, None
|
| 264 |
-
|
|
|
|
| 265 |
file_content_bytes, detected_mime_type, file_name = get_gaia_file_data_for_task(task_id, files_metadata)
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
"
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
-
###########################################################################################################################
|
| 314 |
-
else: # No file content fetched or no files associated
|
| 315 |
-
gemini_parts.append({"text": user_question_text})
|
| 316 |
|
| 317 |
payload = {
|
| 318 |
"contents": [{"role": "user", "parts": gemini_parts}],
|
| 319 |
-
"generationConfig": {
|
| 320 |
-
"temperature": 0.2, # Lower temperature for more factual/deterministic GAIA answers
|
| 321 |
-
"maxOutputTokens": 300, # Increased slightly for potentially more complex answers
|
| 322 |
-
}
|
| 323 |
}
|
| 324 |
-
|
| 325 |
api_url_with_key = f"{GEMINI_API_URL_BASE}?key={gemini_api_key}"
|
| 326 |
agent_computed_answer = f"ERROR_CALLING_GEMINI_FOR_TASK_{task_id}"
|
| 327 |
|
| 328 |
try:
|
| 329 |
headers = {"Content-Type": "application/json"}
|
| 330 |
-
print(f"Calling Gemini API for task {task_id}
|
| 331 |
-
response = requests.post(api_url_with_key, headers=headers, json=payload, timeout=
|
| 332 |
response.raise_for_status()
|
| 333 |
result = response.json()
|
| 334 |
|
|
@@ -336,34 +410,23 @@ def my_agent_logic(task_id: str, question: str, files_metadata: list = None): #
|
|
| 336 |
result["candidates"][0].get("content") and
|
| 337 |
result["candidates"][0]["content"].get("parts") and
|
| 338 |
result["candidates"][0]["content"]["parts"][0].get("text")):
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
print(f"Raw Gemini output: {raw_answer}") # Debugging log
|
| 342 |
-
agent_computed_answer = clean_final_answer(raw_answer)
|
| 343 |
-
|
| 344 |
else:
|
| 345 |
-
print(f"Warning: Unexpected response structure from Gemini API for task {task_id}: {result}")
|
| 346 |
if result.get("promptFeedback") and result["promptFeedback"].get("blockReason"):
|
| 347 |
block_reason = result["promptFeedback"]["blockReason"]
|
| 348 |
-
print(f"Gemini API blocked the prompt for task {task_id}. Reason: {block_reason}")
|
| 349 |
agent_computed_answer = f"ERROR_GEMINI_PROMPT_BLOCKED_{block_reason}_FOR_TASK_{task_id}"
|
| 350 |
else:
|
| 351 |
agent_computed_answer = f"ERROR_PARSING_GEMINI_RESPONSE_FOR_TASK_{task_id}"
|
| 352 |
except requests.exceptions.Timeout:
|
| 353 |
-
print(f"Timeout error calling Gemini API for task {task_id}.")
|
| 354 |
agent_computed_answer = f"ERROR_GEMINI_TIMEOUT_FOR_TASK_{task_id}"
|
| 355 |
except requests.exceptions.RequestException as e:
|
| 356 |
-
print(f"
|
| 357 |
-
if e.response is not None:
|
| 358 |
-
print(f"Gemini API Error Response Status: {e.response.status_code}")
|
| 359 |
-
try: print(f"Gemini API Error Response Body: {e.response.json()}")
|
| 360 |
-
except json.JSONDecodeError: print(f"Gemini API Error Response Body (text): {e.response.text}")
|
| 361 |
agent_computed_answer = f"ERROR_GEMINI_REQUEST_FAILED_FOR_TASK_{task_id}"
|
| 362 |
except Exception as e:
|
| 363 |
-
print(f"An unexpected error occurred in my_agent_logic for task {task_id}: {e}")
|
| 364 |
agent_computed_answer = f"ERROR_UNEXPECTED_IN_AGENT_LOGIC_FOR_TASK_{task_id}"
|
| 365 |
-
|
| 366 |
-
print(f"Agent (GAIA-Grounded Gemini) computed answer for Task ID {task_id}: {agent_computed_answer}")
|
| 367 |
return agent_computed_answer
|
| 368 |
|
| 369 |
def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
|
|
@@ -389,20 +452,25 @@ def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
|
|
| 389 |
log_messages.append(f"Processing 1 random question based on user choice.")
|
| 390 |
elif run_all_questions:
|
| 391 |
log_messages.append(f"Processing all {len(tasks_to_process)} questions.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
for task in tasks_to_process:
|
| 393 |
task_id = task.get("task_id")
|
| 394 |
question = task.get("question")
|
| 395 |
-
associated_files_metadata = task.get("files", [])
|
| 396 |
if task_id and question:
|
| 397 |
log_messages.append(f"\nProcessing Task ID: {task_id}")
|
| 398 |
log_messages.append(f"Question: {question}")
|
| 399 |
if associated_files_metadata:
|
| 400 |
log_messages.append(f"Associated files metadata: {associated_files_metadata}")
|
| 401 |
-
# Pass the files_metadata to the agent logic
|
| 402 |
submitted_answer = my_agent_logic(task_id, question, associated_files_metadata)
|
| 403 |
log_messages.append(f"Agent's Answer: {submitted_answer}")
|
| 404 |
answers_to_submit.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 405 |
-
|
|
|
|
| 406 |
else:
|
| 407 |
log_messages.append(f"Skipping malformed task: {task}")
|
| 408 |
if not answers_to_submit:
|
|
@@ -461,7 +529,7 @@ def submit_agent_answers(profile: gr.OAuthProfile, answers_for_submission_state)
|
|
| 461 |
submission_log_messages.append(f"An unexpected error occurred during submission: {e}")
|
| 462 |
return "\n".join(submission_log_messages)
|
| 463 |
|
| 464 |
-
# --- Gradio Interface (largely unchanged
|
| 465 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 466 |
gr.Markdown("# 🎓 Agents Course - Unit 4 Final Project")
|
| 467 |
gr.Markdown("⚠️ **Note**: Due to high demand, you might experience occasional bugs. If something doesn't work, please try again after a moment!")
|
|
@@ -474,7 +542,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 474 |
with gr.Tabs():
|
| 475 |
with gr.TabItem("🤖 Run Agent on GAIA Benchmark"):
|
| 476 |
gr.Markdown("## Step 1: Run Your Agent & Generate Answers")
|
| 477 |
-
gr.Markdown("This agent uses the Gemini API
|
| 478 |
run_all_questions_checkbox = gr.Checkbox(label="Process all questions (unchecked processes 1 random question for testing)", value=True)
|
| 479 |
run_agent_button = gr.Button("🔎 Fetch Questions & Run My Agent")
|
| 480 |
gr.Markdown("### Agent Run Log & Generated Answers:")
|
|
|
|
| 7 |
from huggingface_hub import login
|
| 8 |
import requests
|
| 9 |
import json
|
| 10 |
+
import base64
|
| 11 |
+
import re # For advanced string cleaning
|
| 12 |
import time
|
| 13 |
+
import pandas as pd # For spreadsheet handling
|
| 14 |
+
from io import StringIO # For capturing print output from exec
|
| 15 |
+
|
| 16 |
# Attempt to login using environment token
|
| 17 |
try:
|
| 18 |
HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
|
|
|
|
| 157 |
return None, f"An unexpected error occurred: {e}"
|
| 158 |
|
| 159 |
def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata_list):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
file_url = f"{GAIA_API_BASE_URL}/files/{task_id_for_file_fetch}"
|
| 161 |
print(f"Attempting to fetch file for task {task_id_for_file_fetch} from {file_url}")
|
|
|
|
| 162 |
try:
|
| 163 |
response = requests.get(file_url, timeout=30)
|
| 164 |
+
response.raise_for_status()
|
|
|
|
| 165 |
raw_bytes = response.content
|
| 166 |
detected_mime_type = response.headers.get('Content-Type', '').split(';')[0].strip()
|
| 167 |
+
file_name = "attached_file" # Default
|
|
|
|
|
|
|
| 168 |
if associated_file_metadata_list and isinstance(associated_file_metadata_list, list) and len(associated_file_metadata_list) > 0:
|
|
|
|
| 169 |
first_file_meta = associated_file_metadata_list[0]
|
| 170 |
if isinstance(first_file_meta, dict) and 'file_name' in first_file_meta:
|
| 171 |
file_name = first_file_meta['file_name']
|
|
|
|
| 172 |
print(f"File fetched for task {task_id_for_file_fetch}. Mime-type: {detected_mime_type}, Name: {file_name}, Size: {len(raw_bytes)} bytes")
|
| 173 |
return raw_bytes, detected_mime_type, file_name
|
|
|
|
| 174 |
except requests.exceptions.HTTPError as http_err:
|
|
|
|
| 175 |
if http_err.response.status_code == 404:
|
| 176 |
print(f"No file found (404) for task {task_id_for_file_fetch} at {file_url}.")
|
| 177 |
else:
|
|
|
|
| 184 |
print(f"Unexpected error fetching file for task {task_id_for_file_fetch}: {e_gen}")
|
| 185 |
return None, None, None
|
| 186 |
|
| 187 |
+
def execute_python_code(code_string: str):
|
| 188 |
+
"""
|
| 189 |
+
Safely executes a string of Python code and captures its standard output.
|
| 190 |
+
Returns the captured output or an error message.
|
| 191 |
+
"""
|
| 192 |
+
print(f"Attempting to execute Python code:\n{code_string[:500]}...") # Log first 500 chars
|
| 193 |
+
# Create a new StringIO object to capture stdout
|
| 194 |
+
old_stdout = sys.stdout
|
| 195 |
+
sys.stdout = captured_output = StringIO()
|
| 196 |
+
|
| 197 |
+
execution_result = None
|
| 198 |
+
error_message = None
|
| 199 |
|
| 200 |
+
try:
|
| 201 |
+
# Execute the code in a restricted namespace
|
| 202 |
+
# For safety, you might want to further restrict the available builtins/modules
|
| 203 |
+
# For this benchmark, we assume the provided Python code is generally safe.
|
| 204 |
+
local_namespace = {}
|
| 205 |
+
exec(code_string, {"__builtins__": __builtins__}, local_namespace)
|
| 206 |
+
|
| 207 |
+
# Try to get a 'final_answer' variable if it exists, as some questions might expect it
|
| 208 |
+
if 'final_answer' in local_namespace:
|
| 209 |
+
execution_result = str(local_namespace['final_answer'])
|
| 210 |
+
|
| 211 |
+
except Exception as e:
|
| 212 |
+
print(f"Error executing Python code: {e}")
|
| 213 |
+
error_message = f"Execution Error: {type(e).__name__}: {e}"
|
| 214 |
+
finally:
|
| 215 |
+
# Restore stdout
|
| 216 |
+
sys.stdout = old_stdout
|
| 217 |
+
|
| 218 |
+
# Get the content of captured_output
|
| 219 |
+
printed_output = captured_output.getvalue().strip()
|
| 220 |
+
|
| 221 |
+
if execution_result:
|
| 222 |
+
# If 'final_answer' was found, prioritize it
|
| 223 |
+
return execution_result, None
|
| 224 |
+
elif printed_output:
|
| 225 |
+
# If 'final_answer' not found, but something was printed, return that
|
| 226 |
+
return printed_output, None
|
| 227 |
+
elif error_message:
|
| 228 |
+
# If there was an error during execution
|
| 229 |
+
return None, error_message
|
| 230 |
else:
|
| 231 |
+
# If no 'final_answer', nothing printed, and no error (e.g., script only defines functions)
|
| 232 |
+
return "Python code executed without explicit output or 'final_answer' variable.", None
|
| 233 |
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
def clean_final_answer(raw_text: str) -> str:
|
| 236 |
+
"""More robustly cleans the raw text output from the LLM."""
|
| 237 |
+
if not isinstance(raw_text, str):
|
| 238 |
+
return "" # Should not happen, but good to be safe
|
| 239 |
|
| 240 |
+
answer = raw_text.strip()
|
|
|
|
| 241 |
|
| 242 |
+
# Attempt to extract content after "FINAL ANSWER:" if it's still present
|
| 243 |
+
# This regex is more robust to variations in spacing and casing
|
| 244 |
+
final_answer_match = re.search(r"FINAL ANSWER:\s*(.*)", answer, re.IGNORECASE | re.DOTALL)
|
| 245 |
+
if final_answer_match:
|
| 246 |
+
answer = final_answer_match.group(1).strip()
|
| 247 |
+
|
| 248 |
+
# Remove common conversational prefixes more aggressively
|
| 249 |
+
common_prefixes = [
|
| 250 |
+
"The answer is", "The final answer is", "So, the answer is", "Therefore, the answer is",
|
| 251 |
+
"Based on the information, the answer is", "The correct answer is", "My answer is",
|
| 252 |
+
"Okay, the answer is", "Sure, the answer is", "Here is the answer:", "The solution is",
|
| 253 |
+
"Answer:", "Result:"
|
| 254 |
+
]
|
| 255 |
+
for prefix in common_prefixes:
|
| 256 |
+
if answer.lower().startswith(prefix.lower()):
|
| 257 |
+
answer = answer[len(prefix):].strip()
|
| 258 |
+
# Remove potential colon or period after prefix
|
| 259 |
+
if answer.startswith(":") or answer.startswith("."):
|
| 260 |
+
answer = answer[1:].strip()
|
| 261 |
+
break # Stop after first prefix match
|
| 262 |
+
|
| 263 |
+
# Remove wrapping quotes (single or double)
|
| 264 |
+
if len(answer) >= 2:
|
| 265 |
+
if (answer.startswith('"') and answer.endswith('"')) or \
|
| 266 |
+
(answer.startswith("'") and answer.endswith("'")):
|
| 267 |
+
answer = answer[1:-1].strip()
|
| 268 |
+
|
| 269 |
+
# Specific GAIA formatting: remove units like $ or % unless specified otherwise by the question
|
| 270 |
+
# This is tricky to do generally, as some questions might require them.
|
| 271 |
+
# The prompt already tells Gemini about this. This is a fallback.
|
| 272 |
+
# For now, let's keep it simple and rely on the prompt.
|
| 273 |
+
# If a question asks for "USD with two decimal places", the LLM should include '$'.
|
| 274 |
+
# answer = answer.replace('$', '').replace('%', '').strip() # Re-evaluating if this is too aggressive
|
| 275 |
+
|
| 276 |
+
# Normalize spaces around commas for comma-separated lists
|
| 277 |
+
answer = re.sub(r'\s*,\s*', ',', answer)
|
| 278 |
+
|
| 279 |
+
# Remove trailing punctuation if it seems unintended (e.g. a lone period)
|
| 280 |
+
if len(answer) > 1 and answer.endswith(".") and not re.search(r"[a-zA-Z0-9]\.[a-zA-Z0-9]", answer): # Avoid stripping from e.g. "file.txt"
|
| 281 |
+
# Check if the part before the period is a number or a short phrase
|
| 282 |
+
# This is to avoid stripping periods from full sentences if the LLM disobeys "few words"
|
| 283 |
+
if not answer[:-1].strip().isdigit() and len(answer[:-1].strip().split()) > 3:
|
| 284 |
+
pass # Likely a sentence, keep period
|
| 285 |
+
else:
|
| 286 |
+
answer = answer[:-1].strip()
|
| 287 |
+
|
| 288 |
return answer
|
| 289 |
+
|
| 290 |
+
def my_agent_logic(task_id: str, question: str, files_metadata: list = None):
|
| 291 |
+
print(f"Agent (Enhanced Tools + Gemini) processing Task ID: {task_id}, Question: {question}")
|
| 292 |
+
if files_metadata:
|
| 293 |
+
print(f"File metadata associated: {files_metadata}")
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
gemini_api_key = os.environ.get("GEMINI_API_KEY")
|
| 296 |
if not gemini_api_key:
|
|
|
|
| 297 |
return f"ERROR_GEMINI_KEY_MISSING_FOR_TASK_{task_id}"
|
| 298 |
|
|
|
|
|
|
|
| 299 |
system_prompt_lines = [
|
| 300 |
"You are a general AI assistant. I will ask you a question.",
|
| 301 |
+
"Your primary goal is to provide the single, exact, concise, and factual answer to the question.",
|
| 302 |
+
"Do not include any conversational fluff, disclaimers, explanations, or any introductory phrases like 'The answer is:'. Your response should be ONLY the answer itself.",
|
| 303 |
+
"Do not use markdown formatting unless the question explicitly asks for it.",
|
| 304 |
+
"If the question implies a specific format (e.g., a number, a date, a comma-separated list), provide the answer in that format.",
|
| 305 |
+
"Do NOT include the phrase 'FINAL ANSWER:' in your response to me.",
|
| 306 |
+
"If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise by the question.",
|
| 307 |
"If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.",
|
| 308 |
"If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
|
| 309 |
+
"If external files or tool outputs are provided below, use their content if relevant and accessible to answer the question.",
|
|
|
|
| 310 |
]
|
|
|
|
|
|
|
| 311 |
|
| 312 |
+
user_question_text_for_gemini = "\n".join(system_prompt_lines) + f"\n\nGAIA Question: {question}"
|
| 313 |
gemini_parts = []
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
+
# --- File & Tool Handling ---
|
| 316 |
+
tool_output_description = ""
|
| 317 |
file_content_bytes, detected_mime_type, file_name = None, None, None
|
| 318 |
+
|
| 319 |
+
if files_metadata:
|
| 320 |
file_content_bytes, detected_mime_type, file_name = get_gaia_file_data_for_task(task_id, files_metadata)
|
| 321 |
|
| 322 |
+
if file_content_bytes:
|
| 323 |
+
if file_name and file_name.lower().endswith(".py") and detected_mime_type in ["text/x-python", "application/x-python-code", "text/plain"]:
|
| 324 |
+
print(f"Detected Python file: {file_name}")
|
| 325 |
+
try:
|
| 326 |
+
python_code = file_content_bytes.decode('utf-8')
|
| 327 |
+
execution_result, exec_error = execute_python_code(python_code)
|
| 328 |
+
if exec_error:
|
| 329 |
+
tool_output_description += f"\n\nExecution of Python file '{file_name}' failed: {exec_error}"
|
| 330 |
+
elif execution_result:
|
| 331 |
+
tool_output_description += f"\n\nOutput from executing Python file '{file_name}':\n{execution_result}"
|
| 332 |
+
else:
|
| 333 |
+
tool_output_description += f"\n\nPython file '{file_name}' executed without specific return or error."
|
| 334 |
+
except Exception as e_py_decode:
|
| 335 |
+
tool_output_description += f"\n\nError decoding Python file '{file_name}': {e_py_decode}"
|
| 336 |
+
|
| 337 |
+
elif detected_mime_type and detected_mime_type.startswith("image/"):
|
| 338 |
+
try:
|
| 339 |
+
base64_image = base64.b64encode(file_content_bytes).decode('utf-8')
|
| 340 |
+
gemini_parts.append({"inline_data": {"mime_type": detected_mime_type, "data": base64_image}})
|
| 341 |
+
tool_output_description += f"\n\nAn image file '{file_name}' ({detected_mime_type}) is provided. Refer to it if relevant."
|
| 342 |
+
print(f"Added image {file_name} to Gemini parts for task {task_id}.")
|
| 343 |
+
except Exception as e_img:
|
| 344 |
+
tool_output_description += f"\n\n[Agent note: Error processing image file '{file_name}': {e_img}]"
|
| 345 |
+
|
| 346 |
+
elif detected_mime_type and detected_mime_type.startswith("audio/"): # mp3, m4a, wav, etc.
|
| 347 |
+
try:
|
| 348 |
+
base64_audio = base64.b64encode(file_content_bytes).decode('utf-8')
|
| 349 |
+
gemini_parts.append({"inline_data": {"mime_type": detected_mime_type, "data": base64_audio}})
|
| 350 |
+
tool_output_description += f"\n\nAn audio file '{file_name}' ({detected_mime_type}) is provided. Transcribe or analyze it if relevant to the question."
|
| 351 |
+
print(f"Added audio {file_name} to Gemini parts for task {task_id}.")
|
| 352 |
+
except Exception as e_audio:
|
| 353 |
+
tool_output_description += f"\n\n[Agent note: Error processing audio file '{file_name}': {e_audio}]"
|
| 354 |
+
|
| 355 |
+
elif detected_mime_type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel", "text/csv"]:
|
| 356 |
+
try:
|
| 357 |
+
if "csv" in detected_mime_type: df = pd.read_csv(io.BytesIO(file_content_bytes))
|
| 358 |
+
else: df = pd.read_excel(io.BytesIO(file_content_bytes))
|
| 359 |
+
|
| 360 |
+
# Provide a more comprehensive preview
|
| 361 |
+
preview_rows = min(10, len(df))
|
| 362 |
+
preview_cols = min(5, len(df.columns))
|
| 363 |
+
preview_df = df.iloc[:preview_rows, :preview_cols]
|
| 364 |
+
df_description = f"First {preview_rows} rows and first {preview_cols} columns (if available):\n{preview_df.to_string(index=True)}\nTotal rows: {len(df)}, Total columns: {len(df.columns)}."
|
| 365 |
+
if len(df.columns) > preview_cols:
|
| 366 |
+
df_description += f"\nOther columns include: {list(df.columns[preview_cols:])}"
|
| 367 |
+
|
| 368 |
+
tool_output_description += f"\n\nData from spreadsheet '{file_name}':\n{df_description}"
|
| 369 |
+
print(f"Added spreadsheet preview for {file_name} to tool output description.")
|
| 370 |
+
except Exception as e_xls:
|
| 371 |
+
tool_output_description += f"\n\n[Agent note: Unable to parse spreadsheet '{file_name}': {e_xls}]"
|
| 372 |
+
|
| 373 |
+
elif detected_mime_type == "text/plain":
|
| 374 |
+
try:
|
| 375 |
+
text_content = file_content_bytes.decode('utf-8')
|
| 376 |
+
tool_output_description += f"\n\nContent of attached text file '{file_name}':\n{text_content[:2000]}" # Limit length
|
| 377 |
+
print(f"Added text file content '{file_name}' to tool output description.")
|
| 378 |
+
except Exception as e_txt:
|
| 379 |
+
tool_output_description += f"\n\n[Agent note: A text file '{file_name}' was associated but could not be decoded: {e_txt}]"
|
| 380 |
+
else:
|
| 381 |
+
tool_output_description += f"\n\nNote: A file named '{file_name}' (type: {detected_mime_type or 'unknown'}) is associated. Its content could not be directly processed by current tools."
|
| 382 |
+
elif files_metadata : # File metadata exists but no bytes fetched (e.g. 404)
|
| 383 |
+
tool_output_description += f"\n\nNote: File(s) {files_metadata} were listed for this task, but could not be fetched or processed."
|
| 384 |
+
|
| 385 |
|
| 386 |
+
# Append the main question and any tool/file processing notes as a single text part if no multimodal data was added yet,
|
| 387 |
+
# or as the first text part if multimodal data (image/audio) is present.
|
| 388 |
+
final_user_text_for_gemini = user_question_text_for_gemini + tool_output_description
|
| 389 |
+
if not any(p.get("inline_data") for p in gemini_parts): # If no image/audio was added
|
| 390 |
+
gemini_parts.append({"text": final_user_text_for_gemini})
|
| 391 |
+
else: # If image/audio was added, insert text part at the beginning
|
| 392 |
+
gemini_parts.insert(0, {"text": final_user_text_for_gemini})
|
| 393 |
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
payload = {
|
| 396 |
"contents": [{"role": "user", "parts": gemini_parts}],
|
| 397 |
+
"generationConfig": {"temperature": 0.1, "maxOutputTokens": 350} # Very low temp for GAIA
|
|
|
|
|
|
|
|
|
|
| 398 |
}
|
|
|
|
| 399 |
api_url_with_key = f"{GEMINI_API_URL_BASE}?key={gemini_api_key}"
|
| 400 |
agent_computed_answer = f"ERROR_CALLING_GEMINI_FOR_TASK_{task_id}"
|
| 401 |
|
| 402 |
try:
|
| 403 |
headers = {"Content-Type": "application/json"}
|
| 404 |
+
print(f"Calling Gemini API for task {task_id} with payload structure: {[(k, type(v)) for p in payload['contents'] for part in p['parts'] for k,v in part.items()]}")
|
| 405 |
+
response = requests.post(api_url_with_key, headers=headers, json=payload, timeout=90) # Increased timeout slightly
|
| 406 |
response.raise_for_status()
|
| 407 |
result = response.json()
|
| 408 |
|
|
|
|
| 410 |
result["candidates"][0].get("content") and
|
| 411 |
result["candidates"][0]["content"].get("parts") and
|
| 412 |
result["candidates"][0]["content"]["parts"][0].get("text")):
|
| 413 |
+
raw_answer_from_gemini = result["candidates"][0]["content"]["parts"][0]["text"].strip()
|
| 414 |
+
agent_computed_answer = clean_final_answer(raw_answer_from_gemini)
|
|
|
|
|
|
|
|
|
|
| 415 |
else:
|
| 416 |
+
print(f"Warning: Unexpected response structure from Gemini API for task {task_id}: {json.dumps(result, indent=2)}")
|
| 417 |
if result.get("promptFeedback") and result["promptFeedback"].get("blockReason"):
|
| 418 |
block_reason = result["promptFeedback"]["blockReason"]
|
|
|
|
| 419 |
agent_computed_answer = f"ERROR_GEMINI_PROMPT_BLOCKED_{block_reason}_FOR_TASK_{task_id}"
|
| 420 |
else:
|
| 421 |
agent_computed_answer = f"ERROR_PARSING_GEMINI_RESPONSE_FOR_TASK_{task_id}"
|
| 422 |
except requests.exceptions.Timeout:
|
|
|
|
| 423 |
agent_computed_answer = f"ERROR_GEMINI_TIMEOUT_FOR_TASK_{task_id}"
|
| 424 |
except requests.exceptions.RequestException as e:
|
| 425 |
+
if e.response is not None: print(f"Gemini API Error Response Status: {e.response.status_code}, Body: {e.response.text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
agent_computed_answer = f"ERROR_GEMINI_REQUEST_FAILED_FOR_TASK_{task_id}"
|
| 427 |
except Exception as e:
|
|
|
|
| 428 |
agent_computed_answer = f"ERROR_UNEXPECTED_IN_AGENT_LOGIC_FOR_TASK_{task_id}"
|
| 429 |
+
print(f"Agent (Enhanced Tools + Gemini) computed answer for Task ID {task_id}: {agent_computed_answer}")
|
|
|
|
| 430 |
return agent_computed_answer
|
| 431 |
|
| 432 |
def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
|
|
|
|
| 452 |
log_messages.append(f"Processing 1 random question based on user choice.")
|
| 453 |
elif run_all_questions:
|
| 454 |
log_messages.append(f"Processing all {len(tasks_to_process)} questions.")
|
| 455 |
+
|
| 456 |
+
# Need to import sys for execute_python_code's stdout capture
|
| 457 |
+
global sys
|
| 458 |
+
import sys
|
| 459 |
+
|
| 460 |
for task in tasks_to_process:
|
| 461 |
task_id = task.get("task_id")
|
| 462 |
question = task.get("question")
|
| 463 |
+
associated_files_metadata = task.get("files", [])
|
| 464 |
if task_id and question:
|
| 465 |
log_messages.append(f"\nProcessing Task ID: {task_id}")
|
| 466 |
log_messages.append(f"Question: {question}")
|
| 467 |
if associated_files_metadata:
|
| 468 |
log_messages.append(f"Associated files metadata: {associated_files_metadata}")
|
|
|
|
| 469 |
submitted_answer = my_agent_logic(task_id, question, associated_files_metadata)
|
| 470 |
log_messages.append(f"Agent's Answer: {submitted_answer}")
|
| 471 |
answers_to_submit.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 472 |
+
if run_all_questions: # Add a small delay if processing all questions to be kind to APIs
|
| 473 |
+
time.sleep(1) # 1-second delay between processing each question
|
| 474 |
else:
|
| 475 |
log_messages.append(f"Skipping malformed task: {task}")
|
| 476 |
if not answers_to_submit:
|
|
|
|
| 529 |
submission_log_messages.append(f"An unexpected error occurred during submission: {e}")
|
| 530 |
return "\n".join(submission_log_messages)
|
| 531 |
|
| 532 |
+
# --- Gradio Interface (largely unchanged) ---
|
| 533 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 534 |
gr.Markdown("# 🎓 Agents Course - Unit 4 Final Project")
|
| 535 |
gr.Markdown("⚠️ **Note**: Due to high demand, you might experience occasional bugs. If something doesn't work, please try again after a moment!")
|
|
|
|
| 542 |
with gr.Tabs():
|
| 543 |
with gr.TabItem("🤖 Run Agent on GAIA Benchmark"):
|
| 544 |
gr.Markdown("## Step 1: Run Your Agent & Generate Answers")
|
| 545 |
+
gr.Markdown("This agent uses the Gemini API with enhanced tool handling (Python execution, audio, spreadsheets) to generate answers.")
|
| 546 |
run_all_questions_checkbox = gr.Checkbox(label="Process all questions (unchecked processes 1 random question for testing)", value=True)
|
| 547 |
run_agent_button = gr.Button("🔎 Fetch Questions & Run My Agent")
|
| 548 |
gr.Markdown("### Agent Run Log & Generated Answers:")
|