import os import gradio as gr import requests import pandas as pd from dotenv import load_dotenv from langchain import hub from langchain_openai import ChatOpenAI from langchain.agents import create_react_agent, AgentExecutor from langchain_core.prompts import PromptTemplate from langchain_core.messages import AIMessage, HumanMessage from tools.web_searcher import web_search_tool from tools.calculator import calculator_tool from tools.file_reader import read_file_tool from tools.code_review import code_reviewer from tools.web_scraper import web_scraper_tool import time load_dotenv() DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # === Tools === tools = [ web_search_tool, calculator_tool, read_file_tool, code_reviewer, web_scraper_tool ] # === Model === model = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY")) # === ReAct Prompt === template = '''Answer the following questions as best you can. You have access to the following tools: {tools} Strict rules to follow: 1️⃣ Use tools when needed: web_search_tool for external information, file_reader for file data, calculator for math, code_reviewer for code questions, web_scraper for web content. 2️⃣ Combine tools logically for multi-step problems. 3️⃣ Format answers exactly as requested: single name, city, code, or number—no extra text. 4️⃣ If a link is provided, use web_search_tool or web_scraper to extract the information. 5️⃣ Do not guess; if information is unavailable, say: 'No answer found.' 6️⃣ Be precise, factual, and avoid hallucination. Verify using tools. Use the following format for every question: Question: the input question you must answer Thought: reasoning about what to do Action: the action to take, one of [{tool_names}] Action Input: the input to the action Observation: the result of the action Use the following format: Question: the input question you must answer Thought: you should always think about what to do Action: the action to take, should be one of [{tool_names}] Action Input: the input to the action Observation: the result of the action ... (this Thought/Action/Action Input/Observation can repeat N times) Thought: I now know the final answer Final Answer: the final answer to the original input question Begin! Question: {input} Thought:{agent_scratchpad}''' react_prompt = PromptTemplate.from_template(template) # === ReAct Agent === agent = create_react_agent(model, tools, react_prompt) agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) # === Gradio Interface === def process_question(profile: gr.OAuthProfile | None): if profile is None: return "Please log in first." username = profile.username api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" agent_code = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main" try: response = requests.get(questions_url, timeout=15) response.raise_for_status() questions_data = response.json() except Exception as e: return f"Error fetching questions: {e}" answers_payload = [] results_log = [] for item in questions_data: task_id = item.get("task_id") question_text = item.get("question") if not task_id or not question_text: continue try: result = agent_executor.invoke({"input": question_text}) final_answer = result["output"] if isinstance(result, dict) else result answers_payload.append({"task_id": task_id, "submitted_answer": final_answer}) results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer}) except Exception as e: answers_payload.append({"task_id": task_id, "submitted_answer": f"[ERROR: {e}]"}) results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"[ERROR: {e}]"}) time.sleep(1) # slight delay to avoid rate limits submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload} try: response = requests.post(submit_url, json=submission_data, timeout=60) response.raise_for_status() result_data = response.json() final_status = ( f"Submission Successful!\n" f"User: {result_data.get('username')}\n" f"Overall Score: {result_data.get('score', 'N/A')}% " f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" f"Message: {result_data.get('message', 'No message received.')}" ) return final_status except Exception as e: return f"Submission Failed: {e}" with gr.Blocks() as demo: gr.Markdown("# GAIA Agent Evaluation") gr.Markdown("Login, run evaluation, and submit your answers below.") gr.LoginButton() run_button = gr.Button("Run Evaluation & Submit All Answers") status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) run_button.click(fn=process_question, outputs=[status_output]) if __name__ == "__main__": print("Launching GAIA Agent ReAct Evaluation App") demo.launch()