Spaces:

ana-35
/

agents-final-assessment

Sleeping

App Files Files Community

ana-35 commited on May 27

Commit

a2709ae

1 Parent(s): 3e45b4c

Change to ReAct Agent

Browse files

Files changed (2) hide show

app.py +94 -73
tools/web_scraper.py +31 -0

app.py CHANGED Viewed

@@ -1,162 +1,183 @@
 import os
-import time
 import gradio as gr
 import requests
 import pandas as pd
-import re
-import ast
-import operator as op
-from io import BytesIO
 from dotenv import load_dotenv
 from openai import OpenAI
-from langchain.tools import Tool
 from tools.web_searcher import web_search_tool
 from tools.calculator import calculator_tool
 from tools.file_reader import read_file_tool
 from tools.code_review import code_reviewer
 load_dotenv()
-# --- ToolUsingAgent ---
 class ToolUsingAgent:
     def __init__(self):
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         self.model = "gpt-4o"
         self.temperature = 0.0
-        self.max_tokens = 1500  # Limit to prevent overload
-        self.web_search = web_search_tool
-        self.calculator = calculator_tool
-        self.file_reader = read_file_tool
-        self.code_reviewer = code_reviewer
     def answer(self, question: str, task_id: str = None) -> str:
-        tools_context = ""
         try:
-            # Use file_reader if task_id provided
             if task_id:
-                tools_context += f"\nFile content: {self.file_reader.run(task_id)}"
-            # Use code reviewer if it's a code-related question
-            if any(kw in question.lower() for kw in ["python", "code", "function", "bug"]):
-                tools_context += f"\nCode Review: {self.code_reviewer.run(question)}"
-            # Use web search for specific keywords
-            if any(kw in question.lower() for kw in ["wikipedia", "search", "google", "find", "ioc", "lookup"]):
-                tools_context += f"\nWeb search result: {self.web_search.run(question)}"
-            # Use calculator if math-related keywords found
             if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
-                tools_context += f"\nCalculation result: {self.calculator.run(question)}"
             prompt = f"""
-You are an expert AI agent solving complex questions. Follow these strict rules:
-1. Use tools (web_search, file_reader, calculator, code_reviewer) when needed.
-2. Combine tools for multi-step questions.
-3. Return only the requested format (name, number, city, code). No explanations.
-4. If you cannot confidently answer a question based on your existing knowledge, do not guess—use tools like web_search_tool to search for information online.
-5. If the question includes a link (e.g., a YouTube video or website), make sure to use tools like web_search_tool to access the link, review the content, and provide an answer based on the information found in the linked resource.
-6. After finding the answer, ensure that you format your response exactly as requested in the question—provide only what is explicitly required, without extra text or explanation.
 Question: {question}
-{tools_context}
 Answer:"""
-            return self.query_llm_with_backoff(prompt, question)
         except Exception as e:
             return f"[AGENT ERROR: {e}]"
-    def query_llm_with_backoff(self, prompt: str, question: str, retries=3) -> str:
-        delay = 2
-        for attempt in range(retries):
             try:
-                prompt = prompt[:self.max_tokens]
                 completion = self.client.chat.completions.create(
                     model=self.model,
                     messages=[{"role": "user", "content": prompt}],
                     temperature=self.temperature,
-                    max_tokens=500
                 )
-                response = completion.choices[0].message.content.strip()
-                return self.clean_output(response, self.expected_format_detected_from_question(question))
             except Exception as e:
-                if "rate limit" in str(e).lower() or "429" in str(e):
-                    time.sleep(delay)
-                    delay *= 2
                 else:
-                    return f"[LLM Error: {e}]"
-        return "[LLM Error: Exceeded retry attempts]"
-    def expected_format_detected_from_question(self, question: str) -> str:
         q = question.lower()
-        if "ioc" in q or "olympics" in q or "3-letter code" in q:
             return "ioc"
         elif "city" in q:
             return "city"
         elif "name" in q or "first name" in q or "last name" in q:
             return "name"
-        elif any(word in q for word in ["number", "amount", "how many", "total", "sum", "price", "usd"]):
             return "number"
         else:
             return "text"
-    def clean_output(self, output: str, expected_format: str) -> str:
         output = output.strip().strip(' "\'')
-        if expected_format == "ioc":
             match = re.search(r'\b[A-Z]{3}\b', output)
             return match.group(0) if match else "No answer found."
-        elif expected_format == "city":
             match = re.search(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', output)
             return match.group(0) if match else "No answer found."
-        elif expected_format == "name":
             match = re.search(r'\b[A-Z][a-z]+\b', output)
             return match.group(0) if match else "No answer found."
-        elif expected_format == "number":
             match = re.search(r'\d+(\.\d+)?', output)
             return match.group(0) if match else "No answer found."
         else:
             return output
-# --- Gradio App ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
-    if profile:
-        username = profile.username
-    else:
-        return "Please log in.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    agent = ToolUsingAgent()
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
-        questions = requests.get(questions_url, timeout=15).json()
     except Exception as e:
         return f"Error fetching questions: {e}", None
-    results_log = []
-    answers_payload = []
-    for item in questions:
         task_id = item.get("task_id")
-        question = item.get("question")
-        if not task_id or not question:
             continue
-        answer = agent.answer(question, task_id)
         answers_payload.append({"task_id": task_id, "submitted_answer": answer})
-        results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
     try:
-        response = requests.post(submit_url, json={"username": username, "agent_code": agent_code, "answers": answers_payload}, timeout=60)
-        data = response.json()
-        final_status = f"Submission Successful!\nUser: {data.get('username')}\nOverall Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\nMessage: {data.get('message')}"
         return final_status, pd.DataFrame(results_log)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)

 import os
 import gradio as gr
 import requests
 import pandas as pd
 from dotenv import load_dotenv
 from openai import OpenAI
 from tools.web_searcher import web_search_tool
 from tools.calculator import calculator_tool
 from tools.file_reader import read_file_tool
 from tools.code_review import code_reviewer
+from tools.web_scraper import web_scraper_tool
+from langchain.tools import Tool
+import time
 load_dotenv()
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class ToolUsingAgent:
     def __init__(self):
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         self.model = "gpt-4o"
         self.temperature = 0.0
+        self.max_tokens = 1000
+        self.tools = {
+            "web_search": web_search_tool,
+            "calculator": calculator_tool,
+            "file_reader": read_file_tool,
+            "code_reviewer": code_reviewer,
+            "web_scraper": web_scraper_tool
+        }
     def answer(self, question: str, task_id: str = None) -> str:
         try:
+            context = ""
+            results = {}
+            # File first if task_id
             if task_id:
+                results["file"] = self.tools["file_reader"].run(task_id)
+            # Web search if keywords
+            if any(kw in question.lower() for kw in ["search", "find", "lookup", "wikipedia", "ioc", "youtube", "link", "video"]):
+                results["web_search"] = self.tools["web_search"].run(question)
+            # Web scraper if question implies scraping
+            if any(kw in question.lower() for kw in ["scrape", "web page", "html", "site content"]):
+                results["web_scraper"] = self.tools["web_scraper"].run(question)
+            # Calculator if math
             if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
+                results["calculator"] = self.tools["calculator"].run(question)
+            # Code review if code question
+            if any(kw in question.lower() for kw in ["python", "code", "function", "class", "debug"]):
+                results["code_reviewer"] = self.tools["code_reviewer"].run(question)
+            # Compose tool outputs into context
+            for tool_name, result in results.items():
+                if result:
+                    context += f"\n[{tool_name.upper()} RESULT]: {result}"
             prompt = f"""
+You are an advanced ReAct-style AI agent that solves complex questions by using tools when needed.
+Strictly follow these rules:
+1. Think step by step. Use tools like web_search_tool, file_reader_tool, calculator_tool, code_reviewer_tool, and web_scraper_tool as necessary.
+2. If a question includes a link (e.g., YouTube, Wikipedia), use web_search_tool or web_scraper_tool to fetch relevant information.
+3. If a file is attached, use file_reader_tool.
+4. If it's a code question, use code_reviewer_tool to review and understand it.
+5. If it's a math question, use calculator_tool.
+6. If you cannot find the answer, say: 'No answer found.'
+7. Do not guess. Do not hallucinate.
+8. Once you have the answer, follow the exact format the question requires (e.g., a single name, a number, a city). Do not add extra text or explanations.
 Question: {question}
+{context}
 Answer:"""
+            final_answer = self.query_llm(prompt)
+            format_type = self.detect_format(question)
+            return self.clean_output(final_answer, format_type)
         except Exception as e:
             return f"[AGENT ERROR: {e}]"
+    def query_llm(self, prompt: str) -> str:
+        retries = 3
+        for i in range(retries):
             try:
                 completion = self.client.chat.completions.create(
                     model=self.model,
                     messages=[{"role": "user", "content": prompt}],
                     temperature=self.temperature,
+                    max_tokens=self.max_tokens
                 )
+                return completion.choices[0].message.content.strip()
             except Exception as e:
+                if "rate_limit" in str(e).lower():
+                    time.sleep(2 ** i)  # Exponential backoff
                 else:
+                    raise
+        return "No answer found."
+    def detect_format(self, question: str) -> str:
         q = question.lower()
+        if "ioc" in q or "olympics" in q:
             return "ioc"
         elif "city" in q:
             return "city"
         elif "name" in q or "first name" in q or "last name" in q:
             return "name"
+        elif any(w in q for w in ["number", "amount", "total", "sum", "price", "usd"]):
             return "number"
         else:
             return "text"
+    def clean_output(self, output: str, expected: str) -> str:
+        import re
         output = output.strip().strip(' "\'')
+        if expected == "ioc":
             match = re.search(r'\b[A-Z]{3}\b', output)
             return match.group(0) if match else "No answer found."
+        elif expected == "city":
             match = re.search(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', output)
             return match.group(0) if match else "No answer found."
+        elif expected == "name":
             match = re.search(r'\b[A-Z][a-z]+\b', output)
             return match.group(0) if match else "No answer found."
+        elif expected == "number":
             match = re.search(r'\d+(\.\d+)?', output)
             return match.group(0) if match else "No answer found."
         else:
             return output
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
+    if not profile:
+        return "Please log in with the button above.", None
+    username = profile.username
+    agent = ToolUsingAgent()
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
     except Exception as e:
         return f"Error fetching questions: {e}", None
+    results_log, answers_payload = [], []
+    for item in questions_data:
         task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or not question_text:
             continue
+        answer = agent.answer(question_text, task_id)
         answers_payload.append({"task_id": task_id, "submitted_answer": answer})
+        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
+    submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
         return final_status, pd.DataFrame(results_log)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)

tools/web_scraper.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import requests
+from bs4 import BeautifulSoup
+from langchain.tools import Tool
+def scrape_webpage(url: str) -> str:
+    """
+    Fetches the textual content of a webpage.
+    """
+    try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (compatible; AI-Agent/1.0)"
+        }
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.content, "html.parser")
+        # Extract main text content (without scripts, styles)
+        for script in soup(["script", "style"]):
+            script.decompose()
+        text = " ".join(soup.stripped_strings)
+        return text[:3000]  # Limit the output length to avoid overloading the LLM
+    except Exception as e:
+        return f"[WebScraper error: {e}]"
+# Register as a LangChain Tool
+web_scraper_tool = Tool.from_function(
+    name="web_scraper",
+    description="Fetches and extracts main text content from a webpage using its URL.",
+    func=scrape_webpage
+)