ana-35 commited on
Commit
a2709ae
·
1 Parent(s): 3e45b4c

Change to ReAct Agent

Browse files
Files changed (2) hide show
  1. app.py +94 -73
  2. tools/web_scraper.py +31 -0
app.py CHANGED
@@ -1,162 +1,183 @@
1
  import os
2
- import time
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
- import re
7
- import ast
8
- import operator as op
9
- from io import BytesIO
10
  from dotenv import load_dotenv
11
  from openai import OpenAI
12
- from langchain.tools import Tool
13
  from tools.web_searcher import web_search_tool
14
  from tools.calculator import calculator_tool
15
  from tools.file_reader import read_file_tool
16
  from tools.code_review import code_reviewer
 
 
 
17
 
18
  load_dotenv()
19
 
 
20
 
21
- # --- ToolUsingAgent ---
22
  class ToolUsingAgent:
23
  def __init__(self):
24
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
25
  self.model = "gpt-4o"
26
  self.temperature = 0.0
27
- self.max_tokens = 1500 # Limit to prevent overload
28
- self.web_search = web_search_tool
29
- self.calculator = calculator_tool
30
- self.file_reader = read_file_tool
31
- self.code_reviewer = code_reviewer
 
 
 
32
 
33
  def answer(self, question: str, task_id: str = None) -> str:
34
- tools_context = ""
35
  try:
36
- # Use file_reader if task_id provided
 
 
 
37
  if task_id:
38
- tools_context += f"\nFile content: {self.file_reader.run(task_id)}"
39
 
40
- # Use code reviewer if it's a code-related question
41
- if any(kw in question.lower() for kw in ["python", "code", "function", "bug"]):
42
- tools_context += f"\nCode Review: {self.code_reviewer.run(question)}"
43
 
44
- # Use web search for specific keywords
45
- if any(kw in question.lower() for kw in ["wikipedia", "search", "google", "find", "ioc", "lookup"]):
46
- tools_context += f"\nWeb search result: {self.web_search.run(question)}"
47
 
48
- # Use calculator if math-related keywords found
49
  if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
50
- tools_context += f"\nCalculation result: {self.calculator.run(question)}"
 
 
 
 
 
 
 
 
 
51
 
52
  prompt = f"""
53
- You are an expert AI agent solving complex questions. Follow these strict rules:
54
- 1. Use tools (web_search, file_reader, calculator, code_reviewer) when needed.
55
- 2. Combine tools for multi-step questions.
56
- 3. Return only the requested format (name, number, city, code). No explanations.
57
- 4. If you cannot confidently answer a question based on your existing knowledge, do not guess—use tools like web_search_tool to search for information online.
58
- 5. If the question includes a link (e.g., a YouTube video or website), make sure to use tools like web_search_tool to access the link, review the content, and provide an answer based on the information found in the linked resource.
59
- 6. After finding the answer, ensure that you format your response exactly as requested in the question—provide only what is explicitly required, without extra text or explanation.
 
 
 
60
 
61
  Question: {question}
62
- {tools_context}
63
 
64
  Answer:"""
65
 
66
- return self.query_llm_with_backoff(prompt, question)
 
 
67
 
68
  except Exception as e:
69
  return f"[AGENT ERROR: {e}]"
70
 
71
- def query_llm_with_backoff(self, prompt: str, question: str, retries=3) -> str:
72
- delay = 2
73
- for attempt in range(retries):
74
  try:
75
- prompt = prompt[:self.max_tokens]
76
  completion = self.client.chat.completions.create(
77
  model=self.model,
78
  messages=[{"role": "user", "content": prompt}],
79
  temperature=self.temperature,
80
- max_tokens=500
81
  )
82
- response = completion.choices[0].message.content.strip()
83
- return self.clean_output(response, self.expected_format_detected_from_question(question))
84
  except Exception as e:
85
- if "rate limit" in str(e).lower() or "429" in str(e):
86
- time.sleep(delay)
87
- delay *= 2
88
  else:
89
- return f"[LLM Error: {e}]"
90
- return "[LLM Error: Exceeded retry attempts]"
91
 
92
- def expected_format_detected_from_question(self, question: str) -> str:
93
  q = question.lower()
94
- if "ioc" in q or "olympics" in q or "3-letter code" in q:
95
  return "ioc"
96
  elif "city" in q:
97
  return "city"
98
  elif "name" in q or "first name" in q or "last name" in q:
99
  return "name"
100
- elif any(word in q for word in ["number", "amount", "how many", "total", "sum", "price", "usd"]):
101
  return "number"
102
  else:
103
  return "text"
104
 
105
- def clean_output(self, output: str, expected_format: str) -> str:
 
106
  output = output.strip().strip(' "\'')
107
- if expected_format == "ioc":
108
  match = re.search(r'\b[A-Z]{3}\b', output)
109
  return match.group(0) if match else "No answer found."
110
- elif expected_format == "city":
111
  match = re.search(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', output)
112
  return match.group(0) if match else "No answer found."
113
- elif expected_format == "name":
114
  match = re.search(r'\b[A-Z][a-z]+\b', output)
115
  return match.group(0) if match else "No answer found."
116
- elif expected_format == "number":
117
  match = re.search(r'\d+(\.\d+)?', output)
118
  return match.group(0) if match else "No answer found."
119
  else:
120
  return output
121
 
122
- # --- Gradio App ---
123
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
124
-
125
  def run_and_submit_all(profile: gr.OAuthProfile | None):
126
  space_id = os.getenv("SPACE_ID")
127
- if profile:
128
- username = profile.username
129
- else:
130
- return "Please log in.", None
131
 
 
 
 
132
  api_url = DEFAULT_API_URL
 
133
  questions_url = f"{api_url}/questions"
134
  submit_url = f"{api_url}/submit"
135
 
136
- agent = ToolUsingAgent()
137
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
138
-
139
  try:
140
- questions = requests.get(questions_url, timeout=15).json()
 
 
141
  except Exception as e:
142
  return f"Error fetching questions: {e}", None
143
 
144
- results_log = []
145
- answers_payload = []
146
 
147
- for item in questions:
148
  task_id = item.get("task_id")
149
- question = item.get("question")
150
- if not task_id or not question:
151
  continue
152
- answer = agent.answer(question, task_id)
 
153
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
154
- results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
 
 
155
 
156
  try:
157
- response = requests.post(submit_url, json={"username": username, "agent_code": agent_code, "answers": answers_payload}, timeout=60)
158
- data = response.json()
159
- final_status = f"Submission Successful!\nUser: {data.get('username')}\nOverall Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\nMessage: {data.get('message')}"
 
 
 
 
 
 
 
160
  return final_status, pd.DataFrame(results_log)
161
  except Exception as e:
162
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
1
  import os
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
 
 
 
5
  from dotenv import load_dotenv
6
  from openai import OpenAI
 
7
  from tools.web_searcher import web_search_tool
8
  from tools.calculator import calculator_tool
9
  from tools.file_reader import read_file_tool
10
  from tools.code_review import code_reviewer
11
+ from tools.web_scraper import web_scraper_tool
12
+ from langchain.tools import Tool
13
+ import time
14
 
15
  load_dotenv()
16
 
17
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
 
19
  class ToolUsingAgent:
20
  def __init__(self):
21
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
22
  self.model = "gpt-4o"
23
  self.temperature = 0.0
24
+ self.max_tokens = 1000
25
+ self.tools = {
26
+ "web_search": web_search_tool,
27
+ "calculator": calculator_tool,
28
+ "file_reader": read_file_tool,
29
+ "code_reviewer": code_reviewer,
30
+ "web_scraper": web_scraper_tool
31
+ }
32
 
33
  def answer(self, question: str, task_id: str = None) -> str:
 
34
  try:
35
+ context = ""
36
+ results = {}
37
+
38
+ # File first if task_id
39
  if task_id:
40
+ results["file"] = self.tools["file_reader"].run(task_id)
41
 
42
+ # Web search if keywords
43
+ if any(kw in question.lower() for kw in ["search", "find", "lookup", "wikipedia", "ioc", "youtube", "link", "video"]):
44
+ results["web_search"] = self.tools["web_search"].run(question)
45
 
46
+ # Web scraper if question implies scraping
47
+ if any(kw in question.lower() for kw in ["scrape", "web page", "html", "site content"]):
48
+ results["web_scraper"] = self.tools["web_scraper"].run(question)
49
 
50
+ # Calculator if math
51
  if any(kw in question.lower() for kw in ["calculate", "math", "sum", "total", "add", "subtract", "multiply", "divide"]):
52
+ results["calculator"] = self.tools["calculator"].run(question)
53
+
54
+ # Code review if code question
55
+ if any(kw in question.lower() for kw in ["python", "code", "function", "class", "debug"]):
56
+ results["code_reviewer"] = self.tools["code_reviewer"].run(question)
57
+
58
+ # Compose tool outputs into context
59
+ for tool_name, result in results.items():
60
+ if result:
61
+ context += f"\n[{tool_name.upper()} RESULT]: {result}"
62
 
63
  prompt = f"""
64
+ You are an advanced ReAct-style AI agent that solves complex questions by using tools when needed.
65
+ Strictly follow these rules:
66
+ 1. Think step by step. Use tools like web_search_tool, file_reader_tool, calculator_tool, code_reviewer_tool, and web_scraper_tool as necessary.
67
+ 2. If a question includes a link (e.g., YouTube, Wikipedia), use web_search_tool or web_scraper_tool to fetch relevant information.
68
+ 3. If a file is attached, use file_reader_tool.
69
+ 4. If it's a code question, use code_reviewer_tool to review and understand it.
70
+ 5. If it's a math question, use calculator_tool.
71
+ 6. If you cannot find the answer, say: 'No answer found.'
72
+ 7. Do not guess. Do not hallucinate.
73
+ 8. Once you have the answer, follow the exact format the question requires (e.g., a single name, a number, a city). Do not add extra text or explanations.
74
 
75
  Question: {question}
76
+ {context}
77
 
78
  Answer:"""
79
 
80
+ final_answer = self.query_llm(prompt)
81
+ format_type = self.detect_format(question)
82
+ return self.clean_output(final_answer, format_type)
83
 
84
  except Exception as e:
85
  return f"[AGENT ERROR: {e}]"
86
 
87
+ def query_llm(self, prompt: str) -> str:
88
+ retries = 3
89
+ for i in range(retries):
90
  try:
 
91
  completion = self.client.chat.completions.create(
92
  model=self.model,
93
  messages=[{"role": "user", "content": prompt}],
94
  temperature=self.temperature,
95
+ max_tokens=self.max_tokens
96
  )
97
+ return completion.choices[0].message.content.strip()
 
98
  except Exception as e:
99
+ if "rate_limit" in str(e).lower():
100
+ time.sleep(2 ** i) # Exponential backoff
 
101
  else:
102
+ raise
103
+ return "No answer found."
104
 
105
+ def detect_format(self, question: str) -> str:
106
  q = question.lower()
107
+ if "ioc" in q or "olympics" in q:
108
  return "ioc"
109
  elif "city" in q:
110
  return "city"
111
  elif "name" in q or "first name" in q or "last name" in q:
112
  return "name"
113
+ elif any(w in q for w in ["number", "amount", "total", "sum", "price", "usd"]):
114
  return "number"
115
  else:
116
  return "text"
117
 
118
+ def clean_output(self, output: str, expected: str) -> str:
119
+ import re
120
  output = output.strip().strip(' "\'')
121
+ if expected == "ioc":
122
  match = re.search(r'\b[A-Z]{3}\b', output)
123
  return match.group(0) if match else "No answer found."
124
+ elif expected == "city":
125
  match = re.search(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', output)
126
  return match.group(0) if match else "No answer found."
127
+ elif expected == "name":
128
  match = re.search(r'\b[A-Z][a-z]+\b', output)
129
  return match.group(0) if match else "No answer found."
130
+ elif expected == "number":
131
  match = re.search(r'\d+(\.\d+)?', output)
132
  return match.group(0) if match else "No answer found."
133
  else:
134
  return output
135
 
 
 
 
136
  def run_and_submit_all(profile: gr.OAuthProfile | None):
137
  space_id = os.getenv("SPACE_ID")
138
+ if not profile:
139
+ return "Please log in with the button above.", None
 
 
140
 
141
+ username = profile.username
142
+ agent = ToolUsingAgent()
143
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
144
  api_url = DEFAULT_API_URL
145
+
146
  questions_url = f"{api_url}/questions"
147
  submit_url = f"{api_url}/submit"
148
 
 
 
 
149
  try:
150
+ response = requests.get(questions_url, timeout=15)
151
+ response.raise_for_status()
152
+ questions_data = response.json()
153
  except Exception as e:
154
  return f"Error fetching questions: {e}", None
155
 
156
+ results_log, answers_payload = [], []
 
157
 
158
+ for item in questions_data:
159
  task_id = item.get("task_id")
160
+ question_text = item.get("question")
161
+ if not task_id or not question_text:
162
  continue
163
+
164
+ answer = agent.answer(question_text, task_id)
165
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
166
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
167
+
168
+ submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
169
 
170
  try:
171
+ response = requests.post(submit_url, json=submission_data, timeout=60)
172
+ response.raise_for_status()
173
+ result_data = response.json()
174
+ final_status = (
175
+ f"Submission Successful!\n"
176
+ f"User: {result_data.get('username')}\n"
177
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
178
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
179
+ f"Message: {result_data.get('message', 'No message received.')}"
180
+ )
181
  return final_status, pd.DataFrame(results_log)
182
  except Exception as e:
183
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
tools/web_scraper.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from langchain.tools import Tool
4
+
5
+ def scrape_webpage(url: str) -> str:
6
+ """
7
+ Fetches the textual content of a webpage.
8
+ """
9
+ try:
10
+ headers = {
11
+ "User-Agent": "Mozilla/5.0 (compatible; AI-Agent/1.0)"
12
+ }
13
+ response = requests.get(url, headers=headers, timeout=10)
14
+ response.raise_for_status()
15
+
16
+ soup = BeautifulSoup(response.content, "html.parser")
17
+ # Extract main text content (without scripts, styles)
18
+ for script in soup(["script", "style"]):
19
+ script.decompose()
20
+ text = " ".join(soup.stripped_strings)
21
+ return text[:3000] # Limit the output length to avoid overloading the LLM
22
+
23
+ except Exception as e:
24
+ return f"[WebScraper error: {e}]"
25
+
26
+ # Register as a LangChain Tool
27
+ web_scraper_tool = Tool.from_function(
28
+ name="web_scraper",
29
+ description="Fetches and extracts main text content from a webpage using its URL.",
30
+ func=scrape_webpage
31
+ )