ana-35 commited on
Commit
d21c232
·
1 Parent(s): ed0039a

improving web search and file reader

Browse files
Files changed (3) hide show
  1. app.py +5 -4
  2. tools/file_reader.py +10 -22
  3. tools/web_searcher.py +12 -12
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
  from io import BytesIO
6
  from dotenv import load_dotenv
@@ -48,6 +49,8 @@ You are an assistant answering benchmark questions. Return ONLY the final answer
48
  - For a city name: Just the city name, properly capitalized, no extra words.
49
  - For lists: Only a comma-separated list, no explanations, no 'and', no extra text.
50
  - For numbers: Just the number, 2 decimal places if applicable, no units unless explicitly asked.
 
 
51
 
52
  Here is the question:
53
  {question}
@@ -69,10 +72,8 @@ Here is the question:
69
 
70
  def clean_output(self, output: str) -> str:
71
  output = output.strip()
72
- if output.lower().startswith("the answer is"):
73
- output = output[len("the answer is"):].strip()
74
- if output.startswith('"') and output.endswith('"'):
75
- output = output[1:-1].strip()
76
  return output
77
 
78
  # --- Gradio + Submission Logic ---
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import re
5
  import pandas as pd
6
  from io import BytesIO
7
  from dotenv import load_dotenv
 
49
  - For a city name: Just the city name, properly capitalized, no extra words.
50
  - For lists: Only a comma-separated list, no explanations, no 'and', no extra text.
51
  - For numbers: Just the number, 2 decimal places if applicable, no units unless explicitly asked.
52
+ - For logical tasks (like reversing words): Think step by step, then provide only the final answer, no explanation.
53
+ - If unsure, do not guess.
54
 
55
  Here is the question:
56
  {question}
 
72
 
73
  def clean_output(self, output: str) -> str:
74
  output = output.strip()
75
+ output = re.sub(r'^(The answer is|Answer:)\s*', '', output, flags=re.IGNORECASE)
76
+ output = output.strip(' "\'')
 
 
77
  return output
78
 
79
  # --- Gradio + Submission Logic ---
tools/file_reader.py CHANGED
@@ -4,31 +4,19 @@ from io import BytesIO
4
  from langchain.tools import Tool
5
 
6
  def read_file(task_id: str) -> str:
7
- api_base = "https://agents-course-unit4-scoring.hf.space"
8
- file_url = f"{api_base}/files/{task_id}"
 
9
 
10
- try:
11
- response = requests.get(file_url)
12
- response.raise_for_status()
13
 
14
- content_type = response.headers.get("Content-Type", "")
15
- if "spreadsheet" in content_type or "excel" in content_type:
16
- df = pd.read_excel(BytesIO(response.content))
17
- elif "csv" in content_type:
18
- df = pd.read_csv(BytesIO(response.content))
19
- else:
20
- return "[Error: Unsupported file type]"
21
 
22
- # Filter: Keep only food columns (exclude drinks)
23
- # Adjust the logic below based on the actual file columns
24
- food_columns = [col for col in df.columns if "soda" not in col.lower() and "drink" not in col.lower()]
25
- food_sales = df[food_columns].sum().sum()
26
-
27
- # Format: Number with 2 decimals, no commas, no $
28
- return f"{food_sales:.2f}"
29
-
30
- except Exception as e:
31
- return f"[FileReader Error: {e}]"
32
 
33
  read_file_tool = Tool.from_function(
34
  name="read_file",
 
4
  from langchain.tools import Tool
5
 
6
  def read_file(task_id: str) -> str:
7
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
8
+ response = requests.get(url)
9
+ response.raise_for_status()
10
 
11
+ content_type = response.headers.get("Content-Type", "")
 
 
12
 
13
+ if "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" in content_type:
14
+ df = pd.read_excel(BytesIO(response.content))
15
+ elif "text/csv" in content_type or task_id.endswith(".csv"):
16
+ df = pd.read_csv(BytesIO(response.content))
17
+ else:
18
+ return "[Error: Unsupported file type]"
 
19
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  read_file_tool = Tool.from_function(
22
  name="read_file",
tools/web_searcher.py CHANGED
@@ -1,5 +1,6 @@
1
  import requests
2
  import os
 
3
  from langchain.tools import Tool
4
  from dotenv import load_dotenv
5
 
@@ -23,24 +24,22 @@ def web_search(query: str) -> str:
23
  response.raise_for_status()
24
  data = response.json()
25
 
26
- if data.get("answerBox") and data["answerBox"].get("answer"):
27
- answer = data["answerBox"]["answer"]
28
- elif data.get("answerBox") and data["answerBox"].get("snippet"):
29
- answer = data["answerBox"]["snippet"]
30
  elif data.get("organic"):
31
- answer = data["organic"][0].get("snippet", "").strip()
32
- if not answer:
33
- answer = data["organic"][0].get("title", "").strip()
34
  else:
35
  return "No results found"
36
 
 
37
  answer = answer.strip()
38
- if answer.lower().startswith("the answer is"):
39
- answer = answer[len("the answer is"):].strip()
40
- if answer.startswith('"') and answer.endswith('"'):
41
- answer = answer[1:-1].strip()
42
 
43
- return answer if answer else "No results found"
 
 
44
 
45
  except Exception as e:
46
  return f"[Web search error: {e}]"
@@ -51,3 +50,4 @@ web_search_tool = Tool.from_function(
51
  func=web_search
52
  )
53
 
 
 
1
  import requests
2
  import os
3
+ import re
4
  from langchain.tools import Tool
5
  from dotenv import load_dotenv
6
 
 
24
  response.raise_for_status()
25
  data = response.json()
26
 
27
+ # Extract concise answers
28
+ if data.get("answerBox"):
29
+ answer = data["answerBox"].get("answer") or data["answerBox"].get("snippet")
 
30
  elif data.get("organic"):
31
+ answer = data["organic"][0].get("snippet") or data["organic"][0].get("title")
 
 
32
  else:
33
  return "No results found"
34
 
35
+ # Clean output: remove extra text and punctuation
36
  answer = answer.strip()
37
+ answer = re.sub(r'^(The answer is|Answer:)\s*', '', answer, flags=re.IGNORECASE)
38
+ answer = answer.strip(' "\'')
 
 
39
 
40
+ # Optional: Extract only the first sentence or number
41
+ match = re.match(r'^[^.,;:!?]+', answer)
42
+ return match.group(0).strip() if match else answer
43
 
44
  except Exception as e:
45
  return f"[Web search error: {e}]"
 
50
  func=web_search
51
  )
52
 
53
+