Spaces:

ana-35
/

agents-final-assessment

Sleeping

ana-35 commited on May 24

Commit

d21c232

1 Parent(s): ed0039a

improving web search and file reader

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import gradio as gr
 import requests
 import pandas as pd
 from io import BytesIO
 from dotenv import load_dotenv
@@ -48,6 +49,8 @@ You are an assistant answering benchmark questions. Return ONLY the final answer
 - For a city name: Just the city name, properly capitalized, no extra words.
 - For lists: Only a comma-separated list, no explanations, no 'and', no extra text.
 - For numbers: Just the number, 2 decimal places if applicable, no units unless explicitly asked.
 Here is the question:
 {question}
@@ -69,10 +72,8 @@ Here is the question:
     def clean_output(self, output: str) -> str:
         output = output.strip()
-        if output.lower().startswith("the answer is"):
-            output = output[len("the answer is"):].strip()
-        if output.startswith('"') and output.endswith('"'):
-            output = output[1:-1].strip()
         return output
 # --- Gradio + Submission Logic ---

 import os
 import gradio as gr
 import requests
+import re
 import pandas as pd
 from io import BytesIO
 from dotenv import load_dotenv
 - For a city name: Just the city name, properly capitalized, no extra words.
 - For lists: Only a comma-separated list, no explanations, no 'and', no extra text.
 - For numbers: Just the number, 2 decimal places if applicable, no units unless explicitly asked.
+- For logical tasks (like reversing words): Think step by step, then provide only the final answer, no explanation.
+- If unsure, do not guess.
 Here is the question:
 {question}
     def clean_output(self, output: str) -> str:
         output = output.strip()
+        output = re.sub(r'^(The answer is|Answer:)\s*', '', output, flags=re.IGNORECASE)
+        output = output.strip(' "\'')
         return output
 # --- Gradio + Submission Logic ---

tools/file_reader.py CHANGED Viewed

@@ -4,31 +4,19 @@ from io import BytesIO
 from langchain.tools import Tool
 def read_file(task_id: str) -> str:
-    api_base = "https://agents-course-unit4-scoring.hf.space"
-    file_url = f"{api_base}/files/{task_id}"
-    try:
-        response = requests.get(file_url)
-        response.raise_for_status()
-        content_type = response.headers.get("Content-Type", "")
-        if "spreadsheet" in content_type or "excel" in content_type:
-            df = pd.read_excel(BytesIO(response.content))
-        elif "csv" in content_type:
-            df = pd.read_csv(BytesIO(response.content))
-        else:
-            return "[Error: Unsupported file type]"
-        # Filter: Keep only food columns (exclude drinks)
-        # Adjust the logic below based on the actual file columns
-        food_columns = [col for col in df.columns if "soda" not in col.lower() and "drink" not in col.lower()]
-        food_sales = df[food_columns].sum().sum()
-        # Format: Number with 2 decimals, no commas, no $
-        return f"{food_sales:.2f}"
-    except Exception as e:
-        return f"[FileReader Error: {e}]"
 read_file_tool = Tool.from_function(
     name="read_file",

 from langchain.tools import Tool
 def read_file(task_id: str) -> str:
+    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+    response = requests.get(url)
+    response.raise_for_status()
+    content_type = response.headers.get("Content-Type", "")
+    if "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" in content_type:
+        df = pd.read_excel(BytesIO(response.content))
+    elif "text/csv" in content_type or task_id.endswith(".csv"):
+        df = pd.read_csv(BytesIO(response.content))
+    else:
+        return "[Error: Unsupported file type]"
 read_file_tool = Tool.from_function(
     name="read_file",

tools/web_searcher.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import requests
 import os
 from langchain.tools import Tool
 from dotenv import load_dotenv
@@ -23,24 +24,22 @@ def web_search(query: str) -> str:
         response.raise_for_status()
         data = response.json()
-        if data.get("answerBox") and data["answerBox"].get("answer"):
-            answer = data["answerBox"]["answer"]
-        elif data.get("answerBox") and data["answerBox"].get("snippet"):
-            answer = data["answerBox"]["snippet"]
         elif data.get("organic"):
-            answer = data["organic"][0].get("snippet", "").strip()
-            if not answer:
-                answer = data["organic"][0].get("title", "").strip()
         else:
             return "No results found"
         answer = answer.strip()
-        if answer.lower().startswith("the answer is"):
-            answer = answer[len("the answer is"):].strip()
-        if answer.startswith('"') and answer.endswith('"'):
-            answer = answer[1:-1].strip()
-        return answer if answer else "No results found"
     except Exception as e:
         return f"[Web search error: {e}]"
@@ -51,3 +50,4 @@ web_search_tool = Tool.from_function(
     func=web_search
 )

 import requests
 import os
+import re
 from langchain.tools import Tool
 from dotenv import load_dotenv
         response.raise_for_status()
         data = response.json()
+        # Extract concise answers
+        if data.get("answerBox"):
+            answer = data["answerBox"].get("answer") or data["answerBox"].get("snippet")
         elif data.get("organic"):
+            answer = data["organic"][0].get("snippet") or data["organic"][0].get("title")
         else:
             return "No results found"
+        # Clean output: remove extra text and punctuation
         answer = answer.strip()
+        answer = re.sub(r'^(The answer is|Answer:)\s*', '', answer, flags=re.IGNORECASE)
+        answer = answer.strip(' "\'')
+        # Optional: Extract only the first sentence or number
+        match = re.match(r'^[^.,;:!?]+', answer)
+        return match.group(0).strip() if match else answer
     except Exception as e:
         return f"[Web search error: {e}]"
     func=web_search
 )