ana-35's picture
improved tools
64188e4
import requests
import pandas as pd
import io
import PyPDF2
from langchain.tools import Tool
def read_file(task_id: str) -> str:
"""
Downloads and reads the content of a file by task_id from the evaluation server.
Handles Excel, CSV, PDF, TXT, JSON, Python, Markdown, and generic files.
"""
try:
url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
response = requests.get(url, timeout=15)
response.raise_for_status()
content_type = response.headers.get("Content-Type", "").lower()
file_bytes = io.BytesIO(response.content)
if "excel" in content_type or task_id.endswith((".xlsx", ".xls")):
df = pd.read_excel(file_bytes)
numeric_cols = df.select_dtypes(include='number').columns
if not numeric_cols.empty:
total = df[numeric_cols].sum().sum()
return f"{total:.2f}"
return df.to_string(index=False, header=True)
elif "csv" in content_type or task_id.endswith(".csv"):
df = pd.read_csv(file_bytes)
numeric_cols = df.select_dtypes(include='number').columns
if not numeric_cols.empty:
total = df[numeric_cols].sum().sum()
return f"{total:.2f}"
return df.to_string(index=False, header=True)
elif "pdf" in content_type or task_id.endswith(".pdf"):
pdf = PyPDF2.PdfReader(file_bytes)
text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
return text.strip() if text else "No text extracted from PDF."
elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md", ".json"]):
return response.content.decode('utf-8', errors='ignore').strip()
else:
return response.content.decode('utf-8', errors='ignore').strip()
except Exception as e:
return f"[File reader error: {e}]"
read_file_tool = Tool.from_function(
name="read_file",
description="Reads the content of a file based on task_id (Excel, CSV, PDF, text, code, etc.)",
func=read_file
)