Spaces:
Sleeping
Sleeping
File size: 2,132 Bytes
00dd226 f8a38a7 64188e4 30c59be 10a7732 00dd226 64188e4 30c59be 64188e4 30c59be f8a38a7 30c59be 64188e4 5c082e6 64188e4 5c082e6 64188e4 f8a38a7 30c59be 5c082e6 64188e4 02a45be 30c59be 5c082e6 30c59be 5c082e6 00dd226 64188e4 00dd226 30c59be 64188e4 30c59be 00dd226 10a7732 64188e4 10a7732 5c082e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import requests
import pandas as pd
import io
import PyPDF2
from langchain.tools import Tool
def read_file(task_id: str) -> str:
"""
Downloads and reads the content of a file by task_id from the evaluation server.
Handles Excel, CSV, PDF, TXT, JSON, Python, Markdown, and generic files.
"""
try:
url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
response = requests.get(url, timeout=15)
response.raise_for_status()
content_type = response.headers.get("Content-Type", "").lower()
file_bytes = io.BytesIO(response.content)
if "excel" in content_type or task_id.endswith((".xlsx", ".xls")):
df = pd.read_excel(file_bytes)
numeric_cols = df.select_dtypes(include='number').columns
if not numeric_cols.empty:
total = df[numeric_cols].sum().sum()
return f"{total:.2f}"
return df.to_string(index=False, header=True)
elif "csv" in content_type or task_id.endswith(".csv"):
df = pd.read_csv(file_bytes)
numeric_cols = df.select_dtypes(include='number').columns
if not numeric_cols.empty:
total = df[numeric_cols].sum().sum()
return f"{total:.2f}"
return df.to_string(index=False, header=True)
elif "pdf" in content_type or task_id.endswith(".pdf"):
pdf = PyPDF2.PdfReader(file_bytes)
text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
return text.strip() if text else "No text extracted from PDF."
elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md", ".json"]):
return response.content.decode('utf-8', errors='ignore').strip()
else:
return response.content.decode('utf-8', errors='ignore').strip()
except Exception as e:
return f"[File reader error: {e}]"
read_file_tool = Tool.from_function(
name="read_file",
description="Reads the content of a file based on task_id (Excel, CSV, PDF, text, code, etc.)",
func=read_file
)
|