import requests import pandas as pd import io import PyPDF2 from langchain.tools import Tool def read_file(task_id: str) -> str: """ Downloads and reads the content of a file by task_id from the evaluation server. Handles Excel, CSV, PDF, TXT, JSON, Python, Markdown, and generic files. """ try: url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}" response = requests.get(url, timeout=15) response.raise_for_status() content_type = response.headers.get("Content-Type", "").lower() file_bytes = io.BytesIO(response.content) if "excel" in content_type or task_id.endswith((".xlsx", ".xls")): df = pd.read_excel(file_bytes) numeric_cols = df.select_dtypes(include='number').columns if not numeric_cols.empty: total = df[numeric_cols].sum().sum() return f"{total:.2f}" return df.to_string(index=False, header=True) elif "csv" in content_type or task_id.endswith(".csv"): df = pd.read_csv(file_bytes) numeric_cols = df.select_dtypes(include='number').columns if not numeric_cols.empty: total = df[numeric_cols].sum().sum() return f"{total:.2f}" return df.to_string(index=False, header=True) elif "pdf" in content_type or task_id.endswith(".pdf"): pdf = PyPDF2.PdfReader(file_bytes) text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) return text.strip() if text else "No text extracted from PDF." elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md", ".json"]): return response.content.decode('utf-8', errors='ignore').strip() else: return response.content.decode('utf-8', errors='ignore').strip() except Exception as e: return f"[File reader error: {e}]" read_file_tool = Tool.from_function( name="read_file", description="Reads the content of a file based on task_id (Excel, CSV, PDF, text, code, etc.)", func=read_file )