File size: 2,132 Bytes
00dd226
f8a38a7
64188e4
30c59be
10a7732
00dd226
 
64188e4
 
 
 
30c59be
 
64188e4
30c59be
f8a38a7
30c59be
64188e4
5c082e6
64188e4
5c082e6
64188e4
 
 
 
 
f8a38a7
30c59be
5c082e6
64188e4
 
 
 
 
02a45be
30c59be
5c082e6
30c59be
5c082e6
00dd226
64188e4
 
00dd226
30c59be
64188e4
30c59be
 
 
00dd226
10a7732
 
64188e4
10a7732
5c082e6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import requests
import pandas as pd
import io
import PyPDF2
from langchain.tools import Tool

def read_file(task_id: str) -> str:
    """
    Downloads and reads the content of a file by task_id from the evaluation server.
    Handles Excel, CSV, PDF, TXT, JSON, Python, Markdown, and generic files.
    """
    try:
        url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
        response = requests.get(url, timeout=15)
        response.raise_for_status()

        content_type = response.headers.get("Content-Type", "").lower()
        file_bytes = io.BytesIO(response.content)

        if "excel" in content_type or task_id.endswith((".xlsx", ".xls")):
            df = pd.read_excel(file_bytes)
            numeric_cols = df.select_dtypes(include='number').columns
            if not numeric_cols.empty:
                total = df[numeric_cols].sum().sum()
                return f"{total:.2f}"
            return df.to_string(index=False, header=True)

        elif "csv" in content_type or task_id.endswith(".csv"):
            df = pd.read_csv(file_bytes)
            numeric_cols = df.select_dtypes(include='number').columns
            if not numeric_cols.empty:
                total = df[numeric_cols].sum().sum()
                return f"{total:.2f}"
            return df.to_string(index=False, header=True)

        elif "pdf" in content_type or task_id.endswith(".pdf"):
            pdf = PyPDF2.PdfReader(file_bytes)
            text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
            return text.strip() if text else "No text extracted from PDF."

        elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md", ".json"]):
            return response.content.decode('utf-8', errors='ignore').strip()

        else:
            return response.content.decode('utf-8', errors='ignore').strip()

    except Exception as e:
        return f"[File reader error: {e}]"

read_file_tool = Tool.from_function(
    name="read_file",
    description="Reads the content of a file based on task_id (Excel, CSV, PDF, text, code, etc.)",
    func=read_file
)