import requests
import pandas as pd
import io
import PyPDF2
from langchain.tools import Tool

def read_file(task_id: str) -> str:
    """
    Downloads and reads the content of a file by task_id from the evaluation server.
    Handles Excel, CSV, PDF, TXT, JSON, Python, Markdown, and generic files.
    """
    try:
        url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
        response = requests.get(url, timeout=15)
        response.raise_for_status()

        content_type = response.headers.get("Content-Type", "").lower()
        file_bytes = io.BytesIO(response.content)

        if "excel" in content_type or task_id.endswith((".xlsx", ".xls")):
            df = pd.read_excel(file_bytes)
            numeric_cols = df.select_dtypes(include='number').columns
            if not numeric_cols.empty:
                total = df[numeric_cols].sum().sum()
                return f"{total:.2f}"
            return df.to_string(index=False, header=True)

        elif "csv" in content_type or task_id.endswith(".csv"):
            df = pd.read_csv(file_bytes)
            numeric_cols = df.select_dtypes(include='number').columns
            if not numeric_cols.empty:
                total = df[numeric_cols].sum().sum()
                return f"{total:.2f}"
            return df.to_string(index=False, header=True)

        elif "pdf" in content_type or task_id.endswith(".pdf"):
            pdf = PyPDF2.PdfReader(file_bytes)
            text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
            return text.strip() if text else "No text extracted from PDF."

        elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md", ".json"]):
            return response.content.decode('utf-8', errors='ignore').strip()

        else:
            return response.content.decode('utf-8', errors='ignore').strip()

    except Exception as e:
        return f"[File reader error: {e}]"

read_file_tool = Tool.from_function(
    name="read_file",
    description="Reads the content of a file based on task_id (Excel, CSV, PDF, text, code, etc.)",
    func=read_file
)