Spaces:

ana-35
/

agents-final-assessment

Sleeping

App Files Files Community

agents-final-assessment / tools /file_reader.py

ana-35

improved tools

64188e4 7 months ago

raw

history blame contribute delete

2.13 kB

	import requests
	import pandas as pd
	import io
	import PyPDF2
	from langchain.tools import Tool

	def read_file(task_id: str) -> str:
	"""
	Downloads and reads the content of a file by task_id from the evaluation server.
	Handles Excel, CSV, PDF, TXT, JSON, Python, Markdown, and generic files.
	"""
	try:
	url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
	response = requests.get(url, timeout=15)
	response.raise_for_status()

	content_type = response.headers.get("Content-Type", "").lower()
	file_bytes = io.BytesIO(response.content)

	if "excel" in content_type or task_id.endswith((".xlsx", ".xls")):
	df = pd.read_excel(file_bytes)
	numeric_cols = df.select_dtypes(include='number').columns
	if not numeric_cols.empty:
	total = df[numeric_cols].sum().sum()
	return f"{total:.2f}"
	return df.to_string(index=False, header=True)

	elif "csv" in content_type or task_id.endswith(".csv"):
	df = pd.read_csv(file_bytes)
	numeric_cols = df.select_dtypes(include='number').columns
	if not numeric_cols.empty:
	total = df[numeric_cols].sum().sum()
	return f"{total:.2f}"
	return df.to_string(index=False, header=True)

	elif "pdf" in content_type or task_id.endswith(".pdf"):
	pdf = PyPDF2.PdfReader(file_bytes)
	text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
	return text.strip() if text else "No text extracted from PDF."

	elif any(task_id.endswith(ext) for ext in [".txt", ".py", ".md", ".json"]):
	return response.content.decode('utf-8', errors='ignore').strip()

	else:
	return response.content.decode('utf-8', errors='ignore').strip()

	except Exception as e:
	return f"[File reader error: {e}]"

	read_file_tool = Tool.from_function(
	name="read_file",
	description="Reads the content of a file based on task_id (Excel, CSV, PDF, text, code, etc.)",
	func=read_file
	)