Spaces:

ana-35
/

agents-final-assessment

Sleeping

agents-final-assessment / tools /web_scraper.py

Change to ReAct Agent

a2709ae 7 months ago

1 kB

	import requests
	from bs4 import BeautifulSoup
	from langchain.tools import Tool

	def scrape_webpage(url: str) -> str:
	"""
	Fetches the textual content of a webpage.
	"""
	try:
	headers = {
	"User-Agent": "Mozilla/5.0 (compatible; AI-Agent/1.0)"
	}
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()

	soup = BeautifulSoup(response.content, "html.parser")
	# Extract main text content (without scripts, styles)
	for script in soup(["script", "style"]):
	script.decompose()
	text = " ".join(soup.stripped_strings)
	return text[:3000] # Limit the output length to avoid overloading the LLM

	except Exception as e:
	return f"[WebScraper error: {e}]"

	# Register as a LangChain Tool
	web_scraper_tool = Tool.from_function(
	name="web_scraper",
	description="Fetches and extracts main text content from a webpage using its URL.",
	func=scrape_webpage
	)