ana-35's picture
Change to ReAct Agent
a2709ae
import requests
from bs4 import BeautifulSoup
from langchain.tools import Tool
def scrape_webpage(url: str) -> str:
"""
Fetches the textual content of a webpage.
"""
try:
headers = {
"User-Agent": "Mozilla/5.0 (compatible; AI-Agent/1.0)"
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
# Extract main text content (without scripts, styles)
for script in soup(["script", "style"]):
script.decompose()
text = " ".join(soup.stripped_strings)
return text[:3000] # Limit the output length to avoid overloading the LLM
except Exception as e:
return f"[WebScraper error: {e}]"
# Register as a LangChain Tool
web_scraper_tool = Tool.from_function(
name="web_scraper",
description="Fetches and extracts main text content from a webpage using its URL.",
func=scrape_webpage
)