import requests from bs4 import BeautifulSoup from langchain.tools import Tool def scrape_webpage(url: str) -> str: """ Fetches the textual content of a webpage. """ try: headers = { "User-Agent": "Mozilla/5.0 (compatible; AI-Agent/1.0)" } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") # Extract main text content (without scripts, styles) for script in soup(["script", "style"]): script.decompose() text = " ".join(soup.stripped_strings) return text[:3000] # Limit the output length to avoid overloading the LLM except Exception as e: return f"[WebScraper error: {e}]" # Register as a LangChain Tool web_scraper_tool = Tool.from_function( name="web_scraper", description="Fetches and extracts main text content from a webpage using its URL.", func=scrape_webpage )