Spaces:
Paused
Paused
| import arxiv | |
| from typing import List, Dict, Any | |
| class ArxivRetrievalService: | |
| def __init__(self): | |
| self.client = arxiv.Client(delay_seconds=3, num_retries=3) | |
| def fetch_metadata(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]: | |
| search = arxiv.Search( | |
| query=query, | |
| max_results=max_results, | |
| sort_by=arxiv.SortCriterion.SubmittedDate | |
| ) | |
| results = [] | |
| for result in self.client.results(search): | |
| metadata = { | |
| "title": result.title, | |
| "authors": [author.name for author in result.authors], | |
| "published": result.published.isoformat(), | |
| "updated": result.updated.isoformat(), | |
| "pdf_url": result.pdf_url, | |
| "entry_id": result.entry_id, | |
| "summary": result.summary, | |
| "categories": result.categories, | |
| "primary_category": result.primary_category, | |
| "html_url": f"http://arxiv.org/abs/{result.entry_id.split('/')[-1]}" | |
| } | |
| results.append(metadata) | |
| return results | |
| # Usage: | |
| # arxiv_service = ArxivRetrievalService() | |
| # metadata = arxiv_service.fetch_metadata("quantum computing", max_results=5) | |