Spaces:
Paused
Paused
| import gradio as gr | |
| from typing import List, Dict, Any | |
| from config import DATASET_NAME | |
| from arxiv_retrieval_service import ArxivRetrievalService | |
| from dataset_management_service import DatasetManagementService | |
| import logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| arxiv_service = ArxivRetrievalService() | |
| dataset_service = DatasetManagementService(DATASET_NAME) | |
| def handle_metadata_extraction(query: str, max_results: int) -> str: | |
| try: | |
| logging.info(f"Fetching metadata for query: {query}, max_results: {max_results}") | |
| metadata_list = arxiv_service.fetch_metadata(query, max_results) | |
| if not metadata_list: | |
| return "No metadata found for the given query." | |
| result = dataset_service.update_dataset(metadata_list) | |
| logging.info(f"Dataset update result: {result}") | |
| return result | |
| except Exception as e: | |
| error_msg = f"An error occurred during metadata extraction: {str(e)}" | |
| logging.error(error_msg) | |
| return error_msg | |
| def handle_dataset_view(page: int = 1, page_size: int = 10) -> Dict[str, Any]: | |
| logging.info(f"handle_dataset_view called with page={page}, page_size={page_size}") | |
| try: | |
| total_records = dataset_service.get_dataset_size() | |
| logging.info(f"Total records: {total_records}") | |
| records = dataset_service.get_dataset_records(page, page_size) | |
| logging.info(f"Records type: {type(records)}") | |
| logging.info(f"Number of records returned: {len(records)}") | |
| result = { | |
| "total_records": total_records, | |
| "current_page": page, | |
| "page_size": page_size, | |
| "records": records | |
| } | |
| logging.info(f"Returning result: {result}") | |
| return result | |
| except Exception as e: | |
| error_msg = f"Error loading dataset: {str(e)}" | |
| logging.error(error_msg) | |
| return {"error": error_msg} | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| f"""# ArXiv Metadata Extraction and Dataset Management | |
| This application extracts metadata from ArXiv papers and manages the dataset: | |
| [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}/viewer) | |
| """ | |
| ) | |
| with gr.Tab("Extract Metadata"): | |
| query_input = gr.Textbox(label="ArXiv Query") | |
| max_results = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Max Results") | |
| submit_button = gr.Button("Extract Metadata") | |
| output = gr.Textbox(label="Result") | |
| submit_button.click( | |
| fn=handle_metadata_extraction, | |
| inputs=[query_input, max_results], | |
| outputs=output | |
| ) | |
| with gr.Tab("View Dataset"): | |
| page_number = gr.Number(value=1, label="Page Number", precision=0) | |
| page_size = gr.Slider(minimum=5, maximum=50, value=10, step=5, label="Page Size") | |
| refresh_button = gr.Button("Refresh Dataset View") | |
| dataset_info = gr.JSON(label="Dataset Info") | |
| refresh_button.click( | |
| fn=handle_dataset_view, | |
| inputs=[page_number, page_size], | |
| outputs=dataset_info | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |