""" Content management for the Tox21 leaderboard frontend. Contains all text, styling, and data formatting - separated from UI layout. """ import pandas as pd from typing import Dict, List from config.tasks import TOX21_TASKS, get_task_groups from config.settings import APP_TITLE, APP_DESCRIPTION class LeaderboardContent: """Content for the leaderboard tab""" title = "Tox21 Leaderboard 🧪" subtitle = "Measuring AI progress in Drug Discovery" @staticmethod def get_header_html() -> str: """Generate header HTML""" return f"""

{LeaderboardContent.title}

{LeaderboardContent.subtitle}

""" @staticmethod def get_info_html() -> str: """Generate info section HTML""" return """
Avg. AUC: Mean ROC-AUC across all 12 tasks
Avg. ΔAUC-PR: Mean ΔAUC-PR across all 12 tasks
Rank: based on Avg. AUC
Type: 0ļøāƒ£ Zero-shot | 1ļøāƒ£ Few-shot | ā¤µļø Pre-trained | šŸ”¼ Models trained from scratch
""" class AboutContent: """Content for the about tab""" @staticmethod def get_markdown_content() -> str: """Generate about page markdown content""" return f""" # About the Tox21 Leaderboard {APP_DESCRIPTION} ## Overview The **Tox21 Leaderboard** provides a standardized and reproducible evaluation framework for molecular toxicity prediction models. It restores the original evaluation protocol of the 2014–2015 **Tox21 Data Challenge**, ensuring that modern models can be compared under identical conditions using the original test set of 647 compounds. This leaderboard addresses inconsistencies introduced by later benchmark integrations (e.g., MoleculeNet, TDC, OGB), where the dataset was altered through label imputation, resampling, and metric changes. By aligning evaluation with the original challenge data, the leaderboard enables a faithful assessment of genuine progress in bioactivity modeling over the past decade. ## How it works - **Hosted on Hugging Face Spaces:** The leaderboard Space stores the original Tox21 test set and orchestrates evaluation. - **Model submission:** Participants provide a public Hugging Face Space exposing a `FastAPI` endpoint that accepts SMILES strings and returns predicted probabilities for the 12 toxicity endpoints. - **Evaluation process:** The leaderboard sends the test compounds to the model’s API, receives predictions, computes per-endpoint AUC scores, and appends results to a public results dataset. - **Manual approval:** Each submission is verified by the maintainers for completeness, correctness, and reproducibility before publication. ## FastAPI template To simplify participation, we provide a minimal **FastAPI template** that defines a `/predict` endpoint. Developers only need to adapt the `predict_fn()` function to include their model’s preprocessing and inference logic. This interface ensures: - Compatibility with the leaderboard orchestrator, - Transparent, reproducible evaluation, and - External accessibility for research or industry partners. Example implementation: [`ml-jku/tox21_gin_classifier`](https://huggingface.co/spaces/ml-jku/tox21_gin_classifier). ## Evaluation protocol - **Task:** 12 binary toxicity classification endpoints (NR and SR assays). - **Input:** SMILES strings of the 647 original Tox21 test molecules. - **Output:** Probabilities in `[0, 1]` for each molecule–endpoint pair. - **Metric:** ROC-AUC per endpoint, averaged across all 12 tasks (macro AUC). - **Integrity:** The original split and label sparsity are preserved; no imputation or data alteration is applied. ## Baselines The leaderboard includes reference implementations of key model families: - Descriptor-based models (e.g., DeepTox, SNN) - Graph neural networks (e.g., GIN, Chemprop) - Classical machine learning (e.g., RF, XGBoost) - Foundation models (e.g., TabPFN, GPT-OSS) These baselines form the foundation for future community submissions and progress tracking. ## Responsible use This leaderboard is intended for **research benchmarking only**. Predictions are not suitable for clinical or regulatory decision-making without experimental validation. ## Citation If you use this leaderboard in your research, please cite: ```bibtex - """ class SubmissionContent: """Content for the submission tab""" emoji = "šŸš€" title = "Submit Your Model" form_labels = { "model_name": "*Model Name", "hf_space_tag": "*HuggingFace Space Tag", "model_description": "*Model Description", "organization": "*Organization", "model_size": "*Model Size", "publication_title": "*Publication Title", "publication_link": "*Publication Link", "pretrained": "*Pretrained (y/n)", "zero_shot": "*Zero shot (y/n)", "n_shot": "*N-shot", "few_shot": "*Few-shot (y/n)", "pretraining_data": "*Pretraining Data" } form_placeholders = { "model_name": "e.g., AwesomeTox", "hf_space_tag": "e.g., username/model-name", "model_description": "Brief description of your model architecture and approach...", "organization": "e.g., University of Example", "model_size": "e.g., 150M", "publication_title": "Title of associated paper", "publication_link": "https://arxiv.org/abs/...", "pretrained": "Yes/No", "zero_shot": "Yes/No", "n_shot": "e.g. 5", "few_shot": "Yes/No", "pretraining_data": "e.g., ChEMBL 29, ZINC-15" } form_info = { "model_name": "A short, descriptive name for your model", "hf_space_tag": "Your HuggingFace space in format: username/space-name", "model_description": "Describe your model, methodology, and key features" } @staticmethod def get_instructions_html() -> str: """Generate submission instructions HTML""" return """

Submit your HuggingFace Space for evaluation on the Tox21 benchmark.

Requirements:

For a detailed explanation have a look at this Example Space.

Evaluation flow:

* Required fields

"""