tox21_leaderboard

Running

App Files Files Community

antoniaebner commited on Oct 29, 2025

Commit

fe1a8cf

1 Parent(s): 98d346b

refactor leaderboard formatting

Browse files

Files changed (4) hide show

backend/data_loader.py +12 -54
config/leaderboard.py +38 -0
frontend/layout.py +84 -58
frontend/leaderboard.py +81 -22

backend/data_loader.py CHANGED Viewed

@@ -4,84 +4,42 @@ Handles loading and processing results from HuggingFace datasets.
 """
 import pandas as pd
-from datasets import load_dataset
 from config.settings import RESULTS_DATASET, TEST_DATASET, HF_TOKEN
 from config.tasks import TOX21_TASKS
-def load_leaderboard_data() -> pd.DataFrame:
     """
     Load leaderboard data from HuggingFace dataset.
     """
     print(f"Loading dataset: {RESULTS_DATASET}")
     # Load the dataset
     dataset = load_dataset(RESULTS_DATASET, token=HF_TOKEN)
     print(f"Dataset loaded successfully. Keys: {dataset.keys()}")
     # Look for test split (more appropriate for results)
-    if 'test' in dataset:
-        results_data = dataset['test']
         print(f"Test split has {len(results_data)} entries")
         if len(results_data) > 0:
             print(f"First entry keys: {results_data[0].keys()}")
             print(f"First entry: {results_data[0]}")
     else:
         raise ValueError("Dataset does not contain a 'test' split.")
-    # Convert to DataFrame (new schema only)
-    rows = []
-    for entry in results_data:
-        config = entry['config']
-        results = entry['results']
-        # Only include approved entries
-        if not config.get('approved', False):
-            continue
-        # Create a row with all the data
-        row = {
-            'Model': config['model_name'],
-            'Model Description': config['model_description'],
-            'Publication': config.get('publication_title', ''),
-            'Parameters': config.get('model_size', ''),
-            'Date Added': str(config.get('date_approved', config.get('date_submitted', ''))).split()[0],
-            'Overall Score': results['overall_score']['roc_auc']
-        }
-        # Add individual task scores
-        for task_key, task_result in results.items():
-            if task_key != 'overall_score':  # Skip overall score
-                row[task_key] = task_result['roc_auc']
-        rows.append(row)
-    df = pd.DataFrame(rows)
-    # Handle empty dataset case
-    if df.empty:
-        print("No approved submissions found. Creating empty DataFrame with proper columns.")
-        # Create empty DataFrame with expected columns
-        columns = [
-            'Model', 'Model Description', 'Publication', 'Parameters', 'Date Added', 'Overall Score',
-            'NR-AR', 'NR-AR-LBD', 'NR-AhR', 'NR-Aromatase', 'NR-ER', 'NR-ER-LBD',
-            'NR-PPAR-gamma', 'SR-ARE', 'SR-ATAD5', 'SR-HSE', 'SR-MMP', 'SR-p53'
-        ]
-        df = pd.DataFrame(columns=columns)
-    print(df)
-    print(f"Created DataFrame with shape: {df.shape}")
-    return df
 def load_test_dataset() -> tuple[list[str], dict[str, dict[str, float]]]:
-     # Get test smiles and labels
     dset = load_dataset(TEST_DATASET, token=HF_TOKEN, split="test")
-    tasks = [t.key for t in TOX21_TASKS]
     smiles = list(dset["smiles"])
     labels = {
-        sample["smiles"]: {task: sample[task] for task in tasks}
         for sample in list(dset)
     }
     print(f"Loaded test dataset")
-    return smiles, labels

 """
 import pandas as pd
+from datasets import load_dataset, Dataset
 from config.settings import RESULTS_DATASET, TEST_DATASET, HF_TOKEN
 from config.tasks import TOX21_TASKS
+def load_leaderboard_data() -> Dataset:
     """
     Load leaderboard data from HuggingFace dataset.
     """
     print(f"Loading dataset: {RESULTS_DATASET}")
     # Load the dataset
     dataset = load_dataset(RESULTS_DATASET, token=HF_TOKEN)
     print(f"Dataset loaded successfully. Keys: {dataset.keys()}")
     # Look for test split (more appropriate for results)
+    if "test" in dataset:
+        results_data = dataset["test"]
         print(f"Test split has {len(results_data)} entries")
         if len(results_data) > 0:
             print(f"First entry keys: {results_data[0].keys()}")
             print(f"First entry: {results_data[0]}")
+        return results_data
     else:
         raise ValueError("Dataset does not contain a 'test' split.")
 def load_test_dataset() -> tuple[list[str], dict[str, dict[str, float]]]:
+    # Get test smiles and labels
     dset = load_dataset(TEST_DATASET, token=HF_TOKEN, split="test")
+    tasks = [t.key for t in TOX21_TASKS]
     smiles = list(dset["smiles"])
     labels = {
+        sample["smiles"]: {task: sample[task] for task in tasks}
         for sample in list(dset)
     }
     print(f"Loaded test dataset")
+    return smiles, labels

config/leaderboard.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from .tasks import get_all_task_keys
+# column names
+TASK_NAMES = get_all_task_keys()
+METADATA_COLUMN_NAMES = [
+    "Model",
+    "Model Description",
+    "Publication",
+    "Parameters",
+    "Date Added",
+    "Overall Score",
+]
+COLUMN_NAMES = TASK_NAMES + METADATA_COLUMN_NAMES
+# column widths
+METADATA_COLUMN_WIDTHS = {
+    "Model": 200,
+    "Model Description": 300,
+    "Publication": 150,
+    "Parameters": 100,
+    "Date Added": 120,
+    "Overall Score": 100,
+}
+TASK_COLUMN_WIDTH = 80
+COLUMN_WIDTHS = METADATA_COLUMN_WIDTHS | {
+    k: TASK_COLUMN_WIDTH for k in TASK_NAMES
+}
+assert all(
+    [name in COLUMN_WIDTHS for name in COLUMN_NAMES]
+), "Some leaderboard columns have no assigned width! "
+# table styling
+MAX_DECIMALS = 3

frontend/layout.py CHANGED Viewed

@@ -2,6 +2,7 @@
 UI Layout components for the Tox21 leaderboard.
 Pure Gradio structure - no content or business logic.
 """
 import gradio as gr
 from typing import Callable, Any
 from .content import LeaderboardContent, AboutContent, SubmissionContent
@@ -10,18 +11,21 @@ import os
 import pandas as pd
 from gradio_leaderboard import Leaderboard
 def create_leaderboard_tab(refresh_callback: Callable = None) -> gr.TabItem:
     """Create the leaderboard tab layout"""
     with gr.TabItem("🏅 Leaderboard", elem_id="leaderboard-tab", id=0) as tab:
         # Header section
         header_html = gr.HTML(LeaderboardContent.get_header_html())
         # Load initial data
-        result_data = refresh_leaderboard().reset_index(drop=True)
         result_data.columns = result_data.columns.map(str)
         # Leaderboard at full width
         leaderboard_table = Leaderboard(
@@ -34,23 +38,26 @@ def create_leaderboard_tab(refresh_callback: Callable = None) -> gr.TabItem:
             height=480,
             min_width=160,
             wrap=True,
-            column_widths=[200, 300, 150, 100, 120, 100] + [80] * 12,
         )
         # Refresh button below table
-        refresh_btn = gr.Button("🔄 Refresh Leaderboard", variant="secondary", size="sm")
         # Connect refresh button
         def refresh_data():
-            new_data = refresh_callback().reset_index(drop=True) if refresh_callback else refresh_leaderboard().reset_index(drop=True)
             new_data.columns = new_data.columns.map(str)
             return new_data
-        refresh_btn.click(
-            fn=refresh_data,
-            outputs=leaderboard_table
-        )
         # Info section
         info_html = gr.HTML(LeaderboardContent.get_info_html())
@@ -59,116 +66,135 @@ def create_leaderboard_tab(refresh_callback: Callable = None) -> gr.TabItem:
 def create_about_tab() -> gr.TabItem:
     """Create the about tab layout"""
     with gr.TabItem("📝 About", elem_id="about-tab", id=1) as tab:
         content_markdown = gr.Markdown(
-            AboutContent.get_markdown_content(),
-            elem_classes="markdown-text"
         )
     return tab
 def create_submission_tab(submit_callback: Callable = None) -> gr.TabItem:
     """Create the submission tab layout"""
     with gr.TabItem("🚀 Submit", elem_id="submission-tab", id=2) as tab:
         # Header
-        gr.HTML(f"<h2 style='text-align: center;'>{SubmissionContent.title}</h2>")
         # Instructions
         instructions_html = gr.HTML(SubmissionContent.get_instructions_html())
         # Submission form
         with gr.Group():
             # Required fields
             model_name = gr.Textbox(
                 label=SubmissionContent.form_labels["model_name"],
                 placeholder=SubmissionContent.form_placeholders["model_name"],
-                info=SubmissionContent.form_info["model_name"]
             )
             hf_space_tag = gr.Textbox(
                 label=SubmissionContent.form_labels["hf_space_tag"],
                 placeholder=SubmissionContent.form_placeholders["hf_space_tag"],
-                info=SubmissionContent.form_info["hf_space_tag"]
             )
             model_description = gr.Textbox(
                 label=SubmissionContent.form_labels["model_description"],
-                placeholder=SubmissionContent.form_placeholders["model_description"],
                 info=SubmissionContent.form_info["model_description"],
-                lines=3
             )
             # Optional fields in accordion
             with gr.Accordion("Additional Information (Optional)", open=False):
                 organization = gr.Textbox(
                     label=SubmissionContent.form_labels["organization"],
-                    placeholder=SubmissionContent.form_placeholders["organization"]
                 )
                 with gr.Row():
                     model_size = gr.Textbox(
                         label=SubmissionContent.form_labels["model_size"],
-                        placeholder=SubmissionContent.form_placeholders["model_size"]
                     )
                     pretraining = gr.Textbox(
                         label=SubmissionContent.form_labels["pretraining"],
-                        placeholder=SubmissionContent.form_placeholders["pretraining"]
                     )
                 publication_title = gr.Textbox(
                     label=SubmissionContent.form_labels["publication_title"],
-                    placeholder=SubmissionContent.form_placeholders["publication_title"]
                 )
                 publication_link = gr.Textbox(
                     label=SubmissionContent.form_labels["publication_link"],
-                    placeholder=SubmissionContent.form_placeholders["publication_link"]
                 )
             # Submit button and result
             submit_btn = gr.Button("Submit Model", variant="primary")
             result_msg = gr.HTML()
             if submit_callback:
                 submit_btn.click(
                     fn=submit_callback,
                     inputs=[
-                        model_name, hf_space_tag, model_description,
-                        organization, model_size, pretraining,
-                        publication_title, publication_link
                     ],
-                    outputs=result_msg
                 )
     return tab
 def create_main_interface(
-    refresh_callback: Callable = None,
-    submit_callback: Callable = None
 ) -> gr.Blocks:
     """Create the main application interface"""
     # Load CSS from file
-    css_path = os.path.join(os.path.dirname(__file__), 'styles.css')
-    with open(css_path, 'r') as f:
         css = f.read()
     with gr.Blocks(css=css, title="Tox21 Leaderboard") as app:
         # Main title
         gr.HTML("<h1>🧪 Tox21 Leaderboard</h1>")
         # Tab container
         with gr.Tabs(elem_classes="tab-nav") as tabs:
             # Create all tabs
-            leaderboard_tab, leaderboard_table = create_leaderboard_tab(refresh_callback)
             about_tab = create_about_tab()
             submission_tab = create_submission_tab(submit_callback)
-    return app, leaderboard_table

 UI Layout components for the Tox21 leaderboard.
 Pure Gradio structure - no content or business logic.
 """
 import gradio as gr
 from typing import Callable, Any
 from .content import LeaderboardContent, AboutContent, SubmissionContent
 import pandas as pd
 from gradio_leaderboard import Leaderboard
+from config.leaderboard import COLUMN_WIDTHS
 def create_leaderboard_tab(refresh_callback: Callable = None) -> gr.TabItem:
     """Create the leaderboard tab layout"""
     with gr.TabItem("🏅 Leaderboard", elem_id="leaderboard-tab", id=0) as tab:
         # Header section
         header_html = gr.HTML(LeaderboardContent.get_header_html())
         # Load initial data
+        result_data = refresh_leaderboard()
         result_data.columns = result_data.columns.map(str)
+        column_widths = [COLUMN_WIDTHS[c] for c in result_data.columns]
         # Leaderboard at full width
         leaderboard_table = Leaderboard(
             height=480,
             min_width=160,
             wrap=True,
+            column_widths=column_widths,
         )
         # Refresh button below table
+        refresh_btn = gr.Button(
+            "🔄 Refresh Leaderboard", variant="secondary", size="sm"
+        )
         # Connect refresh button
         def refresh_data():
+            new_data = (
+                refresh_callback().reset_index(drop=True)
+                if refresh_callback
+                else refresh_leaderboard().reset_index(drop=True)
+            )
             new_data.columns = new_data.columns.map(str)
             return new_data
+        refresh_btn.click(fn=refresh_data, outputs=leaderboard_table)
         # Info section
         info_html = gr.HTML(LeaderboardContent.get_info_html())
 def create_about_tab() -> gr.TabItem:
     """Create the about tab layout"""
     with gr.TabItem("📝 About", elem_id="about-tab", id=1) as tab:
         content_markdown = gr.Markdown(
+            AboutContent.get_markdown_content(), elem_classes="markdown-text"
         )
     return tab
 def create_submission_tab(submit_callback: Callable = None) -> gr.TabItem:
     """Create the submission tab layout"""
     with gr.TabItem("🚀 Submit", elem_id="submission-tab", id=2) as tab:
         # Header
+        gr.HTML(
+            f"<h2 style='text-align: center;'>{SubmissionContent.title}</h2>"
+        )
         # Instructions
         instructions_html = gr.HTML(SubmissionContent.get_instructions_html())
         # Submission form
         with gr.Group():
             # Required fields
             model_name = gr.Textbox(
                 label=SubmissionContent.form_labels["model_name"],
                 placeholder=SubmissionContent.form_placeholders["model_name"],
+                info=SubmissionContent.form_info["model_name"],
             )
             hf_space_tag = gr.Textbox(
                 label=SubmissionContent.form_labels["hf_space_tag"],
                 placeholder=SubmissionContent.form_placeholders["hf_space_tag"],
+                info=SubmissionContent.form_info["hf_space_tag"],
             )
             model_description = gr.Textbox(
                 label=SubmissionContent.form_labels["model_description"],
+                placeholder=SubmissionContent.form_placeholders[
+                    "model_description"
+                ],
                 info=SubmissionContent.form_info["model_description"],
+                lines=3,
             )
             # Optional fields in accordion
             with gr.Accordion("Additional Information (Optional)", open=False):
                 organization = gr.Textbox(
                     label=SubmissionContent.form_labels["organization"],
+                    placeholder=SubmissionContent.form_placeholders[
+                        "organization"
+                    ],
                 )
                 with gr.Row():
                     model_size = gr.Textbox(
                         label=SubmissionContent.form_labels["model_size"],
+                        placeholder=SubmissionContent.form_placeholders[
+                            "model_size"
+                        ],
                     )
                     pretraining = gr.Textbox(
                         label=SubmissionContent.form_labels["pretraining"],
+                        placeholder=SubmissionContent.form_placeholders[
+                            "pretraining"
+                        ],
                     )
                 publication_title = gr.Textbox(
                     label=SubmissionContent.form_labels["publication_title"],
+                    placeholder=SubmissionContent.form_placeholders[
+                        "publication_title"
+                    ],
                 )
                 publication_link = gr.Textbox(
                     label=SubmissionContent.form_labels["publication_link"],
+                    placeholder=SubmissionContent.form_placeholders[
+                        "publication_link"
+                    ],
                 )
             # Submit button and result
             submit_btn = gr.Button("Submit Model", variant="primary")
             result_msg = gr.HTML()
             if submit_callback:
                 submit_btn.click(
                     fn=submit_callback,
                     inputs=[
+                        model_name,
+                        hf_space_tag,
+                        model_description,
+                        organization,
+                        model_size,
+                        pretraining,
+                        publication_title,
+                        publication_link,
                     ],
+                    outputs=result_msg,
                 )
     return tab
 def create_main_interface(
+    refresh_callback: Callable = None, submit_callback: Callable = None
 ) -> gr.Blocks:
     """Create the main application interface"""
     # Load CSS from file
+    css_path = os.path.join(os.path.dirname(__file__), "styles.css")
+    with open(css_path, "r") as f:
         css = f.read()
     with gr.Blocks(css=css, title="Tox21 Leaderboard") as app:
         # Main title
         gr.HTML("<h1>🧪 Tox21 Leaderboard</h1>")
         # Tab container
         with gr.Tabs(elem_classes="tab-nav") as tabs:
             # Create all tabs
+            leaderboard_tab, leaderboard_table = create_leaderboard_tab(
+                refresh_callback
+            )
             about_tab = create_about_tab()
             submission_tab = create_submission_tab(submit_callback)
+    return app, leaderboard_table

frontend/leaderboard.py CHANGED Viewed

@@ -2,10 +2,12 @@
 Leaderboard-specific business logic.
 Handles data processing, backend communication, and state management.
 """
 import pandas as pd
 from typing import Optional
 from .content import LeaderboardContent
 def refresh_leaderboard() -> pd.DataFrame:
@@ -14,91 +16,148 @@ def refresh_leaderboard() -> pd.DataFrame:
     Currently returns sample data - will connect to backend later.
     """
     print("= Refreshing leaderboard data...")
     # Load data from backend
     from backend.data_loader import load_leaderboard_data
     results_data = load_leaderboard_data()
     return results_data
 def format_leaderboard_data(raw_data: dict) -> pd.DataFrame:
     """
     Format raw leaderboard data for display.
     Args:
         raw_data: Raw data from backend/datasets
     Returns:
         Formatted DataFrame for Gradio display
     """
     # TODO: Implement data formatting logic
     # This will process raw evaluation results into the display format
-    pass
 def calculate_average_score(task_scores: dict) -> float:
     """
     Calculate average ROC-AUC score across all tasks.
     Args:
         task_scores: Dictionary of task_name -> score
     Returns:
         Average score across all tasks
     """
     if not task_scores:
         return 0.0
-    valid_scores = [score for score in task_scores.values() if score is not None]
     if not valid_scores:
         return 0.0
     return sum(valid_scores) / len(valid_scores)
 def sort_by_performance(leaderboard_data: pd.DataFrame) -> pd.DataFrame:
     """
     Sort leaderboard by average performance score.
     Args:
         leaderboard_data: DataFrame with leaderboard data
     Returns:
         Sorted DataFrame with rank column updated
     """
     # Sort by average score (descending)
     sorted_data = leaderboard_data.sort_values(by="Average", ascending=False)
     # Update rank column
     sorted_data["Rank"] = range(1, len(sorted_data) + 1)
     return sorted_data
 def filter_leaderboard(
-    data: pd.DataFrame,
     min_score: Optional[float] = None,
     model_type: Optional[str] = None,
-    date_range: Optional[tuple] = None
 ) -> pd.DataFrame:
     """
     Filter leaderboard data based on criteria.
     Args:
         data: Original leaderboard data
         min_score: Minimum average score threshold
         model_type: Filter by model type
         date_range: Filter by submission date range
     Returns:
         Filtered DataFrame
     """
     filtered_data = data.copy()
     if min_score is not None:
         filtered_data = filtered_data[filtered_data["Average"] >= min_score]
     # TODO: Add more filtering logic as needed
-    return filtered_data

 Leaderboard-specific business logic.
 Handles data processing, backend communication, and state management.
 """
 import pandas as pd
 from typing import Optional
 from .content import LeaderboardContent
+from config.leaderboard import MAX_DECIMALS, COLUMN_NAMES
 def refresh_leaderboard() -> pd.DataFrame:
     Currently returns sample data - will connect to backend later.
     """
     print("= Refreshing leaderboard data...")
     # Load data from backend
     from backend.data_loader import load_leaderboard_data
     results_data = load_leaderboard_data()
+    results_data = format_leaderboard_data(results_data)
+    assert all(
+        [c in COLUMN_NAMES for c in results_data.columns]
+    ), "Some required columns not found in dataset!"
     return results_data
 def format_leaderboard_data(raw_data: dict) -> pd.DataFrame:
     """
     Format raw leaderboard data for display.
     Args:
         raw_data: Raw data from backend/datasets
     Returns:
         Formatted DataFrame for Gradio display
     """
     # TODO: Implement data formatting logic
     # This will process raw evaluation results into the display format
+    # Convert to DataFrame (new schema only)
+    rows = []
+    for entry in raw_data:
+        config = entry["config"]
+        results = entry["results"]
+        # Only include approved entries
+        if not config.get("approved", False):
+            continue
+        # Create a row with all the data
+        row = {
+            "Model": config["model_name"],
+            "Model Description": config["model_description"],
+            "Publication": config.get("publication_title", ""),
+            "Parameters": config.get("model_size", ""),
+            "Pretrained (y/n)": config.get("pretraining", ""),
+            "Date Added": str(
+                config.get("date_approved", config.get("date_submitted", ""))
+            ).split()[0],
+            "Overall Score": results["overall_score"]["roc_auc"],
+        }
+        # Add individual task scores
+        for task_key, task_result in results.items():
+            if task_key != "overall_score":  # Skip overall score
+                row[task_key] = task_result["roc_auc"]
+        rows.append(row)
+    df = pd.DataFrame(rows)
+    # Handle empty dataset case
+    if df.empty:
+        print(
+            "No approved submissions found. Creating empty DataFrame with proper columns."
+        )
+        # Create empty DataFrame with expected columns
+        df = pd.DataFrame(columns=COLUMN_NAMES)
+    else:
+        # rank according to overall score
+        df = df.sort_values(by="Overall Score", ascending=False).reset_index(
+            drop=True
+        )
+        # set different precision
+    print(df)
+    print(f"Created DataFrame with shape: {df.shape}")
+    df = df.round(decimals=MAX_DECIMALS)
+    return df
 def calculate_average_score(task_scores: dict) -> float:
     """
     Calculate average ROC-AUC score across all tasks.
     Args:
         task_scores: Dictionary of task_name -> score
     Returns:
         Average score across all tasks
     """
     if not task_scores:
         return 0.0
+    valid_scores = [
+        score for score in task_scores.values() if score is not None
+    ]
     if not valid_scores:
         return 0.0
     return sum(valid_scores) / len(valid_scores)
 def sort_by_performance(leaderboard_data: pd.DataFrame) -> pd.DataFrame:
     """
     Sort leaderboard by average performance score.
     Args:
         leaderboard_data: DataFrame with leaderboard data
     Returns:
         Sorted DataFrame with rank column updated
     """
     # Sort by average score (descending)
     sorted_data = leaderboard_data.sort_values(by="Average", ascending=False)
     # Update rank column
     sorted_data["Rank"] = range(1, len(sorted_data) + 1)
     return sorted_data
 def filter_leaderboard(
+    data: pd.DataFrame,
     min_score: Optional[float] = None,
     model_type: Optional[str] = None,
+    date_range: Optional[tuple] = None,
 ) -> pd.DataFrame:
     """
     Filter leaderboard data based on criteria.
     Args:
         data: Original leaderboard data
         min_score: Minimum average score threshold
         model_type: Filter by model type
         date_range: Filter by submission date range
     Returns:
         Filtered DataFrame
     """
     filtered_data = data.copy()
     if min_score is not None:
         filtered_data = filtered_data[filtered_data["Average"] >= min_score]
     # TODO: Add more filtering logic as needed
+    return filtered_data