Spaces:
Running
Running
| """ | |
| Leaderboard-specific business logic. | |
| Handles data processing, backend communication, and state management. | |
| """ | |
| import pandas as pd | |
| from typing import Optional | |
| from datetime import datetime | |
| from config.leaderboard import MAX_DECIMALS, COLUMN_NAMES | |
| def parse_parameter_count(value): | |
| """Parse parameter count from various formats to raw numeric value. | |
| Accepts: | |
| - Raw numbers: 120000000, "120000000" | |
| - Human-readable: "120M", "0.12B", "154K" | |
| - Empty/None values | |
| Args: | |
| value: Parameter count in any supported format | |
| Returns: | |
| int: Raw parameter count, or None for empty/invalid values | |
| """ | |
| if pd.isna(value) or value == "" or value is None: | |
| return None | |
| # If already a number, return it | |
| if isinstance(value, (int, float)): | |
| return int(value) | |
| # Convert string to number | |
| value_str = str(value).strip().upper() | |
| if not value_str: | |
| return None | |
| # Extract numeric part and suffix | |
| import re | |
| match = re.match(r'^([0-9.]+)\s*([KMBT]?)$', value_str) | |
| if not match: | |
| return None | |
| num_part = float(match.group(1)) | |
| suffix = match.group(2) | |
| # Apply multiplier based on suffix | |
| multipliers = {'K': 1e3, 'M': 1e6, 'B': 1e9, 'T': 1e12, '': 1} | |
| return int(num_part * multipliers[suffix]) | |
| def format_parameter_count(value): | |
| """Format parameter count to human-readable string (B, M, K). | |
| Args: | |
| value: Raw parameter count (int/float) or np.nan | |
| Returns: | |
| Formatted string like '40.1M', '1.9M', '154K' or empty string for NaN | |
| """ | |
| if pd.isna(value) or value == "": | |
| return "" | |
| value = float(value) | |
| if value >= 1e9: | |
| # Format as integer B if rounded to 1 decimal equals the integer, otherwise 1 decimal | |
| formatted = value / 1e9 | |
| rounded = round(formatted, 1) | |
| return f"{int(rounded)}B" if rounded == int(rounded) else f"{rounded}B" | |
| elif value >= 1e6: | |
| # Format as integer M if rounded to 1 decimal equals the integer, otherwise 1 decimal | |
| formatted = value / 1e6 | |
| rounded = round(formatted, 1) | |
| return f"{int(rounded)}M" if rounded == int(rounded) else f"{rounded}M" | |
| elif value >= 1e3: | |
| # Format as integer K if rounded to 1 decimal equals the integer, otherwise 1 decimal | |
| formatted = value / 1e3 | |
| rounded = round(formatted, 1) | |
| return f"{int(rounded)}K" if rounded == int(rounded) else f"{rounded}K" | |
| else: | |
| return str(int(value)) | |
| def refresh_leaderboard() -> pd.DataFrame: | |
| """ | |
| Refresh leaderboard data by fetching from backend. | |
| Currently returns sample data - will connect to backend later. | |
| """ | |
| print("= Refreshing leaderboard data...") | |
| # Load data from backend | |
| from backend.data_loader import load_leaderboard_data | |
| results_data = load_leaderboard_data() | |
| results_data = format_leaderboard_data(results_data) | |
| # assert all( | |
| # [c in COLUMN_NAMES for c in results_data.columns] | |
| # ), "Some required columns not found in dataset!" | |
| return results_data | |
| def format_leaderboard_data(raw_data: dict) -> pd.DataFrame: | |
| """ | |
| Format raw leaderboard data for display. | |
| Args: | |
| raw_data: Raw data from backend/datasets | |
| Returns: | |
| Formatted DataFrame for Gradio display | |
| """ | |
| # TODO: Implement data formatting logic | |
| # This will process raw evaluation results into the display format | |
| # Convert to DataFrame (new schema only) | |
| rows = [] | |
| for entry in raw_data: | |
| config = entry["config"] | |
| results = entry["results"] | |
| # Only include approved entries | |
| if not config.get("approved", False): | |
| continue | |
| # Determine model type based on flags | |
| pretrained = config.get("pretrained", "")=="Yes" | |
| zero_shot = config.get("zero_shot", "")=="Yes" | |
| few_shot = config.get("few_shot", "")=="Yes" | |
| # Model type emoji logic | |
| if zero_shot: | |
| model_type = "0️⃣" # Zero-shot | |
| elif few_shot: | |
| model_type = "1️⃣" # Few-shot | |
| elif pretrained: | |
| model_type = "⤵️" # Pre-training | |
| else: | |
| model_type = "🔼" # Standard (trained on Tox21 only) | |
| # Create a row with all the data | |
| # Column order: Type will be added as 2nd column after Rank | |
| row = { | |
| ("", "Type"): model_type, | |
| ("", "Model"): config["model_name"], | |
| ("", "HF_Space_Tag"): config.get("hf_space_tag", ""), # Hidden column for links | |
| ("", "Organization"): config.get("organization", ""), | |
| ("", "Publication"): config.get("publication_title", ""), | |
| ("", "Publication Link"): config.get("publication_link", ""), # Hidden column for links | |
| ("", "Model Description"): config["model_description"], | |
| ("", "Avg. AUC"): results["overall_score"]["roc_auc"], | |
| ("", "Avg. ΔAUC-PR"): results["overall_score"].get("delta_auprc"), | |
| ("", "# Parameters"): config.get("model_size", ""), # Moved here after Avg. ΔAUC-PR | |
| } | |
| print(results["overall_score"]) | |
| # === Insert task columns immediately after # Parameters === | |
| for task_key, task_result in results.items(): | |
| if task_key != "overall_score": | |
| row[("ROC-AUC", task_key)] = task_result.get("roc_auc", "") | |
| for task_key, task_result in results.items(): | |
| if task_key != "overall_score": | |
| row[("ΔAUC-PR", task_key)] = task_result.get("delta_auprc", "") | |
| # === Then continue with the rest of the metadata columns === | |
| row.update({ | |
| ("", "Pretrained"): pretrained, | |
| ("", "Pretraining Data"): config.get("pretraining_data", ""), | |
| ("", "Zero-shot"): zero_shot, | |
| ("", "Few-shot"): few_shot, | |
| ("", "N-shot"): config.get("n_shot", ""), | |
| }) | |
| date_raw = config.get("date_approved", config.get("date_submitted", "")) | |
| try: | |
| # Parse if ISO-like (e.g. "2025-09-11T12:51:33.227003") | |
| date_obj = datetime.fromisoformat( | |
| str(date_raw).replace("Z", "") | |
| ) # remove 'Z' if present | |
| date_str = date_obj.strftime("%Y-%m-%d") # ✅ just date | |
| except Exception: | |
| # fallback if parsing fails | |
| date_str = str(date_raw).split("T")[0].split()[0] | |
| row.update({ | |
| # ... | |
| ("", "Date Added"): date_str, | |
| }) | |
| rows.append(row) | |
| df = pd.DataFrame(rows) | |
| df.columns = pd.MultiIndex.from_tuples(df.columns) | |
| # Handle empty dataset case | |
| if df.empty: | |
| print( | |
| "No approved submissions found. Creating empty DataFrame with proper columns." | |
| ) | |
| # Create empty DataFrame with expected columns | |
| df = pd.DataFrame(columns=COLUMN_NAMES) | |
| else: | |
| # rank according to overall score | |
| df = df.sort_values(by=("", "Avg. AUC"), ascending=False).reset_index( | |
| drop=True | |
| ) | |
| # set different precision | |
| print(f"Created DataFrame with shape: {df.shape}") | |
| df = df.round(decimals=MAX_DECIMALS) | |
| return df | |
| def calculate_average_score(task_scores: dict) -> float: | |
| """ | |
| Calculate average ROC-AUC score across all tasks. | |
| Args: | |
| task_scores: Dictionary of task_name -> score | |
| Returns: | |
| Average score across all tasks | |
| """ | |
| if not task_scores: | |
| return 0.0 | |
| valid_scores = [ | |
| score for score in task_scores.values() if score is not None | |
| ] | |
| if not valid_scores: | |
| return 0.0 | |
| return sum(valid_scores) / len(valid_scores) | |
| def sort_by_performance(leaderboard_data: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Sort leaderboard by average performance score. | |
| Args: | |
| leaderboard_data: DataFrame with leaderboard data | |
| Returns: | |
| Sorted DataFrame with rank column updated | |
| """ | |
| # Sort by average score (descending) | |
| sorted_data = leaderboard_data.sort_values(by="Average", ascending=False) | |
| # Update rank column | |
| sorted_data["Rank"] = range(1, len(sorted_data) + 1) | |
| return sorted_data | |
| def filter_leaderboard( | |
| data: pd.DataFrame, | |
| min_score: Optional[float] = None, | |
| model_type: Optional[str] = None, | |
| date_range: Optional[tuple] = None, | |
| ) -> pd.DataFrame: | |
| """ | |
| Filter leaderboard data based on criteria. | |
| Args: | |
| data: Original leaderboard data | |
| min_score: Minimum average score threshold | |
| model_type: Filter by model type | |
| date_range: Filter by submission date range | |
| Returns: | |
| Filtered DataFrame | |
| """ | |
| filtered_data = data.copy() | |
| if min_score is not None: | |
| filtered_data = filtered_data[filtered_data["Average"] >= min_score] | |
| # TODO: Add more filtering logic as needed | |
| return filtered_data | |