tox21_leaderboard

Running

App Files Files Community

tox21_leaderboard / frontend /leaderboard.py

Tschoui

:lipstick: Improve frontend

3ba0b02 about 1 month ago

raw

history blame contribute delete

8.9 kB

	"""
	Leaderboard-specific business logic.
	Handles data processing, backend communication, and state management.
	"""

	import pandas as pd
	from typing import Optional
	from datetime import datetime

	from config.leaderboard import MAX_DECIMALS, COLUMN_NAMES


	def parse_parameter_count(value):
	"""Parse parameter count from various formats to raw numeric value.

	Accepts:
	- Raw numbers: 120000000, "120000000"
	- Human-readable: "120M", "0.12B", "154K"
	- Empty/None values

	Args:
	value: Parameter count in any supported format

	Returns:
	int: Raw parameter count, or None for empty/invalid values
	"""
	if pd.isna(value) or value == "" or value is None:
	return None

	# If already a number, return it
	if isinstance(value, (int, float)):
	return int(value)

	# Convert string to number
	value_str = str(value).strip().upper()
	if not value_str:
	return None

	# Extract numeric part and suffix
	import re
	match = re.match(r'^([0-9.]+)\s*([KMBT]?)$', value_str)
	if not match:
	return None

	num_part = float(match.group(1))
	suffix = match.group(2)

	# Apply multiplier based on suffix
	multipliers = {'K': 1e3, 'M': 1e6, 'B': 1e9, 'T': 1e12, '': 1}
	return int(num_part * multipliers[suffix])


	def format_parameter_count(value):
	"""Format parameter count to human-readable string (B, M, K).

	Args:
	value: Raw parameter count (int/float) or np.nan

	Returns:
	Formatted string like '40.1M', '1.9M', '154K' or empty string for NaN
	"""
	if pd.isna(value) or value == "":
	return ""

	value = float(value)

	if value >= 1e9:
	# Format as integer B if rounded to 1 decimal equals the integer, otherwise 1 decimal
	formatted = value / 1e9
	rounded = round(formatted, 1)
	return f"{int(rounded)}B" if rounded == int(rounded) else f"{rounded}B"
	elif value >= 1e6:
	# Format as integer M if rounded to 1 decimal equals the integer, otherwise 1 decimal
	formatted = value / 1e6
	rounded = round(formatted, 1)
	return f"{int(rounded)}M" if rounded == int(rounded) else f"{rounded}M"
	elif value >= 1e3:
	# Format as integer K if rounded to 1 decimal equals the integer, otherwise 1 decimal
	formatted = value / 1e3
	rounded = round(formatted, 1)
	return f"{int(rounded)}K" if rounded == int(rounded) else f"{rounded}K"
	else:
	return str(int(value))


	def refresh_leaderboard() -> pd.DataFrame:
	"""
	Refresh leaderboard data by fetching from backend.
	Currently returns sample data - will connect to backend later.
	"""
	print("= Refreshing leaderboard data...")

	# Load data from backend
	from backend.data_loader import load_leaderboard_data

	results_data = load_leaderboard_data()
	results_data = format_leaderboard_data(results_data)
	# assert all(
	# [c in COLUMN_NAMES for c in results_data.columns]
	# ), "Some required columns not found in dataset!"
	return results_data


	def format_leaderboard_data(raw_data: dict) -> pd.DataFrame:
	"""
	Format raw leaderboard data for display.

	Args:
	raw_data: Raw data from backend/datasets

	Returns:
	Formatted DataFrame for Gradio display
	"""
	# TODO: Implement data formatting logic
	# This will process raw evaluation results into the display format

	# Convert to DataFrame (new schema only)
	rows = []
	for entry in raw_data:
	config = entry["config"]
	results = entry["results"]

	# Only include approved entries
	if not config.get("approved", False):
	continue

	# Determine model type based on flags
	pretrained = config.get("pretrained", "")=="Yes"
	zero_shot = config.get("zero_shot", "")=="Yes"
	few_shot = config.get("few_shot", "")=="Yes"

	# Model type emoji logic
	if zero_shot:
	model_type = "0️⃣" # Zero-shot
	elif few_shot:
	model_type = "1️⃣" # Few-shot
	elif pretrained:
	model_type = "⤵️" # Pre-training
	else:
	model_type = "🔼" # Standard (trained on Tox21 only)

	# Create a row with all the data
	# Column order: Type will be added as 2nd column after Rank
	row = {
	("", "Type"): model_type,
	("", "Model"): config["model_name"],
	("", "HF_Space_Tag"): config.get("hf_space_tag", ""), # Hidden column for links
	("", "Organization"): config.get("organization", ""),
	("", "Publication"): config.get("publication_title", ""),
	("", "Publication Link"): config.get("publication_link", ""), # Hidden column for links
	("", "Model Description"): config["model_description"],
	("", "Avg. AUC"): results["overall_score"]["roc_auc"],
	("", "Avg. ΔAUC-PR"): results["overall_score"].get("delta_auprc"),
	("", "# Parameters"): config.get("model_size", ""), # Moved here after Avg. ΔAUC-PR
	}
	print(results["overall_score"])

	# === Insert task columns immediately after # Parameters ===
	for task_key, task_result in results.items():
	if task_key != "overall_score":
	row[("ROC-AUC", task_key)] = task_result.get("roc_auc", "")
	for task_key, task_result in results.items():
	if task_key != "overall_score":
	row[("ΔAUC-PR", task_key)] = task_result.get("delta_auprc", "")


	# === Then continue with the rest of the metadata columns ===

	row.update({
	("", "Pretrained"): pretrained,
	("", "Pretraining Data"): config.get("pretraining_data", ""),
	("", "Zero-shot"): zero_shot,
	("", "Few-shot"): few_shot,
	("", "N-shot"): config.get("n_shot", ""),
	})

	date_raw = config.get("date_approved", config.get("date_submitted", ""))
	try:
	# Parse if ISO-like (e.g. "2025-09-11T12:51:33.227003")
	date_obj = datetime.fromisoformat(
	str(date_raw).replace("Z", "")
	) # remove 'Z' if present
	date_str = date_obj.strftime("%Y-%m-%d") # ✅ just date
	except Exception:
	# fallback if parsing fails
	date_str = str(date_raw).split("T")[0].split()[0]

	row.update({
	# ...
	("", "Date Added"): date_str,
	})

	rows.append(row)

	df = pd.DataFrame(rows)
	df.columns = pd.MultiIndex.from_tuples(df.columns)

	# Handle empty dataset case
	if df.empty:
	print(
	"No approved submissions found. Creating empty DataFrame with proper columns."
	)
	# Create empty DataFrame with expected columns
	df = pd.DataFrame(columns=COLUMN_NAMES)
	else:
	# rank according to overall score
	df = df.sort_values(by=("", "Avg. AUC"), ascending=False).reset_index(
	drop=True
	)
	# set different precision

	print(f"Created DataFrame with shape: {df.shape}")
	df = df.round(decimals=MAX_DECIMALS)

	return df


	def calculate_average_score(task_scores: dict) -> float:
	"""
	Calculate average ROC-AUC score across all tasks.

	Args:
	task_scores: Dictionary of task_name -> score

	Returns:
	Average score across all tasks
	"""
	if not task_scores:
	return 0.0

	valid_scores = [
	score for score in task_scores.values() if score is not None
	]
	if not valid_scores:
	return 0.0

	return sum(valid_scores) / len(valid_scores)


	def sort_by_performance(leaderboard_data: pd.DataFrame) -> pd.DataFrame:
	"""
	Sort leaderboard by average performance score.

	Args:
	leaderboard_data: DataFrame with leaderboard data

	Returns:
	Sorted DataFrame with rank column updated
	"""
	# Sort by average score (descending)
	sorted_data = leaderboard_data.sort_values(by="Average", ascending=False)

	# Update rank column
	sorted_data["Rank"] = range(1, len(sorted_data) + 1)

	return sorted_data


	def filter_leaderboard(
	data: pd.DataFrame,
	min_score: Optional[float] = None,
	model_type: Optional[str] = None,
	date_range: Optional[tuple] = None,
	) -> pd.DataFrame:
	"""
	Filter leaderboard data based on criteria.

	Args:
	data: Original leaderboard data
	min_score: Minimum average score threshold
	model_type: Filter by model type
	date_range: Filter by submission date range

	Returns:
	Filtered DataFrame
	"""
	filtered_data = data.copy()

	if min_score is not None:
	filtered_data = filtered_data[filtered_data["Average"] >= min_score]

	# TODO: Add more filtering logic as needed

	return filtered_data