"""
Leaderboard-specific business logic.
Handles data processing, backend communication, and state management.
"""

import pandas as pd
from typing import Optional
from datetime import datetime

from config.leaderboard import MAX_DECIMALS, COLUMN_NAMES


def parse_parameter_count(value):
    """Parse parameter count from various formats to raw numeric value.

    Accepts:
        - Raw numbers: 120000000, "120000000"
        - Human-readable: "120M", "0.12B", "154K"
        - Empty/None values

    Args:
        value: Parameter count in any supported format

    Returns:
        int: Raw parameter count, or None for empty/invalid values
    """
    if pd.isna(value) or value == "" or value is None:
        return None

    # If already a number, return it
    if isinstance(value, (int, float)):
        return int(value)

    # Convert string to number
    value_str = str(value).strip().upper()
    if not value_str:
        return None

    # Extract numeric part and suffix
    import re
    match = re.match(r'^([0-9.]+)\s*([KMBT]?)$', value_str)
    if not match:
        return None

    num_part = float(match.group(1))
    suffix = match.group(2)

    # Apply multiplier based on suffix
    multipliers = {'K': 1e3, 'M': 1e6, 'B': 1e9, 'T': 1e12, '': 1}
    return int(num_part * multipliers[suffix])


def format_parameter_count(value):
    """Format parameter count to human-readable string (B, M, K).

    Args:
        value: Raw parameter count (int/float) or np.nan

    Returns:
        Formatted string like '40.1M', '1.9M', '154K' or empty string for NaN
    """
    if pd.isna(value) or value == "":
        return ""

    value = float(value)

    if value >= 1e9:
        # Format as integer B if rounded to 1 decimal equals the integer, otherwise 1 decimal
        formatted = value / 1e9
        rounded = round(formatted, 1)
        return f"{int(rounded)}B" if rounded == int(rounded) else f"{rounded}B"
    elif value >= 1e6:
        # Format as integer M if rounded to 1 decimal equals the integer, otherwise 1 decimal
        formatted = value / 1e6
        rounded = round(formatted, 1)
        return f"{int(rounded)}M" if rounded == int(rounded) else f"{rounded}M"
    elif value >= 1e3:
        # Format as integer K if rounded to 1 decimal equals the integer, otherwise 1 decimal
        formatted = value / 1e3
        rounded = round(formatted, 1)
        return f"{int(rounded)}K" if rounded == int(rounded) else f"{rounded}K"
    else:
        return str(int(value))


def refresh_leaderboard() -> pd.DataFrame:
    """
    Refresh leaderboard data by fetching from backend.
    Currently returns sample data - will connect to backend later.
    """
    print("= Refreshing leaderboard data...")

    # Load data from backend
    from backend.data_loader import load_leaderboard_data

    results_data = load_leaderboard_data()
    results_data = format_leaderboard_data(results_data)
    # assert all(
    #     [c in COLUMN_NAMES for c in results_data.columns]
    # ), "Some required columns not found in dataset!"
    return results_data


def format_leaderboard_data(raw_data: dict) -> pd.DataFrame:
    """
    Format raw leaderboard data for display.

    Args:
        raw_data: Raw data from backend/datasets

    Returns:
        Formatted DataFrame for Gradio display
    """
    # TODO: Implement data formatting logic
    # This will process raw evaluation results into the display format

    # Convert to DataFrame (new schema only)
    rows = []
    for entry in raw_data:
        config = entry["config"]
        results = entry["results"]

        # Only include approved entries
        if not config.get("approved", False):
            continue

        # Determine model type based on flags
        pretrained = config.get("pretrained", "")=="Yes"
        zero_shot = config.get("zero_shot", "")=="Yes"
        few_shot = config.get("few_shot", "")=="Yes"

        # Model type emoji logic
        if zero_shot:
            model_type = "0️⃣"  # Zero-shot
        elif few_shot:
            model_type = "1️⃣"  # Few-shot
        elif pretrained:
            model_type = "⤵️"  # Pre-training
        else:
            model_type = "🔼"  # Standard (trained on Tox21 only)

        # Create a row with all the data
        # Column order: Type will be added as 2nd column after Rank
        row = {
            ("", "Type"): model_type,
            ("", "Model"): config["model_name"],
            ("", "HF_Space_Tag"): config.get("hf_space_tag", ""),  # Hidden column for links
            ("", "Organization"): config.get("organization", ""),
            ("", "Publication"): config.get("publication_title", ""),
            ("", "Publication Link"): config.get("publication_link", ""),  # Hidden column for links
            ("", "Model Description"): config["model_description"],
            ("", "Avg. AUC"): results["overall_score"]["roc_auc"],
            ("", "Avg. ΔAUC-PR"): results["overall_score"].get("delta_auprc"),
            ("", "# Parameters"): config.get("model_size", ""),  # Moved here after Avg. ΔAUC-PR
        }
        print(results["overall_score"])

        # === Insert task columns immediately after # Parameters ===
        for task_key, task_result in results.items():
            if task_key != "overall_score":
                row[("ROC-AUC", task_key)] = task_result.get("roc_auc", "")
        for task_key, task_result in results.items():
            if task_key != "overall_score":
                row[("ΔAUC-PR", task_key)] = task_result.get("delta_auprc", "")


        # === Then continue with the rest of the metadata columns ===

        row.update({
            ("", "Pretrained"): pretrained,
            ("", "Pretraining Data"): config.get("pretraining_data", ""),
            ("", "Zero-shot"): zero_shot,
            ("", "Few-shot"): few_shot,
            ("", "N-shot"): config.get("n_shot", ""),
        })

        date_raw = config.get("date_approved", config.get("date_submitted", ""))
        try:
            # Parse if ISO-like (e.g. "2025-09-11T12:51:33.227003")
            date_obj = datetime.fromisoformat(
                str(date_raw).replace("Z", "")
            )  # remove 'Z' if present
            date_str = date_obj.strftime("%Y-%m-%d")  # ✅ just date
        except Exception:
            # fallback if parsing fails
            date_str = str(date_raw).split("T")[0].split()[0]

        row.update({
            # ...
            ("", "Date Added"): date_str,
        })

        rows.append(row)

    df = pd.DataFrame(rows)
    df.columns = pd.MultiIndex.from_tuples(df.columns)

    # Handle empty dataset case
    if df.empty:
        print(
            "No approved submissions found. Creating empty DataFrame with proper columns."
        )
        # Create empty DataFrame with expected columns
        df = pd.DataFrame(columns=COLUMN_NAMES)
    else:
        # rank according to overall score
        df = df.sort_values(by=("", "Avg. AUC"), ascending=False).reset_index(
            drop=True
        )
        # set different precision

    print(f"Created DataFrame with shape: {df.shape}")
    df = df.round(decimals=MAX_DECIMALS)

    return df


def calculate_average_score(task_scores: dict) -> float:
    """
    Calculate average ROC-AUC score across all tasks.

    Args:
        task_scores: Dictionary of task_name -> score

    Returns:
        Average score across all tasks
    """
    if not task_scores:
        return 0.0

    valid_scores = [
        score for score in task_scores.values() if score is not None
    ]
    if not valid_scores:
        return 0.0

    return sum(valid_scores) / len(valid_scores)


def sort_by_performance(leaderboard_data: pd.DataFrame) -> pd.DataFrame:
    """
    Sort leaderboard by average performance score.

    Args:
        leaderboard_data: DataFrame with leaderboard data

    Returns:
        Sorted DataFrame with rank column updated
    """
    # Sort by average score (descending)
    sorted_data = leaderboard_data.sort_values(by="Average", ascending=False)

    # Update rank column
    sorted_data["Rank"] = range(1, len(sorted_data) + 1)

    return sorted_data


def filter_leaderboard(
    data: pd.DataFrame,
    min_score: Optional[float] = None,
    model_type: Optional[str] = None,
    date_range: Optional[tuple] = None,
) -> pd.DataFrame:
    """
    Filter leaderboard data based on criteria.

    Args:
        data: Original leaderboard data
        min_score: Minimum average score threshold
        model_type: Filter by model type
        date_range: Filter by submission date range

    Returns:
        Filtered DataFrame
    """
    filtered_data = data.copy()

    if min_score is not None:
        filtered_data = filtered_data[filtered_data["Average"] >= min_score]

    # TODO: Add more filtering logic as needed

    return filtered_data