File size: 3,554 Bytes
0595902
 
02268d1
0595902
 
 
a3311e7
0595902
 
 
 
 
 
947d027
c1afbc3
0595902
 
947d027
 
 
 
 
0595902
02268d1
0595902
 
02268d1
0595902
 
02268d1
0595902
 
02268d1
0595902
 
 
 
 
947d027
 
0595902
 
02268d1
 
 
 
 
0595902
 
 
02268d1
0595902
02268d1
0595902
 
02268d1
0595902
 
 
02268d1
 
 
0595902
02268d1
0595902
 
02268d1
a3311e7
0595902
a3311e7
0595902
a3311e7
 
 
d22d443
a3311e7
d22d443
02268d1
a3311e7
 
 
 
 
02268d1
 
 
 
 
 
 
 
 
0595902
 
02268d1
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import asyncio
from typing import Dict, Any, List
from sklearn.metrics import roc_auc_score, average_precision_score
from .evaluator import evaluate_model
from .schema import create_submission_record
from .dataset_storage import save_submission_to_dataset
from config.tasks import TOX21_TASKS


async def process_submission(
    model_name: str,
    hf_space_tag: str,
    model_description: str,
    organization: str,
    model_size: int,  # Changed from str to int
    publication_title: str,
    publication_link: str,
    pretrained: bool,
    pretraining_data: str,
    zero_shot: bool,
    few_shot: bool,
    n_shot: str,
    smiles_list: List[str],
    true_labels: Dict[str, Dict[str, float]],
) -> Dict[str, Any]:
    """Process a complete submission from evaluation to metrics computation."""

    # Step 1: Evaluate the model
    evaluation_result = await evaluate_model(hf_space_tag, smiles_list)

    # Step 2: Compute metrics
    metrics = compute_metrics(evaluation_result["results"], true_labels)

    # Step 3: Create the submission record
    record = create_submission_record(
        model_name=model_name,
        hf_space_tag=hf_space_tag,
        model_description=model_description,
        organization=organization,
        model_size=model_size,
        publication_title=publication_title,
        publication_link=publication_link,
        pretrained=pretrained,
        pretraining_data=pretraining_data,
        zero_shot=zero_shot,
        few_shot=few_shot,
        n_shot=n_shot,
        raw_predictions=evaluation_result["results"],
        computed_metrics=metrics,
        status="completed",
        approved=False,
    )

    # Step 4: Save to HuggingFace dataset
    save_submission_to_dataset(record)

    return record


def compute_metrics(
    predictions: List[Dict[str, Any]], true_labels: List[Dict[str, float]]
) -> Dict[str, Any]:
    """Compute evaluation metrics from predictions and true labels."""

    # Simple placeholder - you'll want to implement proper ROC-AUC computation
    task_metrics = {}

    # Get all unique tasks
    if predictions:
        tasks = [task.key for task in TOX21_TASKS]
        for task in tasks:
            # arrange labels and predictions for evaluation
            y_true = []
            y_pred = []
            for i, sample in enumerate(predictions):
                smiles = sample["smiles"]
                label = true_labels[i].get(task, None)

                if label is None:
                    continue
                y_true.append(int(label))
                y_pred.append(sample["raw_predictions"][task])
            # calculate score
            random_clf_auprc = sum(y_true) / len(y_true)
            auprc_score = average_precision_score(y_true=y_true, y_score=y_pred)
            delta_auprc_score = auprc_score - random_clf_auprc
            roc_auc_score_ = roc_auc_score(y_true=y_true, y_score=y_pred)
            task_metrics[task] = {
                "roc_auc": roc_auc_score_,
                "delta_auprc": delta_auprc_score,
            }

    # Overall score (average of task scores)
    if task_metrics:
        overall_roc_auc_score = sum(
            m["roc_auc"] for m in task_metrics.values()
        ) / len(task_metrics)
        overall_delta_auprc_score = sum(
            m["delta_auprc"] for m in task_metrics.values()
        ) / len(task_metrics)

        task_metrics["overall_score"] = {
            "roc_auc": overall_roc_auc_score,
            "delta_auprc": overall_delta_auprc_score,
        }

    return task_metrics