Spaces:
Running
Running
File size: 3,554 Bytes
0595902 02268d1 0595902 a3311e7 0595902 947d027 c1afbc3 0595902 947d027 0595902 02268d1 0595902 02268d1 0595902 02268d1 0595902 02268d1 0595902 947d027 0595902 02268d1 0595902 02268d1 0595902 02268d1 0595902 02268d1 0595902 02268d1 0595902 02268d1 0595902 02268d1 a3311e7 0595902 a3311e7 0595902 a3311e7 d22d443 a3311e7 d22d443 02268d1 a3311e7 02268d1 0595902 02268d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import asyncio
from typing import Dict, Any, List
from sklearn.metrics import roc_auc_score, average_precision_score
from .evaluator import evaluate_model
from .schema import create_submission_record
from .dataset_storage import save_submission_to_dataset
from config.tasks import TOX21_TASKS
async def process_submission(
model_name: str,
hf_space_tag: str,
model_description: str,
organization: str,
model_size: int, # Changed from str to int
publication_title: str,
publication_link: str,
pretrained: bool,
pretraining_data: str,
zero_shot: bool,
few_shot: bool,
n_shot: str,
smiles_list: List[str],
true_labels: Dict[str, Dict[str, float]],
) -> Dict[str, Any]:
"""Process a complete submission from evaluation to metrics computation."""
# Step 1: Evaluate the model
evaluation_result = await evaluate_model(hf_space_tag, smiles_list)
# Step 2: Compute metrics
metrics = compute_metrics(evaluation_result["results"], true_labels)
# Step 3: Create the submission record
record = create_submission_record(
model_name=model_name,
hf_space_tag=hf_space_tag,
model_description=model_description,
organization=organization,
model_size=model_size,
publication_title=publication_title,
publication_link=publication_link,
pretrained=pretrained,
pretraining_data=pretraining_data,
zero_shot=zero_shot,
few_shot=few_shot,
n_shot=n_shot,
raw_predictions=evaluation_result["results"],
computed_metrics=metrics,
status="completed",
approved=False,
)
# Step 4: Save to HuggingFace dataset
save_submission_to_dataset(record)
return record
def compute_metrics(
predictions: List[Dict[str, Any]], true_labels: List[Dict[str, float]]
) -> Dict[str, Any]:
"""Compute evaluation metrics from predictions and true labels."""
# Simple placeholder - you'll want to implement proper ROC-AUC computation
task_metrics = {}
# Get all unique tasks
if predictions:
tasks = [task.key for task in TOX21_TASKS]
for task in tasks:
# arrange labels and predictions for evaluation
y_true = []
y_pred = []
for i, sample in enumerate(predictions):
smiles = sample["smiles"]
label = true_labels[i].get(task, None)
if label is None:
continue
y_true.append(int(label))
y_pred.append(sample["raw_predictions"][task])
# calculate score
random_clf_auprc = sum(y_true) / len(y_true)
auprc_score = average_precision_score(y_true=y_true, y_score=y_pred)
delta_auprc_score = auprc_score - random_clf_auprc
roc_auc_score_ = roc_auc_score(y_true=y_true, y_score=y_pred)
task_metrics[task] = {
"roc_auc": roc_auc_score_,
"delta_auprc": delta_auprc_score,
}
# Overall score (average of task scores)
if task_metrics:
overall_roc_auc_score = sum(
m["roc_auc"] for m in task_metrics.values()
) / len(task_metrics)
overall_delta_auprc_score = sum(
m["delta_auprc"] for m in task_metrics.values()
) / len(task_metrics)
task_metrics["overall_score"] = {
"roc_auc": overall_roc_auc_score,
"delta_auprc": overall_delta_auprc_score,
}
return task_metrics
|