Spaces:
Running
Running
| import asyncio | |
| from typing import Dict, Any, List | |
| from sklearn.metrics import roc_auc_score, average_precision_score | |
| from .evaluator import evaluate_model | |
| from .schema import create_submission_record | |
| from .dataset_storage import save_submission_to_dataset | |
| from config.tasks import TOX21_TASKS | |
| async def process_submission( | |
| model_name: str, | |
| hf_space_tag: str, | |
| model_description: str, | |
| organization: str, | |
| model_size: int, # Changed from str to int | |
| publication_title: str, | |
| publication_link: str, | |
| pretrained: bool, | |
| pretraining_data: str, | |
| zero_shot: bool, | |
| few_shot: bool, | |
| n_shot: str, | |
| smiles_list: List[str], | |
| true_labels: Dict[str, Dict[str, float]], | |
| ) -> Dict[str, Any]: | |
| """Process a complete submission from evaluation to metrics computation.""" | |
| # Step 1: Evaluate the model | |
| evaluation_result = await evaluate_model(hf_space_tag, smiles_list) | |
| # Step 2: Compute metrics | |
| metrics = compute_metrics(evaluation_result["results"], true_labels) | |
| # Step 3: Create the submission record | |
| record = create_submission_record( | |
| model_name=model_name, | |
| hf_space_tag=hf_space_tag, | |
| model_description=model_description, | |
| organization=organization, | |
| model_size=model_size, | |
| publication_title=publication_title, | |
| publication_link=publication_link, | |
| pretrained=pretrained, | |
| pretraining_data=pretraining_data, | |
| zero_shot=zero_shot, | |
| few_shot=few_shot, | |
| n_shot=n_shot, | |
| raw_predictions=evaluation_result["results"], | |
| computed_metrics=metrics, | |
| status="completed", | |
| approved=False, | |
| ) | |
| # Step 4: Save to HuggingFace dataset | |
| save_submission_to_dataset(record) | |
| return record | |
| def compute_metrics( | |
| predictions: List[Dict[str, Any]], true_labels: List[Dict[str, float]] | |
| ) -> Dict[str, Any]: | |
| """Compute evaluation metrics from predictions and true labels.""" | |
| # Simple placeholder - you'll want to implement proper ROC-AUC computation | |
| task_metrics = {} | |
| # Get all unique tasks | |
| if predictions: | |
| tasks = [task.key for task in TOX21_TASKS] | |
| for task in tasks: | |
| # arrange labels and predictions for evaluation | |
| y_true = [] | |
| y_pred = [] | |
| for i, sample in enumerate(predictions): | |
| smiles = sample["smiles"] | |
| label = true_labels[i].get(task, None) | |
| if label is None: | |
| continue | |
| y_true.append(int(label)) | |
| y_pred.append(sample["raw_predictions"][task]) | |
| # calculate score | |
| random_clf_auprc = sum(y_true) / len(y_true) | |
| auprc_score = average_precision_score(y_true=y_true, y_score=y_pred) | |
| delta_auprc_score = auprc_score - random_clf_auprc | |
| roc_auc_score_ = roc_auc_score(y_true=y_true, y_score=y_pred) | |
| task_metrics[task] = { | |
| "roc_auc": roc_auc_score_, | |
| "delta_auprc": delta_auprc_score, | |
| } | |
| # Overall score (average of task scores) | |
| if task_metrics: | |
| overall_roc_auc_score = sum( | |
| m["roc_auc"] for m in task_metrics.values() | |
| ) / len(task_metrics) | |
| overall_delta_auprc_score = sum( | |
| m["delta_auprc"] for m in task_metrics.values() | |
| ) / len(task_metrics) | |
| task_metrics["overall_score"] = { | |
| "roc_auc": overall_roc_auc_score, | |
| "delta_auprc": overall_delta_auprc_score, | |
| } | |
| return task_metrics | |