Spaces:
Running
Running
File size: 1,595 Bytes
1146e96 fe1a8cf a3311e7 1146e96 fe1a8cf 1146e96 66a9b43 fe1a8cf 68fefb1 1146e96 fe1a8cf 1146e96 fe1a8cf 1146e96 fe1a8cf 1146e96 fe1a8cf a3311e7 d22d443 68fefb1 a3311e7 fe1a8cf a3311e7 d22d443 a3311e7 fe1a8cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
"""
Data loading functionality for the Tox21 leaderboard.
Handles loading and processing results from HuggingFace datasets.
"""
import pandas as pd
from datasets import load_dataset, Dataset
from config.settings import RESULTS_DATASET, TEST_DATASET, HF_TOKEN
from config.tasks import TOX21_TASKS
def load_leaderboard_data() -> Dataset:
"""
Load leaderboard data from HuggingFace dataset.
"""
print(f"Loading dataset: {RESULTS_DATASET}")
print(f"Using HF token: {'Yes' if HF_TOKEN else 'No'}")
# Load the dataset (token already set globally via login in settings)
dataset = load_dataset(RESULTS_DATASET)
print(f"Dataset loaded successfully. Keys: {dataset.keys()}")
# Look for test split (more appropriate for results)
if "test" in dataset:
results_data = dataset["test"]
print(f"Test split has {len(results_data)} entries")
if len(results_data) > 0:
print(f"First entry keys: {results_data[0].keys()}")
print(f"First entry: {results_data[0]}")
return results_data
else:
raise ValueError("Dataset does not contain a 'test' split.")
def load_test_dataset() -> tuple[list[str], list[dict[str, float]]]:
# Get test smiles and labels (token already set globally via login in settings)
dset = load_dataset(TEST_DATASET, split="test")
tasks = [t.key for t in TOX21_TASKS]
smiles = list(dset["smiles"])
labels = []
for sample in list(dset):
labels.append({task: sample[task] for task in tasks})
print(f"Loaded test dataset")
return smiles, labels
|