import json import datasets import pandas as pd import pandera.pandas as pa from src.common.paths import DATASET_NAME from src.common.schema import DatasetSchema @pa.check_output(DatasetSchema) def load_dataset() -> pd.DataFrame: ds = datasets.load_dataset(DATASET_NAME, split="test") df = pd.DataFrame(ds) df[DatasetSchema.correct_answer] = df[DatasetSchema.correct_answer].apply( json.loads ) return df