d0rj's picture
style: code blacked
3e35a01
raw
history blame contribute delete
438 Bytes
import json
import datasets
import pandas as pd
import pandera.pandas as pa
from src.common.paths import DATASET_NAME
from src.common.schema import DatasetSchema
@pa.check_output(DatasetSchema)
def load_dataset() -> pd.DataFrame:
ds = datasets.load_dataset(DATASET_NAME, split="test")
df = pd.DataFrame(ds)
df[DatasetSchema.correct_answer] = df[DatasetSchema.correct_answer].apply(
json.loads
)
return df