File size: 1,297 Bytes
25bc9b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from pandas import DataFrame, Series, read_csv
from sklearn.model_selection import train_test_split
from src.config import DATASET_FILE_PATH
def get_dataset() -> DataFrame:
"""
Get the dataset
Returns:
DataFrame: The dataset as a DataFrame
"""
try:
return DataFrame(data=read_csv(DATASET_FILE_PATH))
except FileNotFoundError:
return DataFrame(data={})
def get_features_target(df: DataFrame) -> tuple[DataFrame, Series]:
"""
Get the feature and target from the dataset
Args:
df (DataFrame): The dataset as a DataFrame
Returns:
tuple[DataFrame, Series]: The features and target as a tuple
"""
return df.drop(columns=["TARGET"], axis=1), df["TARGET"]
def get_train_test_sets(
X: DataFrame, y: Series
) -> tuple[DataFrame, Series, DataFrame, Series]:
"""
Get the train and test sets from the features and target
Args:
features (DataFrame): The features as a DataFrame
target (Series): The target as a Series
Returns:
tuple[DataFrame, Series, DataFrame, Series]: The train and test sets as a tuple
"""
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
return X_train, y_train, X_test, y_test
|