File size: 1,297 Bytes
25bc9b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from pandas import DataFrame, Series, read_csv
from sklearn.model_selection import train_test_split

from src.config import DATASET_FILE_PATH


def get_dataset() -> DataFrame:
    """
    Get the dataset

    Returns:
        DataFrame: The dataset as a DataFrame
    """
    try:
        return DataFrame(data=read_csv(DATASET_FILE_PATH))
    except FileNotFoundError:
        return DataFrame(data={})


def get_features_target(df: DataFrame) -> tuple[DataFrame, Series]:
    """
    Get the feature and target from the dataset

    Args:
        df (DataFrame): The dataset as a DataFrame

    Returns:
        tuple[DataFrame, Series]: The features and target as a tuple
    """
    return df.drop(columns=["TARGET"], axis=1), df["TARGET"]


def get_train_test_sets(
    X: DataFrame, y: Series
) -> tuple[DataFrame, Series, DataFrame, Series]:
    """
    Get the train and test sets from the features and target

    Args:
        features (DataFrame): The features as a DataFrame
        target (Series): The target as a Series

    Returns:
        tuple[DataFrame, Series, DataFrame, Series]: The train and test sets as a tuple
    """
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    return X_train, y_train, X_test, y_test