Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import StandardScaler | |
| def prase_transaction_data(file_path): | |
| """prase and clean transaction data""" | |
| df = pd.read_csv(file_path) | |
| df = df.dropna() | |
| df['hour'] = pd.to_datetime(df['timestamp']).dt.hour | |
| df['amount_log'] = np.log1p(df['amount']) | |
| df['is_high_risk_country'] = df['country'].apply(lambda x: 1 if x in ["Nigeria", "Russia", "China"] else 0) | |
| return df | |
| def preprocess_for_model(df): | |
| """Prepare data for fraud detection model""" | |
| features = ['amount_log', 'hour','is_high_risk_country','merchant_category'] | |
| X = df[features] | |
| y = df.get('fraud_label', None) | |
| #One-hot encode category | |
| X = pd.get_dummies(X,columns=['merchant_category'],drop_first=True) | |
| #Normalize | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| return X_scaled, y | |