Spaces:

iBrokeTheCode
/

Home_Credit_Default_Risk_Prediction

Sleeping

App Files Files Community

Home_Credit_Default_Risk_Prediction / app.py

iBrokeTheCode

refactor: Improve main app layout

f429f8e 4 months ago

raw

history blame

14.6 kB

	import marimo

	__generated_with = "0.14.16"
	app = marimo.App()


	@app.cell
	def _():
	import joblib
	import warnings

	import marimo as mo
	import pandas as pd

	warnings.filterwarnings(
	"ignore", message="X does not have valid feature names"
	)
	return joblib, mo, pd


	@app.cell
	def _(mo):
	mo.center(mo.md("# 🏦 Home Credit Default Risk Prediction"))
	return


	@app.cell
	def _(mo):
	mo.Html("<br>")
	return


	@app.cell
	def _(joblib, mo):
	# 📌 [1] Load the saved model pipeline
	with mo.redirect_stdout():
	loaded_pipeline = joblib.load("./model/lgbm_model.joblib")
	return (loaded_pipeline,)


	@app.cell
	def _():
	# 📌 [2] Define the default values for all other features
	default_values = {
	"SK_ID_CURR": 277659.5,
	"CNT_CHILDREN": 0.0,
	"AMT_INCOME_TOTAL": 147150.0,
	"AMT_CREDIT": 512997.75,
	"AMT_ANNUITY": 24885.0,
	"AMT_GOODS_PRICE": 450000.0,
	"REGION_POPULATION_RELATIVE": 0.01885,
	"DAYS_BIRTH": -15743.5,
	"DAYS_EMPLOYED": -1219.0,
	"DAYS_REGISTRATION": -4492.0,
	"DAYS_ID_PUBLISH": -3254.0,
	"OWN_CAR_AGE": 9.0,
	"FLAG_MOBIL": 1.0,
	"FLAG_EMP_PHONE": 1.0,
	"FLAG_WORK_PHONE": 0.0,
	"FLAG_CONT_MOBILE": 1.0,
	"FLAG_PHONE": 0.0,
	"FLAG_EMAIL": 0.0,
	"CNT_FAM_MEMBERS": 2.0,
	"REGION_RATING_CLIENT": 2.0,
	"REGION_RATING_CLIENT_W_CITY": 2.0,
	"HOUR_APPR_PROCESS_START": 12.0,
	"REG_REGION_NOT_LIVE_REGION": 0.0,
	"REG_REGION_NOT_WORK_REGION": 0.0,
	"LIVE_REGION_NOT_WORK_REGION": 0.0,
	"REG_CITY_NOT_LIVE_CITY": 0.0,
	"REG_CITY_NOT_WORK_CITY": 0.0,
	"LIVE_CITY_NOT_WORK_CITY": 0.0,
	"EXT_SOURCE_1": 0.5068839442599388,
	"EXT_SOURCE_2": 0.5662837032261614,
	"EXT_SOURCE_3": 0.5370699579791587,
	"APARTMENTS_AVG": 0.0876,
	"BASEMENTAREA_AVG": 0.0764,
	"YEARS_BEGINEXPLUATATION_AVG": 0.9816,
	"YEARS_BUILD_AVG": 0.7552,
	"COMMONAREA_AVG": 0.0211,
	"ELEVATORS_AVG": 0.0,
	"ENTRANCES_AVG": 0.1379,
	"FLOORSMAX_AVG": 0.1667,
	"FLOORSMIN_AVG": 0.2083,
	"LANDAREA_AVG": 0.0483,
	"LIVINGAPARTMENTS_AVG": 0.0756,
	"LIVINGAREA_AVG": 0.0746,
	"NONLIVINGAPARTMENTS_AVG": 0.0,
	"NONLIVINGAREA_AVG": 0.0035,
	"APARTMENTS_MODE": 0.084,
	"BASEMENTAREA_MODE": 0.0748,
	"YEARS_BEGINEXPLUATATION_MODE": 0.9816,
	"YEARS_BUILD_MODE": 0.7648,
	"COMMONAREA_MODE": 0.0191,
	"ELEVATORS_MODE": 0.0,
	"ENTRANCES_MODE": 0.1379,
	"FLOORSMAX_MODE": 0.1667,
	"FLOORSMIN_MODE": 0.2083,
	"LANDAREA_MODE": 0.0459,
	"LIVINGAPARTMENTS_MODE": 0.0771,
	"LIVINGAREA_MODE": 0.0731,
	"NONLIVINGAPARTMENTS_MODE": 0.0,
	"NONLIVINGAREA_MODE": 0.0011,
	"APARTMENTS_MEDI": 0.0864,
	"BASEMENTAREA_MEDI": 0.0761,
	"YEARS_BEGINEXPLUATATION_MEDI": 0.9816,
	"YEARS_BUILD_MEDI": 0.7585,
	"COMMONAREA_MEDI": 0.0209,
	"ELEVATORS_MEDI": 0.0,
	"ENTRANCES_MEDI": 0.1379,
	"FLOORSMAX_MEDI": 0.1667,
	"FLOORSMIN_MEDI": 0.2083,
	"LANDAREA_MEDI": 0.0488,
	"LIVINGAPARTMENTS_MEDI": 0.0765,
	"LIVINGAREA_MEDI": 0.0749,
	"NONLIVINGAPARTMENTS_MEDI": 0.0,
	"NONLIVINGAREA_MEDI": 0.003,
	"TOTALAREA_MODE": 0.0687,
	"OBS_30_CNT_SOCIAL_CIRCLE": 0.0,
	"DEF_30_CNT_SOCIAL_CIRCLE": 0.0,
	"OBS_60_CNT_SOCIAL_CIRCLE": 0.0,
	"DEF_60_CNT_SOCIAL_CIRCLE": 0.0,
	"DAYS_LAST_PHONE_CHANGE": -755.0,
	"FLAG_DOCUMENT_2": 0.0,
	"FLAG_DOCUMENT_3": 1.0,
	"FLAG_DOCUMENT_4": 0.0,
	"FLAG_DOCUMENT_5": 0.0,
	"FLAG_DOCUMENT_6": 0.0,
	"FLAG_DOCUMENT_7": 0.0,
	"FLAG_DOCUMENT_8": 0.0,
	"FLAG_DOCUMENT_9": 0.0,
	"FLAG_DOCUMENT_10": 0.0,
	"FLAG_DOCUMENT_11": 0.0,
	"FLAG_DOCUMENT_12": 0.0,
	"FLAG_DOCUMENT_13": 0.0,
	"FLAG_DOCUMENT_14": 0.0,
	"FLAG_DOCUMENT_15": 0.0,
	"FLAG_DOCUMENT_16": 0.0,
	"FLAG_DOCUMENT_17": 0.0,
	"FLAG_DOCUMENT_18": 0.0,
	"FLAG_DOCUMENT_19": 0.0,
	"FLAG_DOCUMENT_20": 0.0,
	"FLAG_DOCUMENT_21": 0.0,
	"AMT_REQ_CREDIT_BUREAU_HOUR": 0.0,
	"AMT_REQ_CREDIT_BUREAU_DAY": 0.0,
	"AMT_REQ_CREDIT_BUREAU_WEEK": 0.0,
	"AMT_REQ_CREDIT_BUREAU_MON": 0.0,
	"AMT_REQ_CREDIT_BUREAU_QRT": 0.0,
	"AMT_REQ_CREDIT_BUREAU_YEAR": 1.0,
	"NAME_CONTRACT_TYPE": "Cash loans",
	"CODE_GENDER": "F",
	"FLAG_OWN_CAR": "N",
	"FLAG_OWN_REALTY": "Y",
	"NAME_TYPE_SUITE": "Unaccompanied",
	"NAME_INCOME_TYPE": "Working",
	"NAME_EDUCATION_TYPE": "Secondary / secondary special",
	"NAME_FAMILY_STATUS": "Married",
	"NAME_HOUSING_TYPE": "House / apartment",
	"OCCUPATION_TYPE": "Laborers",
	"WEEKDAY_APPR_PROCESS_START": "TUESDAY",
	"ORGANIZATION_TYPE": "Business Entity Type 3",
	"FONDKAPREMONT_MODE": "reg oper account",
	"HOUSETYPE_MODE": "block of flats",
	"WALLSMATERIAL_MODE": "Panel",
	"EMERGENCYSTATE_MODE": "No",
	}
	return (default_values,)


	@app.cell
	def _(mo):
	# 📌 [3] Create widgets for the top 10 features
	EXT_SOURCE_3 = mo.ui.slider(
	start=0.00,
	stop=0.90,
	step=0.01,
	value=0.5,
	label="EXT_SOURCE_3",
	)

	EXT_SOURCE_2 = mo.ui.slider(
	start=0.00,
	stop=0.86,
	step=0.01,
	value=0.5,
	label="EXT_SOURCE_2",
	)

	DAYS_BIRTH = mo.ui.slider(
	start=-25229,
	stop=-7673,
	value=-15743,
	label="DAYS_BIRTH",
	)

	EXT_SOURCE_1 = mo.ui.slider(
	start=0.01,
	stop=0.97,
	step=0.01,
	value=0.5,
	label="EXT_SOURCE_1",
	)

	AMT_ANNUITY = mo.ui.slider(
	start=1980,
	stop=258025,
	step=100,
	value=24885,
	label="AMT_ANNUITY",
	)

	AMT_CREDIT = mo.ui.slider(
	start=45000,
	stop=4050000,
	step=50000,
	value=512997,
	label="AMT_CREDIT",
	)

	DAYS_EMPLOYED = mo.ui.slider(
	start=-17583,
	stop=365243,
	value=-1219,
	label="DAYS_EMPLOYED",
	)

	DAYS_ID_PUBLISH = mo.ui.slider(
	start=-7197,
	stop=0,
	value=-3254,
	label="DAYS_ID_PUBLISH",
	)

	DAYS_REGISTRATION = mo.ui.slider(
	start=-24672,
	stop=0,
	value=-4492,
	label="DAYS_REGISTRATION",
	)

	SK_ID_CURR = mo.ui.slider(
	start=100003,
	stop=456253,
	step=100,
	value=277659,
	label="SK_ID_CURR",
	)

	features_widgets = {
	"EXT_SOURCE_3": EXT_SOURCE_3,
	"EXT_SOURCE_2": EXT_SOURCE_2,
	"DAYS_BIRTH": DAYS_BIRTH,
	"EXT_SOURCE_1": EXT_SOURCE_1,
	"AMT_ANNUITY": AMT_ANNUITY,
	"AMT_CREDIT": AMT_CREDIT,
	"DAYS_EMPLOYED": DAYS_EMPLOYED,
	"DAYS_ID_PUBLISH": DAYS_ID_PUBLISH,
	"DAYS_REGISTRATION": DAYS_REGISTRATION,
	"SK_ID_CURR": SK_ID_CURR,
	}
	return (features_widgets,)


	@app.cell
	def _(features_widgets, mo):
	# 📌 [4] Create the form with the sliders
	sliders_form = (
	mo.md("""
	###Fill in the Client Profile to see the prediction

	{EXT_SOURCE_3} {EXT_SOURCE_2}
	{DAYS_BIRTH} {EXT_SOURCE_1}
	{AMT_ANNUITY} {AMT_CREDIT}
	{DAYS_EMPLOYED} {DAYS_ID_PUBLISH}
	{DAYS_REGISTRATION} {SK_ID_CURR}
	""")
	.batch(**features_widgets) # Pass the dict unpacked
	.form(show_clear_button=True, bordered=True)
	)
	return (sliders_form,)


	@app.cell
	def _(default_values, loaded_pipeline, mo, pd, sliders_form):
	# 📌 [5] Get prediction from model
	probability = None

	# Process form submission
	if sliders_form.value is not None:
	# Copy default values
	prediction_data = default_values.copy()

	# Update with sliders' submitted values
	prediction_data.update(sliders_form.value)

	# Create a DataFrame
	predict_df = pd.DataFrame([prediction_data])

	# Predict probability
	probability = loaded_pipeline.predict_proba(predict_df)[:, 1][0]
	else:
	mo.md("Fill in the form and click Submit to get a prediction.")
	return (probability,)


	@app.cell
	def _(probability):
	# 📌 [6] Display prediction results
	prob_percent = 70.12
	risk = "High Risk"
	direction = "decrease"

	if probability is not None:
	prob_percent = round(probability * 100, 2)

	# Define risk category
	if probability < 0.34:
	risk = "Low Risk"
	direction = "increase"
	elif probability < 0.67:
	risk = "Medium Risk"
	direction = None
	else:
	risk = "High Risk"
	direction = "decrease"
	return direction, prob_percent, risk


	@app.cell
	def _(direction, mo, prob_percent, risk):
	interpretation_text = f"""This means there is a {prob_percent}% chance the client will default on their loan.
	Risk level is categorized as {risk}, which can help guide loan approval decisions.
	"""

	result_stat = mo.stat(
	label="⚖️ Probability of Payment Difficulties",
	bordered=True,
	value=f"{prob_percent}%",
	caption=risk,
	direction=direction,
	)

	interpretation_stat = mo.stat(
	label="💡 Interpretation",
	bordered=True,
	value="",
	caption=interpretation_text,
	)
	return interpretation_stat, result_stat


	@app.cell
	def _(mo):
	mo.md("""## 🔮 Credit Risk Predictor — Try It Yourself!""")
	return


	@app.cell
	def _(mo):
	mo.Html("<hr><br>")
	return


	@app.cell
	def _(interpretation_stat, mo, result_stat):
	mo.vstack(
	items=[
	mo.hstack(
	items=[result_stat, interpretation_stat], widths="equal", gap=1
	),
	],
	gap=1,
	heights="equal",
	)
	return


	@app.cell
	def _(mo):
	mo.Html("<br>")
	return


	@app.cell
	def _(sliders_form):
	sliders_form
	return


	@app.cell
	def _(mo):
	mo.md(
	r"""
	<small>_(*) Predictions are based on the top 10 most important features. Remaining features are assigned default values (median for numeric, mode for categorical)._</small>

	"""
	)
	return


	@app.cell
	def _(mo):
	mo.Html("<br>")
	return


	@app.cell
	def _(mo):
	mo.md(r"""## 🚀 Model Selection""")
	return


	@app.cell
	def _(mo):
	mo.Html("<hr><br>")
	return


	@app.cell
	def _(mo):
	lg_stat = mo.stat(
	label="Logistic Regression",
	bordered=True,
	value="💪🏻 68.7% 📝 68.5%",
	caption="Scores are consistent across train and test, indicating no overfitting. However, the overall AUC is low, suggesting underfitting — the model is too simple to capture complex patterns.",
	direction="decrease",
	)

	rfc_stat = mo.stat(
	label="Random Forest Classifier",
	bordered=True,
	value="💪🏻 100% 📝 70.7%",
	caption="Perfect training AUC indicates severe overfitting — the model memorized the training set. While the test score is better than Logistic Regression, the gap is too large for good generalization.",
	direction="decrease",
	)

	rfo_stat = mo.stat(
	label="Random Forest with Randomized Search",
	bordered=True,
	value="💪🏻 82% 📝 73.1%",
	caption="Hyperparameter tuning greatly reduced overfitting. The smaller train–test gap and improved test AUC show better generalization and a strong performance.",
	direction="increase",
	)

	lgbm_stat = mo.stat(
	label="LightGBM",
	bordered=True,
	value="💪🏻 85.2% 📝 75.1%",
	caption="Best overall performance. Small train–test gap and highest test AUC indicate a well-balanced model with strong generalization.",
	direction="increase",
	)

	mo.vstack(
	items=[
	mo.hstack(items=[lg_stat, rfc_stat], widths="equal", gap=1),
	mo.hstack(items=[rfo_stat, lgbm_stat], widths="equal", gap=1),
	],
	gap=1,
	heights="equal",
	align="center",
	justify="center",
	)
	return


	@app.cell
	def _(mo):
	mo.Html("<br>")
	return


	@app.cell
	def _(mo):
	mo.md(
	r"""Based on a comparison of all the models _(using AUC ROC metric)_, the final model selection is clear:"""
	)
	return


	@app.cell
	def _(mo):
	mo.Html("<br>")
	return


	@app.cell
	def _(mo):
	mo.center(
	mo.md(r"""
	\| Model \| 💪🏻 Train Score \| 📝 Test Score \|
	\| :--- \| :---: \| :---: \|
	\| Logistic Regression \| 0.687 \| 0.685 \|
	\| Random Forest Classifier \| 1.000 \| 0.707 \|
	\| Randomized Search (Tuned RF) \| 0.820 \| 0.731 \|
	\| LightGBM \| 0.852 \| 0.751 \|
	""")
	)
	return


	@app.cell
	def _(mo):
	mo.Html("<br>")
	return


	@app.cell
	def _(mo):
	mo.md(
	r"""
	* The Logistic Regression model performed poorly due to underfitting.
	* The base Random Forest model, while better, suffered from severe overfitting.
	* The tuned Random Forest model was a significant improvement and a strong contender, achieving a solid `test_score`.
	* However, the LightGBM model ultimately demonstrated the best performance, achieving the highest ROC AUC test score of 0.751. This indicates that it is the most robust and accurate model for predicting loan repayment risk on unseen data.
	"""
	)
	return


	@app.cell
	def _(mo):
	mo.callout(
	kind="info",
	value=mo.md(
	"""💡 Want to explore the process in detail?

	See the full 👉 [Jupyter notebook](https://huggingface.co/spaces/iBrokeTheCode/Home_Credit_Default_Risk_Prediction/blob/main/tutorial_app.ipynb) 👈️ for an end-to-end walkthrough, including Exploratory Data Analysis, preprocessing, model training, evaluation, model selection, and saving the final model."""
	),
	)
	return


	@app.cell
	def _(mo):
	mo.Html("<br><hr><br>")
	return


	@app.cell
	def _(mo):
	mo.center(
	mo.md(
	"Connect with me: 💼 [Linkedin](https://www.linkedin.com/in/alex-turpo/) • 🐱 [GitHub](https://github.com/iBrokeTheCode) • 🤗 [Hugging Face](https://huggingface.co/iBrokeTheCode)"
	)
	)
	return


	if __name__ == "__main__":
	app.run()