Commit
ยท
f429f8e
1
Parent(s):
d234096
refactor: Improve main app layout
Browse files
app.py
CHANGED
|
@@ -26,7 +26,7 @@ def _(mo):
|
|
| 26 |
|
| 27 |
@app.cell
|
| 28 |
def _(mo):
|
| 29 |
-
mo.Html("<br
|
| 30 |
return
|
| 31 |
|
| 32 |
|
|
@@ -266,18 +266,13 @@ def _(features_widgets, mo):
|
|
| 266 |
# ๐ [4] Create the form with the sliders
|
| 267 |
sliders_form = (
|
| 268 |
mo.md("""
|
| 269 |
-
###
|
| 270 |
-
|
| 271 |
-
{EXT_SOURCE_3}
|
| 272 |
-
{
|
| 273 |
-
{
|
| 274 |
-
{
|
| 275 |
-
{
|
| 276 |
-
{AMT_CREDIT}
|
| 277 |
-
{DAYS_EMPLOYED}
|
| 278 |
-
{DAYS_ID_PUBLISH}
|
| 279 |
-
{DAYS_REGISTRATION}
|
| 280 |
-
{SK_ID_CURR}
|
| 281 |
""")
|
| 282 |
.batch(**features_widgets) # Pass the dict unpacked
|
| 283 |
.form(show_clear_button=True, bordered=True)
|
|
@@ -285,16 +280,9 @@ def _(features_widgets, mo):
|
|
| 285 |
return (sliders_form,)
|
| 286 |
|
| 287 |
|
| 288 |
-
@app.cell
|
| 289 |
-
def _(sliders_form):
|
| 290 |
-
# ๐ [5] Display the form
|
| 291 |
-
sliders_form
|
| 292 |
-
return
|
| 293 |
-
|
| 294 |
-
|
| 295 |
@app.cell
|
| 296 |
def _(default_values, loaded_pipeline, mo, pd, sliders_form):
|
| 297 |
-
# ๐ [
|
| 298 |
probability = None
|
| 299 |
|
| 300 |
# Process form submission
|
|
@@ -317,7 +305,7 @@ def _(default_values, loaded_pipeline, mo, pd, sliders_form):
|
|
| 317 |
|
| 318 |
@app.cell
|
| 319 |
def _(probability):
|
| 320 |
-
# ๐ [
|
| 321 |
prob_percent = 70.12
|
| 322 |
risk = "High Risk"
|
| 323 |
direction = "decrease"
|
|
@@ -338,32 +326,14 @@ def _(probability):
|
|
| 338 |
return direction, prob_percent, risk
|
| 339 |
|
| 340 |
|
| 341 |
-
@app.cell
|
| 342 |
-
def _(mo):
|
| 343 |
-
mo.Html("<br>")
|
| 344 |
-
return
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
@app.cell
|
| 348 |
-
def _(mo):
|
| 349 |
-
mo.md("## ๐ฎ Credit Risk Prediction")
|
| 350 |
-
return
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
@app.cell
|
| 354 |
-
def _(mo):
|
| 355 |
-
mo.Html("<hr><br>")
|
| 356 |
-
return
|
| 357 |
-
|
| 358 |
-
|
| 359 |
@app.cell
|
| 360 |
def _(direction, mo, prob_percent, risk):
|
| 361 |
-
interpretation_text = f"""This means there is a {prob_percent}% chance the client will
|
| 362 |
-
Risk level is categorized as
|
| 363 |
"""
|
| 364 |
|
| 365 |
result_stat = mo.stat(
|
| 366 |
-
label="
|
| 367 |
bordered=True,
|
| 368 |
value=f"{prob_percent}%",
|
| 369 |
caption=risk,
|
|
@@ -379,6 +349,18 @@ def _(direction, mo, prob_percent, risk):
|
|
| 379 |
return interpretation_stat, result_stat
|
| 380 |
|
| 381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
@app.cell
|
| 383 |
def _(interpretation_stat, mo, result_stat):
|
| 384 |
mo.vstack(
|
|
@@ -395,22 +377,33 @@ def _(interpretation_stat, mo, result_stat):
|
|
| 395 |
|
| 396 |
@app.cell
|
| 397 |
def _(mo):
|
| 398 |
-
mo.Html("<br
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
return
|
| 400 |
|
| 401 |
|
| 402 |
@app.cell
|
| 403 |
def _(mo):
|
| 404 |
-
mo.
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
),
|
| 410 |
)
|
| 411 |
return
|
| 412 |
|
| 413 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
@app.cell
|
| 415 |
def _(mo):
|
| 416 |
mo.md(r"""## ๐ Model Selection""")
|
|
@@ -428,7 +421,7 @@ def _(mo):
|
|
| 428 |
lg_stat = mo.stat(
|
| 429 |
label="Logistic Regression",
|
| 430 |
bordered=True,
|
| 431 |
-
value="๐ช๐ป
|
| 432 |
caption="Scores are consistent across train and test, indicating no overfitting. However, the overall AUC is low, suggesting underfitting โ the model is too simple to capture complex patterns.",
|
| 433 |
direction="decrease",
|
| 434 |
)
|
|
@@ -436,7 +429,7 @@ def _(mo):
|
|
| 436 |
rfc_stat = mo.stat(
|
| 437 |
label="Random Forest Classifier",
|
| 438 |
bordered=True,
|
| 439 |
-
value="๐ช๐ป
|
| 440 |
caption="Perfect training AUC indicates severe overfitting โ the model memorized the training set. While the test score is better than Logistic Regression, the gap is too large for good generalization.",
|
| 441 |
direction="decrease",
|
| 442 |
)
|
|
@@ -444,7 +437,7 @@ def _(mo):
|
|
| 444 |
rfo_stat = mo.stat(
|
| 445 |
label="Random Forest with Randomized Search",
|
| 446 |
bordered=True,
|
| 447 |
-
value="๐ช๐ป
|
| 448 |
caption="Hyperparameter tuning greatly reduced overfitting. The smaller trainโtest gap and improved test AUC show better generalization and a strong performance.",
|
| 449 |
direction="increase",
|
| 450 |
)
|
|
@@ -452,7 +445,7 @@ def _(mo):
|
|
| 452 |
lgbm_stat = mo.stat(
|
| 453 |
label="LightGBM",
|
| 454 |
bordered=True,
|
| 455 |
-
value="๐ช๐ป
|
| 456 |
caption="Best overall performance. Small trainโtest gap and highest test AUC indicate a well-balanced model with strong generalization.",
|
| 457 |
direction="increase",
|
| 458 |
)
|
|
@@ -479,7 +472,7 @@ def _(mo):
|
|
| 479 |
@app.cell
|
| 480 |
def _(mo):
|
| 481 |
mo.md(
|
| 482 |
-
r"""Based on a comparison of all the models _(using AUC ROC metric)_, the final model selection is clear
|
| 483 |
)
|
| 484 |
return
|
| 485 |
|
|
@@ -499,7 +492,7 @@ def _(mo):
|
|
| 499 |
| Logistic Regression | 0.687 | 0.685 |
|
| 500 |
| Random Forest Classifier | 1.000 | 0.707 |
|
| 501 |
| Randomized Search (Tuned RF) | 0.820 | 0.731 |
|
| 502 |
-
| **LightGBM** | 0.852 | **0.751** |
|
| 503 |
""")
|
| 504 |
)
|
| 505 |
return
|
|
@@ -524,6 +517,19 @@ def _(mo):
|
|
| 524 |
return
|
| 525 |
|
| 526 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
@app.cell
|
| 528 |
def _(mo):
|
| 529 |
mo.Html("<br><hr><br>")
|
|
|
|
| 26 |
|
| 27 |
@app.cell
|
| 28 |
def _(mo):
|
| 29 |
+
mo.Html("<br>")
|
| 30 |
return
|
| 31 |
|
| 32 |
|
|
|
|
| 266 |
# ๐ [4] Create the form with the sliders
|
| 267 |
sliders_form = (
|
| 268 |
mo.md("""
|
| 269 |
+
###Fill in the Client Profile to see the prediction
|
| 270 |
+
|
| 271 |
+
{EXT_SOURCE_3} {EXT_SOURCE_2}
|
| 272 |
+
{DAYS_BIRTH} {EXT_SOURCE_1}
|
| 273 |
+
{AMT_ANNUITY} {AMT_CREDIT}
|
| 274 |
+
{DAYS_EMPLOYED} {DAYS_ID_PUBLISH}
|
| 275 |
+
{DAYS_REGISTRATION} {SK_ID_CURR}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
""")
|
| 277 |
.batch(**features_widgets) # Pass the dict unpacked
|
| 278 |
.form(show_clear_button=True, bordered=True)
|
|
|
|
| 280 |
return (sliders_form,)
|
| 281 |
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
@app.cell
|
| 284 |
def _(default_values, loaded_pipeline, mo, pd, sliders_form):
|
| 285 |
+
# ๐ [5] Get prediction from model
|
| 286 |
probability = None
|
| 287 |
|
| 288 |
# Process form submission
|
|
|
|
| 305 |
|
| 306 |
@app.cell
|
| 307 |
def _(probability):
|
| 308 |
+
# ๐ [6] Display prediction results
|
| 309 |
prob_percent = 70.12
|
| 310 |
risk = "High Risk"
|
| 311 |
direction = "decrease"
|
|
|
|
| 326 |
return direction, prob_percent, risk
|
| 327 |
|
| 328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
@app.cell
|
| 330 |
def _(direction, mo, prob_percent, risk):
|
| 331 |
+
interpretation_text = f"""This means there is a {prob_percent}% chance the client will default on their loan.
|
| 332 |
+
Risk level is categorized as {risk}, which can help guide loan approval decisions.
|
| 333 |
"""
|
| 334 |
|
| 335 |
result_stat = mo.stat(
|
| 336 |
+
label="โ๏ธ Probability of Payment Difficulties",
|
| 337 |
bordered=True,
|
| 338 |
value=f"{prob_percent}%",
|
| 339 |
caption=risk,
|
|
|
|
| 349 |
return interpretation_stat, result_stat
|
| 350 |
|
| 351 |
|
| 352 |
+
@app.cell
|
| 353 |
+
def _(mo):
|
| 354 |
+
mo.md("""## ๐ฎ Credit Risk Predictor โ Try It Yourself!""")
|
| 355 |
+
return
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
@app.cell
|
| 359 |
+
def _(mo):
|
| 360 |
+
mo.Html("<hr><br>")
|
| 361 |
+
return
|
| 362 |
+
|
| 363 |
+
|
| 364 |
@app.cell
|
| 365 |
def _(interpretation_stat, mo, result_stat):
|
| 366 |
mo.vstack(
|
|
|
|
| 377 |
|
| 378 |
@app.cell
|
| 379 |
def _(mo):
|
| 380 |
+
mo.Html("<br>")
|
| 381 |
+
return
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
@app.cell
|
| 385 |
+
def _(sliders_form):
|
| 386 |
+
sliders_form
|
| 387 |
return
|
| 388 |
|
| 389 |
|
| 390 |
@app.cell
|
| 391 |
def _(mo):
|
| 392 |
+
mo.md(
|
| 393 |
+
r"""
|
| 394 |
+
<small>_(*) Predictions are based on the top 10 most important features. Remaining features are assigned default values (median for numeric, mode for categorical)._</small>
|
| 395 |
+
|
| 396 |
+
"""
|
|
|
|
| 397 |
)
|
| 398 |
return
|
| 399 |
|
| 400 |
|
| 401 |
+
@app.cell
|
| 402 |
+
def _(mo):
|
| 403 |
+
mo.Html("<br>")
|
| 404 |
+
return
|
| 405 |
+
|
| 406 |
+
|
| 407 |
@app.cell
|
| 408 |
def _(mo):
|
| 409 |
mo.md(r"""## ๐ Model Selection""")
|
|
|
|
| 421 |
lg_stat = mo.stat(
|
| 422 |
label="Logistic Regression",
|
| 423 |
bordered=True,
|
| 424 |
+
value="๐ช๐ป 68.7% ๐ 68.5%",
|
| 425 |
caption="Scores are consistent across train and test, indicating no overfitting. However, the overall AUC is low, suggesting underfitting โ the model is too simple to capture complex patterns.",
|
| 426 |
direction="decrease",
|
| 427 |
)
|
|
|
|
| 429 |
rfc_stat = mo.stat(
|
| 430 |
label="Random Forest Classifier",
|
| 431 |
bordered=True,
|
| 432 |
+
value="๐ช๐ป 100% ๐ 70.7%",
|
| 433 |
caption="Perfect training AUC indicates severe overfitting โ the model memorized the training set. While the test score is better than Logistic Regression, the gap is too large for good generalization.",
|
| 434 |
direction="decrease",
|
| 435 |
)
|
|
|
|
| 437 |
rfo_stat = mo.stat(
|
| 438 |
label="Random Forest with Randomized Search",
|
| 439 |
bordered=True,
|
| 440 |
+
value="๐ช๐ป 82% ๐ 73.1%",
|
| 441 |
caption="Hyperparameter tuning greatly reduced overfitting. The smaller trainโtest gap and improved test AUC show better generalization and a strong performance.",
|
| 442 |
direction="increase",
|
| 443 |
)
|
|
|
|
| 445 |
lgbm_stat = mo.stat(
|
| 446 |
label="LightGBM",
|
| 447 |
bordered=True,
|
| 448 |
+
value="๐ช๐ป 85.2% ๐ 75.1%",
|
| 449 |
caption="Best overall performance. Small trainโtest gap and highest test AUC indicate a well-balanced model with strong generalization.",
|
| 450 |
direction="increase",
|
| 451 |
)
|
|
|
|
| 472 |
@app.cell
|
| 473 |
def _(mo):
|
| 474 |
mo.md(
|
| 475 |
+
r"""Based on a comparison of all the models _(using AUC ROC metric)_, the final model selection is clear:"""
|
| 476 |
)
|
| 477 |
return
|
| 478 |
|
|
|
|
| 492 |
| Logistic Regression | 0.687 | 0.685 |
|
| 493 |
| Random Forest Classifier | 1.000 | 0.707 |
|
| 494 |
| Randomized Search (Tuned RF) | 0.820 | 0.731 |
|
| 495 |
+
| **LightGBM** | **0.852** | **0.751** |
|
| 496 |
""")
|
| 497 |
)
|
| 498 |
return
|
|
|
|
| 517 |
return
|
| 518 |
|
| 519 |
|
| 520 |
+
@app.cell
|
| 521 |
+
def _(mo):
|
| 522 |
+
mo.callout(
|
| 523 |
+
kind="info",
|
| 524 |
+
value=mo.md(
|
| 525 |
+
"""๐ก **Want to explore the process in detail?**
|
| 526 |
+
|
| 527 |
+
See the full ๐ [Jupyter notebook](https://huggingface.co/spaces/iBrokeTheCode/Home_Credit_Default_Risk_Prediction/blob/main/tutorial_app.ipynb) ๐๏ธ for an end-to-end walkthrough, including Exploratory Data Analysis, preprocessing, model training, evaluation, model selection, and saving the final model."""
|
| 528 |
+
),
|
| 529 |
+
)
|
| 530 |
+
return
|
| 531 |
+
|
| 532 |
+
|
| 533 |
@app.cell
|
| 534 |
def _(mo):
|
| 535 |
mo.Html("<br><hr><br>")
|