iBrokeTheCode commited on
Commit
25bc9b3
·
1 Parent(s): f5d80de

chore: Add functions to get dataset

Browse files
Files changed (6) hide show
  1. .gitignore +208 -0
  2. app.py +20 -434
  3. requirements.txt +4 -5
  4. src/__init__.py +0 -0
  5. src/config.py +4 -0
  6. src/utils.py +49 -0
.gitignore ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.db
2
+ *.sqlite
3
+ *.csv
4
+ *.xlsx
5
+
6
+ # Byte-compiled / optimized / DLL files
7
+ __pycache__/
8
+ *.py[codz]
9
+ *$py.class
10
+
11
+ # C extensions
12
+ *.so
13
+
14
+ # Distribution / packaging
15
+ .Python
16
+ build/
17
+ develop-eggs/
18
+ dist/
19
+ downloads/
20
+ eggs/
21
+ .eggs/
22
+ lib/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py.cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+ cover/
58
+
59
+ # Translations
60
+ *.mo
61
+ *.pot
62
+
63
+ # Django stuff:
64
+ *.log
65
+ local_settings.py
66
+ db.sqlite3
67
+ db.sqlite3-journal
68
+
69
+ # Flask stuff:
70
+ instance/
71
+ .webassets-cache
72
+
73
+ # Scrapy stuff:
74
+ .scrapy
75
+
76
+ # Sphinx documentation
77
+ docs/_build/
78
+
79
+ # PyBuilder
80
+ .pybuilder/
81
+ target/
82
+
83
+ # Jupyter Notebook
84
+ .ipynb_checkpoints
85
+
86
+ # IPython
87
+ profile_default/
88
+ ipython_config.py
89
+
90
+ # pyenv
91
+ # For a library or package, you might want to ignore these files since the code is
92
+ # intended to run in multiple environments; otherwise, check them in:
93
+ # .python-version
94
+
95
+ # pipenv
96
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
98
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
99
+ # install all needed dependencies.
100
+ #Pipfile.lock
101
+
102
+ # UV
103
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
104
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
105
+ # commonly ignored for libraries.
106
+ #uv.lock
107
+
108
+ # poetry
109
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
110
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
111
+ # commonly ignored for libraries.
112
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
113
+ #poetry.lock
114
+ #poetry.toml
115
+
116
+ # pdm
117
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
118
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
119
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
120
+ #pdm.lock
121
+ #pdm.toml
122
+ .pdm-python
123
+ .pdm-build/
124
+
125
+ # pixi
126
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
127
+ #pixi.lock
128
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
129
+ # in the .venv directory. It is recommended not to include this directory in version control.
130
+ .pixi
131
+
132
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
133
+ __pypackages__/
134
+
135
+ # Celery stuff
136
+ celerybeat-schedule
137
+ celerybeat.pid
138
+
139
+ # SageMath parsed files
140
+ *.sage.py
141
+
142
+ # Environments
143
+ .env
144
+ .envrc
145
+ .venv
146
+ env/
147
+ venv/
148
+ ENV/
149
+ env.bak/
150
+ venv.bak/
151
+
152
+ # Spyder project settings
153
+ .spyderproject
154
+ .spyproject
155
+
156
+ # Rope project settings
157
+ .ropeproject
158
+
159
+ # mkdocs documentation
160
+ /site
161
+
162
+ # mypy
163
+ .mypy_cache/
164
+ .dmypy.json
165
+ dmypy.json
166
+
167
+ # Pyre type checker
168
+ .pyre/
169
+
170
+ # pytype static type analyzer
171
+ .pytype/
172
+
173
+ # Cython debug symbols
174
+ cython_debug/
175
+
176
+ # PyCharm
177
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
178
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
179
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
180
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
181
+ #.idea/
182
+
183
+ # Abstra
184
+ # Abstra is an AI-powered process automation framework.
185
+ # Ignore directories containing user credentials, local state, and settings.
186
+ # Learn more at https://abstra.io/docs
187
+ .abstra/
188
+
189
+ # Visual Studio Code
190
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
191
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
192
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
193
+ # you could uncomment the following to ignore the entire vscode folder
194
+ # .vscode/
195
+
196
+ # Ruff stuff:
197
+ .ruff_cache/
198
+
199
+ # PyPI configuration file
200
+ .pypirc
201
+
202
+ # Marimo
203
+ marimo/_static/
204
+ marimo/_lsp/
205
+ __marimo__/
206
+
207
+ # Streamlit
208
+ .streamlit/secrets.toml
app.py CHANGED
@@ -1,470 +1,56 @@
1
  import marimo
2
 
3
- __generated_with = "0.9.2"
4
  app = marimo.App()
5
 
6
 
7
  @app.cell
8
- def __():
9
  import marimo as mo
10
-
11
- mo.md("# Welcome to marimo! 🌊🍃")
12
  return (mo,)
13
 
14
 
15
  @app.cell
16
- def __(mo):
17
- slider = mo.ui.slider(1, 22)
18
- return (slider,)
19
-
20
-
21
- @app.cell
22
- def __(mo, slider):
23
- mo.md(
24
- f"""
25
- marimo is a **reactive** Python notebook.
26
-
27
- This means that unlike traditional notebooks, marimo notebooks **run
28
- automatically** when you modify them or
29
- interact with UI elements, like this slider: {slider}.
30
-
31
- {"##" + "🍃" * slider.value}
32
- """
33
- )
34
- return
35
-
36
-
37
- @app.cell(hide_code=True)
38
- def __(mo):
39
- mo.accordion(
40
- {
41
- "Tip: disabling automatic execution": mo.md(
42
- rf"""
43
- marimo lets you disable automatic execution: just go into the
44
- notebook settings and set
45
-
46
- "Runtime > On Cell Change" to "lazy".
47
-
48
- When the runtime is lazy, after running a cell, marimo marks its
49
- descendants as stale instead of automatically running them. The
50
- lazy runtime puts you in control over when cells are run, while
51
- still giving guarantees about the notebook state.
52
- """
53
- )
54
- }
55
- )
56
- return
57
-
58
-
59
- @app.cell(hide_code=True)
60
- def __(mo):
61
- mo.md(
62
- """
63
- Tip: This is a tutorial notebook. You can create your own notebooks
64
- by entering `marimo edit` at the command line.
65
- """
66
- ).callout()
67
- return
68
-
69
-
70
- @app.cell(hide_code=True)
71
- def __(mo):
72
- mo.md(
73
- """
74
- ## 1. Reactive execution
75
-
76
- A marimo notebook is made up of small blocks of Python code called
77
- cells.
78
-
79
- marimo reads your cells and models the dependencies among them: whenever
80
- a cell that defines a global variable is run, marimo
81
- **automatically runs** all cells that reference that variable.
82
-
83
- Reactivity keeps your program state and outputs in sync with your code,
84
- making for a dynamic programming environment that prevents bugs before they
85
- happen.
86
- """
87
- )
88
- return
89
-
90
-
91
- @app.cell(hide_code=True)
92
- def __(changed, mo):
93
- (
94
- mo.md(
95
- f"""
96
- **✨ Nice!** The value of `changed` is now {changed}.
97
-
98
- When you updated the value of the variable `changed`, marimo
99
- **reacted** by running this cell automatically, because this cell
100
- references the global variable `changed`.
101
-
102
- Reactivity ensures that your notebook state is always
103
- consistent, which is crucial for doing good science; it's also what
104
- enables marimo notebooks to double as tools and apps.
105
- """
106
- )
107
- if changed
108
- else mo.md(
109
- """
110
- **🌊 See it in action.** In the next cell, change the value of the
111
- variable `changed` to `True`, then click the run button.
112
- """
113
- )
114
- )
115
  return
116
 
117
 
118
  @app.cell
119
- def __():
120
- changed = False
121
- return (changed,)
122
-
123
-
124
- @app.cell(hide_code=True)
125
- def __(mo):
126
- mo.accordion(
127
- {
128
- "Tip: execution order": (
129
- """
130
- The order of cells on the page has no bearing on
131
- the order in which cells are executed: marimo knows that a cell
132
- reading a variable must run after the cell that defines it. This
133
- frees you to organize your code in the way that makes the most
134
- sense for you.
135
- """
136
- )
137
- }
138
- )
139
- return
140
-
141
-
142
- @app.cell(hide_code=True)
143
- def __(mo):
144
- mo.md(
145
- """
146
- **Global names must be unique.** To enable reactivity, marimo imposes a
147
- constraint on how names appear in cells: no two cells may define the same
148
- variable.
149
- """
150
- )
151
- return
152
-
153
-
154
- @app.cell(hide_code=True)
155
- def __(mo):
156
- mo.accordion(
157
- {
158
- "Tip: encapsulation": (
159
- """
160
- By encapsulating logic in functions, classes, or Python modules,
161
- you can minimize the number of global variables in your notebook.
162
- """
163
- )
164
- }
165
- )
166
- return
167
-
168
-
169
- @app.cell(hide_code=True)
170
- def __(mo):
171
- mo.accordion(
172
- {
173
- "Tip: private variables": (
174
- """
175
- Variables prefixed with an underscore are "private" to a cell, so
176
- they can be defined by multiple cells.
177
- """
178
- )
179
- }
180
- )
181
- return
182
-
183
-
184
- @app.cell(hide_code=True)
185
- def __(mo):
186
- mo.md(
187
- """
188
- ## 2. UI elements
189
-
190
- Cells can output interactive UI elements. Interacting with a UI
191
- element **automatically triggers notebook execution**: when
192
- you interact with a UI element, its value is sent back to Python, and
193
- every cell that references that element is re-run.
194
-
195
- marimo provides a library of UI elements to choose from under
196
- `marimo.ui`.
197
- """
198
- )
199
  return
200
 
201
 
202
  @app.cell
203
- def __(mo):
204
- mo.md("""**🌊 Some UI elements.** Try interacting with the below elements.""")
205
- return
206
 
207
 
208
  @app.cell
209
- def __(mo):
210
- icon = mo.ui.dropdown(["🍃", "🌊", "✨"], value="🍃")
211
- return (icon,)
212
 
213
 
214
  @app.cell
215
- def __(icon, mo):
216
- repetitions = mo.ui.slider(1, 16, label=f"number of {icon.value}: ")
217
- return (repetitions,)
218
 
219
 
220
  @app.cell
221
- def __(icon, repetitions):
222
- icon, repetitions
223
- return
224
-
225
-
226
- @app.cell
227
- def __(icon, mo, repetitions):
228
- mo.md("# " + icon.value * repetitions.value)
229
- return
230
-
231
-
232
- @app.cell(hide_code=True)
233
- def __(mo):
234
- mo.md(
235
- """
236
- ## 3. marimo is just Python
237
-
238
- marimo cells parse Python (and only Python), and marimo notebooks are
239
- stored as pure Python files — outputs are _not_ included. There's no
240
- magical syntax.
241
-
242
- The Python files generated by marimo are:
243
-
244
- - easily versioned with git, yielding minimal diffs
245
- - legible for both humans and machines
246
- - formattable using your tool of choice,
247
- - usable as Python scripts, with UI elements taking their default
248
- values, and
249
- - importable by other modules (more on that in the future).
250
- """
251
- )
252
- return
253
-
254
-
255
- @app.cell(hide_code=True)
256
- def __(mo):
257
- mo.md(
258
- """
259
- ## 4. Running notebooks as apps
260
-
261
- marimo notebooks can double as apps. Click the app window icon in the
262
- bottom-right to see this notebook in "app view."
263
-
264
- Serve a notebook as an app with `marimo run` at the command-line.
265
- Of course, you can use marimo just to level-up your
266
- notebooking, without ever making apps.
267
- """
268
- )
269
- return
270
-
271
-
272
- @app.cell(hide_code=True)
273
- def __(mo):
274
- mo.md(
275
- """
276
- ## 5. The `marimo` command-line tool
277
-
278
- **Creating and editing notebooks.** Use
279
-
280
- ```
281
- marimo edit
282
- ```
283
-
284
- in a terminal to start the marimo notebook server. From here
285
- you can create a new notebook or edit existing ones.
286
-
287
-
288
- **Running as apps.** Use
289
-
290
- ```
291
- marimo run notebook.py
292
- ```
293
-
294
- to start a webserver that serves your notebook as an app in read-only mode,
295
- with code cells hidden.
296
-
297
- **Convert a Jupyter notebook.** Convert a Jupyter notebook to a marimo
298
- notebook using `marimo convert`:
299
-
300
- ```
301
- marimo convert your_notebook.ipynb > your_app.py
302
- ```
303
-
304
- **Tutorials.** marimo comes packaged with tutorials:
305
-
306
- - `dataflow`: more on marimo's automatic execution
307
- - `ui`: how to use UI elements
308
- - `markdown`: how to write markdown, with interpolated values and
309
- LaTeX
310
- - `plots`: how plotting works in marimo
311
- - `sql`: how to use SQL
312
- - `layout`: layout elements in marimo
313
- - `fileformat`: how marimo's file format works
314
- - `markdown-format`: for using `.md` files in marimo
315
- - `for-jupyter-users`: if you are coming from Jupyter
316
-
317
- Start a tutorial with `marimo tutorial`; for example,
318
-
319
- ```
320
- marimo tutorial dataflow
321
- ```
322
-
323
- In addition to tutorials, we have examples in our
324
- [our GitHub repo](https://www.github.com/marimo-team/marimo/tree/main/examples).
325
- """
326
- )
327
- return
328
-
329
-
330
- @app.cell(hide_code=True)
331
- def __(mo):
332
- mo.md(
333
- """
334
- ## 6. The marimo editor
335
-
336
- Here are some tips to help you get started with the marimo editor.
337
- """
338
- )
339
- return
340
 
341
 
342
  @app.cell
343
- def __(mo, tips):
344
- mo.accordion(tips)
345
- return
346
-
347
-
348
- @app.cell(hide_code=True)
349
- def __(mo):
350
- mo.md("""## Finally, a fun fact""")
351
  return
352
 
353
 
354
- @app.cell(hide_code=True)
355
- def __(mo):
356
- mo.md(
357
- """
358
- The name "marimo" is a reference to a type of algae that, under
359
- the right conditions, clumps together to form a small sphere
360
- called a "marimo moss ball". Made of just strands of algae, these
361
- beloved assemblages are greater than the sum of their parts.
362
- """
363
- )
364
- return
365
-
366
-
367
- @app.cell(hide_code=True)
368
- def __():
369
- tips = {
370
- "Saving": (
371
- """
372
- **Saving**
373
-
374
- - _Name_ your app using the box at the top of the screen, or
375
- with `Ctrl/Cmd+s`. You can also create a named app at the
376
- command line, e.g., `marimo edit app_name.py`.
377
-
378
- - _Save_ by clicking the save icon on the bottom right, or by
379
- inputting `Ctrl/Cmd+s`. By default marimo is configured
380
- to autosave.
381
- """
382
- ),
383
- "Running": (
384
- """
385
- 1. _Run a cell_ by clicking the play ( ▷ ) button on the top
386
- right of a cell, or by inputting `Ctrl/Cmd+Enter`.
387
-
388
- 2. _Run a stale cell_ by clicking the yellow run button on the
389
- right of the cell, or by inputting `Ctrl/Cmd+Enter`. A cell is
390
- stale when its code has been modified but not run.
391
-
392
- 3. _Run all stale cells_ by clicking the play ( ▷ ) button on
393
- the bottom right of the screen, or input `Ctrl/Cmd+Shift+r`.
394
- """
395
- ),
396
- "Console Output": (
397
- """
398
- Console output (e.g., `print()` statements) is shown below a
399
- cell.
400
- """
401
- ),
402
- "Creating, Moving, and Deleting Cells": (
403
- """
404
- 1. _Create_ a new cell above or below a given one by clicking
405
- the plus button to the left of the cell, which appears on
406
- mouse hover.
407
-
408
- 2. _Move_ a cell up or down by dragging on the handle to the
409
- right of the cell, which appears on mouse hover.
410
-
411
- 3. _Delete_ a cell by clicking the trash bin icon. Bring it
412
- back by clicking the undo button on the bottom right of the
413
- screen, or with `Ctrl/Cmd+Shift+z`.
414
- """
415
- ),
416
- "Disabling Automatic Execution": (
417
- """
418
- Via the notebook settings (gear icon) or footer panel, you
419
- can disable automatic execution. This is helpful when
420
- working with expensive notebooks or notebooks that have
421
- side-effects like database transactions.
422
- """
423
- ),
424
- "Disabling Cells": (
425
- """
426
- You can disable a cell via the cell context menu.
427
- marimo will never run a disabled cell or any cells that depend on it.
428
- This can help prevent accidental execution of expensive computations
429
- when editing a notebook.
430
- """
431
- ),
432
- "Code Folding": (
433
- """
434
- You can collapse or fold the code in a cell by clicking the arrow
435
- icons in the line number column to the left, or by using keyboard
436
- shortcuts.
437
-
438
- Use the command palette (`Ctrl/Cmd+k`) or a keyboard shortcut to
439
- quickly fold or unfold all cells.
440
- """
441
- ),
442
- "Code Formatting": (
443
- """
444
- If you have [ruff](https://github.com/astral-sh/ruff) installed,
445
- you can format a cell with the keyboard shortcut `Ctrl/Cmd+b`.
446
- """
447
- ),
448
- "Command Palette": (
449
- """
450
- Use `Ctrl/Cmd+k` to open the command palette.
451
- """
452
- ),
453
- "Keyboard Shortcuts": (
454
- """
455
- Open the notebook menu (top-right) or input `Ctrl/Cmd+Shift+h` to
456
- view a list of all keyboard shortcuts.
457
- """
458
- ),
459
- "Configuration": (
460
- """
461
- Configure the editor by clicking the gears icon near the top-right
462
- of the screen.
463
- """
464
- ),
465
- }
466
- return (tips,)
467
-
468
-
469
  if __name__ == "__main__":
470
  app.run()
 
1
  import marimo
2
 
3
+ __generated_with = "0.14.16"
4
  app = marimo.App()
5
 
6
 
7
  @app.cell
8
+ def _():
9
  import marimo as mo
 
 
10
  return (mo,)
11
 
12
 
13
  @app.cell
14
+ def _(mo):
15
+ mo.center(mo.md("# Home Credit Default Risk Prediction"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  return
17
 
18
 
19
  @app.cell
20
+ def _(mo):
21
+ mo.md("""## Importing Libraries""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  return
23
 
24
 
25
  @app.cell
26
+ def _():
27
+ from src.utils import get_dataset, get_features_target, get_train_test_sets
28
+ return get_dataset, get_features_target, get_train_test_sets
29
 
30
 
31
  @app.cell
32
+ def _(get_dataset):
33
+ df = get_dataset()
34
+ return (df,)
35
 
36
 
37
  @app.cell
38
+ def _(df, get_features_target):
39
+ X, y = get_features_target(df)
40
+ return X, y
41
 
42
 
43
  @app.cell
44
+ def _(X, get_train_test_sets, y):
45
+ X_train, y_train, X_test, y_test = get_train_test_sets(X, y)
46
+ return X_test, X_train, y_test, y_train
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  @app.cell
50
+ def _(X_test, X_train, y_test, y_train):
51
+ X_train.shape, y_train.shape, X_test.shape, y_test.shape
 
 
 
 
 
 
52
  return
53
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  if __name__ == "__main__":
56
  app.run()
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
- marimo
2
- # Or a specific version
3
- # marimo>=0.9.0
4
-
5
- # Add other dependencies as needed
 
1
+ marimo==0.14.16
2
+ pandas==2.3.1
3
+ ruff==0.12.7
4
+ scikit-learn==1.7.1
 
src/__init__.py ADDED
File without changes
src/config.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ DATASET_FOLDER_PATH = Path(__file__).parent.parent / "dataset"
4
+ DATASET_FILE_PATH = str(Path(DATASET_FOLDER_PATH / "home_credit_dataset.csv"))
src/utils.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pandas import DataFrame, Series, read_csv
2
+ from sklearn.model_selection import train_test_split
3
+
4
+ from src.config import DATASET_FILE_PATH
5
+
6
+
7
+ def get_dataset() -> DataFrame:
8
+ """
9
+ Get the dataset
10
+
11
+ Returns:
12
+ DataFrame: The dataset as a DataFrame
13
+ """
14
+ try:
15
+ return DataFrame(data=read_csv(DATASET_FILE_PATH))
16
+ except FileNotFoundError:
17
+ return DataFrame(data={})
18
+
19
+
20
+ def get_features_target(df: DataFrame) -> tuple[DataFrame, Series]:
21
+ """
22
+ Get the feature and target from the dataset
23
+
24
+ Args:
25
+ df (DataFrame): The dataset as a DataFrame
26
+
27
+ Returns:
28
+ tuple[DataFrame, Series]: The features and target as a tuple
29
+ """
30
+ return df.drop(columns=["TARGET"], axis=1), df["TARGET"]
31
+
32
+
33
+ def get_train_test_sets(
34
+ X: DataFrame, y: Series
35
+ ) -> tuple[DataFrame, Series, DataFrame, Series]:
36
+ """
37
+ Get the train and test sets from the features and target
38
+
39
+ Args:
40
+ features (DataFrame): The features as a DataFrame
41
+ target (Series): The target as a Series
42
+
43
+ Returns:
44
+ tuple[DataFrame, Series, DataFrame, Series]: The train and test sets as a tuple
45
+ """
46
+ X_train, X_test, y_train, y_test = train_test_split(
47
+ X, y, test_size=0.2, random_state=42
48
+ )
49
+ return X_train, y_train, X_test, y_test