Spaces:

Ahmedik95316
/

Fake-News-Detection-with-MLOps

Running

App Files Files Community

Ahmedik95316 commited on Aug 29

Commit

c4fbb31

verified ·

1 Parent(s): 44dceca

Update initialize_system.py

Browse files

Files changed (1) hide show

initialize_system.py +65 -5

initialize_system.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import os
 import sys
 import shutil
 import pandas as pd
-import json
 from pathlib import Path
 from datetime import datetime
 # Import the new path manager
 try:
@@ -274,14 +275,73 @@ def run_initial_training():
             ))
         ])
-        # Train model
-        log_step("Training model...")
         pipeline.fit(X_train, y_train)
-        # Evaluate
         y_pred = pipeline.predict(X_test)
         accuracy = accuracy_score(y_test, y_pred)
         f1 = f1_score(y_test, y_pred, average='weighted')
         # Ensure model directory exists
         model_path.parent.mkdir(parents=True, exist_ok=True)

 import os
 import sys
+import json
 import shutil
 import pandas as pd
 from pathlib import Path
 from datetime import datetime
+from sklearn.model_selection import cross_validate
 # Import the new path manager
 try:
             ))
         ])
+        # Train model with cross-validation
+        log_step("Training model with cross-validation...")
+        # Perform cross-validation before final training
+        cv_results = cross_validate(
+            pipeline, X_train, y_train,
+            cv=3,
+            scoring=['accuracy', 'f1_weighted', 'precision_weighted', 'recall_weighted'],
+            return_train_score=True
+        )
+        # Train final model on all training data
         pipeline.fit(X_train, y_train)
+        # Evaluate on test set
         y_pred = pipeline.predict(X_test)
         accuracy = accuracy_score(y_test, y_pred)
         f1 = f1_score(y_test, y_pred, average='weighted')
+        # Save CV results for API access
+        cv_data = {
+            "methodology": {
+                "n_splits": 3,
+                "cv_type": "StratifiedKFold",
+                "random_state": 42
+            },
+            "test_scores": {
+                "accuracy": {
+                    "mean": float(cv_results['test_accuracy'].mean()),
+                    "std": float(cv_results['test_accuracy'].std()),
+                    "scores": cv_results['test_accuracy'].tolist()
+                },
+                "f1": {
+                    "mean": float(cv_results['test_f1_weighted'].mean()),
+                    "std": float(cv_results['test_f1_weighted'].std()),
+                    "scores": cv_results['test_f1_weighted'].tolist()
+                }
+            },
+            "train_scores": {
+                "accuracy": {
+                    "mean": float(cv_results['train_accuracy'].mean()),
+                    "std": float(cv_results['train_accuracy'].std()),
+                    "scores": cv_results['train_accuracy'].tolist()
+                },
+                "f1": {
+                    "mean": float(cv_results['train_f1_weighted'].mean()),
+                    "std": float(cv_results['train_f1_weighted'].std()),
+                    "scores": cv_results['train_f1_weighted'].tolist()
+                }
+            }
+        }
+        # Calculate quality indicators
+        train_acc_mean = cv_data['train_scores']['accuracy']['mean']
+        test_acc_mean = cv_data['test_scores']['accuracy']['mean']
+        test_acc_std = cv_data['test_scores']['accuracy']['std']
+        cv_data['performance_indicators'] = {
+            'overfitting_score': float(train_acc_mean - test_acc_mean),
+            'stability_score': float(1 - (test_acc_std / test_acc_mean)) if test_acc_mean > 0 else 0
+        }
+        # Save CV results to file
+        cv_results_path = path_manager.get_logs_path("cv_results.json")
+        with open(cv_results_path, 'w') as f:
+            json.dump(cv_data, f, indent=2)
+        log_step(f"Saved CV results to: {cv_results_path}")
         # Ensure model directory exists
         model_path.parent.mkdir(parents=True, exist_ok=True)