Update initialize_system.py
Browse files- initialize_system.py +2 -81
initialize_system.py
CHANGED
|
@@ -188,90 +188,11 @@ def create_minimal_dataset():
|
|
| 188 |
return False
|
| 189 |
|
| 190 |
|
| 191 |
-
def run_initial_training():
|
| 192 |
-
"""Run enhanced ensemble model training with LightGBM"""
|
| 193 |
-
log_step("Starting initial model training...")
|
| 194 |
-
|
| 195 |
-
try:
|
| 196 |
-
# Get all the paths
|
| 197 |
-
model_path = path_manager.get_model_file_path()
|
| 198 |
-
vectorizer_path = path_manager.get_vectorizer_path()
|
| 199 |
-
pipeline_path = path_manager.get_pipeline_path()
|
| 200 |
-
|
| 201 |
-
log_step(f"Model path: {model_path}")
|
| 202 |
-
log_step(f"Vectorizer path: {vectorizer_path}")
|
| 203 |
-
log_step(f"Pipeline path: {pipeline_path}")
|
| 204 |
|
| 205 |
-
# Check if model already exists
|
| 206 |
-
if pipeline_path.exists() or (model_path.exists() and vectorizer_path.exists()):
|
| 207 |
-
log_step("β
Model files already exist, skipping training")
|
| 208 |
-
return True
|
| 209 |
-
|
| 210 |
-
# Import enhanced training components
|
| 211 |
-
import sys
|
| 212 |
-
sys.path.append('/app')
|
| 213 |
-
from model.train import EnhancedModelTrainer
|
| 214 |
-
|
| 215 |
-
log_step("Using Enhanced Model Trainer with ensemble voting...")
|
| 216 |
-
|
| 217 |
-
# Create enhanced trainer with full ensemble configuration
|
| 218 |
-
trainer = EnhancedModelTrainer(
|
| 219 |
-
use_enhanced_features=True, # Enable sentiment, readability, entities, linguistic features
|
| 220 |
-
enable_ensemble=True # Enable LightGBM + Random Forest + Logistic Regression ensemble
|
| 221 |
-
)
|
| 222 |
-
|
| 223 |
-
# Override paths to use the initialization system paths
|
| 224 |
-
trainer.data_path = path_manager.get_combined_dataset_path()
|
| 225 |
-
trainer.pipeline_path = pipeline_path
|
| 226 |
-
trainer.model_path = model_path
|
| 227 |
-
trainer.vectorizer_path = vectorizer_path
|
| 228 |
-
trainer.metadata_path = path_manager.get_metadata_path()
|
| 229 |
-
|
| 230 |
-
log_step("Starting enhanced ensemble training (this may take several minutes)...")
|
| 231 |
-
|
| 232 |
-
# Run the full enhanced training
|
| 233 |
-
success, message = trainer.train_model()
|
| 234 |
-
|
| 235 |
-
if success:
|
| 236 |
-
log_step(f"β
Enhanced ensemble training completed: {message}")
|
| 237 |
-
|
| 238 |
-
# Verify pipeline was created
|
| 239 |
-
if pipeline_path.exists():
|
| 240 |
-
log_step(f"β
Enhanced pipeline saved successfully to {pipeline_path}")
|
| 241 |
-
|
| 242 |
-
# Test loading the pipeline
|
| 243 |
-
try:
|
| 244 |
-
import joblib
|
| 245 |
-
test_pipeline = joblib.load(pipeline_path)
|
| 246 |
-
test_pred = test_pipeline.predict(["This is a test article"])
|
| 247 |
-
log_step(f"β
Enhanced pipeline verification successful: {test_pred}")
|
| 248 |
-
except Exception as e:
|
| 249 |
-
log_step(f"β οΈ Enhanced pipeline verification failed: {e}")
|
| 250 |
-
else:
|
| 251 |
-
log_step(f"β Enhanced pipeline was not saved to {pipeline_path}")
|
| 252 |
-
return False
|
| 253 |
-
|
| 254 |
-
return True
|
| 255 |
-
else:
|
| 256 |
-
log_step(f"β Enhanced ensemble training failed: {message}")
|
| 257 |
-
# Fall back to basic training if enhanced training fails
|
| 258 |
-
log_step("Falling back to basic training...")
|
| 259 |
-
return run_initial_training()
|
| 260 |
-
|
| 261 |
-
except ImportError as e:
|
| 262 |
-
log_step(f"β οΈ Enhanced training components not available: {e}")
|
| 263 |
-
log_step("Falling back to basic training...")
|
| 264 |
-
return run_basic_training_fallback()
|
| 265 |
-
except Exception as e:
|
| 266 |
-
log_step(f"β Enhanced training failed: {str(e)}")
|
| 267 |
-
import traceback
|
| 268 |
-
log_step(f"β Traceback: {traceback.format_exc()}")
|
| 269 |
-
log_step("Falling back to basic training...")
|
| 270 |
-
return run_basic_training_fallback()
|
| 271 |
|
| 272 |
|
| 273 |
-
def
|
| 274 |
-
"""
|
| 275 |
log_step("Running basic training fallback...")
|
| 276 |
|
| 277 |
try:
|
|
|
|
| 188 |
return False
|
| 189 |
|
| 190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
|
| 194 |
+
def run_initial_training():
|
| 195 |
+
"""Run basic model training"""
|
| 196 |
log_step("Running basic training fallback...")
|
| 197 |
|
| 198 |
try:
|