Ahmedik95316 commited on
Commit
091b449
Β·
verified Β·
1 Parent(s): 2d38242

Update initialize_system.py

Browse files
Files changed (1) hide show
  1. initialize_system.py +2 -81
initialize_system.py CHANGED
@@ -188,90 +188,11 @@ def create_minimal_dataset():
188
  return False
189
 
190
 
191
- def run_initial_training():
192
- """Run enhanced ensemble model training with LightGBM"""
193
- log_step("Starting initial model training...")
194
-
195
- try:
196
- # Get all the paths
197
- model_path = path_manager.get_model_file_path()
198
- vectorizer_path = path_manager.get_vectorizer_path()
199
- pipeline_path = path_manager.get_pipeline_path()
200
-
201
- log_step(f"Model path: {model_path}")
202
- log_step(f"Vectorizer path: {vectorizer_path}")
203
- log_step(f"Pipeline path: {pipeline_path}")
204
 
205
- # Check if model already exists
206
- if pipeline_path.exists() or (model_path.exists() and vectorizer_path.exists()):
207
- log_step("βœ… Model files already exist, skipping training")
208
- return True
209
-
210
- # Import enhanced training components
211
- import sys
212
- sys.path.append('/app')
213
- from model.train import EnhancedModelTrainer
214
-
215
- log_step("Using Enhanced Model Trainer with ensemble voting...")
216
-
217
- # Create enhanced trainer with full ensemble configuration
218
- trainer = EnhancedModelTrainer(
219
- use_enhanced_features=True, # Enable sentiment, readability, entities, linguistic features
220
- enable_ensemble=True # Enable LightGBM + Random Forest + Logistic Regression ensemble
221
- )
222
-
223
- # Override paths to use the initialization system paths
224
- trainer.data_path = path_manager.get_combined_dataset_path()
225
- trainer.pipeline_path = pipeline_path
226
- trainer.model_path = model_path
227
- trainer.vectorizer_path = vectorizer_path
228
- trainer.metadata_path = path_manager.get_metadata_path()
229
-
230
- log_step("Starting enhanced ensemble training (this may take several minutes)...")
231
-
232
- # Run the full enhanced training
233
- success, message = trainer.train_model()
234
-
235
- if success:
236
- log_step(f"βœ… Enhanced ensemble training completed: {message}")
237
-
238
- # Verify pipeline was created
239
- if pipeline_path.exists():
240
- log_step(f"βœ… Enhanced pipeline saved successfully to {pipeline_path}")
241
-
242
- # Test loading the pipeline
243
- try:
244
- import joblib
245
- test_pipeline = joblib.load(pipeline_path)
246
- test_pred = test_pipeline.predict(["This is a test article"])
247
- log_step(f"βœ… Enhanced pipeline verification successful: {test_pred}")
248
- except Exception as e:
249
- log_step(f"⚠️ Enhanced pipeline verification failed: {e}")
250
- else:
251
- log_step(f"❌ Enhanced pipeline was not saved to {pipeline_path}")
252
- return False
253
-
254
- return True
255
- else:
256
- log_step(f"❌ Enhanced ensemble training failed: {message}")
257
- # Fall back to basic training if enhanced training fails
258
- log_step("Falling back to basic training...")
259
- return run_initial_training()
260
-
261
- except ImportError as e:
262
- log_step(f"⚠️ Enhanced training components not available: {e}")
263
- log_step("Falling back to basic training...")
264
- return run_basic_training_fallback()
265
- except Exception as e:
266
- log_step(f"❌ Enhanced training failed: {str(e)}")
267
- import traceback
268
- log_step(f"❌ Traceback: {traceback.format_exc()}")
269
- log_step("Falling back to basic training...")
270
- return run_basic_training_fallback()
271
 
272
 
273
- def run_basic_training_fallback():
274
- """Fallback to basic training if enhanced training fails"""
275
  log_step("Running basic training fallback...")
276
 
277
  try:
 
188
  return False
189
 
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
 
194
+ def run_initial_training():
195
+ """Run basic model training"""
196
  log_step("Running basic training fallback...")
197
 
198
  try: