Ahmedik95316 commited on
Commit
593b44f
·
1 Parent(s): dff1572

Update app/fastapi_server.py

Browse files

Restoring previous working version

Files changed (1) hide show
  1. app/fastapi_server.py +1205 -396
app/fastapi_server.py CHANGED
@@ -1,5 +1,3 @@
1
- # Enhanced app/fastapi_server.py with LightGBM ensemble support
2
-
3
  import json
4
  import time
5
  import joblib
@@ -26,13 +24,6 @@ from fastapi.middleware.trustedhost import TrustedHostMiddleware
26
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
27
  from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, status
28
 
29
- # LightGBM availability check
30
- try:
31
- import lightgbm as lgb
32
- LIGHTGBM_AVAILABLE = True
33
- except ImportError:
34
- LIGHTGBM_AVAILABLE = False
35
-
36
  from data.data_validator import (
37
  DataValidationPipeline, validate_text, validate_articles_list,
38
  get_validation_stats, generate_quality_report
@@ -48,10 +39,12 @@ from deployment.traffic_router import TrafficRouter
48
  from deployment.model_registry import ModelRegistry
49
  from deployment.blue_green_manager import BlueGreenDeploymentManager
50
 
51
- # Import the path manager
 
52
  try:
53
  from path_config import path_manager
54
  except ImportError:
 
55
  import sys
56
  import os
57
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
@@ -60,21 +53,26 @@ except ImportError:
60
  # Configure logging with fallback for permission issues
61
  def setup_logging():
62
  """Setup logging with fallback for environments with restricted file access"""
63
- handlers = [logging.StreamHandler()]
64
 
65
  try:
 
66
  log_file_path = path_manager.get_logs_path('fastapi_server.log')
67
  log_file_path.parent.mkdir(parents=True, exist_ok=True)
68
 
 
69
  test_handler = logging.FileHandler(log_file_path)
70
  test_handler.close()
71
 
 
72
  handlers.append(logging.FileHandler(log_file_path))
73
- print(f"Logging to file: {log_file_path}")
74
 
75
  except (PermissionError, OSError) as e:
 
76
  print(f"Cannot create log file, using console only: {e}")
77
 
 
78
  try:
79
  import tempfile
80
  temp_log = tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False, prefix='fastapi_')
@@ -86,7 +84,7 @@ def setup_logging():
86
 
87
  return handlers
88
 
89
- # Setup logging
90
  logging.basicConfig(
91
  level=logging.INFO,
92
  format='%(asctime)s - %(levelname)s - %(message)s',
@@ -94,7 +92,7 @@ logging.basicConfig(
94
  )
95
  logger = logging.getLogger(__name__)
96
 
97
- # Log environment info
98
  try:
99
  path_manager.log_environment_info()
100
  except Exception as e:
@@ -107,86 +105,49 @@ security = HTTPBearer(auto_error=False)
107
  rate_limit_storage = defaultdict(list)
108
 
109
 
110
- class EnhancedModelManager:
111
- """Enhanced model manager with LightGBM ensemble support"""
112
 
113
  def __init__(self):
114
  self.model = None
115
  self.vectorizer = None
116
  self.pipeline = None
117
- self.ensemble = None
118
  self.model_metadata = {}
119
- self.ensemble_metadata = {}
120
  self.last_health_check = None
121
  self.health_status = "unknown"
122
- self.model_type = "unknown"
123
- self.is_ensemble = False
124
  self.load_model()
125
 
126
  def load_model(self):
127
- """Load model with comprehensive error handling and ensemble support"""
128
  try:
129
- logger.info("Loading ML model with ensemble support...")
130
 
131
  # Initialize all to None first
132
  self.model = None
133
  self.vectorizer = None
134
  self.pipeline = None
135
- self.ensemble = None
136
- self.is_ensemble = False
137
 
138
- # Check for ensemble model first
139
- ensemble_path = Path("/tmp/ensemble.pkl")
140
- ensemble_metadata_path = Path("/tmp/ensemble_metadata.json")
141
 
142
- if ensemble_path.exists():
143
  try:
144
- self.ensemble = joblib.load(ensemble_path)
145
- self.pipeline = self.ensemble # Use ensemble as pipeline
146
- self.model_type = "ensemble"
147
- self.is_ensemble = True
148
-
149
- # Load ensemble metadata
150
- if ensemble_metadata_path.exists():
151
- with open(ensemble_metadata_path, 'r') as f:
152
- self.ensemble_metadata = json.load(f)
153
- logger.info(f"Loaded ensemble metadata: {self.ensemble_metadata.get('ensemble_type', 'unknown')}")
154
-
155
- logger.info("Loaded ensemble model successfully")
156
- logger.info(f"Ensemble type: {self.ensemble_metadata.get('ensemble_type', 'voting_classifier')}")
157
- logger.info(f"Component models: {self.ensemble_metadata.get('component_models', [])}")
158
-
159
  except Exception as e:
160
- logger.warning(f"Failed to load ensemble model: {e}, falling back to individual pipeline")
161
- self.ensemble = None
162
-
163
- # Try to load pipeline if ensemble not available
164
- if self.pipeline is None:
165
- pipeline_path = path_manager.get_pipeline_path()
166
- logger.info(f"Checking for pipeline at: {pipeline_path}")
167
-
168
- if pipeline_path.exists():
169
- try:
170
- self.pipeline = joblib.load(pipeline_path)
171
- # Extract components from pipeline
172
- if hasattr(self.pipeline, 'named_steps'):
173
- self.model = self.pipeline.named_steps.get('model')
174
- self.vectorizer = (self.pipeline.named_steps.get('vectorizer') or
175
- self.pipeline.named_steps.get('vectorize'))
176
-
177
- # Check if this is actually an ensemble pipeline
178
- if 'ensemble' in self.pipeline.named_steps:
179
- self.model_type = "ensemble_pipeline"
180
- self.is_ensemble = True
181
- logger.info("Detected ensemble within pipeline")
182
-
183
- logger.info("Loaded model pipeline successfully")
184
- logger.info(f"Pipeline steps: {list(self.pipeline.named_steps.keys()) if hasattr(self.pipeline, 'named_steps') else 'No named_steps'}")
185
- except Exception as e:
186
- logger.warning(f"Failed to load pipeline: {e}, falling back to individual components")
187
- self.pipeline = None
188
 
189
- # If pipeline loading failed, load individual components
190
  if self.pipeline is None:
191
  model_path = path_manager.get_model_file_path()
192
  vectorizer_path = path_manager.get_vectorizer_path()
@@ -198,52 +159,35 @@ class EnhancedModelManager:
198
  try:
199
  self.model = joblib.load(model_path)
200
  self.vectorizer = joblib.load(vectorizer_path)
201
- self.model_type = "individual_components"
202
  logger.info("Loaded model components successfully")
203
  except Exception as e:
204
  logger.error(f"Failed to load individual components: {e}")
205
  raise e
206
  else:
207
- raise FileNotFoundError(f"No model files found")
 
 
 
 
208
 
209
  # Load metadata
210
  metadata_path = path_manager.get_metadata_path()
211
  if metadata_path.exists():
212
  with open(metadata_path, 'r') as f:
213
  self.model_metadata = json.load(f)
214
-
215
- # Update model type and ensemble status from metadata
216
- if self.model_metadata.get('is_ensemble', False):
217
- self.is_ensemble = True
218
- if not self.model_type.startswith('ensemble'):
219
- self.model_type = "ensemble_from_metadata"
220
-
221
  logger.info(f"Loaded model metadata: {self.model_metadata.get('model_version', 'Unknown')}")
222
- logger.info(f"Model type from metadata: {self.model_metadata.get('model_type', 'unknown')}")
223
- logger.info(f"Is ensemble: {self.is_ensemble}")
224
-
225
- if self.is_ensemble and 'ensemble_details' in self.model_metadata:
226
- ensemble_details = self.model_metadata['ensemble_details']
227
- logger.info(f"Ensemble details: {ensemble_details}")
228
  else:
229
  logger.warning(f"Metadata file not found at: {metadata_path}")
230
  self.model_metadata = {"model_version": "unknown"}
231
 
232
- # Verify we have what we need for predictions
233
- if self.pipeline is None and (self.model is None or self.vectorizer is None):
234
- raise ValueError("Neither complete pipeline nor individual model components are available")
235
-
236
  self.health_status = "healthy"
237
  self.last_health_check = datetime.now()
238
 
239
  # Log what was successfully loaded
240
  logger.info(f"Model loading summary:")
241
  logger.info(f" Pipeline available: {self.pipeline is not None}")
242
- logger.info(f" Individual model available: {self.model is not None}")
243
  logger.info(f" Vectorizer available: {self.vectorizer is not None}")
244
- logger.info(f" Ensemble available: {self.ensemble is not None}")
245
- logger.info(f" Model type: {self.model_type}")
246
- logger.info(f" Is ensemble: {self.is_ensemble}")
247
 
248
  except Exception as e:
249
  logger.error(f"Failed to load model: {e}")
@@ -252,21 +196,15 @@ class EnhancedModelManager:
252
  self.model = None
253
  self.vectorizer = None
254
  self.pipeline = None
255
- self.ensemble = None
256
 
257
  def predict(self, text: str) -> tuple[str, float]:
258
- """Make prediction with enhanced ensemble support"""
259
  try:
260
  if self.pipeline:
261
- # Use pipeline for prediction (works for both ensemble and individual models)
262
  prediction = self.pipeline.predict([text])[0]
263
  probabilities = self.pipeline.predict_proba([text])[0]
264
-
265
- if self.is_ensemble:
266
- logger.debug("Used ensemble pipeline for prediction")
267
- else:
268
- logger.debug("Used individual model pipeline for prediction")
269
-
270
  elif self.model and self.vectorizer:
271
  # Use individual components
272
  X = self.vectorizer.transform([text])
@@ -293,7 +231,7 @@ class EnhancedModelManager:
293
  )
294
 
295
  def health_check(self) -> Dict[str, Any]:
296
- """Perform health check with ensemble information"""
297
  try:
298
  # Test prediction with sample text
299
  test_text = "This is a test article for health check purposes."
@@ -302,44 +240,26 @@ class EnhancedModelManager:
302
  self.health_status = "healthy"
303
  self.last_health_check = datetime.now()
304
 
305
- health_info = {
306
  "status": "healthy",
307
  "last_check": self.last_health_check.isoformat(),
308
  "model_available": self.model is not None,
309
  "vectorizer_available": self.vectorizer is not None,
310
  "pipeline_available": self.pipeline is not None,
311
- "ensemble_available": self.ensemble is not None,
312
- "model_type": self.model_type,
313
- "is_ensemble": self.is_ensemble,
314
  "test_prediction": {"label": label, "confidence": confidence},
315
  "environment": path_manager.environment,
316
- "lightgbm_available": LIGHTGBM_AVAILABLE,
317
- "model_paths": {
318
- "pipeline": str(path_manager.get_pipeline_path()),
319
- "ensemble": "/tmp/ensemble.pkl",
320
- "model": str(path_manager.get_model_file_path()),
321
- "vectorizer": str(path_manager.get_vectorizer_path())
322
- },
323
  "file_exists": {
324
- "pipeline": path_manager.get_pipeline_path().exists(),
325
- "ensemble": Path("/tmp/ensemble.pkl").exists(),
326
  "model": path_manager.get_model_file_path().exists(),
327
  "vectorizer": path_manager.get_vectorizer_path().exists(),
328
- "metadata": path_manager.get_metadata_path().exists(),
329
- "ensemble_metadata": Path("/tmp/ensemble_metadata.json").exists()
330
  }
331
  }
332
 
333
- # Add ensemble-specific information
334
- if self.is_ensemble:
335
- health_info["ensemble_info"] = {
336
- "ensemble_type": self.ensemble_metadata.get('ensemble_type', 'unknown'),
337
- "component_models": self.ensemble_metadata.get('component_models', []),
338
- "voting_type": self.model_metadata.get('ensemble_details', {}).get('voting_type', 'unknown')
339
- }
340
-
341
- return health_info
342
-
343
  except Exception as e:
344
  self.health_status = "unhealthy"
345
  self.last_health_check = datetime.now()
@@ -351,15 +271,21 @@ class EnhancedModelManager:
351
  "model_available": self.model is not None,
352
  "vectorizer_available": self.vectorizer is not None,
353
  "pipeline_available": self.pipeline is not None,
354
- "ensemble_available": self.ensemble is not None,
355
- "model_type": self.model_type,
356
- "is_ensemble": self.is_ensemble,
357
  "environment": path_manager.environment,
358
- "lightgbm_available": LIGHTGBM_AVAILABLE
 
 
 
 
 
 
 
 
 
359
  }
360
 
361
 
362
- # Background task functions remain the same...
363
  async def log_prediction(text: str, prediction: str, confidence: float, client_ip: str, processing_time: float):
364
  """Log prediction details with error handling for file access"""
365
  try:
@@ -370,9 +296,7 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
370
  "prediction": prediction,
371
  "confidence": confidence,
372
  "processing_time": processing_time,
373
- "text_hash": hashlib.md5(text.encode()).hexdigest(),
374
- "model_type": model_manager.model_type,
375
- "is_ensemble": model_manager.is_ensemble
376
  }
377
 
378
  # Try to save to log file
@@ -401,6 +325,7 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
401
  await f.write(json.dumps(logs, indent=2))
402
 
403
  except (PermissionError, OSError) as e:
 
404
  logger.warning(f"Cannot write prediction log to file: {e}")
405
  logger.info(f"Prediction logged: {json.dumps(log_entry)}")
406
 
@@ -408,8 +333,27 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
408
  logger.error(f"Failed to log prediction: {e}")
409
 
410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  # Global variables
412
- model_manager = EnhancedModelManager()
413
 
414
  # Initialize automation manager
415
  automation_manager = None
@@ -419,21 +363,17 @@ deployment_manager = None
419
  traffic_router = None
420
  model_registry = None
421
 
 
422
  @asynccontextmanager
423
  async def lifespan(app: FastAPI):
424
- """Manage application lifespan with enhanced model support"""
425
  global deployment_manager, traffic_router, model_registry
426
 
427
- logger.info("Starting Enhanced FastAPI application with ensemble support...")
428
 
429
  # Startup tasks
430
  model_manager.load_model()
431
 
432
- # Log model information
433
- logger.info(f"Model loaded: {model_manager.model_type}")
434
- logger.info(f"Ensemble support: {model_manager.is_ensemble}")
435
- logger.info(f"LightGBM available: {LIGHTGBM_AVAILABLE}")
436
-
437
  # Initialize deployment components
438
  try:
439
  deployment_manager = BlueGreenDeploymentManager()
@@ -443,37 +383,72 @@ async def lifespan(app: FastAPI):
443
  except Exception as e:
444
  logger.error(f"Failed to initialize deployment system: {e}")
445
 
446
- # Initialize monitoring
447
- try:
448
- prediction_monitor = PredictionMonitor(base_dir=Path("/tmp"))
449
- metrics_collector = MetricsCollector(base_dir=Path("/tmp"))
450
- alert_system = AlertSystem(base_dir=Path("/tmp"))
451
-
452
- prediction_monitor.start_monitoring()
453
- alert_system.add_notification_handler("console", console_notification_handler)
454
- logger.info("Monitoring system initialized")
455
- except Exception as e:
456
- logger.error(f"Failed to initialize monitoring: {e}")
457
 
458
  yield
459
 
460
  # Shutdown tasks
461
- logger.info("Shutting down Enhanced FastAPI application...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
  # Create FastAPI app
464
  app = FastAPI(
465
- title="Enhanced Fake News Detection API with Ensemble Support",
466
- description="Production-ready API for fake news detection with LightGBM ensemble support and comprehensive monitoring",
467
- version="2.1.0",
468
  docs_url="/docs",
469
  redoc_url="/redoc",
470
  lifespan=lifespan
471
  )
472
 
473
- # Add middleware (same as before)
474
  app.add_middleware(
475
  CORSMiddleware,
476
- allow_origins=["*"],
477
  allow_credentials=True,
478
  allow_methods=["*"],
479
  allow_headers=["*"],
@@ -481,31 +456,38 @@ app.add_middleware(
481
 
482
  app.add_middleware(
483
  TrustedHostMiddleware,
484
- allowed_hosts=["*"]
485
  )
486
 
487
- # Enhanced prediction response model
488
- class EnhancedPredictionResponse(BaseModel):
489
- prediction: str = Field(..., description="Prediction result: 'Real' or 'Fake'")
490
- confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score between 0 and 1")
491
- model_version: str = Field(..., description="Version of the model used for prediction")
492
- model_type: str = Field(..., description="Type of model: individual, ensemble, etc.")
493
- is_ensemble: bool = Field(..., description="Whether an ensemble model was used")
494
- ensemble_info: Optional[Dict[str, Any]] = Field(None, description="Ensemble-specific information")
495
- timestamp: str = Field(..., description="Timestamp of the prediction")
496
- processing_time: float = Field(..., description="Time taken for processing in seconds")
 
 
 
 
 
 
 
 
 
 
497
 
498
- # Enhanced health response model
499
- class EnhancedHealthResponse(BaseModel):
500
- status: str
501
- timestamp: str
502
- model_health: Dict[str, Any]
503
- system_health: Dict[str, Any]
504
- api_health: Dict[str, Any]
505
- environment_info: Dict[str, Any]
506
- ensemble_info: Optional[Dict[str, Any]] = None
507
 
508
- # Request models remain the same...
 
509
  class PredictionRequest(BaseModel):
510
  text: str = Field(..., min_length=1, max_length=10000,
511
  description="Text to analyze for fake news detection")
@@ -514,15 +496,67 @@ class PredictionRequest(BaseModel):
514
  def validate_text(cls, v):
515
  if not v or not v.strip():
516
  raise ValueError('Text cannot be empty')
 
 
517
  if len(v.strip()) < 10:
518
  raise ValueError('Text must be at least 10 characters long')
 
 
519
  suspicious_patterns = ['<script', 'javascript:', 'data:']
520
  if any(pattern in v.lower() for pattern in suspicious_patterns):
521
  raise ValueError('Text contains suspicious content')
 
522
  return v.strip()
523
 
524
 
525
- # Rate limiting and error handlers remain the same...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
  async def rate_limit_check(request: Request):
527
  """Check rate limits"""
528
  client_ip = request.client.host
@@ -531,7 +565,7 @@ async def rate_limit_check(request: Request):
531
  # Clean old entries
532
  rate_limit_storage[client_ip] = [
533
  timestamp for timestamp in rate_limit_storage[client_ip]
534
- if current_time - timestamp < 3600
535
  ]
536
 
537
  # Check rate limit (100 requests per hour)
@@ -545,11 +579,14 @@ async def rate_limit_check(request: Request):
545
  rate_limit_storage[client_ip].append(current_time)
546
 
547
 
 
548
  @app.middleware("http")
549
  async def log_requests(request: Request, call_next):
550
- """Log all requests with ensemble information"""
551
  start_time = time.time()
 
552
  response = await call_next(request)
 
553
  process_time = time.time() - start_time
554
 
555
  log_data = {
@@ -558,42 +595,76 @@ async def log_requests(request: Request, call_next):
558
  "client_ip": request.client.host,
559
  "status_code": response.status_code,
560
  "process_time": process_time,
561
- "timestamp": datetime.now().isoformat(),
562
- "model_type": model_manager.model_type,
563
- "is_ensemble": model_manager.is_ensemble
564
  }
565
 
566
  logger.info(f"Request: {json.dumps(log_data)}")
 
567
  return response
568
 
569
 
570
- # Enhanced API Routes
571
- @app.get("/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572
  async def root():
573
- """Root endpoint with ensemble information"""
574
  return {
575
- "message": "Enhanced Fake News Detection API with Ensemble Support",
576
- "version": "2.1.0",
577
  "environment": path_manager.environment,
578
- "model_type": model_manager.model_type,
579
- "ensemble_support": model_manager.is_ensemble,
580
- "lightgbm_available": LIGHTGBM_AVAILABLE,
581
  "documentation": "/docs",
582
  "health_check": "/health"
583
  }
584
 
585
 
586
- @app.post("/predict", response_model=EnhancedPredictionResponse)
587
  async def predict(
588
  request: PredictionRequest,
589
  background_tasks: BackgroundTasks,
590
  http_request: Request,
591
  _: None = Depends(rate_limit_check)
592
- ):
593
  """
594
- Enhanced prediction with ensemble model support
595
  - **text**: The news article text to analyze
596
- - **returns**: Enhanced prediction result with ensemble information
597
  """
598
  start_time = time.time()
599
  client_ip = http_request.client.host
@@ -607,49 +678,62 @@ async def predict(
607
  detail="Model is not available. Please try again later."
608
  )
609
 
610
- # Make prediction using enhanced model manager
611
- label, confidence = model_manager.predict(request.text)
612
- processing_time = time.time() - start_time
613
-
614
- # Prepare ensemble information
615
- ensemble_info = None
616
- if model_manager.is_ensemble:
617
- ensemble_info = {
618
- "ensemble_type": model_manager.ensemble_metadata.get('ensemble_type', 'unknown'),
619
- "component_models": model_manager.ensemble_metadata.get('component_models', []),
620
- "voting_type": model_manager.model_metadata.get('ensemble_details', {}).get('voting_type', 'soft')
621
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
 
623
  # Record prediction for monitoring
624
- if 'prediction_monitor' in globals():
625
- prediction_monitor.record_prediction(
626
- prediction=label,
627
- confidence=confidence,
628
- processing_time=processing_time,
629
- text=request.text,
630
- model_version=model_manager.model_metadata.get('model_version', 'unknown'),
631
- client_id=client_ip,
632
- user_agent=user_agent
633
- )
634
 
635
  # Record API request metrics
636
- if 'metrics_collector' in globals():
637
- metrics_collector.record_api_request(
638
- endpoint="/predict",
639
- method="POST",
640
- response_time=processing_time,
641
- status_code=200,
642
- client_ip=client_ip
643
- )
644
 
645
- # Create enhanced response
646
- response = EnhancedPredictionResponse(
647
  prediction=label,
648
  confidence=confidence,
649
  model_version=model_manager.model_metadata.get('model_version', 'unknown'),
650
- model_type=model_manager.model_type,
651
- is_ensemble=model_manager.is_ensemble,
652
- ensemble_info=ensemble_info,
653
  timestamp=datetime.now().isoformat(),
654
  processing_time=processing_time
655
  )
@@ -669,40 +753,36 @@ async def predict(
669
  except HTTPException:
670
  # Record error for failed requests
671
  processing_time = time.time() - start_time
672
- if 'prediction_monitor' in globals():
673
- prediction_monitor.record_error(
674
- error_type="http_error",
675
- error_message="Service unavailable",
676
- context={"status_code": 503}
677
- )
678
- if 'metrics_collector' in globals():
679
- metrics_collector.record_api_request(
680
- endpoint="/predict",
681
- method="POST",
682
- response_time=processing_time,
683
- status_code=503,
684
- client_ip=client_ip
685
- )
686
  raise
687
  except Exception as e:
688
  processing_time = time.time() - start_time
689
 
690
  # Record error
691
- if 'prediction_monitor' in globals():
692
- prediction_monitor.record_error(
693
- error_type="prediction_error",
694
- error_message=str(e),
695
- context={"text_length": len(request.text)}
696
- )
697
 
698
- if 'metrics_collector' in globals():
699
- metrics_collector.record_api_request(
700
- endpoint="/predict",
701
- method="POST",
702
- response_time=processing_time,
703
- status_code=500,
704
- client_ip=client_ip
705
- )
706
 
707
  logger.error(f"Prediction failed: {e}")
708
  raise HTTPException(
@@ -711,11 +791,90 @@ async def predict(
711
  )
712
 
713
 
714
- @app.get("/health", response_model=EnhancedHealthResponse)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
  async def health_check():
716
  """
717
- Enhanced health check endpoint with ensemble information
718
- - **returns**: Detailed health status including ensemble information
719
  """
720
  try:
721
  # Model health
@@ -738,193 +897,843 @@ async def health_check():
738
 
739
  # Environment info
740
  environment_info = path_manager.get_environment_info()
741
- environment_info["lightgbm_available"] = LIGHTGBM_AVAILABLE
742
-
743
- # Ensemble information
744
- ensemble_info = None
745
- if model_manager.is_ensemble:
746
- ensemble_info = {
747
- "is_ensemble": True,
748
- "ensemble_type": model_manager.ensemble_metadata.get('ensemble_type', 'unknown'),
749
- "component_models": model_manager.ensemble_metadata.get('component_models', []),
750
- "ensemble_health": model_health.get('ensemble_info', {}),
751
- "ensemble_metadata_available": Path("/tmp/ensemble_metadata.json").exists()
752
- }
753
 
754
  # Overall status
755
  overall_status = "healthy" if model_health["status"] == "healthy" else "unhealthy"
756
 
757
- return EnhancedHealthResponse(
758
  status=overall_status,
759
  timestamp=datetime.now().isoformat(),
760
  model_health=model_health,
761
  system_health=system_health,
762
  api_health=api_health,
763
- environment_info=environment_info,
764
- ensemble_info=ensemble_info
765
  )
766
 
767
  except Exception as e:
768
  logger.error(f"Health check failed: {e}")
769
- return EnhancedHealthResponse(
770
  status="unhealthy",
771
  timestamp=datetime.now().isoformat(),
772
  model_health={"status": "unhealthy", "error": str(e)},
773
  system_health={"error": str(e)},
774
  api_health={"error": str(e)},
775
- environment_info={"error": str(e)},
776
- ensemble_info={"error": str(e)} if model_manager.is_ensemble else None
777
  )
778
 
779
 
780
- @app.get("/model/info")
781
- async def get_model_info():
782
  """
783
- Get detailed model information including ensemble details
784
- - **returns**: Comprehensive model information
785
  """
786
  try:
787
- model_info = {
788
- "model_version": model_manager.model_metadata.get('model_version', 'unknown'),
789
- "model_type": model_manager.model_type,
790
- "is_ensemble": model_manager.is_ensemble,
791
- "lightgbm_available": LIGHTGBM_AVAILABLE,
792
- "training_method": model_manager.model_metadata.get('training_method', 'unknown'),
793
- "timestamp": model_manager.model_metadata.get('timestamp', 'unknown'),
794
- "performance_metrics": {
795
- "test_accuracy": model_manager.model_metadata.get('test_accuracy', 'unknown'),
796
- "test_f1": model_manager.model_metadata.get('test_f1', 'unknown'),
797
- "cv_f1_mean": model_manager.model_metadata.get('cv_f1_mean', 'unknown'),
798
- "cv_f1_std": model_manager.model_metadata.get('cv_f1_std', 'unknown')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
799
  },
800
- "feature_engineering": model_manager.model_metadata.get('feature_engineering', {}),
801
- "training_config": model_manager.model_metadata.get('training_config', {})
802
  }
803
 
804
- # Add ensemble-specific information
805
- if model_manager.is_ensemble:
806
- ensemble_details = model_manager.model_metadata.get('ensemble_details', {})
807
- model_info["ensemble_details"] = {
808
- "ensemble_type": ensemble_details.get('ensemble_type', 'unknown'),
809
- "component_models": ensemble_details.get('component_models', []),
810
- "voting_type": ensemble_details.get('voting_type', 'soft'),
811
- "component_performance": model_manager.model_metadata.get('component_performance', {})
812
- }
813
-
814
- # Add ensemble metadata if available
815
- if model_manager.ensemble_metadata:
816
- model_info["ensemble_metadata"] = model_manager.ensemble_metadata
817
-
818
- return model_info
819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
  except Exception as e:
821
- logger.error(f"Model info retrieval failed: {e}")
822
  raise HTTPException(
823
  status_code=500,
824
- detail=f"Failed to retrieve model info: {str(e)}"
825
  )
826
 
827
 
828
- @app.get("/model/performance")
829
- async def get_model_performance():
830
  """
831
- Get detailed model performance metrics including ensemble comparison
832
- - **returns**: Performance metrics and comparisons
833
  """
834
  try:
835
- performance_info = {
836
- "current_model": {
837
- "model_type": model_manager.model_type,
838
- "is_ensemble": model_manager.is_ensemble,
839
- "test_metrics": {
840
- "accuracy": model_manager.model_metadata.get('test_accuracy', 'unknown'),
841
- "f1": model_manager.model_metadata.get('test_f1', 'unknown'),
842
- "precision": model_manager.model_metadata.get('test_precision', 'unknown'),
843
- "recall": model_manager.model_metadata.get('test_recall', 'unknown'),
844
- "roc_auc": model_manager.model_metadata.get('test_roc_auc', 'unknown')
845
- },
846
- "cross_validation": model_manager.model_metadata.get('cross_validation', {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847
  },
848
- "training_info": {
849
- "training_method": model_manager.model_metadata.get('training_method', 'unknown'),
850
- "lightgbm_used": model_manager.model_metadata.get('lightgbm_used', False),
851
- "enhanced_features": model_manager.model_metadata.get('feature_engineering', {}).get('enhanced_features_used', False)
 
 
 
852
  }
853
  }
 
 
 
 
 
 
 
 
 
 
 
 
854
 
855
- # Add ensemble-specific performance information
856
- if model_manager.is_ensemble:
857
- component_performance = model_manager.model_metadata.get('component_performance', {})
858
- if component_performance:
859
- performance_info["component_comparison"] = component_performance
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
 
861
- # Calculate ensemble advantage
862
- ensemble_f1 = model_manager.model_metadata.get('test_f1', 0)
863
- if isinstance(ensemble_f1, (int, float)):
864
- best_individual_f1 = max([comp.get('f1', 0) for comp in component_performance.values()], default=0)
865
- if best_individual_f1 > 0:
866
- ensemble_advantage = ensemble_f1 - best_individual_f1
867
- performance_info["ensemble_advantage"] = {
868
- "f1_improvement": ensemble_advantage,
869
- "relative_improvement": (ensemble_advantage / best_individual_f1) * 100 if best_individual_f1 > 0 else 0
870
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
871
 
872
- return performance_info
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
  except Exception as e:
875
- logger.error(f"Performance info retrieval failed: {e}")
876
  raise HTTPException(
877
  status_code=500,
878
- detail=f"Failed to retrieve performance info: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
879
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
880
 
 
 
 
 
 
 
 
 
 
 
 
881
 
882
- # Keep all other existing endpoints (cv/results, metrics, etc.) but enhance them with ensemble information where relevant
 
 
 
 
 
 
 
 
 
 
883
 
884
- @app.get("/ensemble/status")
885
- async def get_ensemble_status():
886
- """
887
- Get ensemble-specific status information
888
- - **returns**: Ensemble status and configuration
889
- """
890
  try:
891
- if not model_manager.is_ensemble:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892
  return {
893
- "ensemble_active": False,
894
- "message": "Current model is not an ensemble",
895
- "model_type": model_manager.model_type,
896
- "lightgbm_available": LIGHTGBM_AVAILABLE
897
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898
 
899
- ensemble_status = {
900
- "ensemble_active": True,
901
- "ensemble_type": model_manager.ensemble_metadata.get('ensemble_type', 'unknown'),
902
- "component_models": model_manager.ensemble_metadata.get('component_models', []),
903
- "ensemble_health": model_manager.health_status,
904
- "lightgbm_available": LIGHTGBM_AVAILABLE,
905
- "lightgbm_used": 'lightgbm' in model_manager.ensemble_metadata.get('component_models', []),
906
- "voting_type": model_manager.model_metadata.get('ensemble_details', {}).get('voting_type', 'unknown'),
907
- "model_version": model_manager.model_metadata.get('model_version', 'unknown'),
908
- "training_timestamp": model_manager.model_metadata.get('timestamp', 'unknown')
 
 
 
 
 
 
 
 
 
909
  }
 
 
 
 
910
 
911
- # Add performance comparison if available
912
- component_performance = model_manager.model_metadata.get('component_performance', {})
913
- if component_performance:
914
- ensemble_status["component_performance"] = component_performance
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
915
 
916
- # Calculate which model would have been best individually
917
- best_individual = max(component_performance.items(), key=lambda x: x[1].get('f1', 0), default=('none', {'f1': 0}))
918
- ensemble_status["best_individual_model"] = {
919
- "name": best_individual[0],
920
- "f1_score": best_individual[1].get('f1', 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
921
  }
 
 
 
 
 
 
922
 
923
- return ensemble_status
 
 
 
 
 
 
 
 
 
 
 
924
 
 
 
 
 
 
 
 
 
 
 
925
  except Exception as e:
926
- logger.error(f"Ensemble status retrieval failed: {e}")
927
- raise HTTPException(
928
- status_code=500,
929
- detail=f"Failed to retrieve ensemble status: {str(e)}"
930
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import json
2
  import time
3
  import joblib
 
24
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
25
  from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, status
26
 
 
 
 
 
 
 
 
27
  from data.data_validator import (
28
  DataValidationPipeline, validate_text, validate_articles_list,
29
  get_validation_stats, generate_quality_report
 
39
  from deployment.model_registry import ModelRegistry
40
  from deployment.blue_green_manager import BlueGreenDeploymentManager
41
 
42
+
43
+ # Import the new path manager
44
  try:
45
  from path_config import path_manager
46
  except ImportError:
47
+ # Fallback for development environments
48
  import sys
49
  import os
50
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 
53
  # Configure logging with fallback for permission issues
54
  def setup_logging():
55
  """Setup logging with fallback for environments with restricted file access"""
56
+ handlers = [logging.StreamHandler()] # Always include console output
57
 
58
  try:
59
+ # Try to create log file in the logs directory
60
  log_file_path = path_manager.get_logs_path('fastapi_server.log')
61
  log_file_path.parent.mkdir(parents=True, exist_ok=True)
62
 
63
+ # Test if we can write to the file
64
  test_handler = logging.FileHandler(log_file_path)
65
  test_handler.close()
66
 
67
+ # If successful, add file handler
68
  handlers.append(logging.FileHandler(log_file_path))
69
+ print(f"Logging to file: {log_file_path}") # Use print instead of logger
70
 
71
  except (PermissionError, OSError) as e:
72
+ # If file logging fails, just use console logging
73
  print(f"Cannot create log file, using console only: {e}")
74
 
75
+ # Try alternative locations for file logging
76
  try:
77
  import tempfile
78
  temp_log = tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False, prefix='fastapi_')
 
84
 
85
  return handlers
86
 
87
+ # Setup logging with error handling
88
  logging.basicConfig(
89
  level=logging.INFO,
90
  format='%(asctime)s - %(levelname)s - %(message)s',
 
92
  )
93
  logger = logging.getLogger(__name__)
94
 
95
+ # Now that logger is defined, log the environment info
96
  try:
97
  path_manager.log_environment_info()
98
  except Exception as e:
 
105
  rate_limit_storage = defaultdict(list)
106
 
107
 
108
+ class ModelManager:
109
+ """Manages model loading and health checks with dynamic paths"""
110
 
111
  def __init__(self):
112
  self.model = None
113
  self.vectorizer = None
114
  self.pipeline = None
 
115
  self.model_metadata = {}
 
116
  self.last_health_check = None
117
  self.health_status = "unknown"
 
 
118
  self.load_model()
119
 
120
  def load_model(self):
121
+ """Load model with comprehensive error handling and dynamic paths"""
122
  try:
123
+ logger.info("Loading ML model...")
124
 
125
  # Initialize all to None first
126
  self.model = None
127
  self.vectorizer = None
128
  self.pipeline = None
 
 
129
 
130
+ # Try to load pipeline first (preferred)
131
+ pipeline_path = path_manager.get_pipeline_path()
132
+ logger.info(f"Checking for pipeline at: {pipeline_path}")
133
 
134
+ if pipeline_path.exists():
135
  try:
136
+ self.pipeline = joblib.load(pipeline_path)
137
+ # Extract components from pipeline
138
+ if hasattr(self.pipeline, 'named_steps'):
139
+ self.model = self.pipeline.named_steps.get('model')
140
+ self.vectorizer = (self.pipeline.named_steps.get('vectorizer') or
141
+ self.pipeline.named_steps.get('vectorize'))
142
+ logger.info("Loaded model pipeline successfully")
143
+ logger.info(f"Pipeline steps: {list(self.pipeline.named_steps.keys()) if hasattr(self.pipeline, 'named_steps') else 'No named_steps'}")
 
 
 
 
 
 
 
144
  except Exception as e:
145
+ logger.warning(f"Failed to load pipeline: {e}, falling back to individual components")
146
+ self.pipeline = None
147
+ else:
148
+ logger.info(f"Pipeline file not found at {pipeline_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ # If pipeline loading failed or doesn't exist, load individual components
151
  if self.pipeline is None:
152
  model_path = path_manager.get_model_file_path()
153
  vectorizer_path = path_manager.get_vectorizer_path()
 
159
  try:
160
  self.model = joblib.load(model_path)
161
  self.vectorizer = joblib.load(vectorizer_path)
 
162
  logger.info("Loaded model components successfully")
163
  except Exception as e:
164
  logger.error(f"Failed to load individual components: {e}")
165
  raise e
166
  else:
167
+ raise FileNotFoundError(f"No model files found. Checked:\n- {pipeline_path}\n- {model_path}\n- {vectorizer_path}")
168
+
169
+ # Verify we have what we need for predictions
170
+ if self.pipeline is None and (self.model is None or self.vectorizer is None):
171
+ raise ValueError("Neither complete pipeline nor individual model components are available")
172
 
173
  # Load metadata
174
  metadata_path = path_manager.get_metadata_path()
175
  if metadata_path.exists():
176
  with open(metadata_path, 'r') as f:
177
  self.model_metadata = json.load(f)
 
 
 
 
 
 
 
178
  logger.info(f"Loaded model metadata: {self.model_metadata.get('model_version', 'Unknown')}")
 
 
 
 
 
 
179
  else:
180
  logger.warning(f"Metadata file not found at: {metadata_path}")
181
  self.model_metadata = {"model_version": "unknown"}
182
 
 
 
 
 
183
  self.health_status = "healthy"
184
  self.last_health_check = datetime.now()
185
 
186
  # Log what was successfully loaded
187
  logger.info(f"Model loading summary:")
188
  logger.info(f" Pipeline available: {self.pipeline is not None}")
189
+ logger.info(f" Model available: {self.model is not None}")
190
  logger.info(f" Vectorizer available: {self.vectorizer is not None}")
 
 
 
191
 
192
  except Exception as e:
193
  logger.error(f"Failed to load model: {e}")
 
196
  self.model = None
197
  self.vectorizer = None
198
  self.pipeline = None
 
199
 
200
  def predict(self, text: str) -> tuple[str, float]:
201
+ """Make prediction with error handling"""
202
  try:
203
  if self.pipeline:
204
+ # Use pipeline for prediction
205
  prediction = self.pipeline.predict([text])[0]
206
  probabilities = self.pipeline.predict_proba([text])[0]
207
+ logger.debug("Used pipeline for prediction")
 
 
 
 
 
208
  elif self.model and self.vectorizer:
209
  # Use individual components
210
  X = self.vectorizer.transform([text])
 
231
  )
232
 
233
  def health_check(self) -> Dict[str, Any]:
234
+ """Perform health check"""
235
  try:
236
  # Test prediction with sample text
237
  test_text = "This is a test article for health check purposes."
 
240
  self.health_status = "healthy"
241
  self.last_health_check = datetime.now()
242
 
243
+ return {
244
  "status": "healthy",
245
  "last_check": self.last_health_check.isoformat(),
246
  "model_available": self.model is not None,
247
  "vectorizer_available": self.vectorizer is not None,
248
  "pipeline_available": self.pipeline is not None,
 
 
 
249
  "test_prediction": {"label": label, "confidence": confidence},
250
  "environment": path_manager.environment,
251
+ "model_path": str(path_manager.get_model_file_path()),
252
+ "vectorizer_path": str(path_manager.get_vectorizer_path()),
253
+ "pipeline_path": str(path_manager.get_pipeline_path()),
254
+ "data_path": str(path_manager.get_data_path()),
 
 
 
255
  "file_exists": {
 
 
256
  "model": path_manager.get_model_file_path().exists(),
257
  "vectorizer": path_manager.get_vectorizer_path().exists(),
258
+ "pipeline": path_manager.get_pipeline_path().exists(),
259
+ "metadata": path_manager.get_metadata_path().exists()
260
  }
261
  }
262
 
 
 
 
 
 
 
 
 
 
 
263
  except Exception as e:
264
  self.health_status = "unhealthy"
265
  self.last_health_check = datetime.now()
 
271
  "model_available": self.model is not None,
272
  "vectorizer_available": self.vectorizer is not None,
273
  "pipeline_available": self.pipeline is not None,
 
 
 
274
  "environment": path_manager.environment,
275
+ "model_path": str(path_manager.get_model_file_path()),
276
+ "vectorizer_path": str(path_manager.get_vectorizer_path()),
277
+ "pipeline_path": str(path_manager.get_pipeline_path()),
278
+ "data_path": str(path_manager.get_data_path()),
279
+ "file_exists": {
280
+ "model": path_manager.get_model_file_path().exists(),
281
+ "vectorizer": path_manager.get_vectorizer_path().exists(),
282
+ "pipeline": path_manager.get_pipeline_path().exists(),
283
+ "metadata": path_manager.get_metadata_path().exists()
284
+ }
285
  }
286
 
287
 
288
+ # Background task functions
289
  async def log_prediction(text: str, prediction: str, confidence: float, client_ip: str, processing_time: float):
290
  """Log prediction details with error handling for file access"""
291
  try:
 
296
  "prediction": prediction,
297
  "confidence": confidence,
298
  "processing_time": processing_time,
299
+ "text_hash": hashlib.md5(text.encode()).hexdigest()
 
 
300
  }
301
 
302
  # Try to save to log file
 
325
  await f.write(json.dumps(logs, indent=2))
326
 
327
  except (PermissionError, OSError) as e:
328
+ # If file logging fails, just log to console
329
  logger.warning(f"Cannot write prediction log to file: {e}")
330
  logger.info(f"Prediction logged: {json.dumps(log_entry)}")
331
 
 
333
  logger.error(f"Failed to log prediction: {e}")
334
 
335
 
336
+ async def log_batch_prediction(total_texts: int, successful_predictions: int, client_ip: str, processing_time: float):
337
+ """Log batch prediction details"""
338
+ try:
339
+ log_entry = {
340
+ "timestamp": datetime.now().isoformat(),
341
+ "type": "batch_prediction",
342
+ "client_ip": client_ip,
343
+ "total_texts": total_texts,
344
+ "successful_predictions": successful_predictions,
345
+ "processing_time": processing_time,
346
+ "success_rate": successful_predictions / total_texts if total_texts > 0 else 0
347
+ }
348
+
349
+ logger.info(f"Batch prediction logged: {json.dumps(log_entry)}")
350
+
351
+ except Exception as e:
352
+ logger.error(f"Failed to log batch prediction: {e}")
353
+
354
+
355
  # Global variables
356
+ model_manager = ModelManager()
357
 
358
  # Initialize automation manager
359
  automation_manager = None
 
363
  traffic_router = None
364
  model_registry = None
365
 
366
+
367
  @asynccontextmanager
368
  async def lifespan(app: FastAPI):
369
+ """Manage application lifespan with deployment system"""
370
  global deployment_manager, traffic_router, model_registry
371
 
372
+ logger.info("Starting FastAPI application...")
373
 
374
  # Startup tasks
375
  model_manager.load_model()
376
 
 
 
 
 
 
377
  # Initialize deployment components
378
  try:
379
  deployment_manager = BlueGreenDeploymentManager()
 
383
  except Exception as e:
384
  logger.error(f"Failed to initialize deployment system: {e}")
385
 
386
+ # Initialize monitoring and automation...
 
 
 
 
 
 
 
 
 
 
387
 
388
  yield
389
 
390
  # Shutdown tasks
391
+ logger.info("Shutting down FastAPI application...")
392
+
393
+ # Initialize monitoring components
394
+ prediction_monitor = PredictionMonitor(base_dir=Path("/tmp"))
395
+ metrics_collector = MetricsCollector(base_dir=Path("/tmp"))
396
+ alert_system = AlertSystem(base_dir=Path("/tmp"))
397
+
398
+ # Start monitoring
399
+ prediction_monitor.start_monitoring()
400
+
401
+ alert_system.add_notification_handler("console", console_notification_handler)
402
+
403
+
404
+ @asynccontextmanager
405
+ async def lifespan(app: FastAPI):
406
+ """Manage application lifespan"""
407
+ logger.info("Starting FastAPI application...")
408
+
409
+ # Startup tasks
410
+ model_manager.load_model()
411
+
412
+ # Schedule periodic health checks
413
+ asyncio.create_task(periodic_health_check())
414
+
415
+ yield
416
+
417
+ # Shutdown tasks
418
+ logger.info("Shutting down FastAPI application...")
419
+
420
+
421
+ # Background tasks
422
+ async def periodic_health_check():
423
+ """Periodic health check"""
424
+ while True:
425
+ try:
426
+ await asyncio.sleep(300) # Check every 5 minutes
427
+ health_status = model_manager.health_check()
428
+
429
+ if health_status["status"] == "unhealthy":
430
+ logger.warning(
431
+ "Model health check failed, attempting to reload...")
432
+ model_manager.load_model()
433
+
434
+ except Exception as e:
435
+ logger.error(f"Periodic health check failed: {e}")
436
+
437
 
438
  # Create FastAPI app
439
  app = FastAPI(
440
+ title="Fake News Detection API",
441
+ description="Production-ready API for fake news detection with comprehensive monitoring and security features",
442
+ version="2.0.0",
443
  docs_url="/docs",
444
  redoc_url="/redoc",
445
  lifespan=lifespan
446
  )
447
 
448
+ # Add middleware
449
  app.add_middleware(
450
  CORSMiddleware,
451
+ allow_origins=["*"], # Configure appropriately for production
452
  allow_credentials=True,
453
  allow_methods=["*"],
454
  allow_headers=["*"],
 
456
 
457
  app.add_middleware(
458
  TrustedHostMiddleware,
459
+ allowed_hosts=["*"] # Configure appropriately for production
460
  )
461
 
462
+ # Custom OpenAPI setup - RIGHT AFTER app creation
463
+ def custom_openapi():
464
+ if app.openapi_schema:
465
+ return app.openapi_schema
466
+
467
+ openapi_schema = get_openapi(
468
+ title="Fake News Detection API",
469
+ version="2.0.0",
470
+ description="Production-ready API for fake news detection with comprehensive monitoring and security features",
471
+ routes=app.routes,
472
+ )
473
+
474
+ # Add security definitions
475
+ openapi_schema["components"]["securitySchemes"] = {
476
+ "Bearer": {
477
+ "type": "http",
478
+ "scheme": "bearer",
479
+ "bearerFormat": "JWT",
480
+ }
481
+ }
482
 
483
+ app.openapi_schema = openapi_schema
484
+ return app.openapi_schema
485
+
486
+ # Set the custom OpenAPI function
487
+ app.openapi = custom_openapi
 
 
 
 
488
 
489
+
490
+ # Request/Response models
491
  class PredictionRequest(BaseModel):
492
  text: str = Field(..., min_length=1, max_length=10000,
493
  description="Text to analyze for fake news detection")
 
496
  def validate_text(cls, v):
497
  if not v or not v.strip():
498
  raise ValueError('Text cannot be empty')
499
+
500
+ # Basic content validation
501
  if len(v.strip()) < 10:
502
  raise ValueError('Text must be at least 10 characters long')
503
+
504
+ # Check for suspicious patterns
505
  suspicious_patterns = ['<script', 'javascript:', 'data:']
506
  if any(pattern in v.lower() for pattern in suspicious_patterns):
507
  raise ValueError('Text contains suspicious content')
508
+
509
  return v.strip()
510
 
511
 
512
+ class PredictionResponse(BaseModel):
513
+ prediction: str = Field(...,
514
+ description="Prediction result: 'Real' or 'Fake'")
515
+ confidence: float = Field(..., ge=0.0, le=1.0,
516
+ description="Confidence score between 0 and 1")
517
+ model_version: str = Field(...,
518
+ description="Version of the model used for prediction")
519
+ timestamp: str = Field(..., description="Timestamp of the prediction")
520
+ processing_time: float = Field(...,
521
+ description="Time taken for processing in seconds")
522
+
523
+
524
+ class BatchPredictionRequest(BaseModel):
525
+ texts: List[str] = Field(..., min_items=1, max_items=10,
526
+ description="List of texts to analyze")
527
+
528
+ @validator('texts')
529
+ def validate_texts(cls, v):
530
+ if not v:
531
+ raise ValueError('Texts list cannot be empty')
532
+
533
+ for text in v:
534
+ if not text or not text.strip():
535
+ raise ValueError('All texts must be non-empty')
536
+
537
+ if len(text.strip()) < 10:
538
+ raise ValueError(
539
+ 'All texts must be at least 10 characters long')
540
+
541
+ return [text.strip() for text in v]
542
+
543
+
544
+ class BatchPredictionResponse(BaseModel):
545
+ predictions: List[PredictionResponse]
546
+ total_count: int
547
+ processing_time: float
548
+
549
+
550
+ class HealthResponse(BaseModel):
551
+ status: str
552
+ timestamp: str
553
+ model_health: Dict[str, Any]
554
+ system_health: Dict[str, Any]
555
+ api_health: Dict[str, Any]
556
+ environment_info: Dict[str, Any]
557
+
558
+
559
+ # Rate limiting
560
  async def rate_limit_check(request: Request):
561
  """Check rate limits"""
562
  client_ip = request.client.host
 
565
  # Clean old entries
566
  rate_limit_storage[client_ip] = [
567
  timestamp for timestamp in rate_limit_storage[client_ip]
568
+ if current_time - timestamp < 3600 # 1 hour window
569
  ]
570
 
571
  # Check rate limit (100 requests per hour)
 
579
  rate_limit_storage[client_ip].append(current_time)
580
 
581
 
582
+ # Logging middleware
583
  @app.middleware("http")
584
  async def log_requests(request: Request, call_next):
585
+ """Log all requests"""
586
  start_time = time.time()
587
+
588
  response = await call_next(request)
589
+
590
  process_time = time.time() - start_time
591
 
592
  log_data = {
 
595
  "client_ip": request.client.host,
596
  "status_code": response.status_code,
597
  "process_time": process_time,
598
+ "timestamp": datetime.now().isoformat()
 
 
599
  }
600
 
601
  logger.info(f"Request: {json.dumps(log_data)}")
602
+
603
  return response
604
 
605
 
606
+ # Error handlers
607
+ @app.exception_handler(HTTPException)
608
+ async def http_exception_handler(request: Request, exc: HTTPException):
609
+ """Handle HTTP exceptions"""
610
+ error_data = {
611
+ "error": True,
612
+ "message": exc.detail,
613
+ "status_code": exc.status_code,
614
+ "timestamp": datetime.now().isoformat(),
615
+ "path": request.url.path
616
+ }
617
+
618
+ logger.error(f"HTTP Exception: {json.dumps(error_data)}")
619
+
620
+ return JSONResponse(
621
+ status_code=exc.status_code,
622
+ content=error_data
623
+ )
624
+
625
+
626
+ @app.exception_handler(Exception)
627
+ async def general_exception_handler(request: Request, exc: Exception):
628
+ """Handle general exceptions"""
629
+ error_data = {
630
+ "error": True,
631
+ "message": "Internal server error",
632
+ "timestamp": datetime.now().isoformat(),
633
+ "path": request.url.path
634
+ }
635
+
636
+ logger.error(f"General Exception: {str(exc)}\n{traceback.format_exc()}")
637
+
638
+ return JSONResponse(
639
+ status_code=500,
640
+ content=error_data
641
+ )
642
+
643
+
644
+ # API Routes
645
+ @app.get("/", response_model=Dict[str, str])
646
  async def root():
647
+ """Root endpoint"""
648
  return {
649
+ "message": "Fake News Detection API",
650
+ "version": "2.0.0",
651
  "environment": path_manager.environment,
 
 
 
652
  "documentation": "/docs",
653
  "health_check": "/health"
654
  }
655
 
656
 
657
+ @app.post("/predict", response_model=PredictionResponse)
658
  async def predict(
659
  request: PredictionRequest,
660
  background_tasks: BackgroundTasks,
661
  http_request: Request,
662
  _: None = Depends(rate_limit_check)
663
+ ):
664
  """
665
+ Predict whether a news article is fake or real using blue-green deployment routing
666
  - **text**: The news article text to analyze
667
+ - **returns**: Prediction result with confidence score
668
  """
669
  start_time = time.time()
670
  client_ip = http_request.client.host
 
678
  detail="Model is not available. Please try again later."
679
  )
680
 
681
+ # Prepare request data for routing
682
+ request_data = {
683
+ 'client_id': client_ip,
684
+ 'user_agent': user_agent,
685
+ 'timestamp': datetime.now().isoformat()
686
+ }
687
+
688
+ # Use traffic router if available, otherwise fallback to model manager
689
+ if traffic_router and (traffic_router.blue_model or traffic_router.green_model):
690
+ try:
691
+ environment, result = traffic_router.make_prediction(request.text, request_data)
692
+
693
+ # Extract results from traffic router response
694
+ label = result['prediction']
695
+ confidence = result['confidence']
696
+ processing_time = result['processing_time']
697
+
698
+ logger.debug(f"Used {environment} environment for prediction")
699
+
700
+ except Exception as e:
701
+ logger.warning(f"Traffic router failed, falling back to model manager: {e}")
702
+ # Fallback to original model manager
703
+ label, confidence = model_manager.predict(request.text)
704
+ processing_time = time.time() - start_time
705
+ environment = "blue" # Default fallback
706
+ else:
707
+ # Fallback to original model manager
708
+ label, confidence = model_manager.predict(request.text)
709
+ processing_time = time.time() - start_time
710
+ environment = "blue" # Default when no traffic router
711
 
712
  # Record prediction for monitoring
713
+ prediction_monitor.record_prediction(
714
+ prediction=label,
715
+ confidence=confidence,
716
+ processing_time=processing_time,
717
+ text=request.text,
718
+ model_version=model_manager.model_metadata.get('model_version', 'unknown'),
719
+ client_id=client_ip,
720
+ user_agent=user_agent
721
+ )
 
722
 
723
  # Record API request metrics
724
+ metrics_collector.record_api_request(
725
+ endpoint="/predict",
726
+ method="POST",
727
+ response_time=processing_time,
728
+ status_code=200,
729
+ client_ip=client_ip
730
+ )
 
731
 
732
+ # Create response
733
+ response = PredictionResponse(
734
  prediction=label,
735
  confidence=confidence,
736
  model_version=model_manager.model_metadata.get('model_version', 'unknown'),
 
 
 
737
  timestamp=datetime.now().isoformat(),
738
  processing_time=processing_time
739
  )
 
753
  except HTTPException:
754
  # Record error for failed requests
755
  processing_time = time.time() - start_time
756
+ prediction_monitor.record_error(
757
+ error_type="http_error",
758
+ error_message="Service unavailable",
759
+ context={"status_code": 503}
760
+ )
761
+ metrics_collector.record_api_request(
762
+ endpoint="/predict",
763
+ method="POST",
764
+ response_time=processing_time,
765
+ status_code=503,
766
+ client_ip=client_ip
767
+ )
 
 
768
  raise
769
  except Exception as e:
770
  processing_time = time.time() - start_time
771
 
772
  # Record error
773
+ prediction_monitor.record_error(
774
+ error_type="prediction_error",
775
+ error_message=str(e),
776
+ context={"text_length": len(request.text)}
777
+ )
 
778
 
779
+ metrics_collector.record_api_request(
780
+ endpoint="/predict",
781
+ method="POST",
782
+ response_time=processing_time,
783
+ status_code=500,
784
+ client_ip=client_ip
785
+ )
 
786
 
787
  logger.error(f"Prediction failed: {e}")
788
  raise HTTPException(
 
791
  )
792
 
793
 
794
+ @app.post("/predict/batch", response_model=BatchPredictionResponse)
795
+ async def predict_batch(
796
+ request: BatchPredictionRequest,
797
+ background_tasks: BackgroundTasks,
798
+ http_request: Request,
799
+ _: None = Depends(rate_limit_check)
800
+ ):
801
+ """
802
+ Predict multiple news articles in batch
803
+ - **texts**: List of news article texts to analyze
804
+ - **returns**: List of prediction results
805
+ """
806
+ start_time = time.time()
807
+
808
+ try:
809
+ # Check model health
810
+ if model_manager.health_status != "healthy":
811
+ raise HTTPException(
812
+ status_code=503,
813
+ detail="Model is not available. Please try again later."
814
+ )
815
+
816
+ predictions = []
817
+
818
+ for text in request.texts:
819
+ try:
820
+ label, confidence = model_manager.predict(text)
821
+
822
+ prediction = PredictionResponse(
823
+ prediction=label,
824
+ confidence=confidence,
825
+ model_version=model_manager.model_metadata.get(
826
+ 'model_version', 'unknown'),
827
+ timestamp=datetime.now().isoformat(),
828
+ processing_time=0.0 # Will be updated with total time
829
+ )
830
+
831
+ predictions.append(prediction)
832
+
833
+ except Exception as e:
834
+ logger.error(f"Batch prediction failed for text: {e}")
835
+ # Continue with other texts
836
+ continue
837
+
838
+ # Calculate total processing time
839
+ total_processing_time = time.time() - start_time
840
+
841
+ # Update processing time for all predictions
842
+ for prediction in predictions:
843
+ prediction.processing_time = total_processing_time / \
844
+ len(predictions)
845
+
846
+ response = BatchPredictionResponse(
847
+ predictions=predictions,
848
+ total_count=len(predictions),
849
+ processing_time=total_processing_time
850
+ )
851
+
852
+ # Log batch prediction (background task)
853
+ background_tasks.add_task(
854
+ log_batch_prediction,
855
+ len(request.texts),
856
+ len(predictions),
857
+ http_request.client.host,
858
+ total_processing_time
859
+ )
860
+
861
+ return response
862
+
863
+ except HTTPException:
864
+ raise
865
+ except Exception as e:
866
+ logger.error(f"Batch prediction failed: {e}")
867
+ raise HTTPException(
868
+ status_code=500,
869
+ detail=f"Batch prediction failed: {str(e)}"
870
+ )
871
+
872
+
873
+ @app.get("/health", response_model=HealthResponse)
874
  async def health_check():
875
  """
876
+ Comprehensive health check endpoint
877
+ - **returns**: Detailed health status of the API and model
878
  """
879
  try:
880
  # Model health
 
897
 
898
  # Environment info
899
  environment_info = path_manager.get_environment_info()
 
 
 
 
 
 
 
 
 
 
 
 
900
 
901
  # Overall status
902
  overall_status = "healthy" if model_health["status"] == "healthy" else "unhealthy"
903
 
904
+ return HealthResponse(
905
  status=overall_status,
906
  timestamp=datetime.now().isoformat(),
907
  model_health=model_health,
908
  system_health=system_health,
909
  api_health=api_health,
910
+ environment_info=environment_info
 
911
  )
912
 
913
  except Exception as e:
914
  logger.error(f"Health check failed: {e}")
915
+ return HealthResponse(
916
  status="unhealthy",
917
  timestamp=datetime.now().isoformat(),
918
  model_health={"status": "unhealthy", "error": str(e)},
919
  system_health={"error": str(e)},
920
  api_health={"error": str(e)},
921
+ environment_info={"error": str(e)}
 
922
  )
923
 
924
 
925
+ @app.get("/health/detailed")
926
+ async def detailed_health_check():
927
  """
928
+ Detailed health check endpoint with comprehensive CV results
929
+ - **returns**: Detailed health status including cross-validation metrics
930
  """
931
  try:
932
+ # Get basic health information
933
+ basic_health = await health_check()
934
+
935
+ # Load metadata to get CV results
936
+ metadata_path = path_manager.get_metadata_path()
937
+ cv_details = {}
938
+
939
+ if metadata_path.exists():
940
+ try:
941
+ with open(metadata_path, 'r') as f:
942
+ metadata = json.load(f)
943
+
944
+ # Extract cross-validation information
945
+ cv_info = metadata.get('cross_validation', {})
946
+ if cv_info:
947
+ cv_details = {
948
+ 'cross_validation_available': True,
949
+ 'n_splits': cv_info.get('n_splits', 'Unknown'),
950
+ 'test_scores': cv_info.get('test_scores', {}),
951
+ 'train_scores': cv_info.get('train_scores', {}),
952
+ 'overfitting_score': cv_info.get('overfitting_score', 'Unknown'),
953
+ 'stability_score': cv_info.get('stability_score', 'Unknown'),
954
+ 'individual_fold_results': cv_info.get('individual_fold_results', [])
955
+ }
956
+
957
+ # Add summary statistics
958
+ test_scores = cv_info.get('test_scores', {})
959
+ if 'f1' in test_scores:
960
+ cv_details['cv_f1_summary'] = {
961
+ 'mean': test_scores['f1'].get('mean', 'Unknown'),
962
+ 'std': test_scores['f1'].get('std', 'Unknown'),
963
+ 'min': test_scores['f1'].get('min', 'Unknown'),
964
+ 'max': test_scores['f1'].get('max', 'Unknown'),
965
+ 'scores': test_scores['f1'].get('scores', [])
966
+ }
967
+
968
+ if 'accuracy' in test_scores:
969
+ cv_details['cv_accuracy_summary'] = {
970
+ 'mean': test_scores['accuracy'].get('mean', 'Unknown'),
971
+ 'std': test_scores['accuracy'].get('std', 'Unknown'),
972
+ 'min': test_scores['accuracy'].get('min', 'Unknown'),
973
+ 'max': test_scores['accuracy'].get('max', 'Unknown'),
974
+ 'scores': test_scores['accuracy'].get('scores', [])
975
+ }
976
+
977
+ # Add model comparison results if available
978
+ statistical_validation = metadata.get('statistical_validation', {})
979
+ if statistical_validation:
980
+ cv_details['statistical_validation'] = statistical_validation
981
+
982
+ promotion_validation = metadata.get('promotion_validation', {})
983
+ if promotion_validation:
984
+ cv_details['promotion_validation'] = promotion_validation
985
+
986
+ # Add model version and training info
987
+ cv_details['model_info'] = {
988
+ 'model_version': metadata.get('model_version', 'Unknown'),
989
+ 'model_type': metadata.get('model_type', 'Unknown'),
990
+ 'training_timestamp': metadata.get('timestamp', 'Unknown'),
991
+ 'promotion_timestamp': metadata.get('promotion_timestamp'),
992
+ 'cv_f1_mean': metadata.get('cv_f1_mean'),
993
+ 'cv_f1_std': metadata.get('cv_f1_std'),
994
+ 'cv_accuracy_mean': metadata.get('cv_accuracy_mean'),
995
+ 'cv_accuracy_std': metadata.get('cv_accuracy_std')
996
+ }
997
+
998
+ except Exception as e:
999
+ cv_details = {
1000
+ 'cross_validation_available': False,
1001
+ 'error': f"Failed to load CV details: {str(e)}"
1002
+ }
1003
+ else:
1004
+ cv_details = {
1005
+ 'cross_validation_available': False,
1006
+ 'error': "No metadata file found"
1007
+ }
1008
+
1009
+ # Combine basic health with detailed CV information
1010
+ detailed_response = {
1011
+ 'basic_health': basic_health,
1012
+ 'cross_validation_details': cv_details,
1013
+ 'detailed_check_timestamp': datetime.now().isoformat()
1014
+ }
1015
+
1016
+ return detailed_response
1017
+
1018
+ except Exception as e:
1019
+ logger.error(f"Detailed health check failed: {e}")
1020
+ return {
1021
+ 'basic_health': {'status': 'unhealthy', 'error': str(e)},
1022
+ 'cross_validation_details': {
1023
+ 'cross_validation_available': False,
1024
+ 'error': f"Detailed health check failed: {str(e)}"
1025
  },
1026
+ 'detailed_check_timestamp': datetime.now().isoformat()
 
1027
  }
1028
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029
 
1030
+ @app.get("/cv/results")
1031
+ async def get_cv_results():
1032
+ """
1033
+ Get detailed cross-validation results for the current model
1034
+ - **returns**: Comprehensive CV metrics and fold-by-fold results
1035
+ """
1036
+ try:
1037
+ metadata_path = path_manager.get_metadata_path()
1038
+
1039
+ if not metadata_path.exists():
1040
+ raise HTTPException(
1041
+ status_code=404,
1042
+ detail="Model metadata not found. Train a model first."
1043
+ )
1044
+
1045
+ with open(metadata_path, 'r') as f:
1046
+ metadata = json.load(f)
1047
+
1048
+ cv_info = metadata.get('cross_validation', {})
1049
+
1050
+ if not cv_info:
1051
+ raise HTTPException(
1052
+ status_code=404,
1053
+ detail="No cross-validation results found. Model may not have been trained with CV."
1054
+ )
1055
+
1056
+ # Structure the CV results for API response
1057
+ cv_response = {
1058
+ 'model_version': metadata.get('model_version', 'Unknown'),
1059
+ 'model_type': metadata.get('model_type', 'Unknown'),
1060
+ 'training_timestamp': metadata.get('timestamp', 'Unknown'),
1061
+ 'cross_validation': {
1062
+ 'methodology': {
1063
+ 'n_splits': cv_info.get('n_splits', 'Unknown'),
1064
+ 'cv_type': 'StratifiedKFold',
1065
+ 'random_state': 42
1066
+ },
1067
+ 'test_scores': cv_info.get('test_scores', {}),
1068
+ 'train_scores': cv_info.get('train_scores', {}),
1069
+ 'performance_indicators': {
1070
+ 'overfitting_score': cv_info.get('overfitting_score', 'Unknown'),
1071
+ 'stability_score': cv_info.get('stability_score', 'Unknown')
1072
+ },
1073
+ 'individual_fold_results': cv_info.get('individual_fold_results', [])
1074
+ },
1075
+ 'statistical_validation': metadata.get('statistical_validation', {}),
1076
+ 'promotion_validation': metadata.get('promotion_validation', {}),
1077
+ 'summary_statistics': {
1078
+ 'cv_f1_mean': metadata.get('cv_f1_mean'),
1079
+ 'cv_f1_std': metadata.get('cv_f1_std'),
1080
+ 'cv_accuracy_mean': metadata.get('cv_accuracy_mean'),
1081
+ 'cv_accuracy_std': metadata.get('cv_accuracy_std')
1082
+ }
1083
+ }
1084
+
1085
+ return cv_response
1086
+
1087
+ except HTTPException:
1088
+ raise
1089
  except Exception as e:
1090
+ logger.error(f"CV results retrieval failed: {e}")
1091
  raise HTTPException(
1092
  status_code=500,
1093
+ detail=f"Failed to retrieve CV results: {str(e)}"
1094
  )
1095
 
1096
 
1097
+ @app.get("/cv/comparison")
1098
+ async def get_model_comparison_results():
1099
  """
1100
+ Get latest model comparison results from retraining
1101
+ - **returns**: Statistical comparison results between models
1102
  """
1103
  try:
1104
+ # Load comparison logs
1105
+ comparison_log_path = path_manager.get_logs_path("model_comparison.json")
1106
+
1107
+ if not comparison_log_path.exists():
1108
+ raise HTTPException(
1109
+ status_code=404,
1110
+ detail="No model comparison results found."
1111
+ )
1112
+
1113
+ with open(comparison_log_path, 'r') as f:
1114
+ comparison_logs = json.load(f)
1115
+
1116
+ if not comparison_logs:
1117
+ raise HTTPException(
1118
+ status_code=404,
1119
+ detail="No comparison entries found."
1120
+ )
1121
+
1122
+ # Get the most recent comparison
1123
+ latest_comparison = comparison_logs[-1]
1124
+ comparison_details = latest_comparison.get('comparison_details', {})
1125
+
1126
+ # Structure the response
1127
+ comparison_response = {
1128
+ 'comparison_timestamp': latest_comparison.get('timestamp', 'Unknown'),
1129
+ 'session_id': latest_comparison.get('session_id', 'Unknown'),
1130
+ 'models_compared': {
1131
+ 'model1_name': comparison_details.get('model1_name', 'Production'),
1132
+ 'model2_name': comparison_details.get('model2_name', 'Candidate')
1133
+ },
1134
+ 'cv_methodology': {
1135
+ 'cv_folds': comparison_details.get('cv_folds', 'Unknown')
1136
+ },
1137
+ 'model_performance': {
1138
+ 'production_model': comparison_details.get('model1_cv_results', {}),
1139
+ 'candidate_model': comparison_details.get('model2_cv_results', {})
1140
  },
1141
+ 'metric_comparisons': comparison_details.get('metric_comparisons', {}),
1142
+ 'statistical_tests': comparison_details.get('statistical_tests', {}),
1143
+ 'promotion_decision': comparison_details.get('promotion_decision', {}),
1144
+ 'summary': {
1145
+ 'decision': comparison_details.get('promotion_decision', {}).get('promote_candidate', False),
1146
+ 'reason': comparison_details.get('promotion_decision', {}).get('reason', 'Unknown'),
1147
+ 'confidence': comparison_details.get('promotion_decision', {}).get('confidence', 0)
1148
  }
1149
  }
1150
+
1151
+ return comparison_response
1152
+
1153
+ except HTTPException:
1154
+ raise
1155
+ except Exception as e:
1156
+ logger.error(f"Model comparison results retrieval failed: {e}")
1157
+ raise HTTPException(
1158
+ status_code=500,
1159
+ detail=f"Failed to retrieve model comparison results: {str(e)}"
1160
+ )
1161
+
1162
 
1163
+ @app.get("/metrics")
1164
+ async def get_metrics():
1165
+ """
1166
+ Get comprehensive API metrics including CV results
1167
+ - **returns**: Usage statistics, performance metrics, and CV information
1168
+ """
1169
+ try:
1170
+ # Calculate metrics from rate limiting storage
1171
+ total_requests = sum(len(requests)
1172
+ for requests in rate_limit_storage.values())
1173
+ unique_clients = len(rate_limit_storage)
1174
+
1175
+ # Load metadata for CV information
1176
+ metadata_path = path_manager.get_metadata_path()
1177
+ cv_summary = {}
1178
+
1179
+ if metadata_path.exists():
1180
+ try:
1181
+ with open(metadata_path, 'r') as f:
1182
+ metadata = json.load(f)
1183
 
1184
+ # Extract CV summary
1185
+ cv_info = metadata.get('cross_validation', {})
1186
+ if cv_info:
1187
+ test_scores = cv_info.get('test_scores', {})
1188
+ cv_summary = {
1189
+ 'cv_available': True,
1190
+ 'cv_folds': cv_info.get('n_splits', 'Unknown'),
1191
+ 'cv_f1_mean': test_scores.get('f1', {}).get('mean'),
1192
+ 'cv_f1_std': test_scores.get('f1', {}).get('std'),
1193
+ 'cv_accuracy_mean': test_scores.get('accuracy', {}).get('mean'),
1194
+ 'cv_accuracy_std': test_scores.get('accuracy', {}).get('std'),
1195
+ 'overfitting_score': cv_info.get('overfitting_score'),
1196
+ 'stability_score': cv_info.get('stability_score')
1197
+ }
1198
+ else:
1199
+ cv_summary = {'cv_available': False}
1200
+
1201
+ except Exception as e:
1202
+ cv_summary = {'cv_available': False, 'cv_error': str(e)}
1203
+ else:
1204
+ cv_summary = {'cv_available': False, 'cv_error': 'No metadata file'}
1205
+
1206
+ metrics = {
1207
+ 'api_metrics': {
1208
+ 'total_requests': total_requests,
1209
+ 'unique_clients': unique_clients,
1210
+ 'timestamp': datetime.now().isoformat()
1211
+ },
1212
+ 'model_info': {
1213
+ 'model_version': model_manager.model_metadata.get('model_version', 'unknown'),
1214
+ 'model_health': model_manager.health_status,
1215
+ 'last_health_check': model_manager.last_health_check.isoformat() if model_manager.last_health_check else None
1216
+ },
1217
+ 'cross_validation_summary': cv_summary,
1218
+ 'environment_info': {
1219
+ 'environment': path_manager.environment,
1220
+ 'available_datasets': path_manager.list_available_datasets(),
1221
+ 'available_models': path_manager.list_available_models()
1222
+ }
1223
+ }
1224
+
1225
+ return metrics
1226
+
1227
+ except Exception as e:
1228
+ logger.error(f"Metrics retrieval failed: {e}")
1229
+ raise HTTPException(
1230
+ status_code=500,
1231
+ detail=f"Metrics retrieval failed: {str(e)}"
1232
+ )
1233
 
1234
+ @app.get("/validation/statistics")
1235
+ async def get_validation_statistics():
1236
+ """Get comprehensive validation statistics"""
1237
+ try:
1238
+ stats = get_validation_stats()
1239
+
1240
+ if not stats:
1241
+ return {
1242
+ 'statistics_available': False,
1243
+ 'message': 'No validation statistics available yet',
1244
+ 'timestamp': datetime.now().isoformat()
1245
+ }
1246
+
1247
+ enhanced_stats = {
1248
+ 'statistics_available': True,
1249
+ 'last_updated': stats.get('last_updated'),
1250
+ 'overall_metrics': {
1251
+ 'total_validations': stats.get('total_validations', 0),
1252
+ 'total_articles_processed': stats.get('total_articles', 0),
1253
+ 'overall_success_rate': (stats.get('total_valid_articles', 0) /
1254
+ max(stats.get('total_articles', 1), 1)),
1255
+ 'average_quality_score': stats.get('average_quality_score', 0.0)
1256
+ },
1257
+ 'source_breakdown': stats.get('source_statistics', {}),
1258
+ 'recent_performance': {
1259
+ 'validation_history': stats.get('validation_history', [])[-10:],
1260
+ 'quality_trends': stats.get('quality_trends', [])[-10:]
1261
+ },
1262
+ 'timestamp': datetime.now().isoformat()
1263
+ }
1264
+
1265
+ return enhanced_stats
1266
+
1267
+ except Exception as e:
1268
+ logger.error(f"Failed to get validation statistics: {e}")
1269
+ raise HTTPException(
1270
+ status_code=500,
1271
+ detail=f"Failed to retrieve validation statistics: {str(e)}"
1272
+ )
1273
 
1274
+ @app.get("/validation/quality-report")
1275
+ async def get_quality_report():
1276
+ """Get comprehensive data quality report"""
1277
+ try:
1278
+ report = generate_quality_report()
1279
+
1280
+ if 'error' in report:
1281
+ raise HTTPException(
1282
+ status_code=404,
1283
+ detail=report['error']
1284
+ )
1285
+
1286
+ return report
1287
+
1288
+ except HTTPException:
1289
+ raise
1290
  except Exception as e:
1291
+ logger.error(f"Failed to generate quality report: {e}")
1292
  raise HTTPException(
1293
  status_code=500,
1294
+ detail=f"Failed to generate quality report: {str(e)}"
1295
+ )
1296
+
1297
+ @app.get("/validation/health")
1298
+ async def get_validation_health():
1299
+ """Get validation system health status"""
1300
+ try:
1301
+ stats = get_validation_stats()
1302
+
1303
+ health_indicators = {
1304
+ 'validation_system_active': True,
1305
+ 'statistics_available': bool(stats),
1306
+ 'recent_activity': False,
1307
+ 'quality_status': 'unknown'
1308
+ }
1309
+
1310
+ if stats:
1311
+ last_updated = stats.get('last_updated')
1312
+ if last_updated:
1313
+ try:
1314
+ last_update_time = datetime.fromisoformat(last_updated)
1315
+ hours_since_update = (datetime.now() - last_update_time).total_seconds() / 3600
1316
+ health_indicators['recent_activity'] = hours_since_update <= 24
1317
+ health_indicators['hours_since_last_validation'] = hours_since_update
1318
+ except:
1319
+ pass
1320
+
1321
+ avg_quality = stats.get('average_quality_score', 0)
1322
+ success_rate = stats.get('total_valid_articles', 0) / max(stats.get('total_articles', 1), 1)
1323
+
1324
+ if avg_quality >= 0.7 and success_rate >= 0.8:
1325
+ health_indicators['quality_status'] = 'excellent'
1326
+ elif avg_quality >= 0.5 and success_rate >= 0.6:
1327
+ health_indicators['quality_status'] = 'good'
1328
+ elif avg_quality >= 0.3 and success_rate >= 0.4:
1329
+ health_indicators['quality_status'] = 'fair'
1330
+ else:
1331
+ health_indicators['quality_status'] = 'poor'
1332
+
1333
+ health_indicators['average_quality_score'] = avg_quality
1334
+ health_indicators['validation_success_rate'] = success_rate
1335
+
1336
+ overall_healthy = (
1337
+ health_indicators['validation_system_active'] and
1338
+ health_indicators['statistics_available'] and
1339
+ health_indicators['quality_status'] not in ['poor', 'unknown']
1340
  )
1341
+
1342
+ return {
1343
+ 'validation_health': {
1344
+ 'overall_status': 'healthy' if overall_healthy else 'degraded',
1345
+ 'health_indicators': health_indicators,
1346
+ 'last_check': datetime.now().isoformat()
1347
+ }
1348
+ }
1349
+
1350
+ except Exception as e:
1351
+ logger.error(f"Validation health check failed: {e}")
1352
+ return {
1353
+ 'validation_health': {
1354
+ 'overall_status': 'unhealthy',
1355
+ 'error': str(e),
1356
+ 'last_check': datetime.now().isoformat()
1357
+ }
1358
+ }
1359
+
1360
+
1361
+ # New monitoring endpoints
1362
+ @app.get("/monitor/metrics/current")
1363
+ async def get_current_metrics():
1364
+ """Get current real-time metrics"""
1365
+ try:
1366
+ prediction_metrics = prediction_monitor.get_current_metrics()
1367
+ system_metrics = metrics_collector.collect_system_metrics()
1368
+ api_metrics = metrics_collector.collect_api_metrics()
1369
+
1370
+ return {
1371
+ "timestamp": datetime.now().isoformat(),
1372
+ "prediction_metrics": asdict(prediction_metrics),
1373
+ "system_metrics": asdict(system_metrics),
1374
+ "api_metrics": asdict(api_metrics)
1375
+ }
1376
+ except Exception as e:
1377
+ logger.error(f"Failed to get current metrics: {e}")
1378
+ raise HTTPException(status_code=500, detail=str(e))
1379
 
1380
+ @app.get("/monitor/metrics/historical")
1381
+ async def get_historical_metrics(hours: int = 24):
1382
+ """Get historical metrics"""
1383
+ try:
1384
+ return {
1385
+ "prediction_metrics": [asdict(m) for m in prediction_monitor.get_historical_metrics(hours)],
1386
+ "aggregated_metrics": metrics_collector.get_aggregated_metrics(hours)
1387
+ }
1388
+ except Exception as e:
1389
+ logger.error(f"Failed to get historical metrics: {e}")
1390
+ raise HTTPException(status_code=500, detail=str(e))
1391
 
1392
+ @app.get("/monitor/alerts")
1393
+ async def get_alerts():
1394
+ """Get active alerts and statistics"""
1395
+ try:
1396
+ return {
1397
+ "active_alerts": [asdict(alert) for alert in alert_system.get_active_alerts()],
1398
+ "alert_statistics": alert_system.get_alert_statistics()
1399
+ }
1400
+ except Exception as e:
1401
+ logger.error(f"Failed to get alerts: {e}")
1402
+ raise HTTPException(status_code=500, detail=str(e))
1403
 
1404
+ @app.get("/monitor/health")
1405
+ async def get_monitoring_health():
1406
+ """Get monitoring system health"""
 
 
 
1407
  try:
1408
+ dashboard_data = metrics_collector.get_real_time_dashboard_data()
1409
+ confidence_analysis = prediction_monitor.get_confidence_analysis()
1410
+
1411
+ return {
1412
+ "monitoring_status": "active",
1413
+ "dashboard_data": dashboard_data,
1414
+ "confidence_analysis": confidence_analysis,
1415
+ "total_predictions": prediction_monitor.total_predictions
1416
+ }
1417
+ except Exception as e:
1418
+ logger.error(f"Failed to get monitoring health: {e}")
1419
+ raise HTTPException(status_code=500, detail=str(e))
1420
+
1421
+ @app.get("/monitor/patterns")
1422
+ async def get_prediction_patterns(hours: int = 24):
1423
+ """Get prediction patterns and anomaly analysis"""
1424
+ try:
1425
+ return prediction_monitor.get_prediction_patterns(hours)
1426
+ except Exception as e:
1427
+ logger.error(f"Failed to get prediction patterns: {e}")
1428
+ raise HTTPException(status_code=500, detail=str(e))
1429
+
1430
+ @app.post("/monitor/alerts/{alert_id}/acknowledge")
1431
+ async def acknowledge_alert(alert_id: str):
1432
+ """Acknowledge an alert"""
1433
+ try:
1434
+ success = alert_system.acknowledge_alert(alert_id, "api_user")
1435
+ if success:
1436
+ return {"message": f"Alert {alert_id} acknowledged"}
1437
+ else:
1438
+ raise HTTPException(status_code=404, detail="Alert not found")
1439
+ except HTTPException:
1440
+ raise
1441
+ except Exception as e:
1442
+ logger.error(f"Failed to acknowledge alert: {e}")
1443
+ raise HTTPException(status_code=500, detail=str(e))
1444
+
1445
+ @app.post("/monitor/alerts/{alert_id}/resolve")
1446
+ async def resolve_alert(alert_id: str, resolution_note: str = ""):
1447
+ """Resolve an alert"""
1448
+ try:
1449
+ success = alert_system.resolve_alert(alert_id, "api_user", resolution_note)
1450
+ if success:
1451
+ return {"message": f"Alert {alert_id} resolved"}
1452
+ else:
1453
+ raise HTTPException(status_code=404, detail="Alert not found")
1454
+ except HTTPException:
1455
+ raise
1456
+ except Exception as e:
1457
+ logger.error(f"Failed to resolve alert: {e}")
1458
+ raise HTTPException(status_code=500, detail=str(e))
1459
+
1460
+
1461
+ @app.get("/automation/status")
1462
+ async def get_automation_status():
1463
+ """Get automation system status"""
1464
+ try:
1465
+ if automation_manager is None:
1466
+ raise HTTPException(status_code=503, detail="Automation system not available")
1467
+
1468
+ # Get automation status
1469
+ automation_status = automation_manager.get_automation_status()
1470
+
1471
+ # Get drift monitoring status
1472
+ drift_status = automation_manager.drift_monitor.get_automation_status()
1473
+
1474
+ return {
1475
+ "timestamp": datetime.now().isoformat(),
1476
+ "automation_system": automation_status,
1477
+ "drift_monitoring": drift_status,
1478
+ "system_health": "active" if automation_manager.retraining_active else "inactive"
1479
+ }
1480
+
1481
+ except Exception as e:
1482
+ logger.error(f"Failed to get automation status: {e}")
1483
+ raise HTTPException(status_code=500, detail=str(e))
1484
+
1485
+ @app.get("/automation/triggers/check")
1486
+ async def check_retraining_triggers():
1487
+ """Check current retraining triggers"""
1488
+ try:
1489
+ if automation_manager is None:
1490
+ raise HTTPException(status_code=503, detail="Automation system not available")
1491
+
1492
+ trigger_results = automation_manager.drift_monitor.check_retraining_triggers()
1493
+
1494
+ return {
1495
+ "timestamp": datetime.now().isoformat(),
1496
+ "trigger_evaluation": trigger_results,
1497
+ "recommendation": "Retraining recommended" if trigger_results.get('should_retrain') else "No retraining needed"
1498
+ }
1499
+
1500
+ except Exception as e:
1501
+ logger.error(f"Failed to check triggers: {e}")
1502
+ raise HTTPException(status_code=500, detail=str(e))
1503
+
1504
+ @app.post("/automation/retrain/trigger")
1505
+ async def trigger_manual_retraining(reason: str = "manual_api_trigger"):
1506
+ """Manually trigger retraining"""
1507
+ try:
1508
+ if automation_manager is None:
1509
+ raise HTTPException(status_code=503, detail="Automation system not available")
1510
+
1511
+ result = automation_manager.trigger_manual_retraining(reason)
1512
+
1513
+ if result['success']:
1514
  return {
1515
+ "message": "Retraining triggered successfully",
1516
+ "timestamp": datetime.now().isoformat(),
1517
+ "reason": reason
 
1518
  }
1519
+ else:
1520
+ raise HTTPException(status_code=500, detail=result.get('error', 'Unknown error'))
1521
+
1522
+ except HTTPException:
1523
+ raise
1524
+ except Exception as e:
1525
+ logger.error(f"Failed to trigger retraining: {e}")
1526
+ raise HTTPException(status_code=500, detail=str(e))
1527
+
1528
+ @app.get("/automation/queue")
1529
+ async def get_retraining_queue():
1530
+ """Get current retraining queue"""
1531
+ try:
1532
+ if automation_manager is None:
1533
+ raise HTTPException(status_code=503, detail="Automation system not available")
1534
+
1535
+ queue = automation_manager.load_retraining_queue()
1536
+ recent_logs = automation_manager.get_recent_automation_logs(hours=24)
1537
+
1538
+ return {
1539
+ "timestamp": datetime.now().isoformat(),
1540
+ "queued_jobs": queue,
1541
+ "recent_activity": recent_logs,
1542
+ "queue_length": len(queue)
1543
+ }
1544
+
1545
+ except Exception as e:
1546
+ logger.error(f"Failed to get retraining queue: {e}")
1547
+ raise HTTPException(status_code=500, detail=str(e))
1548
 
1549
+ @app.get("/automation/drift/status")
1550
+ async def get_drift_monitoring_status():
1551
+ """Get drift monitoring status"""
1552
+ try:
1553
+ if automation_manager is None:
1554
+ raise HTTPException(status_code=503, detail="Automation system not available")
1555
+
1556
+ # Get recent drift results
1557
+ drift_logs = automation_manager.get_recent_automation_logs(hours=48)
1558
+ drift_checks = [log for log in drift_logs if 'drift' in log.get('event', '')]
1559
+
1560
+ # Get current drift status
1561
+ drift_status = automation_manager.drift_monitor.get_automation_status()
1562
+
1563
+ return {
1564
+ "timestamp": datetime.now().isoformat(),
1565
+ "drift_monitoring_active": True,
1566
+ "recent_drift_checks": drift_checks[-10:], # Last 10 checks
1567
+ "drift_status": drift_status
1568
  }
1569
+
1570
+ except Exception as e:
1571
+ logger.error(f"Failed to get drift status: {e}")
1572
+ raise HTTPException(status_code=500, detail=str(e))
1573
 
1574
+ @app.post("/automation/settings/update")
1575
+ async def update_automation_settings(settings: Dict[str, Any]):
1576
+ """Update automation settings"""
1577
+ try:
1578
+ if automation_manager is None:
1579
+ raise HTTPException(status_code=503, detail="Automation system not available")
1580
+
1581
+ # Update settings
1582
+ automation_manager.automation_config.update(settings)
1583
+ automation_manager.save_automation_config()
1584
+
1585
+ return {
1586
+ "message": "Automation settings updated",
1587
+ "timestamp": datetime.now().isoformat(),
1588
+ "updated_settings": settings
1589
+ }
1590
+
1591
+ except Exception as e:
1592
+ logger.error(f"Failed to update automation settings: {e}")
1593
+ raise HTTPException(status_code=500, detail=str(e))
1594
+
1595
+
1596
+ # Deployment endpoints
1597
+ @app.get("/deployment/status")
1598
+ async def get_deployment_status():
1599
+ """Get deployment system status"""
1600
+ try:
1601
+ if not deployment_manager:
1602
+ raise HTTPException(status_code=503, detail="Deployment system not available")
1603
+
1604
+ return deployment_manager.get_deployment_status()
1605
+
1606
+ except Exception as e:
1607
+ logger.error(f"Failed to get deployment status: {e}")
1608
+ raise HTTPException(status_code=500, detail=str(e))
1609
+
1610
+ @app.post("/deployment/prepare")
1611
+ async def prepare_deployment(target_version: str, strategy: str = "blue_green"):
1612
+ """Prepare a new deployment"""
1613
+ try:
1614
+ if not deployment_manager:
1615
+ raise HTTPException(status_code=503, detail="Deployment system not available")
1616
+
1617
+ deployment_id = deployment_manager.prepare_deployment(target_version, strategy)
1618
+
1619
+ return {
1620
+ "message": "Deployment prepared",
1621
+ "deployment_id": deployment_id,
1622
+ "target_version": target_version,
1623
+ "strategy": strategy
1624
+ }
1625
+
1626
+ except Exception as e:
1627
+ logger.error(f"Failed to prepare deployment: {e}")
1628
+ raise HTTPException(status_code=500, detail=str(e))
1629
+
1630
+ @app.post("/deployment/start/{deployment_id}")
1631
+ async def start_deployment(deployment_id: str):
1632
+ """Start a prepared deployment"""
1633
+ try:
1634
+ if not deployment_manager:
1635
+ raise HTTPException(status_code=503, detail="Deployment system not available")
1636
+
1637
+ success = deployment_manager.start_deployment(deployment_id)
1638
+
1639
+ if success:
1640
+ return {"message": "Deployment started successfully", "deployment_id": deployment_id}
1641
+ else:
1642
+ raise HTTPException(status_code=500, detail="Deployment failed to start")
1643
+
1644
+ except Exception as e:
1645
+ logger.error(f"Failed to start deployment: {e}")
1646
+ raise HTTPException(status_code=500, detail=str(e))
1647
+
1648
+ @app.post("/deployment/rollback")
1649
+ async def rollback_deployment(reason: str = "Manual rollback"):
1650
+ """Rollback current deployment"""
1651
+ try:
1652
+ if not deployment_manager:
1653
+ raise HTTPException(status_code=503, detail="Deployment system not available")
1654
+
1655
+ success = deployment_manager.initiate_rollback(reason)
1656
+
1657
+ if success:
1658
+ return {"message": "Rollback initiated successfully", "reason": reason}
1659
+ else:
1660
+ raise HTTPException(status_code=500, detail="Rollback failed")
1661
 
1662
+ except Exception as e:
1663
+ logger.error(f"Failed to rollback deployment: {e}")
1664
+ raise HTTPException(status_code=500, detail=str(e))
1665
+
1666
+ @app.get("/deployment/traffic")
1667
+ async def get_traffic_status():
1668
+ """Get traffic routing status"""
1669
+ try:
1670
+ if not traffic_router:
1671
+ raise HTTPException(status_code=503, detail="Traffic router not available")
1672
+
1673
+ return traffic_router.get_routing_status()
1674
+
1675
+ except Exception as e:
1676
+ logger.error(f"Failed to get traffic status: {e}")
1677
+ raise HTTPException(status_code=500, detail=str(e))
1678
+
1679
+ @app.post("/deployment/traffic/weights")
1680
+ async def set_traffic_weights(blue_weight: int, green_weight: int):
1681
+ """Set traffic routing weights"""
1682
+ try:
1683
+ if not traffic_router:
1684
+ raise HTTPException(status_code=503, detail="Traffic router not available")
1685
+
1686
+ success = traffic_router.set_routing_weights(blue_weight, green_weight)
1687
+
1688
+ if success:
1689
+ return {
1690
+ "message": "Traffic weights updated",
1691
+ "blue_weight": blue_weight,
1692
+ "green_weight": green_weight
1693
  }
1694
+ else:
1695
+ raise HTTPException(status_code=500, detail="Failed to update traffic weights")
1696
+
1697
+ except Exception as e:
1698
+ logger.error(f"Failed to set traffic weights: {e}")
1699
+ raise HTTPException(status_code=500, detail=str(e))
1700
 
1701
+ @app.get("/deployment/performance")
1702
+ async def get_deployment_performance(window_minutes: int = 60):
1703
+ """Get deployment performance comparison"""
1704
+ try:
1705
+ if not traffic_router:
1706
+ raise HTTPException(status_code=503, detail="Traffic router not available")
1707
+
1708
+ return traffic_router.compare_environment_performance(window_minutes)
1709
+
1710
+ except Exception as e:
1711
+ logger.error(f"Failed to get deployment performance: {e}")
1712
+ raise HTTPException(status_code=500, detail=str(e))
1713
 
1714
+ @app.get("/registry/models")
1715
+ async def list_registry_models(status: str = None, limit: int = 10):
1716
+ """List models in registry"""
1717
+ try:
1718
+ if not model_registry:
1719
+ raise HTTPException(status_code=503, detail="Model registry not available")
1720
+
1721
+ models = model_registry.list_models(status=status, limit=limit)
1722
+ return {"models": [asdict(model) for model in models]}
1723
+
1724
  except Exception as e:
1725
+ logger.error(f"Failed to list registry models: {e}")
1726
+ raise HTTPException(status_code=500, detail=str(e))
1727
+
1728
+ @app.get("/registry/stats")
1729
+ async def get_registry_stats():
1730
+ """Get model registry statistics"""
1731
+ try:
1732
+ if not model_registry:
1733
+ raise HTTPException(status_code=503, detail="Model registry not available")
1734
+
1735
+ return model_registry.get_registry_stats()
1736
+
1737
+ except Exception as e:
1738
+ logger.error(f"Failed to get registry stats: {e}")
1739
+ raise HTTPException(status_code=500, detail=str(e))